Skip to content

Commit

Permalink
introduce safe regex matcher based on re2 engine (#7878)
Browse files Browse the repository at this point in the history
The libstdc++ std::regex implementation is not safe in all cases
for user provided input. This change deprecates the used of std::regex
in all user facing paths and introduces a new safe regex matcher with
an explicitly configurable engine, right now limited to Google's re2
regex engine. This is not a drop in replacement for std::regex as all
language features are not supported. As such we will go through a
deprecation period for the old regex engine.

Fixes #7728

Signed-off-by: Matt Klein <mklein@lyft.com>
  • Loading branch information
mattklein123 authored and htuch committed Aug 23, 2019
1 parent fd19af8 commit eff0201
Show file tree
Hide file tree
Showing 78 changed files with 1,004 additions and 386 deletions.
1 change: 1 addition & 0 deletions api/docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ proto_library(
"//envoy/type:range",
"//envoy/type/matcher:metadata",
"//envoy/type/matcher:number",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)
4 changes: 4 additions & 0 deletions api/envoy/api/v2/route/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ api_proto_library_internal(
"//envoy/api/v2/core:base",
"//envoy/type:percent",
"//envoy/type:range",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)

Expand All @@ -20,5 +22,7 @@ api_go_proto_library(
"//envoy/api/v2/core:base_go_proto",
"//envoy/type:percent_go_proto",
"//envoy/type:range_go_proto",
"//envoy/type/matcher:regex_go_proto",
"//envoy/type/matcher:string_go_proto",
],
)
88 changes: 77 additions & 11 deletions api/envoy/api/v2/route/route.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option go_package = "route";
option java_generic_services = true;

import "envoy/api/v2/core/base.proto";
import "envoy/type/matcher/regex.proto";
import "envoy/type/matcher/string.proto";
import "envoy/type/percent.proto";
import "envoy/type/range.proto";

Expand Down Expand Up @@ -349,7 +351,25 @@ message RouteMatch {
// * The regex */b[io]t* matches the path */bot*
// * The regex */b[io]t* does not match the path */bite*
// * The regex */b[io]t* does not match the path */bit/bot*
string regex = 3 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 3 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, the route is a regular expression rule meaning that the
// regex must match the *:path* header once the query string is removed. The entire path
// (without the query string) must match the regex. The rule will not match if only a
// subsequence of the *:path* header matches the regex.
//
// [#next-major-version: In the v3 API we should redo how path specification works such
// that we utilize StringMatcher, and additionally have consistent options around whether we
// strip query strings, do a case sensitive match, etc. In the interim it will be too disruptive
// to deprecate the existing options. We should even consider whether we want to do away with
// path_specifier entirely and just rely on a set of header matchers which can already match
// on :path, etc. The issue with that is it is unclear how to generically deal with query string
// stripping. This needs more thought.]
type.matcher.RegexMatcher safe_regex = 10 [(validate.rules).message.required = true];
}

// Indicates that prefix/path matching should be case insensitive. The default
Expand Down Expand Up @@ -404,12 +424,24 @@ message CorsPolicy {
// Specifies the origins that will be allowed to do CORS requests.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin = 1;
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_string_match`.
repeated string allow_origin = 1 [deprecated = true];

// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin_regex = 8 [(validate.rules).repeated .items.string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_string_match` as it is not safe for
// use with untrusted input in all cases.
repeated string allow_origin_regex = 8
[(validate.rules).repeated .items.string.max_bytes = 1024, deprecated = true];

// Specifies string patterns that match allowed origins. An origin is allowed if any of the
// string matchers match.
repeated type.matcher.StringMatcher allow_origin_string_match = 11;

// Specifies the content for the *access-control-allow-methods* header.
string allow_methods = 2;
Expand Down Expand Up @@ -1077,18 +1109,28 @@ message VirtualCluster {
// * The regex */rides/\d+* matches the path */rides/0*
// * The regex */rides/\d+* matches the path */rides/123*
// * The regex */rides/\d+* does not match the path */rides/123/456*
string pattern = 1 [(validate.rules).string = {min_bytes: 1, max_bytes: 1024}];
//
// .. attention::
// This field has been deprecated in favor of `headers` as it is not safe for use with
// untrusted input in all cases.
string pattern = 1 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// Specifies a list of header matchers to use for matching requests. Each specified header must
// match. The pseudo-headers `:path` and `:method` can be used to match the request path and
// method, respectively.
repeated HeaderMatcher headers = 4;

// Specifies the name of the virtual cluster. The virtual cluster name as well
// Specifies the name of the virtual cluster. The virtual cluster name as well
// as the virtual host name are used when emitting statistics. The statistics are emitted by the
// router filter and are documented :ref:`here <config_http_filters_router_stats>`.
string name = 2 [(validate.rules).string.min_bytes = 1];

// Optionally specifies the HTTP method to match on. For example GET, PUT,
// etc.
// [#comment:TODO(htuch): add (validate.rules).enum.defined_only = true once
// https://github.com/lyft/protoc-gen-validate/issues/42 is resolved.]
core.RequestMethod method = 3;
//
// .. attention::
// This field has been deprecated in favor of `headers`.
core.RequestMethod method = 3 [deprecated = true];
}

// Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`.
Expand Down Expand Up @@ -1248,6 +1290,7 @@ message RateLimit {
// <envoy_api_field_route.HeaderMatcher.name>` header will match, regardless of the header's
// value.
//
// [#next-major-version: HeaderMatcher should be refactored to use StringMatcher.]
message HeaderMatcher {
// Specifies the name of the header in the request.
string name = 1 [(validate.rules).string.min_bytes = 1];
Expand All @@ -1273,7 +1316,16 @@ message HeaderMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex_match = 5 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex_match` as it is not safe for use
// with untrusted input in all cases.
string regex_match = 5 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, this regex string is a regular expression rule which implies the entire request
// header value must match the regex. The rule will not match if only a subsequence of the
// request header value matches the regex.
type.matcher.RegexMatcher safe_regex_match = 11;

// If specified, header match will be performed based on range.
// The rule will match if the request header value is within this range.
Expand Down Expand Up @@ -1328,11 +1380,25 @@ message QueryParameterMatcher {
// Specifies the value of the key. If the value is absent, a request
// that contains the key in its query string will match, whether the
// key appears with a value (e.g., "?debug=true") or not (e.g., "?debug")
string value = 3;
//
// ..attention::
// This field is deprecated. Use an `exact` match inside the `string_match` field.
string value = 3 [deprecated = true];

// Specifies whether the query parameter value is a regular expression.
// Defaults to false. The entire query parameter value (i.e., the part to
// the right of the equals sign in "key=value") must match the regex.
// E.g., the regex "\d+$" will match "123" but not "a123" or "123a".
google.protobuf.BoolValue regex = 4;
//
// ..attention::
// This field is deprecated. Use a `safe_regex` match inside the `string_match` field.
google.protobuf.BoolValue regex = 4 [deprecated = true];

oneof query_parameter_match_specifier {
// Specifies whether a query parameter value should match against a string.
type.matcher.StringMatcher string_match = 5 [(validate.rules).message.required = true];

// Specifies whether a query parameter should be present.
bool present_match = 6;
}
}
17 changes: 17 additions & 0 deletions api/envoy/type/matcher/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ api_proto_library_internal(
name = "string",
srcs = ["string.proto"],
visibility = ["//visibility:public"],
deps = [
":regex",
],
)

api_go_proto_library(
name = "string",
proto = ":string",
deps = [
":regex_go_proto",
],
)

api_proto_library_internal(
Expand All @@ -65,3 +71,14 @@ api_go_proto_library(
":string_go_proto",
],
)

api_proto_library_internal(
name = "regex",
srcs = ["regex.proto"],
visibility = ["//visibility:public"],
)

api_go_proto_library(
name = "regex",
proto = ":regex",
)
37 changes: 37 additions & 0 deletions api/envoy/type/matcher/regex.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
syntax = "proto3";

package envoy.type.matcher;

option java_outer_classname = "StringProto";
option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "google/protobuf/wrappers.proto";
import "validate/validate.proto";

// [#protodoc-title: RegexMatcher]

// A regex matcher designed for safety when used with untrusted input.
message RegexMatcher {
// Google's `RE2 <https://github.com/google/re2>`_ regex engine. The regex string must adhere to
// the documented `syntax <https://github.com/google/re2/wiki/Syntax>`_. The engine is designed
// to complete execution in linear time as well as limit the amount of memory used.
message GoogleRE2 {
// This field controls the RE2 "program size" which is a rough estimate of how complex a
// compiled regex is to evaluate. A regex that has a program size greater than the configured
// value will fail to compile. In this case, the configured max program size can be increased
// or the regex can be simplified. If not specified, the default is 100.
google.protobuf.UInt32Value max_program_size = 1;
}

oneof engine_type {
option (validate.required) = true;

// Google's RE2 regex engine.
GoogleRE2 google_re2 = 1 [(validate.rules).message.required = true];
}

// The regex match string. The string must be supported by the configured engine.
string regex = 2 [(validate.rules).string.min_bytes = 1];
}
11 changes: 10 additions & 1 deletion api/envoy/type/matcher/string.proto
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "envoy/type/matcher/regex.proto";

import "validate/validate.proto";

// [#protodoc-title: StringMatcher]
Expand Down Expand Up @@ -48,7 +50,14 @@ message StringMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex = 4 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 4 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// The input string must match the regular expression specified here.
RegexMatcher safe_regex = 5 [(validate.rules).message.required = true];
}
}

Expand Down
6 changes: 3 additions & 3 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ REPOSITORY_LOCATIONS = dict(
urls = ["https://github.com/google/cel-cpp/archive/d9d02b20ab85da2444dbdd03410bac6822141364.tar.gz"],
),
com_googlesource_code_re2 = dict(
sha256 = "f31db9cd224d018a7e4fe88ef84aaa874b0b3ed91d4d98ee5a1531101d3fdc64",
strip_prefix = "re2-87e2ad45e7b18738e1551474f7ee5886ff572059",
urls = ["https://github.com/google/re2/archive/87e2ad45e7b18738e1551474f7ee5886ff572059.tar.gz"],
sha256 = "38bc0426ee15b5ed67957017fd18201965df0721327be13f60496f2b356e3e01",
strip_prefix = "re2-2019-08-01",
urls = ["https://github.com/google/re2/archive/2019-08-01.tar.gz"],
),
)
1 change: 1 addition & 0 deletions docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ PROTO_RST="
/envoy/type/matcher/metadata/envoy/type/matcher/metadata.proto.rst
/envoy/type/matcher/value/envoy/type/matcher/value.proto.rst
/envoy/type/matcher/number/envoy/type/matcher/number.proto.rst
/envoy/type/matcher/regex/envoy/type/matcher/regex.proto.rst
/envoy/type/matcher/string/envoy/type/matcher/string.proto.rst
"

Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v2/types/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ Types
../type/range.proto
../type/matcher/metadata.proto
../type/matcher/number.proto
../type/matcher/regex.proto
../type/matcher/string.proto
../type/matcher/value.proto
21 changes: 19 additions & 2 deletions docs/root/intro/deprecated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,25 @@ Deprecated items below are listed in chronological order.

Version 1.12.0 (pending)
========================
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination cluster <envoy_api_field_Cluster.type>`.
* The :option:`--allow-unknown-fields` command-line option, use :option:`--allow-unknown-static-fields` instead.
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is
deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination
cluster <envoy_api_field_Cluster.type>`.
* The `regex` field in :ref:`StringMatcher <envoy_api_msg_type.matcher.StringMatcher>` has been
deprecated in favor of the `safe_regex` field.
* The `regex` field in :ref:`RouteMatch <envoy_api_msg_route.RouteMatch>` has been
deprecated in favor of the `safe_regex` field.
* The `allow_origin` and `allow_origin_regex` fields in :ref:`CorsPolicy
<envoy_api_msg_route.CorsPolicy>` have been deprecated in favor of the
`allow_origin_string_match` field.
* The `pattern` and `method` fields in :ref:`VirtualCluster <envoy_api_msg_route.VirtualCluster>`
have been deprecated in favor of the `headers` field.
* The `regex_match` field in :ref:`HeaderMatcher <envoy_api_msg_route.HeaderMatcher>` has been
deprecated in favor of the `safe_regex_match` field.
* The `value` and `regex` fields in :ref:`QueryParameterMatcher
<envoy_api_msg_route.QueryParameterMatcher>` has been deprecated in favor of the `string_match`
and `present_match` fields.
* The :option:`--allow-unknown-fields` command-line option,
use :option:`--allow-unknown-static-fields` instead.

Version 1.11.0 (July 11, 2019)
==============================
Expand Down
12 changes: 8 additions & 4 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Version history
* admin: added ability to configure listener :ref:`socket options <envoy_api_field_config.bootstrap.v2.Admin.socket_options>`.
* admin: added config dump support for Secret Discovery Service :ref:`SecretConfigDump <envoy_api_msg_admin.v2alpha.SecretsConfigDump>`.
* api: added ::ref:`set_node_on_first_message_only <envoy_api_field_core.ApiConfigSource.set_node_on_first_message_only>` option to omit the node identifier from the subsequent discovery requests on the same stream.
* config: enforcing that terminal filters (e.g. HttpConnectionManager for L4, router for L7) be the last in their respective filter chains.
* buffer filter: the buffer filter populates content-length header if not present, behavior can be disabled using the runtime feature `envoy.reloadable_features.buffer_filter_populate_content_length`.
* config: enforcing that terminal filters (e.g. HttpConnectionManager for L4, router for L7) be the last in their respective filter chains.
* config: added access log :ref:`extension filter<envoy_api_field_config.filter.accesslog.v2.AccessLogFilter.extension_filter>`.
* config: added support for :option:`--reject-unknown-dynamic-fields`, providing independent control
over whether unknown fields are rejected in static and dynamic configuration. By default, unknown
Expand All @@ -29,11 +29,15 @@ Version history
* http: added the ability to :ref:`merge adjacent slashes<envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.merge_slashes>` in the path.
* listeners: added :ref:`continue_on_listener_filters_timeout <envoy_api_field_Listener.continue_on_listener_filters_timeout>` to configure whether a listener will still create a connection when listener filters time out.
* listeners: added :ref:`HTTP inspector listener filter <config_listener_filters_http_inspector>`.
* performance: stats symbol table implementation (disabled by default; to test it, add "--use-fake-symbol-table 0" to the command-line arguments when starting Envoy).
* redis: added :ref:`read_policy <envoy_api_field_config.filter.network.redis_proxy.v2.RedisProxy.ConnPoolSettings.read_policy>` to allow reading from redis replicas for Redis Cluster deployments.
* rbac: added support for DNS SAN as :ref:`principal_name <envoy_api_field_config.rbac.v2.Principal.Authenticated.principal_name>`.
* lua: extended `httpCall()` and `respond()` APIs to accept headers with entry values that can be a string or table of strings.
* performance: new buffer implementation enabled by default (to disable add "--use-libevent-buffers 1" to the command-line arguments when starting Envoy).
* performance: stats symbol table implementation (disabled by default; to test it, add "--use-fake-symbol-table 0" to the command-line arguments when starting Envoy).
* rbac: added support for DNS SAN as :ref:`principal_name <envoy_api_field_config.rbac.v2.Principal.Authenticated.principal_name>`.
* redis: added :ref:`read_policy <envoy_api_field_config.filter.network.redis_proxy.v2.RedisProxy.ConnPoolSettings.read_policy>` to allow reading from redis replicas for Redis Cluster deployments.
* regex: introduce new :ref:`RegexMatcher <envoy_api_msg_type.matcher.RegexMatcher>` type that
provides a safe regex implementation for untrusted user input. This type is now used in all
configuration that processes user provided input. See :ref:`deprecated configuration details
<deprecated>` for more information.
* rbac: added conditions to the policy, see :ref:`condition <envoy_api_field_config.rbac.v2.Policy.condition>`.
* router: added :ref:`rq_retry_skipped_request_not_complete <config_http_filters_router_stats>` counter stat to router stats.
* router check tool: add coverage reporting & enforcement.
Expand Down
13 changes: 13 additions & 0 deletions include/envoy/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ envoy_cc_library(
hdrs = ["time.h"],
)

envoy_cc_library(
name = "matchers_interface",
hdrs = ["matchers.h"],
)

envoy_cc_library(
name = "regex_interface",
hdrs = ["regex.h"],
deps = [
":matchers_interface",
],
)

envoy_cc_library(
name = "token_bucket_interface",
hdrs = ["token_bucket.h"],
Expand Down
Loading

0 comments on commit eff0201

Please sign in to comment.