Skip to content

Commit

Permalink
introduce safe regex matcher based on re2 engine
Browse files Browse the repository at this point in the history
The libstdc++ std::regex implementation is not safe in all cases
for user provided input. This change deprecates the used of std::regex
in all user facing paths and introduces a new safe regex matcher with
an explicitly configurable engine, right now limited to Google's re2
regex engine. This is not a drop in replacement for std::regex as all
language features are not supported. As such we will go through a
deprecation period for the old regex engine.

Fixes #7728

Signed-off-by: Matt Klein <[email protected]>
  • Loading branch information
mattklein123 committed Aug 8, 2019
1 parent e03936e commit ec6e30a
Show file tree
Hide file tree
Showing 45 changed files with 605 additions and 136 deletions.
1 change: 1 addition & 0 deletions api/docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ proto_library(
"//envoy/type:range",
"//envoy/type/matcher:metadata",
"//envoy/type/matcher:number",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)
2 changes: 2 additions & 0 deletions api/envoy/api/v2/route/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ api_proto_library_internal(
"//envoy/api/v2/core:base",
"//envoy/type:percent",
"//envoy/type:range",
"//envoy/type/matcher:regex",
"//envoy/type/matcher:string",
],
)

Expand Down
67 changes: 60 additions & 7 deletions api/envoy/api/v2/route/route.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option go_package = "route";
option java_generic_services = true;

import "envoy/api/v2/core/base.proto";
import "envoy/type/matcher/regex.proto";
import "envoy/type/matcher/string.proto";
import "envoy/type/percent.proto";
import "envoy/type/range.proto";

Expand Down Expand Up @@ -349,7 +351,17 @@ message RouteMatch {
// * The regex */b[io]t* matches the path */bot*
// * The regex */b[io]t* does not match the path */bite*
// * The regex */b[io]t* does not match the path */bit/bot*
string regex = 3 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 3 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, the route is a regular expression rule meaning that the
// regex must match the *:path* header once the query string is removed. The entire path
// (without the query string) must match the regex. The rule will not match if only a
// subsequence of the *:path* header matches the regex.
type.matcher.RegexMatcher safe_regex = 10 [(validate.rules).message.required = true];
}

// Indicates that prefix/path matching should be case insensitive. The default
Expand Down Expand Up @@ -409,7 +421,17 @@ message CorsPolicy {
// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_regex match.
repeated string allow_origin_regex = 8 [(validate.rules).repeated .items.string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `allow_origin_safe_regex` as it is not safe for
// use with untrusted input in all cases.
repeated string allow_origin_regex = 8
[(validate.rules).repeated .items.string.max_bytes = 1024, deprecated = true];

// Specifies regex patterns that match allowed origins.
//
// An origin is allowed if either allow_origin or allow_origin_safe_regex match.
repeated type.matcher.RegexMatcher allow_origin_safe_regex = 11;

// Specifies the content for the *access-control-allow-methods* header.
string allow_methods = 2;
Expand Down Expand Up @@ -1076,9 +1098,17 @@ message VirtualCluster {
// * The regex */rides/\d+* matches the path */rides/0*
// * The regex */rides/\d+* matches the path */rides/123*
// * The regex */rides/\d+* does not match the path */rides/123/456*
string pattern = 1 [(validate.rules).string = {min_bytes: 1, max_bytes: 1024}];
//
// .. attention::
// This field has been deprecated in favor of `regex` as it is not safe for use with
// untrusted input in all cases.
string pattern = 1 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// Specifies the name of the virtual cluster. The virtual cluster name as well
// Specifies a regex pattern to use for matching requests. The entire path of the request
// must match the regex.
type.matcher.RegexMatcher regex = 4;

// Specifies the name of the virtual cluster. The virtual cluster name as well
// as the virtual host name are used when emitting statistics. The statistics are emitted by the
// router filter and are documented :ref:`here <config_http_filters_router_stats>`.
string name = 2 [(validate.rules).string.min_bytes = 1];
Expand Down Expand Up @@ -1272,7 +1302,16 @@ message HeaderMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex_match = 5 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex_match` as it is not safe for use
// with untrusted input in all cases.
string regex_match = 5 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// If specified, this regex string is a regular expression rule which implies the entire request
// header value must match the regex. The rule will not match if only a subsequence of the
// request header value matches the regex.
type.matcher.RegexMatcher safe_regex_match = 11;

// If specified, header match will be performed based on range.
// The rule will match if the request header value is within this range.
Expand Down Expand Up @@ -1327,11 +1366,25 @@ message QueryParameterMatcher {
// Specifies the value of the key. If the value is absent, a request
// that contains the key in its query string will match, whether the
// key appears with a value (e.g., "?debug=true") or not (e.g., "?debug")
string value = 3;
//
// ..attention::
// This field is deprecated. Use an `exact` match inside the `string_match` field.
string value = 3 [deprecated = true];

// Specifies whether the query parameter value is a regular expression.
// Defaults to false. The entire query parameter value (i.e., the part to
// the right of the equals sign in "key=value") must match the regex.
// E.g., the regex "\d+$" will match "123" but not "a123" or "123a".
google.protobuf.BoolValue regex = 4;
//
// ..attention::
// This field is deprecated. Use a `safe_regex` match inside the `string_match` field.
google.protobuf.BoolValue regex = 4 [deprecated = true];

oneof query_parameter_match_specifier {
// Specifies whether a query parameter value should match against a string.
type.matcher.StringMatcher string_match = 5;

// Specifies whether a query parameter should be present.
bool present_match = 6;
}
}
9 changes: 9 additions & 0 deletions api/envoy/type/matcher/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ api_proto_library_internal(
name = "string",
srcs = ["string.proto"],
visibility = ["//visibility:public"],
deps = [
":regex",
],
)

api_go_proto_library(
Expand All @@ -65,3 +68,9 @@ api_go_proto_library(
":string_go_proto",
],
)

api_proto_library_internal(
name = "regex",
srcs = ["regex.proto"],
visibility = ["//visibility:public"],
)
32 changes: 32 additions & 0 deletions api/envoy/type/matcher/regex.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
syntax = "proto3";

package envoy.type.matcher;

option java_outer_classname = "StringProto";
option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "validate/validate.proto";

// [#protodoc-title: RegexMatcher]

// A regex matcher designed for safety when used with untrusted input.
message RegexMatcher {
// Google's `re2 <https://github.com/google/re2>`_ regex engine. The regex string must adhere to
// the documented `syntax <https://github.com/google/re2/wiki/Syntax>`_. The engine is designed
// to complete execution in linear time as well as limit the amount of memory used. In the future
// different aspects of the engine may be made configurable.
message GoogleReEngine {
}

oneof engine_type {
option (validate.required) = true;

// Google's re2 regex engine.
GoogleReEngine google_re_engine = 1 [(validate.rules).message.required = true];
}

// The regex match string. The string must be supported by the configured engine.
string regex = 2 [(validate.rules).string.min_bytes = 1];
}
11 changes: 10 additions & 1 deletion api/envoy/type/matcher/string.proto
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option java_multiple_files = true;
option java_package = "io.envoyproxy.envoy.type.matcher";
option go_package = "matcher";

import "envoy/type/matcher/regex.proto";

import "validate/validate.proto";

// [#protodoc-title: StringMatcher]
Expand Down Expand Up @@ -48,7 +50,14 @@ message StringMatcher {
// * The regex *\d{3}* matches the value *123*
// * The regex *\d{3}* does not match the value *1234*
// * The regex *\d{3}* does not match the value *123.456*
string regex = 4 [(validate.rules).string.max_bytes = 1024];
//
// .. attention::
// This field has been deprecated in favor of `safe_regex` as it is not safe for use with
// untrusted input in all cases.
string regex = 4 [(validate.rules).string.max_bytes = 1024, deprecated = true];

// The input string must match the regular expression specified here.
RegexMatcher safe_regex = 5;
}
}

Expand Down
4 changes: 4 additions & 0 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def envoy_dependencies(skip_targets = []):
_io_opentracing_cpp()
_net_zlib()
_repository_impl("bazel_toolchains")
_com_googlesource_code_re2()

_python_deps()
_cc_deps()
Expand Down Expand Up @@ -336,6 +337,9 @@ def _io_opentracing_cpp():
actual = "@io_opentracing_cpp//:opentracing",
)

def _com_googlesource_code_re2():
_repository_impl("com_googlesource_code_re2")

def _com_lightstep_tracer_cpp():
location = REPOSITORY_LOCATIONS["com_lightstep_tracer_cpp"]
_repository_impl("com_lightstep_tracer_cpp")
Expand Down
5 changes: 5 additions & 0 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,9 @@ REPOSITORY_LOCATIONS = dict(
sha256 = "fcdebf54c89d839ffa7eefae166c8e4b551c765559db13ff15bff98047f344fb",
urls = ["https://storage.googleapis.com/quiche-envoy-integration/2a930469533c3b541443488a629fe25cd8ff53d0.tar.gz"],
),
com_googlesource_code_re2 = dict(
sha256 = "de6c3ee49b2cecdfd2936af18d6947db36726590e566b5915db3746784c55745",
strip_prefix = "re2-2019-07-01",
urls = ["https://github.com/google/re2/archive/2019-07-01.tar.gz"],
),
)
1 change: 1 addition & 0 deletions docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ PROTO_RST="
/envoy/type/matcher/metadata/envoy/type/matcher/metadata.proto.rst
/envoy/type/matcher/value/envoy/type/matcher/value.proto.rst
/envoy/type/matcher/number/envoy/type/matcher/number.proto.rst
/envoy/type/matcher/regex/envoy/type/matcher/regex.proto.rst
/envoy/type/matcher/string/envoy/type/matcher/string.proto.rst
"

Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v2/types/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ Types
../type/range.proto
../type/matcher/metadata.proto
../type/matcher/number.proto
../type/matcher/regex.proto
../type/matcher/string.proto
../type/matcher/value.proto
17 changes: 16 additions & 1 deletion docs/root/intro/deprecated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,22 @@ Deprecated items below are listed in chronological order.

Version 1.12.0 (pending)
========================
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination cluster <envoy_api_field_Cluster.type>`.
* The ORIGINAL_DST_LB :ref:`load balancing policy <envoy_api_field_Cluster.lb_policy>` is
deprecated, use CLUSTER_PROVIDED policy instead when configuring an :ref:`original destination
cluster <envoy_api_field_Cluster.type>`.
* The `regex` field in :ref:`StringMatcher <envoy_api_msg_type.matcher.StringMatcher>` has been
deprecated in favor of the `safe_regex` field.
* The `regex` field in :ref:`RouteMatch <envoy_api_msg_route.RouteMatch>` has been
deprecated in favor of the `safe_regex` field.
* The `allow_origin_regex` field in :ref:`CorsPolicy <envoy_api_msg_route.CorsPolicy>` has been
deprecated in favor of the `allow_origin_safe_regex` field.
* The `pattern` field in :ref:`VirtualCluster <envoy_api_msg_route.VirtualCluster>` has been
deprecated in favor of the `regex` field.
* The `regex_match` field in :ref:`HeaderMatcher <envoy_api_msg_route.HeaderMatcher>` has been
deprecated in favor of the `safe_regex_match` field.
* The `value` and `regex` fields in :ref:`QueryParameterMatcher
<envoy_api_msg_route.QueryParameterMatcher>` has been deprecated in favor of the `string_match`
and `present_match` fields.

Version 1.11.0 (July 11, 2019)
==============================
Expand Down
5 changes: 5 additions & 0 deletions include/envoy/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ envoy_cc_library(
hdrs = ["time.h"],
)

envoy_cc_library(
name = "regex_interface",
hdrs = ["regex.h"],
)

envoy_cc_library(
name = "token_bucket_interface",
hdrs = ["token_bucket.h"],
Expand Down
28 changes: 28 additions & 0 deletions include/envoy/common/regex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include <memory>

#include "envoy/common/pure.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Regex {

/**
* A compiled regex expression matcher which uses an abstract regex engine.
*/
class CompiledMatcher {
public:
virtual ~CompiledMatcher() = default;

/**
* @return whether the value matches the compiled regex expression.
*/
virtual bool match(absl::string_view value) const PURE;
};

using CompiledMatcherPtr = std::unique_ptr<const CompiledMatcher>;

} // namespace Regex
} // namespace Envoy
1 change: 1 addition & 0 deletions include/envoy/router/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ envoy_cc_library(
external_deps = ["abseil_optional"],
deps = [
"//include/envoy/access_log:access_log_interface",
"//include/envoy/common:regex_interface",
"//include/envoy/config:typed_metadata_interface",
"//include/envoy/http:codec_interface",
"//include/envoy/http:codes_interface",
Expand Down
7 changes: 4 additions & 3 deletions include/envoy/router/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "envoy/access_log/access_log.h"
#include "envoy/api/v2/core/base.pb.h"
#include "envoy/common/regex.h"
#include "envoy/config/typed_metadata.h"
#include "envoy/http/codec.h"
#include "envoy/http/codes.h"
Expand Down Expand Up @@ -105,10 +106,10 @@ class CorsPolicy {
*/
virtual const std::list<std::string>& allowOrigins() const PURE;

/*
* @return std::list<std::regex>& regexes that match allowed origins.
/**
* @return std::list<Regex::CompiledMatcherPtr>& regexes that match allowed origins.
*/
virtual const std::list<std::regex>& allowOriginRegexes() const PURE;
virtual const std::list<Regex::CompiledMatcherPtr>& allowOriginRegexes() const PURE;

/**
* @return std::string access-control-allow-methods value.
Expand Down
1 change: 1 addition & 0 deletions source/common/access_log/access_log_formatter.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "common/access_log/access_log_formatter.h"

#include <cstdint>
#include <regex>
#include <string>
#include <vector>

Expand Down
13 changes: 13 additions & 0 deletions source/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ envoy_cc_library(
external_deps = ["abseil_optional"],
deps = [
":utility_lib",
"//source/common/common:regex_lib",
"//source/common/config:metadata_lib",
"//source/common/protobuf",
"@envoy_api//envoy/type/matcher:metadata_cc",
Expand All @@ -174,6 +175,18 @@ envoy_cc_library(
],
)

envoy_cc_library(
name = "regex_lib",
srcs = ["regex.cc"],
hdrs = ["regex.h"],
deps = [
":assert_lib",
"//include/envoy/common:regex_interface",
"@com_googlesource_code_re2//:re2",
"@envoy_api//envoy/type/matcher:regex_cc",
],
)

envoy_cc_library(
name = "non_copyable",
hdrs = ["non_copyable.h"],
Expand Down
Loading

0 comments on commit ec6e30a

Please sign in to comment.