From b9ad8c4c42b3d1ea9cc8ad49f2a1a6b4dd1781f7 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Wed, 3 Jul 2019 10:56:30 -0700 Subject: [PATCH] fix(router) ensure regex path always takes precedence over plain path This is the third commit of a series of fixes to the router. Background ---------- As established a couple of commits ago, the rules for routing priorities are: 1. Routes with more matching attributes take precedence over Routes with fewer matching attributes 2. Plain hosts take precedence over wildcard hosts 3. Regex path always takes precedence over plain paths 4. Longer plain paths take precedence over shorter plain paths **Note:** as of today, our documentation is not reflecting the router's behavior. As of July 3rd, 2019, the proxy guide (https://docs.konghq.com/1.2.x/proxy/#evaluation-order) states that plain paths are evaluated before regex paths, and implies that the former have priority over the latter, which is not correct as per the router behavior, and asserted as per a series of established test cases. As per the above rules, consider the following simplified scenario: r1: paths = { "/pat" } r2: paths = { "/(path)" } local matched_route = router.select("GET", "/path", "route.com") -- matched_route == r2 Issue ----- Now, consider the following scenario: r1: paths = { "/pat" } hosts = { "route.com" } r2: paths = { "/(path)" } hosts = { "route.com" } local matched_route = router.select("GET", "/path", "route.com") -- /!\ matched_route should still be r2, but prior to this patch, was r1 The above case highlights a flaw in the router: that categorized Routes are not sorted as per rules 2., 3., and 4. (aforementioned). This would not be an issue if regex paths and wildcard hosts were matching categories of their own, but for historical reasons, they are not, at least at the moment. Note: said reasons include the lack of dynamic categories lists support in the initial router implementation, something that was fixed in 1ba8ab26afb3beafb5a26ef0309483d828e84b7a prior to the introduction of SNI/src/dst routing. This is outside the scope of this patch. Therefore, this patch ensures that all categorized Routes are thus sorted appropriately relative to each other within their respective categories. Note ---- It is worth noting that this patch can be breaking in its nature, given that the router behavior highlighted by the new test will differ before and after the patch. This patch replaces the one in https://github.com/Kong/kong/pull/4153, which is more expensive and relies on indexes to fix the issue, instead of fixing the ordering. It was also lacking unit tests, which are more appropriate (and mandatory) for router changes. Replaces #4153 --- kong/router.lua | 77 +++++++++++++++++++++++++++++---- kong/runloop/handler.lua | 14 ------ spec/01-unit/08-router_spec.lua | 72 ++++++++++++++++++++++++++++-- 3 files changed, 138 insertions(+), 25 deletions(-) diff --git a/kong/router.lua b/kong/router.lua index b6332847b16..c214cfe228d 100644 --- a/kong/router.lua +++ b/kong/router.lua @@ -23,6 +23,7 @@ local ipairs = ipairs local pairs = pairs local error = error local type = type +local max = math.max local band = bit.band local bor = bit.bor @@ -71,6 +72,10 @@ local MATCH_RULES = { DST = 0x00000001, } +local MATCH_SUBRULES = { + HAS_REGEX_URI = 0x01, + PLAIN_HOSTS_ONLY = 0x02, +} local EMPTY_T = {} @@ -168,6 +173,8 @@ local function marshall_route(r) preserve_host = route.preserve_host == true, match_rules = 0x00, match_weight = 0, + match_weight2 = 0, + max_uri_length = 0, hosts = {}, uris = {}, methods = {}, @@ -204,6 +211,8 @@ local function marshall_route(r) route_t.match_rules = bor(route_t.match_rules, MATCH_RULES.HOST) route_t.match_weight = route_t.match_weight + 1 + local has_wildcard + for _, host_value in ipairs(host_values) do if find(host_value, "*", nil, true) then -- wildcard host matching @@ -215,6 +224,8 @@ local function marshall_route(r) regex = wildcard_host_regex, }) + has_wildcard = true + else insert(route_t.hosts, { value = host_value, @@ -223,6 +234,11 @@ local function marshall_route(r) route_t.hosts[host_value] = host_value end + + if not has_wildcard then + route_t.match_weight2 = bor(route_t.match_weight2, + MATCH_SUBRULES.PLAIN_HOSTS_ONLY) + end end end @@ -250,6 +266,7 @@ local function marshall_route(r) route_t.uris[path] = uri_t insert(route_t.uris, uri_t) + route_t.max_uri_length = max(route_t.max_uri_length, #path) else -- regex URI @@ -266,6 +283,9 @@ local function marshall_route(r) route_t.uris[path] = uri_t insert(route_t.uris, uri_t) + + route_t.match_weight2 = bor(route_t.match_weight2, + MATCH_SUBRULES.HAS_REGEX_URI) end end end @@ -782,7 +802,9 @@ do end, [MATCH_RULES.URI] = function(category, ctx) - return category.routes_by_uris[ctx.hits.uri or ctx.req_uri] + -- no ctx.req_uri indexing since regex URIs have a higher priority than + -- plain URIs + return category.routes_by_uris[ctx.hits.uri] end, [MATCH_RULES.METHOD] = function(category, ctx) @@ -913,16 +935,55 @@ function _M.new(routes) -- index routes + do + local marshalled_routes = {} - for i = 1, #routes do - local route_t, err = marshall_route(routes[i]) - if not route_t then - return nil, err + for i = 1, #routes do + local route_t, err = marshall_route(routes[i]) + if not route_t then + return nil, err + end + + marshalled_routes[i] = route_t end - categorize_route_t(route_t, route_t.match_rules, categories) - index_route_t(route_t, plain_indexes, prefix_uris, regex_uris, - wildcard_hosts, src_trust_funcs, dst_trust_funcs) + -- sort wildcard hosts and uri regexes since those rules + -- don't have their own matching category + -- + -- * plain hosts > wildcard hosts + -- * regex uris > plain uris + -- * longer plain URIs > shorter plain URIs + + sort(marshalled_routes, function(r1, r2) + if r1.match_weight2 ~= r2.match_weight2 then + return r1.match_weight2 > r2.match_weight2 + end + + do + local rp1 = r1.route.regex_priority or 0 + local rp2 = r2.route.regex_priority or 0 + + if rp1 ~= rp2 then + return rp1 > rp2 + end + end + + if r1.max_uri_length ~= r2.max_uri_length then + return r1.max_uri_length > r2.max_uri_length + end + + if r1.route.created_at ~= nil and r2.route.created_at ~= nil then + return r1.route.created_at < r2.route.created_at + end + end) + + for i = 1, #marshalled_routes do + local route_t = marshalled_routes[i] + + categorize_route_t(route_t, route_t.match_rules, categories) + index_route_t(route_t, plain_indexes, prefix_uris, regex_uris, + wildcard_hosts, src_trust_funcs, dst_trust_funcs) + end end diff --git a/kong/runloop/handler.lua b/kong/runloop/handler.lua index 48376f10ae8..a98642b8df6 100644 --- a/kong/runloop/handler.lua +++ b/kong/runloop/handler.lua @@ -30,7 +30,6 @@ local sub = string.sub local find = string.find local lower = string.lower local fmt = string.format -local sort = table.sort local ngx = ngx local arg = ngx.arg local var = ngx.var @@ -609,19 +608,6 @@ do end end - sort(routes, function(r1, r2) - r1, r2 = r1.route, r2.route - - local rp1 = r1.regex_priority or 0 - local rp2 = r2.regex_priority or 0 - - if rp1 == rp2 then - return r1.created_at < r2.created_at - end - - return rp1 > rp2 - end) - local new_router, err = Router.new(routes) if not new_router then return nil, "could not create router: " .. err diff --git a/spec/01-unit/08-router_spec.lua b/spec/01-unit/08-router_spec.lua index 41eaff0f586..a098b7fb6c2 100644 --- a/spec/01-unit/08-router_spec.lua +++ b/spec/01-unit/08-router_spec.lua @@ -493,6 +493,74 @@ describe("Router", function() assert.same([[/route/persons/\d+/profile]], match_t.matches.uri) assert.same(nil, match_t.matches.uri_captures) end) + + it("matches a [uri regex] even if a [uri] got an exact match", function() + local use_case = { + { + service = service, + route = { + paths = { "/route/fixture" }, + }, + }, + { + service = service, + route = { + paths = { "/route/(fixture)" }, + }, + }, + } + + local router = assert(Router.new(use_case)) + + local match_t = router.select("GET", "/route/fixture", "domain.org") + assert.truthy(match_t) + assert.equal(use_case[2].route, match_t.route) + assert.same(nil, match_t.matches.host) + assert.same(nil, match_t.matches.method) + assert.same("/route/(fixture)", match_t.matches.uri) + end) + + it("matches a [uri regex + host] even if a [prefix uri] got a match", function() + local use_case = { + { + service = service, + route = { + paths = { "/pat" }, + }, + headers = { + host = { "route.com" }, + }, + }, + { + service = service, + route = { + paths = { "/path" }, + methods = { "POST" }, + }, + headers = { + host = { "route.com" }, + }, + }, + { + service = service, + route = { + paths = { "/(path)" }, + }, + headers = { + host = { "route.com" }, + }, + }, + } + + local router = assert(Router.new(use_case)) + + local match_t = router.select("GET", "/path", "route.com") + assert.truthy(match_t) + assert.equal(use_case[3].route, match_t.route) + assert.same("route.com", match_t.matches.host) + assert.same(nil, match_t.matches.method) + assert.same("/(path)", match_t.matches.uri) + end) end) describe("[wildcard host]", function() @@ -533,9 +601,7 @@ describe("Router", function() assert.equal(use_case[2].route, match_t.route) end) - pending("does not take precedence over a plain host", function() - -- Pending: temporarily pending in the current commit, awaiting a fix - -- in a subsequent commit. + it("does not take precedence over a plain host", function() table.insert(use_case, 1, { service = service, route = {