Skip to content

Commit

Permalink
nexthop: support for fdb ecmp nexthops
Browse files Browse the repository at this point in the history
This patch introduces ecmp nexthops and nexthop groups
for mac fdb entries. In subsequent patches this is used
by the vxlan driver fdb entries. The use case is
E-VPN multihoming [1,2,3] which requires bridged vxlan traffic
to be load balanced to remote switches (vteps) belonging to
the same multi-homed ethernet segment (This is analogous to
a multi-homed LAG but over vxlan).

Changes include new nexthop flag NHA_FDB for nexthops
referenced by fdb entries. These nexthops only have ip.
This patch includes appropriate checks to avoid routes
referencing such nexthops.

example:
$ip nexthop add id 12 via 172.16.1.2 fdb
$ip nexthop add id 13 via 172.16.1.3 fdb
$ip nexthop add id 102 group 12/13 fdb

$bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self

[1] E-VPN https://tools.ietf.org/html/rfc7432
[2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365
[3] LPC talk with mention of nexthop groups for L2 ecmp
http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf

v4 - fixed uninitialized variable reported by kernel test robot
Reported-by: kernel test robot <[email protected]>

Signed-off-by: Roopa Prabhu <[email protected]>
Reviewed-by: David Ahern <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
roopa-prabhu authored and davem330 committed May 22, 2020
1 parent 7b1b843 commit 38428d6
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 25 deletions.
1 change: 1 addition & 0 deletions include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ struct fib6_config {
struct nl_info fc_nlinfo;
struct nlattr *fc_encap;
u16 fc_encap_type;
bool fc_is_fdb;
};

struct fib6_node {
Expand Down
32 changes: 32 additions & 0 deletions include/net/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct nh_config {
u8 nh_family;
u8 nh_protocol;
u8 nh_blackhole;
u8 nh_fdb;
u32 nh_flags;

int nh_ifindex;
Expand All @@ -52,6 +53,7 @@ struct nh_info {

u8 family;
bool reject_nh;
bool fdb_nh;

union {
struct fib_nh_common fib_nhc;
Expand Down Expand Up @@ -80,6 +82,7 @@ struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
struct list_head fi_list; /* v4 entries using nh */
struct list_head f6i_list; /* v6 entries using nh */
struct list_head fdb_list; /* fdb entries using this nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;

Expand All @@ -88,6 +91,7 @@ struct nexthop {
u8 protocol; /* app managing this nh */
u8 nh_flags;
bool is_group;
bool is_fdb_nh;

refcount_t refcnt;
struct rcu_head rcu;
Expand Down Expand Up @@ -304,4 +308,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
int nexthop_for_each_fib6_nh(struct nexthop *nh,
int (*cb)(struct fib6_nh *nh, void *arg),
void *arg);

static inline int nexthop_get_family(struct nexthop *nh)
{
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

return nhi->family;
}

static inline
struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
{
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

return &nhi->fib_nhc;
}

static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
int hash)
{
struct nh_info *nhi;
struct nexthop *nhp;

nhp = nexthop_select_path(nh, hash);
if (unlikely(!nhp))
return NULL;
nhi = rcu_dereference(nhp->nh_info);
return &nhi->fib_nhc;
}
#endif
3 changes: 3 additions & 0 deletions include/uapi/linux/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ enum {
NHA_GROUPS, /* flag; only return nexthop groups in dump */
NHA_MASTER, /* u32; only return nexthops with given master dev */

NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */

__NHA_MAX,
};

Expand Down
132 changes: 107 additions & 25 deletions net/ipv4/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
[NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG },
};

static unsigned int nh_dev_hashfn(unsigned int val)
Expand Down Expand Up @@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void)
INIT_LIST_HEAD(&nh->fi_list);
INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
INIT_LIST_HEAD(&nh->fdb_list);
}
return nh;
}
Expand Down Expand Up @@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure;

if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;

if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);

Expand All @@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_flag(skb, NHA_BLACKHOLE))
goto nla_put_failure;
goto out;
} else {
} else if (!nh->is_fdb_nh) {
const struct net_device *dev;

dev = nhi->fib_nhc.nhc_dev;
Expand Down Expand Up @@ -387,12 +392,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
return true;
}

static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;

if (!nh->is_fdb_nh) {
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
return -EINVAL;
}

nhi = rtnl_dereference(nh->nh_info);
if (*nh_family == AF_UNSPEC) {
*nh_family = nhi->family;
} else if (*nh_family != nhi->family) {
NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
return -EINVAL;
}

return 0;
}

static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int i, j;
u8 nhg_fdb = 0;

if (len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
Expand Down Expand Up @@ -421,6 +449,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
}
}

if (tb[NHA_FDB])
nhg_fdb = 1;
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
Expand All @@ -432,11 +462,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
}
if (!valid_group_nh(nh, len, extack))
return -EINVAL;

if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
return -EINVAL;

if (!nhg_fdb && nh->is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
return -EINVAL;
}
}
for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
if (!tb[i])
continue;

if (tb[NHA_FDB])
continue;
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
return -EINVAL;
Expand Down Expand Up @@ -495,6 +534,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
if (hash > atomic_read(&nhge->upper_bound))
continue;

if (nhge->nh->is_fdb_nh)
return nhge->nh;

/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
Expand Down Expand Up @@ -564,6 +606,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
{
struct nh_info *nhi;

if (nh->is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
return -EINVAL;
}

/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
* across multiple fib entries. If the config wants to use source
Expand Down Expand Up @@ -640,6 +687,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope,
{
int err = 0;

if (nh->is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
err = -EINVAL;
goto out;
}

if (nh->is_group) {
struct nh_group *nhg;

Expand Down Expand Up @@ -1125,6 +1178,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
nh_group_rebalance(nhg);
}

if (cfg->nh_fdb)
nh->is_fdb_nh = 1;

rcu_assign_pointer(nh->nh_grp, nhg);

return nh;
Expand Down Expand Up @@ -1152,7 +1208,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
};
u32 tb_id = l3mdev_fib_table(cfg->dev);
u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
int err;

err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
Expand All @@ -1161,6 +1217,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
goto out;
}

if (nh->is_fdb_nh)
goto out;

/* sets nh_dev if successful */
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
if (!err) {
Expand All @@ -1186,6 +1245,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
.fc_flags = cfg->nh_flags,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
.fc_is_fdb = cfg->nh_fdb,
};
int err;

Expand Down Expand Up @@ -1227,6 +1287,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
nhi->family = cfg->nh_family;
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;

if (cfg->nh_fdb)
nh->is_fdb_nh = 1;

if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
cfg->nh_ifindex = net->loopback_dev->ifindex;
Expand All @@ -1248,7 +1311,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
}

/* add the entry to the device based hash */
nexthop_devhash_add(net, nhi);
if (!nh->is_fdb_nh)
nexthop_devhash_add(net, nhi);

rcu_assign_pointer(nh->nh_info, nhi);

Expand Down Expand Up @@ -1352,6 +1416,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
if (tb[NHA_ID])
cfg->nh_id = nla_get_u32(tb[NHA_ID]);

if (tb[NHA_FDB]) {
if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
goto out;
}
if (nhm->nh_flags) {
NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
goto out;
}
cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
}

if (tb[NHA_GROUP]) {
if (nhm->nh_family != AF_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid family for group");
Expand All @@ -1375,8 +1452,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,

if (tb[NHA_BLACKHOLE]) {
if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
goto out;
}

Expand All @@ -1385,26 +1462,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}

if (!tb[NHA_OIF]) {
NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
if (!cfg->nh_fdb && !tb[NHA_OIF]) {
NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
goto out;
}

cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
if (cfg->nh_ifindex)
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
if (!cfg->nh_fdb && tb[NHA_OIF]) {
cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
if (cfg->nh_ifindex)
cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);

if (!cfg->dev) {
NL_SET_ERR_MSG(extack, "Invalid device index");
goto out;
} else if (!(cfg->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
err = -ENETDOWN;
goto out;
} else if (!netif_carrier_ok(cfg->dev)) {
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
err = -ENETDOWN;
goto out;
if (!cfg->dev) {
NL_SET_ERR_MSG(extack, "Invalid device index");
goto out;
} else if (!(cfg->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
err = -ENETDOWN;
goto out;
} else if (!netif_carrier_ok(cfg->dev)) {
NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
err = -ENETDOWN;
goto out;
}
}

err = -EINVAL;
Expand Down Expand Up @@ -1633,7 +1712,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,

static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
int *master_idx, bool *group_filter,
struct netlink_callback *cb)
bool *fdb_filter, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
struct nlattr *tb[NHA_MAX + 1];
Expand Down Expand Up @@ -1670,6 +1749,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
case NHA_GROUPS:
*group_filter = true;
break;
case NHA_FDB:
*fdb_filter = true;
break;
default:
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
return -EINVAL;
Expand All @@ -1688,17 +1770,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
/* rtnl */
static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
{
bool group_filter = false, fdb_filter = false;
struct nhmsg *nhm = nlmsg_data(cb->nlh);
int dev_filter_idx = 0, master_idx = 0;
struct net *net = sock_net(skb->sk);
struct rb_root *root = &net->nexthop.rb_root;
bool group_filter = false;
struct rb_node *node;
int idx = 0, s_idx;
int err;

err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
&group_filter, cb);
&group_filter, &fdb_filter, cb);
if (err < 0)
return err;

Expand Down
Loading

0 comments on commit 38428d6

Please sign in to comment.