Skip to content

Commit 4832c30

Browse files
dsaherndavem330
authored andcommitted
net: ipv6: put host and anycast routes on device with address
One nagging difference between ipv4 and ipv6 is host routes for ipv6 addresses are installed using the loopback device or VRF / L3 Master device. e.g., 2001:db8:1::/120 dev veth0 proto kernel metric 256 pref medium local 2001:db8:1::1 dev lo table local proto kernel metric 0 pref medium Using the loopback device is convenient -- necessary for local tx, but has some nasty side effects, most notably setting the 'lo' device down causes all host routes for all local IPv6 address to be removed from the FIB and completely breaks IPv6 networking across all interfaces. This patch puts FIB entries for IPv6 routes against the device. This simplifies the routes in the FIB, for example by making dst->dev and rt6i_idev->dev the same (a future patch can look at removing the device reference taken for rt6i_idev for FIB entries). When copies are made on FIB lookups, the cloned route has dst->dev set to loopback (or the L3 master device). This is needed for the local Tx of packets to local addresses. With fib entries allocated against the real network device, the addrconf code that reinserts host routes on admin up of 'lo' is no longer needed. Signed-off-by: David Ahern <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 89e4950 commit 4832c30

File tree

3 files changed

+47
-56
lines changed

3 files changed

+47
-56
lines changed

net/ipv6/addrconf.c

-42
Original file line numberDiff line numberDiff line change
@@ -3030,9 +3030,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
30303030
static void init_loopback(struct net_device *dev)
30313031
{
30323032
struct inet6_dev *idev;
3033-
struct net_device *sp_dev;
3034-
struct inet6_ifaddr *sp_ifa;
3035-
struct rt6_info *sp_rt;
30363033

30373034
/* ::1 */
30383035

@@ -3045,45 +3042,6 @@ static void init_loopback(struct net_device *dev)
30453042
}
30463043

30473044
add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
3048-
3049-
/* Add routes to other interface's IPv6 addresses */
3050-
for_each_netdev(dev_net(dev), sp_dev) {
3051-
if (!strcmp(sp_dev->name, dev->name))
3052-
continue;
3053-
3054-
idev = __in6_dev_get(sp_dev);
3055-
if (!idev)
3056-
continue;
3057-
3058-
read_lock_bh(&idev->lock);
3059-
list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
3060-
3061-
if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
3062-
continue;
3063-
3064-
if (sp_ifa->rt) {
3065-
/* This dst has been added to garbage list when
3066-
* lo device down, release this obsolete dst and
3067-
* reallocate a new router for ifa.
3068-
*/
3069-
if (!sp_ifa->rt->rt6i_node) {
3070-
ip6_rt_put(sp_ifa->rt);
3071-
sp_ifa->rt = NULL;
3072-
} else {
3073-
continue;
3074-
}
3075-
}
3076-
3077-
sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
3078-
3079-
/* Failure cases are ignored */
3080-
if (!IS_ERR(sp_rt)) {
3081-
sp_ifa->rt = sp_rt;
3082-
ip6_ins_rt(sp_rt);
3083-
}
3084-
}
3085-
read_unlock_bh(&idev->lock);
3086-
}
30873045
}
30883046

30893047
void addrconf_add_linklocal(struct inet6_dev *idev,

net/ipv6/icmp.c

+13-2
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,20 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
459459
* Source addr check
460460
*/
461461

462-
if (__ipv6_addr_needs_scope_id(addr_type))
462+
if (__ipv6_addr_needs_scope_id(addr_type)) {
463463
iif = skb->dev->ifindex;
464-
else {
464+
465+
/* for local packets, get the real device index */
466+
if (iif == LOOPBACK_IFINDEX) {
467+
dst = skb_dst(skb);
468+
if (dst) {
469+
struct rt6_info *rt;
470+
471+
rt = container_of(dst, struct rt6_info, dst);
472+
iif = rt->rt6i_idev->dev->ifindex;
473+
}
474+
}
475+
} else {
465476
dst = skb_dst(skb);
466477
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
467478
}

net/ipv6/route.c

+34-12
Original file line numberDiff line numberDiff line change
@@ -958,10 +958,34 @@ int ip6_ins_rt(struct rt6_info *rt)
958958
return __ip6_ins_rt(rt, &info, &mxc, NULL);
959959
}
960960

961+
/* called with rcu_lock held */
962+
static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
963+
{
964+
struct net_device *dev = rt->dst.dev;
965+
966+
if (rt->rt6i_flags & RTF_LOCAL) {
967+
/* for copies of local routes, dst->dev needs to be the
968+
* device if it is a master device, the master device if
969+
* device is enslaved, and the loopback as the default
970+
*/
971+
if (netif_is_l3_slave(dev) &&
972+
!rt6_need_strict(&rt->rt6i_dst.addr))
973+
dev = l3mdev_master_dev_rcu(dev);
974+
else if (!netif_is_l3_master(dev))
975+
dev = dev_net(dev)->loopback_dev;
976+
/* last case is netif_is_l3_master(dev) is true in which
977+
* case we want dev returned to be dev
978+
*/
979+
}
980+
981+
return dev;
982+
}
983+
961984
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
962985
const struct in6_addr *daddr,
963986
const struct in6_addr *saddr)
964987
{
988+
struct net_device *dev;
965989
struct rt6_info *rt;
966990

967991
/*
@@ -971,8 +995,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
971995
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
972996
ort = (struct rt6_info *)ort->dst.from;
973997

974-
rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
975-
998+
rcu_read_lock();
999+
dev = ip6_rt_get_dev_rcu(ort);
1000+
rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1001+
rcu_read_unlock();
9761002
if (!rt)
9771003
return NULL;
9781004

@@ -1000,11 +1026,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
10001026

10011027
static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
10021028
{
1029+
struct net_device *dev;
10031030
struct rt6_info *pcpu_rt;
10041031

1005-
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1006-
rt->dst.dev, rt->dst.flags);
1007-
1032+
rcu_read_lock();
1033+
dev = ip6_rt_get_dev_rcu(rt);
1034+
pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1035+
rcu_read_unlock();
10081036
if (!pcpu_rt)
10091037
return NULL;
10101038
ip6_rt_copy_init(pcpu_rt, rt);
@@ -2688,15 +2716,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
26882716
{
26892717
u32 tb_id;
26902718
struct net *net = dev_net(idev->dev);
2691-
struct net_device *dev = net->loopback_dev;
2719+
struct net_device *dev = idev->dev;
26922720
struct rt6_info *rt;
26932721

2694-
/* use L3 Master device as loopback for host routes if device
2695-
* is enslaved and address is not link local or multicast
2696-
*/
2697-
if (!rt6_need_strict(addr))
2698-
dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2699-
27002722
rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
27012723
if (!rt)
27022724
return ERR_PTR(-ENOMEM);

0 commit comments

Comments
 (0)