--- sys/net/if.c.orig +++ sys/net/if.c @@ -274,6 +274,8 @@ static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); +static void if_link_ifnet(struct ifnet *); +static bool if_unlink_ifnet(struct ifnet *, bool); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif @@ -305,12 +307,8 @@ /* * The global network interface list (V_ifnet) and related state (such as - * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and - * an rwlock. Either may be acquired shared to stablize the list, but both - * must be acquired writable to modify the list. This model allows us to - * both stablize the interface list during interrupt thread processing, but - * also to stablize it over long-running ioctls, without introducing priority - * inversions and deadlocks. + * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. + * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); @@ -317,6 +315,9 @@ struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); +struct sx ifnet_detach_sxlock; +SX_SYSINIT(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx"); + /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for @@ -476,17 +477,87 @@ } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); +#endif static void +if_link_ifnet(struct ifnet *ifp) +{ + + IFNET_WLOCK(); + CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); +#ifdef VIMAGE + curvnet->vnet_ifcnt++; +#endif + IFNET_WUNLOCK(); +} + +static bool +if_unlink_ifnet(struct ifnet *ifp, bool vmove) +{ + struct ifnet *iter; + int found = 0; + + IFNET_WLOCK(); + CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) + if (iter == ifp) { + CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); + if (!vmove) + ifp->if_flags |= IFF_DYING; + found = 1; + break; + } +#ifdef VIMAGE + curvnet->vnet_ifcnt--; +#endif + IFNET_WUNLOCK(); + + return (found); +} + +#ifdef VIMAGE +static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; + struct ifnet **pending; + int found, i; + i = 0; + + /* + * We need to protect our access to the V_ifnet tailq. Ordinarily we'd + * enter NET_EPOCH, but that's not possible, because if_vmove() calls + * if_detach_internal(), which waits for NET_EPOCH callbacks to + * complete. We can't do that from within NET_EPOCH. + * + * However, we can also use the IFNET_xLOCK, which is the V_ifnet + * read/write lock. We cannot hold the lock as we call if_vmove() + * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib + * ctx lock. + */ + IFNET_WLOCK(); + + pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, + M_IFNET, M_WAITOK | M_ZERO); + /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { - if (ifp->if_home_vnet != ifp->if_vnet) - if_vmove(ifp, ifp->if_home_vnet); + if (ifp->if_home_vnet != ifp->if_vnet) { + found = if_unlink_ifnet(ifp, true); + MPASS(found); + + pending[i++] = ifp; + } } + IFNET_WUNLOCK(); + + for (int j = 0; j < i; j++) { + sx_xlock(&ifnet_detach_sxlock); + if_vmove(pending[j], pending[j]->if_home_vnet); + sx_xunlock(&ifnet_detach_sxlock); + } + + free(pending, M_IFNET); } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); @@ -894,12 +965,7 @@ } #endif - IFNET_WLOCK(); - CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); -#ifdef VIMAGE - curvnet->vnet_ifcnt++; -#endif - IFNET_WUNLOCK(); + if_link_ifnet(ifp); if (domain_init_status >= 2) if_attachdomain1(ifp); @@ -1037,9 +1103,15 @@ void if_detach(struct ifnet *ifp) { + bool found; CURVNET_SET_QUIET(ifp->if_vnet); - if_detach_internal(ifp, 0, NULL); + found = if_unlink_ifnet(ifp, false); + if (found) { + sx_slock(&ifnet_detach_sxlock); + if_detach_internal(ifp, 0, NULL); + sx_sunlock(&ifnet_detach_sxlock); + } CURVNET_RESTORE(); } @@ -1059,8 +1131,6 @@ struct ifaddr *ifa; int i; struct domain *dp; - struct ifnet *iter; - int found = 0; #ifdef VIMAGE int shutdown; @@ -1067,39 +1137,11 @@ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif - IFNET_WLOCK(); - CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) - if (iter == ifp) { - CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); - if (!vmove) - ifp->if_flags |= IFF_DYING; - found = 1; - break; - } - IFNET_WUNLOCK(); - if (!found) { - /* - * While we would want to panic here, we cannot - * guarantee that the interface is indeed still on - * the list given we don't hold locks all the way. - */ - return (ENOENT); -#if 0 - if (vmove) - panic("%s: ifp=%p not on the ifnet tailq %p", - __func__, ifp, &V_ifnet); - else - return; /* XXX this should panic as well? */ -#endif - } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ -#ifdef VIMAGE - curvnet->vnet_ifcnt--; -#endif epoch_wait_preempt(net_epoch_preempt); /* @@ -1326,6 +1368,7 @@ struct prison *pr; struct ifnet *difp; int shutdown; + bool found; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1362,6 +1405,9 @@ } CURVNET_RESTORE(); + found = if_unlink_ifnet(ifp, true); + MPASS(found); + /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); @@ -1378,7 +1424,8 @@ struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; - int shutdown; + int shutdown; + bool found; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1416,6 +1463,8 @@ } /* Get interface back from child jail/vnet. */ + found = if_unlink_ifnet(ifp, true); + MPASS(found); if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); @@ -3100,8 +3149,12 @@ goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); - if (error == 0) + + if (error == 0) { + sx_slock(&ifnet_detach_sxlock); error = if_clone_destroy(ifr->ifr_name); + sx_sunlock(&ifnet_detach_sxlock); + } goto out_noref; case SIOCIFGCLONERS: --- sys/net/if_var.h.orig +++ sys/net/if_var.h @@ -569,27 +569,11 @@ extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; -#define IFNET_WLOCK() do { \ - sx_xlock(&ifnet_sxlock); \ - rw_wlock(&ifnet_rwlock); \ -} while (0) - -#define IFNET_WUNLOCK() do { \ - rw_wunlock(&ifnet_rwlock); \ - sx_xunlock(&ifnet_sxlock); \ -} while (0) - -/* - * To assert the ifnet lock, you must know not only whether it's for read or - * write, but also whether it was acquired with sleep support or not. - */ -#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) +#define IFNET_WLOCK() sx_xlock(&ifnet_sxlock) +#define IFNET_WUNLOCK() sx_xunlock(&ifnet_sxlock) +#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt)) -#define IFNET_WLOCK_ASSERT() do { \ - sx_assert(&ifnet_sxlock, SA_XLOCKED); \ - rw_assert(&ifnet_rwlock, RA_WLOCKED); \ -} while (0) - +#define IFNET_WLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_XLOCKED) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock)