net/core/net-sysfs.c

Source file repositories/reference/linux-study-clean/net/core/net-sysfs.c

File Facts

System
Linux kernel
Corpus path
net/core/net-sysfs.c
Extension
.c
Size
59907 bytes
Lines
2402
Domain
Networking Core
Bucket
Sockets, Protocols, Packet Path, And Network Policy
Inferred role
Networking Core: exported/initcall integration point
Status
integration implementation candidate

Why This File Exists

Networking stack implementation surface: socket APIs, protocol dispatch, packet flow, routing, filtering, and network namespaces.

Dependency Surface

Detected Declarations

Annotated Snippet

*    rtnl_lock                                   vfs_read
 *    unregister_netdevice_many                   kernfs_seq_start
 *    device_del / kobject_put                      kernfs_get_active (kn->active++)
 *    kernfs_drain                                sysfs_kf_seq_show
 *    wait_event(                                 rtnl_lock
 *       kn->active == KN_DEACTIVATED_BIAS)       -> waits on CPU 0 to release
 *    -> waits on CPU 1 to decrease kn->active       the rtnl lock.
 *
 * The historical fix was to use rtnl_trylock with restart_syscall to bail out
 * of sysfs operations when the lock couldn't be taken. This fixed the above
 * issue as it allowed CPU 1 to bail out of the ABBA situation.
 *
 * But it came with performances issues, as syscalls are being restarted in
 * loops when there was contention on the rtnl lock, with huge slow downs in
 * specific scenarios (e.g. lots of virtual interfaces created and userspace
 * daemons querying their attributes).
 *
 * The idea below is to bail out of the active kernfs_node protection
 * (kn->active) while trying to take the rtnl lock.
 *
 * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The
 * net device is guaranteed to be alive if this returns successfully.
 */
static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr,
			   struct net_device *ndev)
{
	struct kernfs_node *kn;
	int ret = 0;

	/* First, we hold a reference to the net device as the unregistration
	 * path might run in parallel. This will ensure the net device and the
	 * associated sysfs objects won't be freed while we try to take the rtnl
	 * lock.
	 */
	dev_hold(ndev);
	/* sysfs_break_active_protection was introduced to allow self-removal of
	 * devices and their associated sysfs files by bailing out of the
	 * sysfs/kernfs protection. We do this here to allow the unregistration
	 * path to complete in parallel. The following takes a reference on the
	 * kobject and the kernfs_node being accessed.
	 *
	 * This works because we hold a reference onto the net device and the
	 * unregistration path will wait for us eventually in netdev_run_todo
	 * (outside an rtnl lock section).
	 */
	kn = sysfs_break_active_protection(kobj, attr);
	/* We can now try to take the rtnl lock. This can't deadlock us as the
	 * unregistration path is able to drain sysfs files (kernfs_node) thanks
	 * to the above dance.
	 */
	if (rtnl_lock_interruptible()) {
		ret = -ERESTARTSYS;
		goto unbreak;
	}
	/* Check dismantle on the device hasn't started, otherwise deny the
	 * operation.
	 */
	if (!dev_isalive(ndev)) {
		rtnl_unlock();
		ret = -ENODEV;
		goto unbreak;
	}
	/* We are now sure the device dismantle hasn't started nor that it can
	 * start before we exit the locking section as we hold the rtnl lock.
	 * There's no need to keep unbreaking the sysfs protection nor to hold
	 * a net device reference from that point; that was only needed to take
	 * the rtnl lock.
	 */
unbreak:
	sysfs_unbreak_active_protection(kn);
	dev_put(ndev);

	return ret;
}

/* use same locking rules as GIF* ioctl's */
static ssize_t netdev_show(const struct device *dev,
			   struct device_attribute *attr, char *buf,
			   ssize_t (*format)(const struct net_device *, char *))
{
	struct net_device *ndev = to_net_dev(dev);
	ssize_t ret = -EINVAL;

	rcu_read_lock();
	if (dev_isalive(ndev))
		ret = (*format)(ndev, buf);
	rcu_read_unlock();

	return ret;
}

Annotation

Implementation Notes