kernel/events/ring_buffer.c

Source file repositories/reference/linux-study-clean/kernel/events/ring_buffer.c

File Facts

System
Linux kernel
Corpus path
kernel/events/ring_buffer.c
Extension
.c
Size
24217 bytes
Lines
978
Domain
Core OS
Bucket
Scheduler, Processes, Timers, Sync, And Syscalls
Inferred role
Core OS: exported/initcall integration point
Status
integration implementation candidate

Why This File Exists

Core operating-system implementation surface: boot, tasks, memory, VFS, syscall-facing interfaces, synchronization, credentials, and isolation.

Dependency Surface

Detected Declarations

Annotated Snippet

*   if (LOAD ->data_tail) {		LOAD ->data_head
	 *			(A)		smp_rmb()	(C)
	 *	STORE $data			LOAD $data
	 *	smp_wmb()	(B)		smp_mb()	(D)
	 *	STORE ->data_head		STORE ->data_tail
	 *   }
	 *
	 * Where A pairs with D, and B pairs with C.
	 *
	 * In our case (A) is a control dependency that separates the load of
	 * the ->data_tail and the stores of $data. In case ->data_tail
	 * indicates there is no room in the buffer to store $data we do not.
	 *
	 * D needs to be a full barrier since it separates the data READ
	 * from the tail WRITE.
	 *
	 * For B a WMB is sufficient since it separates two WRITEs, and for C
	 * an RMB is sufficient since it separates two READs.
	 *
	 * See perf_output_begin().
	 */
	smp_wmb(); /* B, matches C */
	WRITE_ONCE(rb->user_page->data_head, head);

	/*
	 * We must publish the head before decrementing the nest count,
	 * otherwise an IRQ/NMI can publish a more recent head value and our
	 * write will (temporarily) publish a stale value.
	 */
	barrier();
	WRITE_ONCE(rb->nest, 0);

	/*
	 * Ensure we decrement @rb->nest before we validate the @rb->head.
	 * Otherwise we cannot be sure we caught the 'last' nested update.
	 */
	barrier();
	if (unlikely(head != local_read(&rb->head))) {
		WRITE_ONCE(rb->nest, 1);
		goto again;
	}

	if (handle->wakeup != local_read(&rb->wakeup))
		perf_output_wakeup(handle);

out:
	preempt_enable();
}

static __always_inline bool
ring_buffer_has_space(unsigned long head, unsigned long tail,
		      unsigned long data_size, unsigned int size,
		      bool backward)
{
	if (!backward)
		return CIRC_SPACE(head, tail, data_size) >= size;
	else
		return CIRC_SPACE(tail, head, data_size) >= size;
}

static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
		    struct perf_sample_data *data,
		    struct perf_event *event, unsigned int size,
		    bool backward)
{
	struct perf_buffer *rb;
	unsigned long tail, offset, head;
	int have_lost, page_shift;
	struct {
		struct perf_event_header header;
		u64			 id;
		u64			 lost;
	} lost_event;

	rcu_read_lock();
	/*
	 * For inherited events we send all the output towards the parent.
	 */
	if (event->parent)
		event = event->parent;

	rb = rcu_dereference(event->rb);
	if (unlikely(!rb))
		goto out;

	if (unlikely(rb->paused)) {
		if (rb->nr_pages) {
			local_inc(&rb->lost);
			atomic64_inc(&event->lost_samples);

Annotation

Implementation Notes