kernel/watchdog.c

Source file repositories/reference/linux-study-clean/kernel/watchdog.c

File Facts

System
Linux kernel
Corpus path
kernel/watchdog.c
Extension
.c
Size
38848 bytes
Lines
1401
Domain
Core OS
Bucket
Scheduler, Processes, Timers, Sync, And Syscalls
Inferred role
Core OS: exported/initcall integration point
Status
integration implementation candidate

Why This File Exists

Core operating-system implementation surface: boot, tasks, memory, VFS, syscall-facing interfaces, synchronization, credentials, and isolation.

Dependency Surface

Detected Declarations

Annotated Snippet

struct irq_counts {
	int irq;
	u32 counts;
};

static DEFINE_PER_CPU(bool, snapshot_taken);

/* Tabulate the most frequent interrupts. */
static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 counts, int rank)
{
	int i;
	struct irq_counts new_count = {irq, counts};

	for (i = 0; i < rank; i++) {
		if (counts > irq_counts[i].counts)
			swap(new_count, irq_counts[i]);
	}
}

/*
 * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period,
 * then the cause of softlockup might be interrupt storm. In this case, it
 * would be useful to start interrupt counting.
 */
static bool need_counting_irqs(void)
{
	u8 util;
	int tail = __this_cpu_read(cpustat_tail);

	tail = (tail + NUM_SAMPLE_PERIODS - 1) % NUM_SAMPLE_PERIODS;
	util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
	return util > HARDIRQ_PERCENT_THRESH;
}

static void start_counting_irqs(void)
{
	if (!__this_cpu_read(snapshot_taken)) {
		kstat_snapshot_irqs();
		__this_cpu_write(snapshot_taken, true);
	}
}

static void stop_counting_irqs(void)
{
	__this_cpu_write(snapshot_taken, false);
}

static void print_irq_counts(void)
{
	unsigned int i, count;
	struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = {
		{-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}
	};

	if (__this_cpu_read(snapshot_taken)) {
		for_each_active_irq(i) {
			count = kstat_get_irq_since_snapshot(i);
			tabulate_irq_count(irq_counts_sorted, i, count, NUM_HARDIRQ_REPORT);
		}

		/*
		 * Outputting the "watchdog" prefix on every line is redundant and not
		 * concise, and the original alarm information is sufficient for
		 * positioning in logs, hence here printk() is used instead of pr_crit().
		 */
		printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n",
		       smp_processor_id(), HARDIRQ_PERCENT_THRESH);

		for (i = 0; i < NUM_HARDIRQ_REPORT; i++) {
			if (irq_counts_sorted[i].irq == -1)
				break;

			printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n",
			       i + 1, irq_counts_sorted[i].counts,
			       irq_counts_sorted[i].irq);
		}

		/*
		 * If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last
		 * sample_period, then we suspect the interrupt storm might be subsiding.
		 */
		if (!need_counting_irqs())
			stop_counting_irqs();
	}
}

static void report_cpu_status(void)
{
	print_cpustat();
	print_irq_counts();

Annotation

Implementation Notes