Skip to content

Commit

Permalink
performance counters: core code
Browse files Browse the repository at this point in the history
Implement the core kernel bits of Performance Counters subsystem.

The Linux Performance Counter subsystem provides an abstraction of
performance counter hardware capabilities. It provides per task and per
CPU counters, and it provides event capabilities on top of those.

Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.

The special file descriptor is opened via the perf_counter_open()
system call:

 int
 perf_counter_open(u32 hw_event_type,
                   u32 hw_event_period,
                   u32 record_type,
                   pid_t pid,
                   int cpu);

The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.

Multiple counters can be kept open at a time, and the counters
can be poll()ed.

See more details in Documentation/perf-counters.txt.

Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
KAGA-KOKO authored and Ingo Molnar committed Dec 8, 2008
1 parent b5aa97e commit 0793a61
Show file tree
Hide file tree
Showing 10 changed files with 1,189 additions and 0 deletions.
2 changes: 2 additions & 0 deletions drivers/char/sysrq.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <linux/kbd_kern.h>
#include <linux/proc_fs.h>
#include <linux/quotaops.h>
#include <linux/perf_counter.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
Expand Down Expand Up @@ -244,6 +245,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
struct pt_regs *regs = get_irq_regs();
if (regs)
show_regs(regs);
perf_counter_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
.handler = sysrq_handle_showregs,
Expand Down
171 changes: 171 additions & 0 deletions include/linux/perf_counter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Performance counters:
*
* Copyright(C) 2008, Thomas Gleixner <[email protected]>
* Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
*
* Data type definitions, declarations, prototypes.
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* For licencing details see kernel-base/COPYING
*/
#ifndef _LINUX_PERF_COUNTER_H
#define _LINUX_PERF_COUNTER_H

#include <asm/atomic.h>

#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>

struct task_struct;

/*
* Generalized hardware event types, used by the hw_event_type parameter
* of the sys_perf_counter_open() syscall:
*/
enum hw_event_types {
PERF_COUNT_CYCLES,
PERF_COUNT_INSTRUCTIONS,
PERF_COUNT_CACHE_REFERENCES,
PERF_COUNT_CACHE_MISSES,
PERF_COUNT_BRANCH_INSTRUCTIONS,
PERF_COUNT_BRANCH_MISSES,
/*
* If this bit is set in the type, then trigger NMI sampling:
*/
PERF_COUNT_NMI = (1 << 30),
};

/*
* IRQ-notification data record type:
*/
enum perf_record_type {
PERF_RECORD_SIMPLE,
PERF_RECORD_IRQ,
PERF_RECORD_GROUP,
};

/**
* struct hw_perf_counter - performance counter hardware details
*/
struct hw_perf_counter {
u64 config;
unsigned long config_base;
unsigned long counter_base;
int nmi;
unsigned int idx;
u64 prev_count;
s32 next_count;
u64 irq_period;
};

/*
* Hardcoded buffer length limit for now, for IRQ-fed events:
*/
#define PERF_DATA_BUFLEN 2048

/**
* struct perf_data - performance counter IRQ data sampling ...
*/
struct perf_data {
int len;
int rd_idx;
int overrun;
u8 data[PERF_DATA_BUFLEN];
};

/**
* struct perf_counter - performance counter kernel representation:
*/
struct perf_counter {
struct list_head list;
int active;
#if BITS_PER_LONG == 64
atomic64_t count;
#else
atomic_t count32[2];
#endif
u64 __irq_period;

struct hw_perf_counter hw;

struct perf_counter_context *ctx;
struct task_struct *task;

/*
* Protect attach/detach:
*/
struct mutex mutex;

int oncpu;
int cpu;

s32 hw_event_type;
enum perf_record_type record_type;

/* read() / irq related data */
wait_queue_head_t waitq;
/* optional: for NMIs */
int wakeup_pending;
struct perf_data *irqdata;
struct perf_data *usrdata;
struct perf_data data[2];
};

/**
* struct perf_counter_context - counter context structure
*
* Used as a container for task counters and CPU counters as well:
*/
struct perf_counter_context {
#ifdef CONFIG_PERF_COUNTERS
/*
* Protect the list of counters:
*/
spinlock_t lock;
struct list_head counters;
int nr_counters;
int nr_active;
struct task_struct *task;
#endif
};

/**
* struct perf_counter_cpu_context - per cpu counter context structure
*/
struct perf_cpu_context {
struct perf_counter_context ctx;
struct perf_counter_context *task_ctx;
int active_oncpu;
int max_pertask;
};

/*
* Set by architecture code:
*/
extern int perf_max_counters;

#ifdef CONFIG_PERF_COUNTERS
extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
extern void perf_counter_task_tick(struct task_struct *task, int cpu);
extern void perf_counter_init_task(struct task_struct *task);
extern void perf_counter_notify(struct pt_regs *regs);
extern void perf_counter_print_debug(void);
#else
static inline void
perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
static inline void
perf_counter_task_sched_out(struct task_struct *task, int cpu) { }
static inline void
perf_counter_task_tick(struct task_struct *task, int cpu) { }
static inline void perf_counter_init_task(struct task_struct *task) { }
static inline void perf_counter_notify(struct pt_regs *regs) { }
static inline void perf_counter_print_debug(void) { }
#endif

#endif /* _LINUX_PERF_COUNTER_H */
9 changes: 9 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct sched_param {
#include <linux/fs_struct.h>
#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/perf_counter.h>
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
Expand Down Expand Up @@ -1326,6 +1327,7 @@ struct task_struct {
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
#endif
struct perf_counter_context perf_counter_ctx;
#ifdef CONFIG_NUMA
struct mempolicy *mempolicy;
short il_next;
Expand Down Expand Up @@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif

/*
* Call the function if the target task is executing on a CPU right now:
*/
extern void task_oncpu_function_call(struct task_struct *p,
void (*func) (void *info), void *info);


#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
Expand Down
6 changes: 6 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);

int kernel_execve(const char *filename, char *const argv[], char *const envp[]);

asmlinkage int
sys_perf_counter_open(u32 hw_event_type,
u32 hw_event_period,
u32 record_type,
pid_t pid,
int cpu);
#endif
29 changes: 29 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,35 @@ config AIO
by some high performance threaded applications. Disabling
this option saves about 7k.

config HAVE_PERF_COUNTERS
bool

menu "Performance Counters"

config PERF_COUNTERS
bool "Kernel Performance Counters"
depends on HAVE_PERF_COUNTERS
default y
help
Enable kernel support for performance counter hardware.

Performance counters are special hardware registers available
on most modern CPUs. These registers count the number of certain
types of hw events: such as instructions executed, cachemisses
suffered, or branches mis-predicted - without slowing down the
kernel or applications. These registers can also trigger interrupts
when a threshold number of events have passed - and can thus be
used to profile the code that runs on that CPU.

The Linux Performance Counter subsystem provides an abstraction of
these hardware capabilities, available via a system call. It
provides per task and per CPU counters, and it provides event
capabilities on top of those.

Say Y if unsure.

endmenu

config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
Expand Down
1 change: 1 addition & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o

ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
Expand Down
1 change: 1 addition & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto fork_out;

rt_mutex_init_task(p);
perf_counter_init_task(p);

#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
Expand Down
Loading

0 comments on commit 0793a61

Please sign in to comment.