pax_global_header00006660000000000000000000000064141376225520014521gustar00rootroot0000000000000052 comment=f06781dff3aaa59c65049898f3353771c33f8e96 perf-utils-0.5.1/000077500000000000000000000000001413762255200136165ustar00rootroot00000000000000perf-utils-0.5.1/.gitignore000066400000000000000000000000221413762255200156000ustar00rootroot00000000000000*.swp vendor tags perf-utils-0.5.1/LICENSE000066400000000000000000000020711413762255200146230ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2019 Daniel Hodges Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. perf-utils-0.5.1/README.md000066400000000000000000000202141413762255200150740ustar00rootroot00000000000000# Perf [![GoDoc](https://godoc.org/github.com/hodgesds/perf-utils?status.svg)](https://godoc.org/github.com/hodgesds/perf-utils) This package is a Go library for interacting with the `perf` subsystem in Linux. I had trouble finding a golang perf library so I decided to write this by using the linux's perf as a reference. This library allows you to do things like see how many CPU instructions a function takes (roughly), profile a process for various hardware events, and other interesting things. Note that because the Go scheduler can schedule a goroutine across many OS threads it becomes rather difficult to get an _exact_ profile of an individual goroutine. However, a few tricks can be used; first a call to [`runtime.LockOSThread`](https://golang.org/pkg/runtime/#LockOSThread) to lock the current goroutine to an OS thread. Second a call to [`unix.SchedSetaffinity`](https://godoc.org/golang.org/x/sys/unix#SchedSetaffinity), with a CPU set mask set. Note that if the pid argument is set 0 the calling thread is used (the thread that was just locked). Before using this library you should probably read the [`perf_event_open`](http://www.man7.org/linux/man-pages/man2/perf_event_open.2.html) man page which this library uses heavily. See this [kernel guide](https://perf.wiki.kernel.org/index.php/Tutorial) for a tutorial how to use perf and some of the limitations. # Use Cases If you are looking to interact with the perf subsystem directly with `perf_event_open` syscall than this library is most likely for you. A large number of the utility methods in this package should only be used for testing and/or debugging performance issues. This is due to the nature of the Go runtime being extremely tricky to profile on the goroutine level, with the exception of a long running worker goroutine locked to an OS thread. Eventually this library could be used to implement many of the features of `perf` but in pure Go. Currently this library is used in [node_exporter](https://github.com/prometheus/node_exporter) as well as [perf_exporter](https://github.com/hodgesds/perf_exporter), which is a Prometheus exporter for perf related metrics. ## Caveats * Some utility functions will call [`runtime.LockOSThread`](https://golang.org/pkg/runtime/#LockOSThread) for you, they will also unlock the thread after profiling. ***Note*** using these utility functions will incur significant overhead (~4ms). * Overflow handling is not implemented. # Setup Most likely you will need to tweak some system settings unless you are running as root. From `man perf_event_open`: ``` perf_event related configuration files Files in /proc/sys/kernel/ /proc/sys/kernel/perf_event_paranoid The perf_event_paranoid file can be set to restrict access to the performance counters. 2 allow only user-space measurements (default since Linux 4.6). 1 allow both kernel and user measurements (default before Linux 4.6). 0 allow access to CPU-specific data but not raw tracepoint samples. -1 no restrictions. The existence of the perf_event_paranoid file is the official method for determining if a kernel supports perf_event_open(). /proc/sys/kernel/perf_event_max_sample_rate This sets the maximum sample rate. Setting this too high can allow users to sample at a rate that impacts overall machine performance and potentially lock up the machine. The default value is 100000 (samples per second). /proc/sys/kernel/perf_event_max_stack This file sets the maximum depth of stack frame entries reported when generating a call trace. /proc/sys/kernel/perf_event_mlock_kb Maximum number of pages an unprivileged user can mlock(2). The default is 516 (kB). ``` # Example Say you wanted to see how many CPU instructions a particular function took: ``` package main import ( "fmt" "log" "github.com/hodgesds/perf-utils" ) func foo() error { var total int for i:=0;i<1000;i++ { total++ } return nil } func main() { profileValue, err := perf.CPUInstructions(foo) if err != nil { log.Fatal(err) } fmt.Printf("CPU instructions: %+v\n", profileValue) } ``` # Benchmarks To profile a single function call there is an overhead of ~0.4ms. ``` $ go test -bench=BenchmarkCPUCycles . goos: linux goarch: amd64 pkg: github.com/hodgesds/perf-utils BenchmarkCPUCycles-8 3000 397924 ns/op 32 B/op 1 allocs/op PASS ok github.com/hodgesds/perf-utils 1.255s ``` The `Profiler` interface has low overhead and suitable for many use cases: ``` $ go test -bench=BenchmarkProfiler . goos: linux goarch: amd64 pkg: github.com/hodgesds/perf-utils BenchmarkProfiler-8 3000000 488 ns/op 32 B/op 1 allocs/op PASS ok github.com/hodgesds/perf-utils 1.981s ``` The [`RunBenchmarks`](https://godoc.org/github.com/hodgesds/perf-utils#RunBenchmarks) helper function can be used to run as function as a benchmark and report results from PerfEventAttrs: ``` func BenchmarkRunBenchmarks(b *testing.B) { eventAttrs := []unix.PerfEventAttr{ CPUInstructionsEventAttr(), CPUCyclesEventAttr(), } RunBenchmarks( b, func(b *testing.B) { for n := 1; n < b.N; n++ { a := 42 for i := 0; i < 1000; i++ { a += i } } }, BenchLock|BenchStrict, eventAttrs..., ) } go test -bench=BenchmarkRunBenchmarks goos: linux goarch: amd64 pkg: github.com/hodgesds/iouring-go/go/src/github.com/hodgesds/perf-utils BenchmarkRunBenchmarks-8 3119304 388 ns/op 1336 hw_cycles/op 3314 hw_instr/op 0 B/op 0 allocs/op ``` If you want to run a benchmark tracepoints (ie `perf list` or `cat /sys/kernel/debug/tracing/available_events`) you can use the [`BenchmarkTracepoints`](https://godoc.org/github.com/hodgesds/perf-utils#BenchmarkTracepoints) helper: ``` func BenchmarkBenchmarkTracepoints(b *testing.B) { tracepoints := []string{ "syscalls:sys_enter_getrusage", } BenchmarkTracepoints( b, func(b *testing.B) { for n := 1; n < b.N; n++ { unix.Getrusage(0, &unix.Rusage{}) } }, BenchLock|Benchtrict, tracepoints..., ) } go test -bench=. goos: linux goarch: amd64 pkg: github.com/hodgesds/perf-utils BenchmarkProfiler-8 1983320 596 ns/op 32 B/op 1 allocs/op BenchmarkCPUCycles-8 2335 484068 ns/op 32 B/op 1 allocs/op BenchmarkThreadLocking-8 253319848 4.70 ns/op 0 B/op 0 allocs/op BenchmarkRunBenchmarks-8 1906320 627 ns/op 1023 hw_cycles/op 3007 hw_instr/op BenchmarkRunBenchmarksLocked-8 1903527 632 ns/op 1025 hw_cycles/op 3007 hw_instr/op BenchmarkBenchmarkTracepointsLocked-8 986607 1221 ns/op 2.00 syscalls:sys_enter_getrusage/op 0 B/op 0 allocs/op BenchmarkBenchmarkTracepoints-8 906022 1258 ns/op 2.00 syscalls:sys_enter_getrusage/op 0 B/op 0 allocs/op ``` # BPF Support BPF is supported by using the `BPFProfiler` which is available via the `ProfileTracepoint` function. To use BPF you need to create the BPF program and then call `AttachBPF` with the file descriptor of the BPF program. # Misc Originally I set out to use `go generate` to build Go structs that were compatible with perf, I found a really good [article](https://utcc.utoronto.ca/~cks/space/blog/programming/GoCGoCompatibleStructs) on how to do so. Eventually, after digging through some of the `/x/sys/unix` code I found pretty much what I was needed. However, I think if you are interested in interacting with the kernel it is a worthwhile read. - [Concurrent Hardware Monitoring](https://stackoverflow.com/questions/61879227/perf-type-hardware-and-perf-type-hw-cache-concurrent-monitoring) - [Perf event scheduling](https://hadibrais.wordpress.com/2019/09/06/the-linux-perf-event-scheduling-algorithm/) perf-utils-0.5.1/bpf.go000066400000000000000000000011351413762255200147140ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "golang.org/x/sys/unix" ) // BPFProfiler is a Profiler that allows attaching a Berkeley // Packet Filter (BPF) program to an existing kprobe tracepoint event. // You need CAP_SYS_ADMIN privileges to use this interface. See: // https://lwn.net/Articles/683504/ type BPFProfiler interface { Profiler AttachBPF(int) error } // AttachBPF is used to attach a BPF program to a profiler by using the file // descriptor of the BPF program. func (p *profiler) AttachBPF(fd int) error { return unix.IoctlSetInt(p.fd, unix.PERF_EVENT_IOC_SET_BPF, fd) } perf-utils-0.5.1/cache_profiler.go000066400000000000000000000435701413762255200171230ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "fmt" "sync" "go.uber.org/multierr" "golang.org/x/sys/unix" ) type CacheProfilerType int const ( // AllCacheProfilers is used to try to configure all cache profilers. AllCacheProfilers CacheProfilerType = 0 L1DataReadHitProfiler CacheProfilerType = 1 << iota L1DataReadMissProfiler CacheProfilerType = 1 << iota L1DataWriteHitProfiler CacheProfilerType = 1 << iota L1InstrReadMissProfiler CacheProfilerType = 1 << iota L1InstrReadHitProfiler CacheProfilerType = 1 << iota LLReadHitProfiler CacheProfilerType = 1 << iota LLReadMissProfiler CacheProfilerType = 1 << iota LLWriteHitProfiler CacheProfilerType = 1 << iota LLWriteMissProfiler CacheProfilerType = 1 << iota DataTLBReadHitProfiler CacheProfilerType = 1 << iota DataTLBReadMissProfiler CacheProfilerType = 1 << iota DataTLBWriteHitProfiler CacheProfilerType = 1 << iota DataTLBWriteMissProfiler CacheProfilerType = 1 << iota InstrTLBReadHitProfiler CacheProfilerType = 1 << iota InstrTLBReadMissProfiler CacheProfilerType = 1 << iota BPUReadHitProfiler CacheProfilerType = 1 << iota BPUReadMissProfiler CacheProfilerType = 1 << iota NodeCacheReadHitProfiler CacheProfilerType = 1 << iota NodeCacheReadMissProfiler CacheProfilerType = 1 << iota NodeCacheWriteHitProfiler CacheProfilerType = 1 << iota NodeCacheWriteMissProfiler CacheProfilerType = 1 << iota // L1DataReadHit is a constant... L1DataReadHit = (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // L1DataReadMiss is a constant... L1DataReadMiss = (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // L1DataWriteHit is a constant... L1DataWriteHit = (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // L1InstrReadMiss is a constant... L1InstrReadMiss = (unix.PERF_COUNT_HW_CACHE_L1I) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // L1InstrReadHit is a constant... L1InstrReadHit = (unix.PERF_COUNT_HW_CACHE_L1I) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // LLReadHit is a constant... LLReadHit = (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // LLReadMiss is a constant... LLReadMiss = (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // LLWriteHit is a constant... LLWriteHit = (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // LLWriteMiss is a constant... LLWriteMiss = (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // DataTLBReadHit is a constant... DataTLBReadHit = (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // DataTLBReadMiss is a constant... DataTLBReadMiss = (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // DataTLBWriteHit is a constant... DataTLBWriteHit = (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // DataTLBWriteMiss is a constant... DataTLBWriteMiss = (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // InstrTLBReadHit is a constant... InstrTLBReadHit = (unix.PERF_COUNT_HW_CACHE_ITLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // InstrTLBReadMiss is a constant... InstrTLBReadMiss = (unix.PERF_COUNT_HW_CACHE_ITLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // BPUReadHit is a constant... BPUReadHit = (unix.PERF_COUNT_HW_CACHE_BPU) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // BPUReadMiss is a constant... BPUReadMiss = (unix.PERF_COUNT_HW_CACHE_BPU) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // NodeCacheReadHit is a constant... NodeCacheReadHit = (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // NodeCacheReadMiss is a constant... NodeCacheReadMiss = (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) // NodeCacheWriteHit is a constant... NodeCacheWriteHit = (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) // NodeCacheWriteMiss is a constant... NodeCacheWriteMiss = (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16) ) type cacheProfiler struct { // map of perf counter type to file descriptor profilers map[int]Profiler profilersMu sync.RWMutex } // NewCacheProfiler returns a new cache profiler. func NewCacheProfiler(pid, cpu int, profilerSet CacheProfilerType, opts ...int) (CacheProfiler, error) { profilers := map[int]Profiler{} var e error // L1 data if profilerSet&L1DataReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS l1dataReadHit, err := NewL1DataProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup L1 data read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[L1DataReadHit] = l1dataReadHit } } if profilerSet&L1DataReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS l1dataReadMiss, err := NewL1DataProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup L1 data read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[L1DataReadMiss] = l1dataReadMiss } } if profilerSet&L1DataWriteHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_WRITE result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS l1dataWriteHit, err := NewL1DataProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup L1 data write profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[L1DataWriteHit] = l1dataWriteHit } } // L1 instruction if profilerSet&L1InstrReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS l1instrReadHit, err := NewL1InstrProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup L1 instruction read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[L1InstrReadHit] = l1instrReadHit } } if profilerSet&L1InstrReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS l1InstrReadMiss, err := NewL1InstrProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup L1 instruction read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[L1InstrReadMiss] = l1InstrReadMiss } } // Last Level if profilerSet&LLReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS llReadHit, err := NewLLCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup last level read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[LLReadHit] = llReadHit } } if profilerSet&LLReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS llReadMiss, err := NewLLCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup last level read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[LLReadMiss] = llReadMiss } } if profilerSet&LLWriteHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_WRITE result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS llWriteHit, err := NewLLCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup last level write hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[LLWriteHit] = llWriteHit } } if profilerSet&LLWriteMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_WRITE result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS llWriteMiss, err := NewLLCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup last level write miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[LLWriteMiss] = llWriteMiss } } // dTLB if profilerSet&DataTLBReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS dTLBReadHit, err := NewDataTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup dTLB read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[DataTLBReadHit] = dTLBReadHit } } if profilerSet&DataTLBReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS dTLBReadMiss, err := NewDataTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup dTLB read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[DataTLBReadMiss] = dTLBReadMiss } } if profilerSet&DataTLBWriteHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_WRITE result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS dTLBWriteHit, err := NewDataTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup dTLB write hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[DataTLBWriteHit] = dTLBWriteHit } } if profilerSet&DataTLBWriteMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_WRITE result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS dTLBWriteMiss, err := NewDataTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup dTLB write miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[DataTLBWriteMiss] = dTLBWriteMiss } } // iTLB if profilerSet&InstrTLBReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS iTLBReadHit, err := NewInstrTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup iTLB read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[InstrTLBReadHit] = iTLBReadHit } } if profilerSet&InstrTLBReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS iTLBReadMiss, err := NewInstrTLBProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup iTLB read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[InstrTLBReadMiss] = iTLBReadMiss } } // BPU if profilerSet&BPUReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS bpuReadHit, err := NewBPUProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup BPU read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[BPUReadHit] = bpuReadHit } } if profilerSet&BPUReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS bpuReadMiss, err := NewBPUProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup BPU read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[BPUReadMiss] = bpuReadMiss } } // Node if profilerSet&NodeCacheReadHitProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS nodeReadHit, err := NewNodeCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup node cache read hit profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[NodeCacheReadHit] = nodeReadHit } } if profilerSet&NodeCacheReadMissProfiler > 0 || profilerSet == AllCacheProfilers { op := unix.PERF_COUNT_HW_CACHE_OP_READ result := unix.PERF_COUNT_HW_CACHE_RESULT_MISS nodeReadMiss, err := NewNodeCacheProfiler(pid, cpu, op, result, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf( "Failed to setup node cache read miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[NodeCacheReadMiss] = nodeReadMiss } } return &cacheProfiler{ profilers: profilers, }, e } // HasProfilers returns if there are any configured profilers. func (p *cacheProfiler) HasProfilers() bool { p.profilersMu.RLock() defer p.profilersMu.RUnlock() return len(p.profilers) >= 0 } // Start is used to start the CacheProfiler, it will return an error if no // profilers are configured. func (p *cacheProfiler) Start() error { if !p.HasProfilers() { return ErrNoProfiler } var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Start()) } p.profilersMu.RUnlock() return err } // Reset is used to reset the CacheProfiler. func (p *cacheProfiler) Reset() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Reset()) } p.profilersMu.RUnlock() return err } // Stop is used to stop the CacheProfiler. func (p *cacheProfiler) Stop() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Stop()) } p.profilersMu.RUnlock() return err } // Close is used to reset the CacheProfiler. func (p *cacheProfiler) Close() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Close()) } p.profilersMu.RUnlock() return err } // Profile is used to read the CacheProfiler CacheProfile it returns an // error only if all profiles fail. func (p *cacheProfiler) Profile(cacheProfile *CacheProfile) error { var err error cacheProfile.Reset() p.profilersMu.RLock() for profilerType, profiler := range p.profilers { profileVal := ProfileValuePool.Get().(*ProfileValue) err2 := profiler.Profile(profileVal) err = multierr.Append(err, err2) if err2 == nil { if cacheProfile.TimeEnabled == nil { cacheProfile.TimeEnabled = &profileVal.TimeEnabled } if cacheProfile.TimeRunning == nil { cacheProfile.TimeRunning = &profileVal.TimeRunning } switch { // L1 data case (profilerType ^ L1DataReadHit) == 0: cacheProfile.L1DataReadHit = &profileVal.Value case (profilerType ^ L1DataReadMiss) == 0: cacheProfile.L1DataReadMiss = &profileVal.Value case (profilerType ^ L1DataWriteHit) == 0: cacheProfile.L1DataWriteHit = &profileVal.Value // L1 instruction case (profilerType ^ L1InstrReadMiss) == 0: cacheProfile.L1InstrReadMiss = &profileVal.Value // Last Level case (profilerType ^ LLReadHit) == 0: cacheProfile.LastLevelReadHit = &profileVal.Value case (profilerType ^ LLReadMiss) == 0: cacheProfile.LastLevelReadMiss = &profileVal.Value case (profilerType ^ LLWriteHit) == 0: cacheProfile.LastLevelWriteHit = &profileVal.Value case (profilerType ^ LLWriteMiss) == 0: cacheProfile.LastLevelWriteMiss = &profileVal.Value // dTLB case (profilerType ^ DataTLBReadHit) == 0: cacheProfile.DataTLBReadHit = &profileVal.Value case (profilerType ^ DataTLBReadMiss) == 0: cacheProfile.DataTLBReadMiss = &profileVal.Value case (profilerType ^ DataTLBWriteHit) == 0: cacheProfile.DataTLBWriteHit = &profileVal.Value case (profilerType ^ DataTLBWriteMiss) == 0: cacheProfile.DataTLBWriteMiss = &profileVal.Value // iTLB case (profilerType ^ InstrTLBReadHit) == 0: cacheProfile.InstrTLBReadHit = &profileVal.Value case (profilerType ^ InstrTLBReadMiss) == 0: cacheProfile.InstrTLBReadMiss = &profileVal.Value // BPU case (profilerType ^ BPUReadHit) == 0: cacheProfile.BPUReadHit = &profileVal.Value case (profilerType ^ BPUReadMiss) == 0: cacheProfile.BPUReadMiss = &profileVal.Value // node case (profilerType ^ NodeCacheReadHit) == 0: cacheProfile.NodeReadHit = &profileVal.Value case (profilerType ^ NodeCacheReadMiss) == 0: cacheProfile.NodeReadMiss = &profileVal.Value case (profilerType ^ NodeCacheWriteHit) == 0: cacheProfile.NodeWriteHit = &profileVal.Value case (profilerType ^ NodeCacheWriteMiss) == 0: cacheProfile.NodeWriteMiss = &profileVal.Value } } } p.profilersMu.RUnlock() return err } perf-utils-0.5.1/cache_profiler_test.go000066400000000000000000000007751413762255200201620ustar00rootroot00000000000000package perf import ( "os" "testing" ) func TestCacheProfiler(t *testing.T) { p, err := NewCacheProfiler(os.Getpid(), 0, AllCacheProfilers) if err != nil && !p.HasProfilers() { t.Fatal(err) } defer func() { if err := p.Close(); err != nil { t.Fatal(err) } }() if err := p.Reset(); err != nil { t.Fatal(err) } if err := p.Start(); err != nil { t.Fatal(err) } err = p.Profile(&CacheProfile{}) if err != nil { t.Fatal(err) } if err := p.Stop(); err != nil { t.Fatal(err) } } perf-utils-0.5.1/events.go000066400000000000000000000074561413762255200154650ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "fmt" "strconv" "strings" "unsafe" "golang.org/x/sys/unix" ) const ( // PERF_TYPE_TRACEPOINT is a kernel tracepoint. PERF_TYPE_TRACEPOINT = 2 ) // AvailableEvents returns a mapping of available subsystems and their // corresponding list of available events. func AvailableEvents() (map[string][]string, error) { events := map[string][]string{} // BUG(hodgesds): this should ideally check mounts for debugfs rawEvents, err := fileToStrings(TracingDir + "/available_events") // Events are colon delimited by type so parse the type and add sub // events appropriately. if err != nil { return events, err } for _, rawEvent := range rawEvents { splits := strings.Split(rawEvent, ":") if len(splits) <= 1 { continue } eventTypeEvents, found := events[splits[0]] if found { events[splits[0]] = append(eventTypeEvents, splits[1]) continue } events[splits[0]] = []string{splits[1]} } return events, err } // AvailableSubsystems returns a slice of available subsystems. func AvailableSubsystems() ([]string, error) { subsystems := []string{} // BUG(hodgesds): this should ideally check mounts for debugfs rawEvents, err := fileToStrings(TracingDir + "/available_events") // Events are colon delimited by type so parse the type and add sub // events appropriately. if err != nil { return subsystems, err } for _, rawEvent := range rawEvents { splits := strings.Split(rawEvent, ":") if len(splits) <= 1 { continue } subsystems = append(subsystems, splits[0]) } return subsystems, nil } // AvailableTracers returns the list of available tracers. func AvailableTracers() ([]string, error) { return fileToStrings(TracingDir + "/available_tracers") } // CurrentTracer returns the current tracer. func CurrentTracer() (string, error) { res, err := fileToStrings(TracingDir + "/current_tracer") return res[0], err } // GetTracepointConfig is used to get the configuration for a trace event. func GetTracepointConfig(subsystem, event string) (uint64, error) { res, err := fileToStrings( TracingDir + fmt.Sprintf("/events/%s/%s/id", subsystem, event)) if err != nil { return 0, err } return strconv.ParseUint(res[0], 10, 64) } // ProfileTracepoint is used to profile a kernel tracepoint event for a // specific PID. Events can be listed with `perf list` for Tracepoint Events or // in the /sys/kernel/debug/tracing/events directory with the kind being the // directory and the event being the subdirectory. func ProfileTracepoint(subsystem, event string, pid, cpu int, opts ...int) (BPFProfiler, error) { config, err := GetTracepointConfig(subsystem, event) if err != nil { return nil, err } eventAttr := &unix.PerfEventAttr{ Type: PERF_TYPE_TRACEPOINT, Config: config, Size: uint32(unsafe.Sizeof(unix.PerfEventAttr{})), Bits: unix.PerfBitDisabled | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, Sample_type: PERF_SAMPLE_IDENTIFIER, } var eventOps int if len(opts) > 0 { eventOps = opts[0] } fd, err := unix.PerfEventOpen( eventAttr, pid, cpu, -1, eventOps, ) if err != nil { return nil, err } return &profiler{ fd: fd, }, nil } // TracepointEventAttr is used to return an PerfEventAttr for a trace event. func TracepointEventAttr(subsystem, event string) (*unix.PerfEventAttr, error) { config, err := GetTracepointConfig(subsystem, event) if err != nil { return nil, err } return &unix.PerfEventAttr{ Type: PERF_TYPE_TRACEPOINT, Config: config, Size: uint32(unsafe.Sizeof(unix.PerfEventAttr{})), Bits: unix.PerfBitDisabled | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, Sample_type: PERF_SAMPLE_IDENTIFIER, }, nil } perf-utils-0.5.1/events_test.go000066400000000000000000000003501413762255200165060ustar00rootroot00000000000000package perf import ( "testing" ) func TestAvailableEvents(t *testing.T) { events, err := AvailableEvents() if err != nil { t.Fatal(err) } if len(events) == 0 { t.Fatalf("Expected available events, got: %v", events) } } perf-utils-0.5.1/fs_utils.go000066400000000000000000000045541413762255200160050ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "bufio" "fmt" "os" "strings" ) const ( // DebugFS is the filesystem type for debugfs. DebugFS = "debugfs" // TraceFS is the filesystem type for tracefs. TraceFS = "tracefs" // ProcMounts is the mount point for file systems in procfs. ProcMounts = "/proc/mounts" // PerfMaxStack is the mount point for the max perf event size. PerfMaxStack = "/proc/sys/kernel/perf_event_max_stack" // PerfMaxContexts is a sysfs mount that contains the max perf contexts. PerfMaxContexts = "/proc/sys/kernel/perf_event_max_contexts_per_stack" // SyscallsDir is a constant of the default tracing event syscalls directory. SyscallsDir = "/sys/kernel/debug/tracing/events/syscalls/" // TracingDir is a constant of the default tracing directory. TracingDir = "/sys/kernel/debug/tracing" ) var ( // ErrNoMount is when there is no such mount. ErrNoMount = fmt.Errorf("no such mount") ) // TraceFSMount returns the first found mount point of a tracefs file system. func TraceFSMount() (string, error) { mounts, err := GetFSMount(TraceFS) if err != nil { return "", err } if len(mounts) == 0 { return "", ErrNoMount } return mounts[0], nil } // DebugFSMount returns the first found mount point of a debugfs file system. func DebugFSMount() (string, error) { mounts, err := GetFSMount(DebugFS) if err != nil { return "", err } if len(mounts) == 0 { return "", ErrNoMount } return mounts[0], nil } // GetFSMount is a helper function to get a mount file system type. func GetFSMount(mountType string) ([]string, error) { mounts := []string{} file, err := os.Open(ProcMounts) if err != nil { return mounts, err } scanner := bufio.NewScanner(file) for scanner.Scan() { mountInfo := strings.Split(scanner.Text(), " ") if len(mountInfo) > 3 && mountInfo[2] == mountType { mounts = append(mounts, mountInfo[1]) } } if err := scanner.Err(); err != nil { return mounts, err } return mounts, file.Close() } // fileToStrings is a helper method that reads a line line by line and returns // a slice of strings. func fileToStrings(path string) ([]string, error) { res := []string{} f, err := os.Open(path) if err != nil { return res, err } scanner := bufio.NewScanner(f) for scanner.Scan() { res = append(res, scanner.Text()) } if err := scanner.Err(); err != nil { return res, err } return res, nil } perf-utils-0.5.1/fs_utils_test.go000066400000000000000000000006161413762255200170370ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "testing" ) func TestTraceFSMount(t *testing.T) { mount, err := TraceFSMount() if err != nil { t.Fatal(err) } if len(mount) == 0 { t.Fatal("tracefs not mounted") } } func TestDebugFSMount(t *testing.T) { mount, err := DebugFSMount() if err != nil { t.Fatal(err) } if len(mount) == 0 { t.Fatal("debugfs not mounted") } } perf-utils-0.5.1/go.mod000066400000000000000000000002621413762255200147240ustar00rootroot00000000000000module github.com/hodgesds/perf-utils go 1.14 require ( github.com/stretchr/testify v1.3.0 go.uber.org/multierr v1.6.0 golang.org/x/sys v0.0.0-20211031064116-611d5d643895 ) perf-utils-0.5.1/go.sum000066400000000000000000000023021413762255200147460ustar00rootroot00000000000000github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= golang.org/x/sys v0.0.0-20211031064116-611d5d643895 h1:iaNpwpnrgL5jzWS0vCNnfa8HqzxveCFpFx3uC/X4Tps= golang.org/x/sys v0.0.0-20211031064116-611d5d643895/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= perf-utils-0.5.1/group_profiler.go000066400000000000000000000111101413762255200171750ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "encoding/binary" "fmt" "sync" "syscall" "go.uber.org/multierr" "golang.org/x/sys/unix" ) // ErrNoLeader is returned when a leader of a GroupProfiler is not defined. var ErrNoLeader = fmt.Errorf("No leader defined") // GroupProfileValue is returned from a GroupProfiler. type GroupProfileValue struct { Events uint64 TimeEnabled uint64 TimeRunning uint64 Values []uint64 } // GroupProfiler is used to setup a group profiler. type GroupProfiler interface { Start() error Reset() error Stop() error Close() error HasProfilers() bool Profile(*GroupProfileValue) error } // groupProfiler implements the GroupProfiler interface. type groupProfiler struct { fds []int // leader is always element 0 profilersMu sync.RWMutex bufPool sync.Pool } // NewGroupProfiler returns a GroupProfiler. func NewGroupProfiler(pid, cpu, opts int, eventAttrs ...unix.PerfEventAttr) (GroupProfiler, error) { fds := make([]int, len(eventAttrs)) for i, eventAttr := range eventAttrs { // common configs eventAttr.Size = EventAttrSize eventAttr.Sample_type = PERF_SAMPLE_IDENTIFIER // Leader fd must be opened first if i == 0 { // leader specific configs eventAttr.Bits = unix.PerfBitDisabled | unix.PerfBitExcludeHv eventAttr.Read_format = unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_GROUP fd, err := unix.PerfEventOpen( &eventAttr, pid, cpu, -1, opts, ) if err != nil { return nil, err } fds[i] = fd continue } // non leader configs eventAttr.Read_format = unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_GROUP eventAttr.Bits = unix.PerfBitExcludeHv fd, err := unix.PerfEventOpen( &eventAttr, pid, cpu, fds[0], opts, ) if err != nil { // cleanup any old Fds for ii, fd2 := range fds { if ii == i { break } err = multierr.Append(err, unix.Close(fd2)) } return nil, err } fds[i] = fd } bufPool = sync.Pool{ New: func() interface{} { return make([]byte, 24+8*len(fds)) }, } return &groupProfiler{ fds: fds, bufPool: bufPool}, nil } // HasProfilers returns if there are any configured profilers. func (p *groupProfiler) HasProfilers() bool { p.profilersMu.RLock() defer p.profilersMu.RUnlock() return len(p.fds) >= 0 } // Start is used to start the GroupProfiler. func (p *groupProfiler) Start() error { if !p.HasProfilers() { return ErrNoLeader } p.profilersMu.RLock() defer p.profilersMu.RUnlock() return unix.IoctlSetInt(p.fds[0], unix.PERF_EVENT_IOC_ENABLE, unix.PERF_IOC_FLAG_GROUP) } // Reset is used to reset the GroupProfiler. func (p *groupProfiler) Reset() error { if !p.HasProfilers() { return ErrNoLeader } p.profilersMu.RLock() defer p.profilersMu.RUnlock() return unix.IoctlSetInt(p.fds[0], unix.PERF_EVENT_IOC_RESET, unix.PERF_IOC_FLAG_GROUP) } // Stop is used to stop the GroupProfiler. func (p *groupProfiler) Stop() error { if !p.HasProfilers() { return ErrNoLeader } p.profilersMu.RLock() defer p.profilersMu.RUnlock() return unix.IoctlSetInt(p.fds[0], unix.PERF_EVENT_IOC_DISABLE, unix.PERF_IOC_FLAG_GROUP) } // Close is used to close the GroupProfiler. func (p *groupProfiler) Close() error { var err error p.profilersMu.RLock() for _, fd := range p.fds { err = multierr.Append(err, unix.Close(fd)) } p.profilersMu.RUnlock() return err } // Profile is used to return the GroupProfileValue of the GroupProfiler. func (p *groupProfiler) Profile(val *GroupProfileValue) error { p.profilersMu.RLock() defer p.profilersMu.RUnlock() nEvents := len(p.fds) if nEvents == 0 { return ErrNoLeader } // read format of the raw event looks like this: /* struct read_format { u64 nr; // The number of events / u64 time_enabled; // if PERF_FORMAT_TOTAL_TIME_ENABLED u64 time_running; // if PERF_FORMAT_TOTAL_TIME_RUNNING struct { u64 value; // The value of the event u64 id; // if PERF_FORMAT_ID } values[nr]; }; */ buf := p.bufPool.Get().([]byte) _, err := syscall.Read(p.fds[0], buf) if err != nil { zero(buf) p.bufPool.Put(buf) return err } val.Events = binary.LittleEndian.Uint64(buf[0:8]) val.TimeEnabled = binary.LittleEndian.Uint64(buf[8:16]) val.TimeRunning = binary.LittleEndian.Uint64(buf[16:24]) val.Values = make([]uint64, len(p.fds)) offset := 24 for i := range p.fds { val.Values[i] = binary.LittleEndian.Uint64(buf[offset : offset+8]) offset += 8 } zero(buf) p.bufPool.Put(buf) return nil } perf-utils-0.5.1/group_profiler_test.go000066400000000000000000000010151413762255200202370ustar00rootroot00000000000000package perf import ( "os" "testing" ) func TestGroupProfiler(t *testing.T) { p, err := NewGroupProfiler( os.Getpid(), -1, 0, CPUMigrationsEventAttr(), MinorPageFaultsEventAttr(), ) if err != nil { t.Fatal(err) } if err := p.Reset(); err != nil { t.Fatal(err) } if err := p.Start(); err != nil { t.Fatal(err) } err = p.Profile(&GroupProfileValue{}) if err != nil { t.Fatal(err) } if err := p.Stop(); err != nil { t.Fatal(err) } if err := p.Close(); err != nil { t.Fatal(err) } } perf-utils-0.5.1/hardware_profiler.go000066400000000000000000000175351413762255200176570ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "fmt" "sync" "go.uber.org/multierr" "golang.org/x/sys/unix" ) type HardwareProfilerType int const ( AllHardwareProfilers HardwareProfilerType = 0 CpuCyclesProfiler HardwareProfilerType = 1 << iota CpuInstrProfiler HardwareProfilerType = 1 << iota CacheRefProfiler HardwareProfilerType = 1 << iota CacheMissesProfiler HardwareProfilerType = 1 << iota BranchInstrProfiler HardwareProfilerType = 1 << iota BranchMissesProfiler HardwareProfilerType = 1 << iota BusCyclesProfiler HardwareProfilerType = 1 << iota StalledCyclesBackendProfiler HardwareProfilerType = 1 << iota StalledCyclesFrontendProfiler HardwareProfilerType = 1 << iota RefCpuCyclesProfiler HardwareProfilerType = 1 << iota ) type hardwareProfiler struct { // map of perf counter type to file descriptor profilers map[int]Profiler profilersMu sync.RWMutex } // NewHardwareProfiler returns a new hardware profiler. func NewHardwareProfiler(pid, cpu int, profilerSet HardwareProfilerType, opts ...int) (HardwareProfiler, error) { var e error profilers := map[int]Profiler{} if profilerSet&CpuCyclesProfiler > 0 || profilerSet == AllHardwareProfilers { cpuCycleProfiler, err := NewCPUCycleProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup CPU cycle profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_CPU_CYCLES] = cpuCycleProfiler } } if profilerSet&CpuInstrProfiler > 0 || profilerSet == AllHardwareProfilers { instrProfiler, err := NewInstrProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to CPU setup instruction profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_INSTRUCTIONS] = instrProfiler } } if profilerSet&CacheRefProfiler > 0 || profilerSet == AllHardwareProfilers { cacheRefProfiler, err := NewCacheRefProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup cache ref profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_CACHE_REFERENCES] = cacheRefProfiler } } if profilerSet&CacheMissesProfiler > 0 || profilerSet == AllHardwareProfilers { cacheMissesProfiler, err := NewCacheMissesProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup cache misses profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_CACHE_MISSES] = cacheMissesProfiler } } if profilerSet&BranchInstrProfiler > 0 || profilerSet == AllHardwareProfilers { branchInstrProfiler, err := NewBranchInstrProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup branch instruction profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = branchInstrProfiler } } if profilerSet&BranchMissesProfiler > 0 || profilerSet == AllHardwareProfilers { branchMissesProfiler, err := NewBranchMissesProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup branch miss profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_BRANCH_MISSES] = branchMissesProfiler } } if profilerSet&BusCyclesProfiler > 0 || profilerSet == AllHardwareProfilers { busCyclesProfiler, err := NewBusCyclesProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup bus cycles profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_BUS_CYCLES] = busCyclesProfiler } } if profilerSet&StalledCyclesFrontendProfiler > 0 || profilerSet == AllHardwareProfilers { stalledCyclesFrontProfiler, err := NewStalledCyclesFrontProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup stalled fronted cycles profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = stalledCyclesFrontProfiler } } if profilerSet&StalledCyclesBackendProfiler > 0 || profilerSet == AllHardwareProfilers { stalledCyclesBackProfiler, err := NewStalledCyclesBackProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup stalled backend cycles profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = stalledCyclesBackProfiler } } if profilerSet&RefCpuCyclesProfiler > 0 || profilerSet == AllHardwareProfilers { refCPUCyclesProfiler, err := NewRefCPUCyclesProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, fmt.Errorf("Failed to setup ref CPU cycles profiler: pid (%d) cpu (%d) %q", pid, cpu, err)) } else { profilers[unix.PERF_COUNT_HW_REF_CPU_CYCLES] = refCPUCyclesProfiler } } return &hardwareProfiler{ profilers: profilers, }, e } // HasProfilers returns if there are any configured profilers. func (p *hardwareProfiler) HasProfilers() bool { p.profilersMu.RLock() defer p.profilersMu.RUnlock() return len(p.profilers) >= 0 } // Start is used to start the HardwareProfiler. func (p *hardwareProfiler) Start() error { if !p.HasProfilers() { return ErrNoProfiler } var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Start()) } p.profilersMu.RUnlock() return err } // Reset is used to reset the HardwareProfiler. func (p *hardwareProfiler) Reset() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Reset()) } p.profilersMu.RUnlock() return err } // Stop is used to reset the HardwareProfiler. func (p *hardwareProfiler) Stop() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Stop()) } p.profilersMu.RUnlock() return err } // Close is used to reset the HardwareProfiler. func (p *hardwareProfiler) Close() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Close()) } p.profilersMu.RUnlock() return err } // Profile is used to read the HardwareProfiler HardwareProfile it returns an // error only if all profiles fail. func (p *hardwareProfiler) Profile(hwProfile *HardwareProfile) error { var err error hwProfile.Reset() p.profilersMu.RLock() for profilerType, profiler := range p.profilers { profileVal := ProfileValuePool.Get().(*ProfileValue) err2 := profiler.Profile(profileVal) err = multierr.Append(err, err2) if err2 == nil { if hwProfile.TimeEnabled == nil { hwProfile.TimeEnabled = &profileVal.TimeEnabled } if hwProfile.TimeRunning == nil { hwProfile.TimeRunning = &profileVal.TimeRunning } switch profilerType { case unix.PERF_COUNT_HW_CPU_CYCLES: hwProfile.CPUCycles = &profileVal.Value case unix.PERF_COUNT_HW_INSTRUCTIONS: hwProfile.Instructions = &profileVal.Value case unix.PERF_COUNT_HW_CACHE_REFERENCES: hwProfile.CacheRefs = &profileVal.Value case unix.PERF_COUNT_HW_CACHE_MISSES: hwProfile.CacheMisses = &profileVal.Value case unix.PERF_COUNT_HW_BRANCH_INSTRUCTIONS: hwProfile.BranchInstr = &profileVal.Value case unix.PERF_COUNT_HW_BRANCH_MISSES: hwProfile.BranchMisses = &profileVal.Value case unix.PERF_COUNT_HW_BUS_CYCLES: hwProfile.BusCycles = &profileVal.Value case unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: hwProfile.StalledCyclesFrontend = &profileVal.Value case unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND: hwProfile.StalledCyclesBackend = &profileVal.Value case unix.PERF_COUNT_HW_REF_CPU_CYCLES: hwProfile.RefCPUCycles = &profileVal.Value } } } p.profilersMu.RUnlock() return err } perf-utils-0.5.1/hardware_profiler_test.go000066400000000000000000000012761413762255200207110ustar00rootroot00000000000000package perf import ( "encoding/json" "os" "testing" ) func TestHardwareProfiler(t *testing.T) { hwProfiler, err := NewHardwareProfiler(os.Getpid(), -1, AllHardwareProfilers) if err != nil && !hwProfiler.HasProfilers() { t.Fatal(err) } defer func() { if err := hwProfiler.Close(); err != nil { t.Fatal(err) } }() if err := hwProfiler.Start(); err != nil { t.Fatal(err) } profile := &HardwareProfile{} err = hwProfiler.Profile(profile) if err != nil { t.Fatal(err) } data, err := json.Marshal(profile) if err != nil { t.Fatal(err) } if len(data) == 0 { t.Fatalf("Expected data, got: %+v\n", data) } if err := hwProfiler.Stop(); err != nil { t.Fatal(err) } } perf-utils-0.5.1/msr.go000066400000000000000000000025311413762255200147470ustar00rootroot00000000000000package perf import ( "os" "path/filepath" ) const ( // MSRBaseDir is the base dir for MSRs. MSRBaseDir = "/dev/cpu" ) // MSRPaths returns the set of MSR paths. func MSRPaths() ([]string, error) { msrs := []string{} err := filepath.Walk(MSRBaseDir, func(path string, info os.FileInfo, err error) error { if !info.IsDir() { return nil } if path != MSRBaseDir { // TODO: replace this with a real recursive walk. msrs = append(msrs, path+"/msr") } return nil }) return msrs, err } // MSRs attemps to return all available MSRs. func MSRs(onErr func(error)) []*MSR { paths, err := MSRPaths() if err != nil { onErr(err) return nil } msrs := []*MSR{} for _, path := range paths { msr, err := NewMSR(path) if err != nil { onErr(err) continue } msrs = append(msrs, msr) } return msrs } // MSR represents a Model Specific Register type MSR struct { f *os.File } // NewMSR returns a MSR. func NewMSR(path string) (*MSR, error) { f, err := os.OpenFile(path, os.O_RDWR, 0660) if err != nil { return nil, err } return &MSR{ f: f, }, nil } // Read is used to read a MSR value. func (m *MSR) Read(off int64) ([]byte, error) { b := make([]byte, 8) _, err := m.f.ReadAt(b, off) return b, err } // Close is used to close the MSR. func (m *MSR) Close() error { if m.f != nil { return m.f.Close() } return nil } perf-utils-0.5.1/msr/000077500000000000000000000000001413762255200144175ustar00rootroot00000000000000perf-utils-0.5.1/msr/skylake/000077500000000000000000000000001413762255200160625ustar00rootroot00000000000000perf-utils-0.5.1/msr/skylake/skylake_msr.go000066400000000000000000000566141413762255200207510ustar00rootroot00000000000000package skylake const ( EVENT_TEMP_CORE = 0x00 UMASK_TEMP_CORE = 0x00 EVENT_PWR_PKG_ENERGY = 0x02 UMASK_PWR_PKG_ENERGY = 0x00 EVENT_PWR_PP0_ENERGY = 0x01 UMASK_PWR_PP0_ENERGY = 0x00 EVENT_PWR_PP1_ENERGY = 0x04 UMASK_PWR_PP1_ENERGY = 0x00 EVENT_PWR_DRAM_ENERGY = 0x03 UMASK_PWR_DRAM_ENERGY = 0x00 EVENT_PWR_PLATFORM_ENERGY = 0x05 UMASK_PWR_PLATFORM_ENERGY = 0x00 EVENT_INSTR_RETIRED = 0x00 UMASK_INSTR_RETIRED_ANY = 0x00 EVENT_CPU_CLK_UNHALTED = 0x00 UMASK_CPU_CLK_UNHALTED_CORE = 0x00 UMASK_CPU_CLK_UNHALTED_REF = 0x00 EVENT_ICACHE_16B_IFDATA_STALL = 0x80 UMASK_ICACHE_16B_IFDATA_STALL = 0x04 EVENT_ICACHE_64B_IFTAG = 0x83 UMASK_ICACHE_64B_IFTAG_HIT = 0x01 UMASK_ICACHE_64B_IFTAG_MISS = 0x02 UMASK_ICACHE_64B_IFTAG_ALL = 0x03 UMASK_ICACHE_64B_IFTAG_STALL = 0x04 EVENT_CPU_CLOCK_UNHALTED = 0x3C UMASK_CPU_CLOCK_UNHALTED_THREAD_P = 0x00 UMASK_CPU_CLOCK_UNHALTED_THREAD_P_ANY = 0x00 UMASK_CPU_CLOCK_UNHALTED_REF_XCLK = 0x01 UMASK_CPU_CLOCK_UNHALTED_REF_XCLK_ANY = 0x01 UMASK_CPU_CLOCK_UNHALTED_ONE_THREAD_ACTIVE = 0x02 EVENT_OPTION_INVERT = 0x1 UMASK_CPU_CLOCK_UNHALTED_TOTAL_CYCLES = 0x00 EVENT_BACLEARS = 0xE6 UMASK_BACLEARS_ANY = 0x01 EVENT_ITLB_FLUSH = 0xAE UMASK_ITLB_FLUSH = 0x01 EVENT_ILD_STALL_LCP = 0x87 UMASK_ILD_STALL_LCP = 0x01 EVENT_IDQ_UOPS_NOT_DELIVERED = 0x9C UMASK_IDQ_UOPS_NOT_DELIVERED_CORE = 0x01 UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOPS_DELIV_CORE = 0x01 UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_1_UOP_DELIV_CORE = 0x01 UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_2_UOP_DELIV_CORE = 0x01 UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_LE_3_UOP_DELIV_CORE = 0x01 UMASK_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK = 0x01 EVENT_DSB2MITE_SWITCHES_PENALTY_CYCLES = 0xAB UMASK_DSB2MITE_SWITCHES_PENALTY_CYCLES = 0x02 EVENT_INT_MISC = 0x0D UMASK_INT_MISC_RECOVERY_CYCLES = 0x01 UMASK_INT_MISC_RECOVERY_COUNT = 0x01 UMASK_INT_MISC_RECOVERY_CYCLES_ANY = 0x01 EVENT_OPTION_EDGE = 1 UMASK_INT_MISC_RECOVERY_COUNT_ANY = 0x01 UMASK_INT_MISC_CLEAR_RESTEER_CYCLES = 0x80 UMASK_INT_MISC_CLEAR_RESTEER_COUNT = 0x80 EVENT_RESOURCE_STALLS = 0xA2 UMASK_RESOURCE_STALLS_ANY = 0x01 UMASK_RESOURCE_STALLS_SB = 0x08 EVENT_PARTIAL_RAT_STALLS_SCOREBOARD = 0x59 UMASK_PARTIAL_RAT_STALLS_SCOREBOARD = 0x01 EVENT_UOPS_ISSUED = 0x0E UMASK_UOPS_ISSUED_ANY = 0x01 UMASK_UOPS_ISSUED_VECTOR_WIDTH_MISMATCH = 0x02 UMASK_UOPS_ISSUED_SLOW_LEA = 0x20 UMASK_UOPS_ISSUED_USED_CYCLES = 0x01 UMASK_UOPS_ISSUED_STALL_CYCLES = 0x01 UMASK_UOPS_ISSUED_TOTAL_CYCLES = 0x01 EVENT_OPTION_ANYTHREAD = 1 UMASK_UOPS_ISSUED_CORE_USED_CYCLES = 0x01 UMASK_UOPS_ISSUED_CORE_STALL_CYCLES = 0x01 UMASK_UOPS_ISSUED_CORE_TOTAL_CYCLES = 0x01 UMASK_UOPS_ISSUED_CYCLES_GE_1_UOPS_EXEC = 0x01 UMASK_UOPS_ISSUED_CYCLES_GE_2_UOPS_EXEC = 0x01 UMASK_UOPS_ISSUED_CYCLES_GE_3_UOPS_EXEC = 0x01 UMASK_UOPS_ISSUED_CYCLES_GE_4_UOPS_EXEC = 0x01 UMASK_UOPS_ISSUED_CYCLES_GE_5_UOPS_EXEC = 0x01 EVENT_TX_EXEC = 0x5D UMASK_TX_EXEC_MISC1 = 0x01 UMASK_TX_EXEC_MISC2 = 0x02 UMASK_TX_EXEC_MISC3 = 0x04 UMASK_TX_EXEC_MISC4 = 0x08 UMASK_TX_EXEC_MISC5 = 0x10 EVENT_RS_EVENTS_EMPTY = 0x5E UMASK_RS_EVENTS_EMPTY_CYCLES = 0x01 UMASK_RS_EVENTS_EMPTY_END = 0x01 EVENT_HLE_RETIRED = 0xC8 UMASK_HLE_RETIRED_START = 0x01 UMASK_HLE_RETIRED_COMMIT = 0x02 UMASK_HLE_RETIRED_ABORTED = 0x04 UMASK_HLE_RETIRED_ABORTED_MEM = 0x08 UMASK_HLE_RETIRED_ABORTED_TIMER = 0x10 UMASK_HLE_RETIRED_ABORTED_UNFRIENDLY = 0x20 UMASK_HLE_RETIRED_ABORTED_MEMTYPE = 0x40 UMASK_HLE_RETIRED_ABORTED_EVENTS = 0x80 EVENT_RTM_RETIRED = 0xC9 UMASK_RTM_RETIRED_START = 0x01 UMASK_RTM_RETIRED_COMMIT = 0x02 UMASK_RTM_RETIRED_ABORTED = 0x04 UMASK_RTM_RETIRED_ABORTED_MEM = 0x08 UMASK_RTM_RETIRED_ABORTED_TIMER = 0x10 UMASK_RTM_RETIRED_ABORTED_UNFRIENDLY = 0x20 UMASK_RTM_RETIRED_ABORTED_MEMTYPE = 0x40 UMASK_RTM_RETIRED_ABORTED_EVENTS = 0x80 EVENT_MACHINE_CLEARS = 0xC3 UMASK_MACHINE_CLEARS_COUNT = 0x01 UMASK_MACHINE_CLEARS_MEMORY_ORDERING = 0x02 UMASK_MACHINE_CLEARS_SMC = 0x04 EVENT_HW_INTERRUPTS_RECEIVED = 0xCB UMASK_HW_INTERRUPTS_RECEIVED = 0x01 EVENT_INST_RETIRED = 0xC0 UMASK_INST_RETIRED_ANY = 0x00 EVENT_UOPS_RETIRED = 0xC2 UMASK_UOPS_RETIRED_ALL = 0x01 UMASK_UOPS_RETIRED_CORE_ALL = 0x01 UMASK_UOPS_RETIRED_RETIRE_SLOTS = 0x02 UMASK_UOPS_RETIRED_USED_CYCLES = 0x01 UMASK_UOPS_RETIRED_STALL_CYCLES = 0x01 UMASK_UOPS_RETIRED_TOTAL_CYCLES = 0x01 UMASK_UOPS_RETIRED_CORE_RETIRE_SLOTS = 0x02 UMASK_UOPS_RETIRED_CORE_USED_CYCLES = 0x01 UMASK_UOPS_RETIRED_CORE_STALL_CYCLES = 0x01 UMASK_UOPS_RETIRED_CORE_TOTAL_CYCLES = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_1_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_2_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_3_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_4_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_5_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_6_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_7_UOPS_EXEC = 0x01 UMASK_UOPS_RETIRED_CYCLES_GE_8_UOPS_EXEC = 0x01 EVENT_BR_INST_RETIRED = 0xC4 UMASK_BR_INST_RETIRED_ALL_BRANCHES = 0x00 UMASK_BR_INST_RETIRED_CONDITIONAL = 0x01 UMASK_BR_INST_RETIRED_NEAR_CALL = 0x02 UMASK_BR_INST_RETIRED_NEAR_RETURN = 0x08 UMASK_BR_INST_RETIRED_NOT_TAKEN = 0x10 UMASK_BR_INST_RETIRED_NEAR_TAKEN = 0x20 UMASK_BR_INST_RETIRED_FAR_BRANCH = 0x40 EVENT_BR_MISP_RETIRED = 0xC5 UMASK_BR_MISP_RETIRED_ALL_BRANCHES = 0x00 UMASK_BR_MISP_RETIRED_CONDITIONAL = 0x01 UMASK_BR_MISP_RETIRED_NEAR_TAKEN = 0x20 EVENT_FP_ARITH_INST_RETIRED = 0xC7 UMASK_FP_ARITH_INST_RETIRED_SCALAR_DOUBLE = 0x01 UMASK_FP_ARITH_INST_RETIRED_SCALAR_SINGLE = 0x02 UMASK_FP_ARITH_INST_RETIRED_128B_PACKED_DOUBLE = 0x04 UMASK_FP_ARITH_INST_RETIRED_128B_PACKED_SINGLE = 0x08 UMASK_FP_ARITH_INST_RETIRED_256B_PACKED_DOUBLE = 0x10 UMASK_FP_ARITH_INST_RETIRED_256B_PACKED_SINGLE = 0x20 EVENT_FP_ASSIST_ANY = 0xCA UMASK_FP_ASSIST_ANY = 0x1E EVENT_MEM_INST_RETIRED = 0xD0 UMASK_MEM_INST_RETIRED_STLB_MISS_LOADS = 0x11 UMASK_MEM_INST_RETIRED_STLB_MISS_STORES = 0x12 UMASK_MEM_INST_RETIRED_LOCK_LOADS = 0x21 UMASK_MEM_INST_RETIRED_SPLIT_LOADS = 0x41 UMASK_MEM_INST_RETIRED_SPLIT_STORES = 0x42 UMASK_MEM_INST_RETIRED_ALL_LOADS = 0x81 UMASK_MEM_INST_RETIRED_ALL_STORES = 0x82 UMASK_MEM_INST_RETIRED_ALL = 0x83 EVENT_MEM_LOAD_RETIRED = 0xD1 UMASK_MEM_LOAD_RETIRED_L1_HIT = 0x01 UMASK_MEM_LOAD_RETIRED_L2_HIT = 0x02 UMASK_MEM_LOAD_RETIRED_L3_HIT = 0x04 UMASK_MEM_LOAD_RETIRED_L1_MISS = 0x08 UMASK_MEM_LOAD_RETIRED_L2_MISS = 0x10 UMASK_MEM_LOAD_RETIRED_L3_MISS = 0x20 UMASK_MEM_LOAD_RETIRED_FB_HIT = 0x40 UMASK_MEM_LOAD_RETIRED_L1_ALL = 0x09 UMASK_MEM_LOAD_RETIRED_L2_ALL = 0x12 UMASK_MEM_LOAD_RETIRED_L3_ALL = 0x24 EVENT_MEM_LOAD_L3_HIT_RETIRED = 0xD2 UMASK_MEM_LOAD_L3_HIT_RETIRED_XSNP_MISS = 0x01 UMASK_MEM_LOAD_L3_HIT_RETIRED_XSNP_HIT = 0x02 UMASK_MEM_LOAD_L3_HIT_RETIRED_XSNP_HITM = 0x04 UMASK_MEM_LOAD_L3_HIT_RETIRED_XSNP_NONE = 0x08 EVENT_FRONTEND_RETIRED = 0xC6 //UMASK_FRONTEND_RETIRED_DSB_MISS=0x01 0x00 0x11 //UMASK_FRONTEND_RETIRED_L1I_MISS=0x01 0x00 0x12 //UMASK_FRONTEND_RETIRED_L2_MISS=0x01 0x00 0x13 //UMASK_FRONTEND_RETIRED_ITLB_MISS=0x01 0x00 0x14 //UMASK_FRONTEND_RETIRED_STLB_MISS=0x01 0x00 0x15 //UMASK_FRONTEND_RETIRED_LATENCY_GE_2=0x01 0x00 0x400206 //UMASK_FRONTEND_RETIRED_LATENCY_GE_2_BUBBLES_GE_2=0x01 0x00 0x200206 //UMASK_FRONTEND_RETIRED_LATENCY_GE_4=0x01 0x00 0x400406 EVENT_UOPS_EXECUTED = 0xB1 UMASK_UOPS_EXECUTED_THREAD = 0x01 UMASK_UOPS_EXECUTED_USED_CYCLES = 0x01 UMASK_UOPS_EXECUTED_STALL_CYCLES = 0x01 UMASK_UOPS_EXECUTED_TOTAL_CYCLES = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_1_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_2_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_3_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_4_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_5_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_6_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_7_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CYCLES_GE_8_UOPS_EXEC = 0x01 UMASK_UOPS_EXECUTED_CORE = 0x02 UMASK_UOPS_EXECUTED_CORE_USED_CYCLES = 0x02 UMASK_UOPS_EXECUTED_CORE_STALL_CYCLES = 0x02 UMASK_UOPS_EXECUTED_CORE_TOTAL_CYCLES = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_1_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_2_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_3_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_4_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_5_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_6_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_7_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_CORE_CYCLES_GE_8_UOPS_EXEC = 0x02 UMASK_UOPS_EXECUTED_X87 = 0x10 EVENT_EXE_ACTIVITY = 0xA6 UMASK_EXE_ACTIVITY_EXE_BOUND_0_PORTS = 0x01 UMASK_EXE_ACTIVITY_1_PORTS_UTIL = 0x02 UMASK_EXE_ACTIVITY_2_PORTS_UTIL = 0x04 UMASK_EXE_ACTIVITY_3_PORTS_UTIL = 0x08 UMASK_EXE_ACTIVITY_4_PORTS_UTIL = 0x10 UMASK_EXE_ACTIVITY_BOUND_ON_STORES = 0x40 EVENT_UOPS_DISPATCHED_PORT = 0xA1 UMASK_UOPS_DISPATCHED_PORT_PORT_0 = 0x01 UMASK_UOPS_DISPATCHED_PORT_PORT_1 = 0x02 UMASK_UOPS_DISPATCHED_PORT_PORT_2 = 0x04 UMASK_UOPS_DISPATCHED_PORT_PORT_3 = 0x08 UMASK_UOPS_DISPATCHED_PORT_PORT_4 = 0x10 UMASK_UOPS_DISPATCHED_PORT_PORT_5 = 0x20 UMASK_UOPS_DISPATCHED_PORT_PORT_6 = 0x40 UMASK_UOPS_DISPATCHED_PORT_PORT_7 = 0x80 UMASK_UOPS_DISPATCHED_PORT_ARITH_PORTS = 0x63 UMASK_UOPS_DISPATCHED_PORT_ARITH_PORTS_CORE = 0x63 UMASK_UOPS_DISPATCHED_PORT_DATA_PORTS = 0x9C EVENT_CYCLE_ACTIVITY = 0xA3 UMASK_CYCLE_ACTIVITY_STALLS_TOTAL = 0x04 UMASK_CYCLE_ACTIVITY_CYCLES_NO_EXECUTE = 0x04 UMASK_CYCLE_ACTIVITY_CYCLES_L2_MISS = 0x01 UMASK_CYCLE_ACTIVITY_STALLS_L2_MISS = 0x05 UMASK_CYCLE_ACTIVITY_CYCLES_L2_PENDING = 0x01 UMASK_CYCLE_ACTIVITY_STALLS_L2_PENDING = 0x05 UMASK_CYCLE_ACTIVITY_CYCLES_L3_MISS = 0x02 UMASK_CYCLE_ACTIVITY_STALLS_L3_MISS = 0x06 UMASK_CYCLE_ACTIVITY_CYCLES_L3_PENDING = 0x02 UMASK_CYCLE_ACTIVITY_STALLS_L3_PENDING = 0x06 UMASK_CYCLE_ACTIVITY_CYCLES_MEM_ANY = 0x10 UMASK_CYCLE_ACTIVITY_STALLS_MEM_ANY = 0x14 UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING = 0x10 UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING = 0x14 EVENT_CYCLE_ACTIVITY_CYCLES_L1D_MISS = 0xA3 UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS = 0x08 EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS = 0xA3 UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS = 0x0C EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING = 0xA3 UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING = 0x08 EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING = 0xA3 UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING = 0x0C EVENT_EPT_WALK_PENDING = 0x4F UMASK_EPT_WALK_PENDING = 0x10 EVENT_ITLB_MISSES = 0x85 UMASK_ITLB_MISSES_CAUSES_A_WALK = 0x01 UMASK_ITLB_MISSES_WALK_PENDING = 0x10 UMASK_ITLB_MISSES_STLB_HIT = 0x20 UMASK_ITLB_MISSES_WALK_COMPLETED = 0x0E UMASK_ITLB_MISSES_WALK_COMPLETED_4K = 0x02 UMASK_ITLB_MISSES_WALK_COMPLETED_2M_4M = 0x04 UMASK_ITLB_MISSES_WALK_COMPLETED_1G = 0x08 UMASK_ITLB_MISSES_WALK_ACTIVE = 0x10 EVENT_DTLB_LOAD_MISSES = 0x08 UMASK_DTLB_LOAD_MISSES_CAUSES_A_WALK = 0x01 UMASK_DTLB_LOAD_MISSES_WALK_PENDING = 0x10 UMASK_DTLB_LOAD_MISSES_STLB_HIT = 0x20 UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED = 0x0E UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_4K = 0x02 UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_2M_4M = 0x04 UMASK_DTLB_LOAD_MISSES_WALK_COMPLETED_1G = 0x08 UMASK_DTLB_LOAD_MISSES_WALK_ACTIVE = 0x10 EVENT_DTLB_STORE_MISSES = 0x49 UMASK_DTLB_STORE_MISSES_CAUSES_A_WALK = 0x01 UMASK_DTLB_STORE_MISSES_WALK_PENDING = 0x10 UMASK_DTLB_STORE_MISSES_STLB_HIT = 0x20 UMASK_DTLB_STORE_MISSES_WALK_COMPLETED = 0x0E UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_4K = 0x02 UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_2M_4M = 0x04 UMASK_DTLB_STORE_MISSES_WALK_COMPLETED_1G = 0x08 UMASK_DTLB_STORE_MISSES_WALK_ACTIVE = 0x10 EVENT_TLB_FLUSH = 0xBD UMASK_TLB_FLUSH_DTLB_THREAD = 0x01 UMASK_TLB_FLUSH_STLB_ANY = 0x20 EVENT_L1D = 0x51 UMASK_L1D_REPLACEMENT = 0x01 UMASK_L1D_M_EVICT = 0x04 EVENT_TX_MEM = 0x54 UMASK_TX_MEM_ABORT_CONFLICT = 0x01 UMASK_TX_MEM_ABORT_CAPACITY = 0x02 UMASK_TX_MEM_ABORT_HLE_STORE_TO_ELIDED_LOCK = 0x04 UMASK_TX_MEM_ABORT_HLE_ELISION_BUFFER_NOT_EMPTY = 0x08 UMASK_TX_MEM_ABORT_HLE_ELISION_BUFFER_MISMATCH = 0x10 UMASK_TX_MEM_ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT = 0x20 UMASK_TX_MEM_HLE_ELISION_BUFFER_FULL = 0x40 EVENT_L1D_PEND_MISS = 0x48 UMASK_L1D_PEND_MISS_PENDING = 0x01 UMASK_L1D_PEND_MISS_FB_FULL = 0x02 UMASK_L1D_PEND_MISS_PENDING_CYCLES = 0x01 UMASK_L1D_PEND_MISS_PENDING_CYCLES_ANY = 0x01 EVENT_LOAD_HIT_PRE_SW_PF = 0x4C UMASK_LOAD_HIT_PRE_SW_PF = 0x01 EVENT_LOCK_CYCLES_CACHE_LOCK = 0x63 UMASK_LOCK_CYCLES_CACHE_LOCK_DURATION = 0x02 UMASK_LOCK_CYCLES_CACHE_LOCK_COUNT = 0x02 EVENT_LD_BLOCKS = 0x03 UMASK_LD_BLOCKS_STORE_FORWARD = 0x02 UMASK_LD_BLOCKS_NO_SR = 0x08 EVENT_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS = 0x07 UMASK_LD_BLOCKS_PARTIAL_ADDRESS_ALIAS = 0x01 EVENT_OFFCORE_REQUESTS = 0xB0 UMASK_OFFCORE_REQUESTS_DEMAND_DATA_RD = 0x01 UMASK_OFFCORE_REQUESTS_DEMAND_CODE_RD = 0x02 UMASK_OFFCORE_REQUESTS_DEMAND_RFO = 0x04 UMASK_OFFCORE_REQUESTS_ALL_DATA_RD = 0x08 UMASK_OFFCORE_REQUESTS_L3_MISS_DEMAND_DATA_RD = 0x10 UMASK_OFFCORE_REQUESTS_ALL_REQUESTS = 0x80 EVENT_OFFCORE_REQUESTS_OUTSTANDING = 0x60 UMASK_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_DATA_RD = 0x01 UMASK_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_DATA_RD_GE_6 = 0x01 UMASK_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_CODE_RD = 0x02 UMASK_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_RFO = 0x04 UMASK_OFFCORE_REQUESTS_OUTSTANDING_ALL_DATA_RD = 0x08 UMASK_OFFCORE_REQUESTS_OUTSTANDING_L3_MISS_DEMAND_DATA_RD = 0x10 UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_DEMAND_DATA_RD = 0x01 UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_DATA_RD = 0x08 UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_DEMAND_CODE_RD = 0x02 UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_DEMAND_RFO = 0x04 UMASK_OFFCORE_REQUESTS_OUTSTANDING_CYCLES_WITH_L3_MISS_DEMAND_DATA_RD = 0x10 UMASK_OFFCORE_REQUESTS_OUTSTANDING_L3_MISS_DEMAND_DATA_RD_GE_6 = 0x10 EVENT_OFFCORE_REQUESTS_BUFFER_SQ_FULL = 0xB2 UMASK_OFFCORE_REQUESTS_BUFFER_SQ_FULL = 0x01 EVENT_L2_TRANS = 0xF0 UMASK_L2_TRANS_DEMAND_DATA_RD = 0x01 UMASK_L2_TRANS_RFO = 0x02 UMASK_L2_TRANS_CODE_RD = 0x04 UMASK_L2_TRANS_ALL_PF = 0x08 UMASK_L2_TRANS_L1D_WB = 0x10 UMASK_L2_TRANS_L2_FILL = 0x20 UMASK_L2_TRANS_L2_WB = 0x40 UMASK_L2_TRANS_ALL_REQUESTS = 0x80 EVENT_LONGEST_LAT_CACHE = 0x2E UMASK_LONGEST_LAT_CACHE_MISS = 0x41 UMASK_LONGEST_LAT_CACHE_REFERENCE = 0x4F EVENT_L2_RQSTS = 0x24 UMASK_L2_RQSTS_DEMAND_DATA_RD_MISS = 0x21 UMASK_L2_RQSTS_DEMAND_DATA_RD_HIT = 0x41 UMASK_L2_RQSTS_ALL_DEMAND_DATA_RD = 0xE1 UMASK_L2_RQSTS_ALL_RFO = 0xE2 UMASK_L2_RQSTS_ALL_CODE_RD = 0xE4 UMASK_L2_RQSTS_ALL_PF = 0xF8 UMASK_L2_RQSTS_PF_MISS = 0x38 UMASK_L2_RQSTS_PF_HIT = 0xD8 UMASK_L2_RQSTS_RFO_HIT = 0x42 UMASK_L2_RQSTS_RFO_MISS = 0x22 UMASK_L2_RQSTS_CODE_RD_HIT = 0x44 UMASK_L2_RQSTS_CODE_RD_MISS = 0x24 UMASK_L2_RQSTS_ALL_DEMAND_MISS = 0x27 UMASK_L2_RQSTS_ALL_DEMAND_REFERENCES = 0xE7 UMASK_L2_RQSTS_MISS = 0x3F UMASK_L2_RQSTS_REFERENCES = 0xFF EVENT_IDQ = 0x79 UMASK_IDQ_MITE_UOPS = 0x04 UMASK_IDQ_DSB_UOPS = 0x08 UMASK_IDQ_MS_DSB_UOPS = 0x10 UMASK_IDQ_MS_MITE_UOPS = 0x20 UMASK_IDQ_MS_UOPS = 0x30 UMASK_IDQ_MITE_CYCLES = 0x04 UMASK_IDQ_MITE_CYCLES_1_UOPS = 0x04 UMASK_IDQ_MITE_CYCLES_2_UOPS = 0x04 UMASK_IDQ_MITE_CYCLES_3_UOPS = 0x04 UMASK_IDQ_MITE_CYCLES_4_UOPS = 0x04 UMASK_IDQ_MITE_CYCLES_5_UOPS = 0x04 UMASK_IDQ_MITE_CYCLES_6_UOPS = 0x04 UMASK_IDQ_DSB_CYCLES = 0x08 UMASK_IDQ_DSB_CYCLES_1_UOPS = 0x08 UMASK_IDQ_DSB_CYCLES_2_UOPS = 0x08 UMASK_IDQ_DSB_CYCLES_3_UOPS = 0x08 UMASK_IDQ_DSB_CYCLES_4_UOPS = 0x08 UMASK_IDQ_DSB_CYCLES_5_UOPS = 0x08 UMASK_IDQ_DSB_CYCLES_6_UOPS = 0x08 UMASK_IDQ_MS_DSB_CYCLES = 0x10 UMASK_IDQ_MS_DSB_CYCLES_1_UOPS = 0x10 UMASK_IDQ_MS_DSB_CYCLES_2_UOPS = 0x10 UMASK_IDQ_MS_DSB_CYCLES_3_UOPS = 0x10 UMASK_IDQ_MS_DSB_CYCLES_4_UOPS = 0x10 UMASK_IDQ_MS_DSB_CYCLES_5_UOPS = 0x10 UMASK_IDQ_MS_DSB_CYCLES_6_UOPS = 0x10 UMASK_IDQ_MS_DSB_OCCUR = 0x10 UMASK_IDQ_MS_MITE_CYCLES = 0x20 UMASK_IDQ_MS_MITE_CYCLES_1_UOPS = 0x20 UMASK_IDQ_MS_MITE_CYCLES_2_UOPS = 0x20 UMASK_IDQ_MS_MITE_CYCLES_3_UOPS = 0x20 UMASK_IDQ_MS_MITE_CYCLES_4_UOPS = 0x20 UMASK_IDQ_MS_MITE_CYCLES_5_UOPS = 0x20 UMASK_IDQ_MS_MITE_CYCLES_6_UOPS = 0x20 UMASK_IDQ_MS_CYCLES = 0x30 UMASK_IDQ_MS_CYCLES_1_UOPS = 0x30 UMASK_IDQ_MS_CYCLES_2_UOPS = 0x30 UMASK_IDQ_MS_CYCLES_3_UOPS = 0x30 UMASK_IDQ_MS_CYCLES_4_UOPS = 0x30 UMASK_IDQ_MS_CYCLES_5_UOPS = 0x30 UMASK_IDQ_MS_CYCLES_6_UOPS = 0x30 UMASK_IDQ_MS_SWITCHES = 0x30 UMASK_IDQ_ALL_DSB_CYCLES_ANY_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_1_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_2_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_3_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_4_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_5_UOPS = 0x18 UMASK_IDQ_ALL_DSB_CYCLES_6_UOPS = 0x18 UMASK_IDQ_ALL_MITE_CYCLES_ANY_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_1_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_2_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_3_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_4_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_5_UOPS = 0x24 UMASK_IDQ_ALL_MITE_CYCLES_6_UOPS = 0x24 UMASK_IDQ_ALL_CYCLES_ANY_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_1_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_2_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_3_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_4_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_5_UOPS = 0x3C UMASK_IDQ_ALL_CYCLES_6_UOPS = 0x3C EVENT_L2_LINES_IN = 0xF1 UMASK_L2_LINES_IN_I = 0x01 UMASK_L2_LINES_IN_S = 0x02 UMASK_L2_LINES_IN_E = 0x04 UMASK_L2_LINES_IN_ALL = 0x07 EVENT_L2_LINES_OUT = 0xF2 UMASK_L2_LINES_OUT_SILENT = 0x01 UMASK_L2_LINES_OUT_NON_SILENT = 0x02 UMASK_L2_LINES_OUT_USELESS_PREF = 0x04 EVENT_ARITH_DIVIDER = 0x14 UMASK_ARITH_DIVIDER_ACTIVE = 0x01 UMASK_ARITH_DIVIDER_COUNT = 0x01 EVENT_LSD_UOPS = 0xA8 UMASK_LSD_UOPS = 0x01 UMASK_LSD_UOPS_CYCLES_1 = 0x01 UMASK_LSD_UOPS_CYCLES_2 = 0x01 UMASK_LSD_UOPS_CYCLES_3 = 0x01 UMASK_LSD_UOPS_CYCLES_4 = 0x01 UMASK_LSD_UOPS_CYCLES_ACTIVE = 0x01 UMASK_LSD_UOPS_CYCLES_INACTIVE = 0x01 EVENT_OTHER_ASSISTS_ANY = 0xC1 UMASK_OTHER_ASSISTS_ANY = 0x3F EVENT_FRONTEND_RETIRED_LATENCY = 0xC6 //UMASK_FRONTEND_RETIRED_LATENCY_GE_8=0x01 0x00 0x400806 //UMASK_FRONTEND_RETIRED_LATENCY_GE_16=0x01 0x00 0x401006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_32=0x01 0x00 0x402006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_64=0x01 0x00 0x404006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_128=0x01 0x00 0x408006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_256=0x01 0x00 0x410006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_512=0x01 0x00 0x420006 //UMASK_FRONTEND_RETIRED_LATENCY_GE_2_BUBBLES_GE_1=0x01 0x00 0x100206 //UMASK_FRONTEND_RETIRED_LATENCY_GE_2_BUBBLES_GE_3=0x01 0x00 0x300206 EVENT_OFFCORE_RESPONSE_0 = 0xB7 EVENT_OFFCORE_RESPONSE_1 = 0xBB EVENT_CACHE_LOOKUP = 0x34 UMASK_CACHE_LOOKUP_M = 0x01 UMASK_CACHE_LOOKUP_E = 0x02 UMASK_CACHE_LOOKUP_S = 0x04 UMASK_CACHE_LOOKUP_I = 0x08 UMASK_CACHE_LOOKUP_READ_FILTER = 0x10 UMASK_CACHE_LOOKUP_WRITE_FILTER = 0x20 UMASK_CACHE_LOOKUP_EXTSNP_FILTER = 0x40 UMASK_CACHE_LOOKUP_ANY_REQUEST_FILTER = 0x80 UMASK_CACHE_LOOKUP_READ_M = 0x11 UMASK_CACHE_LOOKUP_WRITE_M = 0x21 UMASK_CACHE_LOOKUP_EXTSNP_M = 0x41 UMASK_CACHE_LOOKUP_ANY_M = 0x81 UMASK_CACHE_LOOKUP_READ_E = 0x12 UMASK_CACHE_LOOKUP_WRITE_E = 0x22 UMASK_CACHE_LOOKUP_EXTSNP_E = 0x42 UMASK_CACHE_LOOKUP_ANY_E = 0x82 UMASK_CACHE_LOOKUP_READ_S = 0x14 UMASK_CACHE_LOOKUP_WRITE_S = 0x24 UMASK_CACHE_LOOKUP_EXTSNP_S = 0x44 UMASK_CACHE_LOOKUP_ANY_S = 0x84 UMASK_CACHE_LOOKUP_READ_ES = 0x16 UMASK_CACHE_LOOKUP_WRITE_ES = 0x26 UMASK_CACHE_LOOKUP_EXTSNP_ES = 0x46 UMASK_CACHE_LOOKUP_ANY_ES = 0x86 UMASK_CACHE_LOOKUP_READ_I = 0x18 UMASK_CACHE_LOOKUP_WRITE_I = 0x28 UMASK_CACHE_LOOKUP_EXTSNP_I = 0x48 UMASK_CACHE_LOOKUP_ANY_I = 0x88 UMASK_CACHE_LOOKUP_READ_MESI = 0x1F UMASK_CACHE_LOOKUP_WRITE_MESI = 0x2F UMASK_CACHE_LOOKUP_EXTSNP_MESI = 0x4F UMASK_CACHE_LOOKUP_ANY_MESI = 0x8F EVENT_XSNP_RESPONSE = 0x22 UMASK_XSNP_RESPONSE_MISS_EXTERNAL = 0x21 UMASK_XSNP_RESPONSE_MISS_XCORE = 0x41 UMASK_XSNP_RESPONSE_MISS_EVICTION = 0x81 UMASK_XSNP_RESPONSE_HIT_EXTERNAL = 0x24 UMASK_XSNP_RESPONSE_HIT_XCORE = 0x44 UMASK_XSNP_RESPONSE_HIT_EVICTION = 0x84 UMASK_XSNP_RESPONSE_HITM_EXTERNAL = 0x28 UMASK_XSNP_RESPONSE_HITM_XCORE = 0x48 UMASK_XSNP_RESPONSE_HITM_EVICTION = 0x88 EVENT_TRK_OCCUPANCY_ALL = 0x80 UMASK_TRK_OCCUPANCY_ALL = 0x01 EVENT_TRK_REQUESTS = 0x81 UMASK_TRK_REQUESTS_ALL = 0x01 UMASK_TRK_REQUESTS_WRITES = 0x20 EVENT_COH_TRK_OCCUPANCY = 0x83 UMASK_COH_TRK_OCCUPANCY = 0x01 EVENT_COH_TRK_REQUESTS = 0x84 UMASK_COH_TRK_REQUESTS_ALL = 0x01 EVENT_UNCORE_CLOCK = 0x00 UMASK_UNCORE_CLOCK = 0x01 EVENT_IO_REQUESTS = 0x00 UMASK_IO_REQUESTS = 0x00 EVENT_DRAM_READS = 0x01 UMASK_DRAM_READS = 0x01 EVENT_DRAM_WRITES = 0x02 UMASK_DRAM_WRITES = 0x02 EVENT_PP0_TEMP = 0x00 UMASK_PP0_TEMP = 0x00 EVENT_PP1_TEMP = 0x01 UMASK_PP1_TEMP = 0x01 EVENT_VOLTAGE_CORE = 0x00 UMASK_VOLTAGE_CORE = 0x00 ) perf-utils-0.5.1/msr/skylake/skylake_msr_test.go000066400000000000000000000010311413762255200217670ustar00rootroot00000000000000package skylake import ( "testing" "github.com/hodgesds/perf-utils" "golang.org/x/sys/unix" ) func TestSkylake(t *testing.T) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_RAW, Config: 0x1B0, Size: perf.EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } _, err := unix.PerfEventOpen( eventAttr, unix.Gettid(), -1, -1, 0, ) if err != nil { t.Fatal(err) } } perf-utils-0.5.1/msr/zen/000077500000000000000000000000001413762255200152135ustar00rootroot00000000000000perf-utils-0.5.1/msr/zen/zen_msr.go000066400000000000000000000320341413762255200172210ustar00rootroot00000000000000package zen const ( EVENT_ACTUAL_CPU_CLOCK = 0x01 UMASK_ACTUAL_CPU_CLOCK = 0x00 EVENT_APERF = 0x01 UMASK_APERF = 0x00 EVENT_MAX_CPU_CLOCK = 0x02 UMASK_MAX_CPU_CLOCK = 0x00 EVENT_MPERF = 0x02 UMASK_MPERF = 0x00 EVENT_MERGE = 0xFFF UMASK_MERGE = 0x00 EVENT_FPU_PIPE_ASSIGNMENT = 0x00 UMASK_FPU_PIPE_ASSIGNMENT_UOPS_PIPE_0 = 0x01 UMASK_FPU_PIPE_ASSIGNMENT_UOPS_PIPE_1 = 0x02 UMASK_FPU_PIPE_ASSIGNMENT_UOPS_PIPE_2 = 0x04 UMASK_FPU_PIPE_ASSIGNMENT_UOPS_PIPE_3 = 0x08 UMASK_FPU_PIPE_ASSIGNMENT_UOPS_PIPE_ALL = 0x0F UMASK_FPU_PIPE_ASSIGNMENT_MULTI_PIPE_UOPS_PIPE_0 = 0x10 UMASK_FPU_PIPE_ASSIGNMENT_MULTI_PIPE_UOPS_PIPE_1 = 0x20 UMASK_FPU_PIPE_ASSIGNMENT_MULTI_PIPE_UOPS_PIPE_2 = 0x40 UMASK_FPU_PIPE_ASSIGNMENT_MULTI_PIPE_UOPS_PIPE_3 = 0x80 UMASK_FPU_PIPE_ASSIGNMENT_MULTI_PIPE_UOPS_PIPE_ALL = 0xF0 EVENT_FP_SCHEDULER_EMPTY = 0x01 UMASK_FP_SCHEDULER_EMPTY = 0x00 EVENT_RETIRED_X87_FLOPS = 0x02 UMASK_RETIRED_X87_FLOPS_ADD_SUB = 0x01 UMASK_RETIRED_X87_FLOPS_MULT = 0x02 UMASK_RETIRED_X87_FLOPS_DIV = 0x04 UMASK_RETIRED_X87_FLOPS_ALL = 0x07 EVENT_RETIRED_SSE_AVX_FLOPS = 0x03 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_ADD_SUB = 0x01 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_MULT = 0x02 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_DIV = 0x04 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_ADD_MULT_DIV = 0x07 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_FMA = 0x08 UMASK_RETIRED_SSE_AVX_FLOPS_SINGLE_ALL = 0x0F UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_ADD_SUB = 0x10 UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_MULT = 0x20 UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_DIV = 0x40 UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_ADD_MULT_DIV = 0x70 UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_FMA = 0x80 UMASK_RETIRED_SSE_AVX_FLOPS_DOUBLE_ALL = 0xF0 UMASK_RETIRED_SSE_AVX_FLOPS_ALL = 0xFF EVENT_MOVE_ELIMINATION = 0x04 UMASK_MOVE_ELIMINATION_SSE_MOVES = 0x01 UMASK_MOVE_ELIMINATION_SSE_MOVE_ELIMS = 0x02 EVENT_SCALAR_OP_OPTIMIZATIONS = 0x04 UMASK_SCALAR_OP_OPTIMIZATIONS_POTENTIAL = 0x04 UMASK_SCALAR_OP_OPTIMIZATIONS_DONE = 0x08 EVENT_RETIRED_SERIALIZING_OPS = 0x05 UMASK_RETIRED_SERIALIZING_OPS_SSE_BOTTOM = 0x00 UMASK_RETIRED_SERIALIZING_OPS_SSE_CONTROL = 0x01 UMASK_RETIRED_SERIALIZING_OPS_X87_BOTTOM = 0x02 UMASK_RETIRED_SERIALIZING_OPS_X87_CONTROL = 0x04 EVENT_LS_BAD_STATUS = 0x24 UMASK_LS_BAD_STATUS_STLI_NO_STATE = 0x01 UMASK_LS_BAD_STATUS_STLI_OTHER = 0x02 UMASK_LS_BAD_STATUS_STLF_NO_DATA = 0x04 EVENT_LOCKS = 0x25 UMASK_LOCKS_BUS_LOCK = 0x01 UMASK_LOCKS_NON_SPEC_LOCK = 0x02 UMASK_LOCKS_SPEC_LOCK = 0x04 UMASK_LOCKS_SPEC_LOCK_MAP_COMMIT = 0x08 EVENT_RETIRED_CLFLUSH = 0x26 UMASK_RETIRED_CLFLUSH = 0x00 EVENT_RETIRED_CPUID = 0x27 UMASK_RETIRED_CPUID = 0x00 EVENT_LS_DISPATCH = 0x29 UMASK_LS_DISPATCH_LOADS = 0x01 UMASK_LS_DISPATCH_STORES = 0x02 UMASK_LS_DISPATCH_LOAD_OP_STORES = 0x04 UMASK_LS_DISPATCH_ANY = 0x07 EVENT_SMIS_RECEIVED = 0x2B UMASK_SMIS_RECEIVED = 0x00 EVENT_ST_TO_LD_FWD = 0x35 UMASK_ST_TO_LD_FWD = 0x00 EVENT_ST_COMMIT_CANCELS = 0x37 UMASK_ST_COMMIT_CANCELS = 0x01 EVENT_DATA_CACHE_ACCESSES = 0x40 UMASK_DATA_CACHE_ACCESSES = 0x00 EVENT_MAB_ALLOC_PIPE = 0x41 UMASK_MAB_ALLOC_PIPE_DATA = 0x01 UMASK_MAB_ALLOC_PIPE_STORE = 0x02 UMASK_MAB_ALLOC_PIPE_TLB_LATE = 0x04 UMASK_MAB_ALLOC_PIPE_HW_PF = 0x08 UMASK_MAB_ALLOC_PIPE_TLB_EARLY = 0x10 EVENT_DATA_CACHE_REFILLS = 0x43 UMASK_DATA_CACHE_REFILLS_LOCAL_L2 = 0x01 UMASK_DATA_CACHE_REFILLS_LOCAL_CACHE = 0x02 UMASK_DATA_CACHE_REFILLS_LOCAL_DRAM = 0x08 UMASK_DATA_CACHE_REFILLS_LOCAL_ALL = 0x0B UMASK_DATA_CACHE_REFILLS_REMOTE_CACHE = 0x10 UMASK_DATA_CACHE_REFILLS_REMOTE_DRAM = 0x40 UMASK_DATA_CACHE_REFILLS_REMOTE_ALL = 0x50 UMASK_DATA_CACHE_REFILLS_ALL = 0x5B EVENT_L1_DTLB_MISS = 0x45 UMASK_L1_DTLB_MISS_4K_L2_HIT = 0x01 UMASK_L1_DTLB_MISS_32K_L2_HIT = 0x02 UMASK_L1_DTLB_MISS_2M_L2_HIT = 0x04 UMASK_L1_DTLB_MISS_1G_L2_HIT = 0x08 UMASK_L1_DTLB_MISS_ANY_L2_HIT = 0x0F UMASK_L1_DTLB_MISS_4K_L2_MISS = 0x10 UMASK_L1_DTLB_MISS_32K_L2_MISS = 0x20 UMASK_L1_DTLB_MISS_2M_L2_MISS = 0x40 UMASK_L1_DTLB_MISS_1G_L2_MISS = 0x80 UMASK_L1_DTLB_MISS_ANY_L2_MISS = 0xF0 EVENT_TABLEWALKER_ALLOC = 0x46 UMASK_TABLEWALKER_ALLOC_DSIDE0 = 0x01 UMASK_TABLEWALKER_ALLOC_DSIDE1 = 0x02 UMASK_TABLEWALKER_ALLOC_ISIDE0 = 0x04 UMASK_TABLEWALKER_ALLOC_ISIDE1 = 0x08 EVENT_MISALIGNED_LOADS = 0x47 UMASK_MISALIGNED_LOADS = 0x00 EVENT_PREF_INSTR_DISPATCHED = 0x4B UMASK_PREF_INSTR_DISPATCHED_LOAD = 0x01 UMASK_PREF_INSTR_DISPATCHED_STORE = 0x02 UMASK_PREF_INSTR_DISPATCHED_NTA = 0x04 EVENT_INEFFECTIVE_SW_PREF = 0x52 UMASK_INEFFECTIVE_SW_PREF_DATA_CACHE_HIT = 0x01 UMASK_INEFFECTIVE_SW_PREF_MAB_MATCH = 0x02 EVENT_SWPREF_DATA_CACHE_FILLS = 0x59 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_L2 = 0x01 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_CACHE = 0x02 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_DRAM = 0x08 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_ALL = 0x0B UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_CACHE = 0x10 UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_DRAM = 0x40 UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_ALL = 0x50 UMASK_SWPREF_DATA_CACHE_FILLS_ALL = 0x5B EVENT_HWPREF_DATA_CACHE_FILLS = 0x5A UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_L2 = 0x01 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_CACHE = 0x02 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_DRAM = 0x08 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_ALL = 0x0B UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_CACHE = 0x10 UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_DRAM = 0x40 UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_ALL = 0x50 UMASK_HWPREF_DATA_CACHE_FILLS_ALL = 0x5B EVENT_TABLEWALKER_DATA_CACHE_FILLS = 0x5B UMASK_TABLEWALKER_DATA_CACHE_FILLS_LOCAL_L2 = 0x01 UMASK_TABLEWALKER_DATA_CACHE_FILLS_LOCAL_CACHE = 0x02 UMASK_TABLEWALKER_DATA_CACHE_FILLS_LOCAL_DRAM = 0x08 UMASK_TABLEWALKER_DATA_CACHE_FILLS_LOCAL_ALL = 0x0B UMASK_TABLEWALKER_DATA_CACHE_FILLS_REMOTE_CACHE = 0x10 UMASK_TABLEWALKER_DATA_CACHE_FILLS_REMOTE_DRAM = 0x40 UMASK_TABLEWALKER_DATA_CACHE_FILLS_REMOTE_ALL = 0x50 UMASK_TABLEWALKER_DATA_CACHE_FILLS_ALL = 0x5B EVENT_CPU_CLOCKS_UNHALTED = 0x76 UMASK_CPU_CLOCKS_UNHALTED = 0x00 EVENT_ICACHE_FETCHES = 0x80 UMASK_ICACHE_FETCHES = 0x00 EVENT_ICACHE_MISSES = 0x81 UMASK_ICACHE_MISSES = 0x00 EVENT_ICACHE_L2_REFILLS = 0x82 UMASK_ICACHE_L2_REFILLS = 0x00 EVENT_ICACHE_SYSTEM_REFILLS = 0x83 UMASK_ICACHE_SYSTEM_REFILLS = 0x00 EVENT_L1_ITLB_L2_ITLB_HIT = 0x84 UMASK_L1_ITLB_L2_ITLB_HIT = 0x00 EVENT_L1_ITLB_L2_ITLB_MISS = 0x85 UMASK_L1_ITLB_L2_ITLB_MISS = 0x00 EVENT_PIPELINE_RESTART_DUE_INSTR_STREAM_PROBE = 0x86 UMASK_PIPELINE_RESTART_DUE_INSTR_STREAM_PROBE = 0x00 EVENT_INSTR_PIPE_STALL = 0x87 UMASK_INSTR_PIPE_STALL_BACK_PRESSURE = 0x01 UMASK_INSTR_PIPE_STALL_DQ_EMPTY = 0x02 UMASK_INSTR_PIPE_STALL_ANY = 0x04 EVENT_L1_BTB_CORRECTION = 0x8A UMASK_L1_BTB_CORRECTION = 0x00 EVENT_L2_BTB_CORRECTION = 0x8B UMASK_L2_BTB_CORRECTION = 0x00 EVENT_ICACHE_LINES_INVALIDATED = 0x8C UMASK_ICACHE_LINES_INVALIDATED_FILL = 0x01 UMASK_ICACHE_LINES_INVALIDATED_L2_PROBE = 0x02 EVENT_ITLB_RELOADS = 0x99 UMASK_ITLB_RELOADS = 0x00 EVENT_OC_MODE_SWITCH = 0x28A UMASK_OC_MODE_SWITCH_IC_OC = 0x01 UMASK_OC_MODE_SWITCH_OC_IC = 0x02 EVENT_DYN_TOKENS_DISP_STALL_CYCLES = 0xAF UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALSQ1_TOKEN_STALL = 0x01 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALSQ2_TOKEN_STALL = 0x02 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALSQ3_TOKEN_STALL = 0x04 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALSQ3_0_TOKEN_STALL = 0x08 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALSQ_ANY_TOKEN_STALL = 0x0F UMASK_DYN_TOKENS_DISP_STALL_CYCLES_ALU_TOKEN_STALL = 0x10 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_AGSQ_TOKEN_STALL = 0x20 UMASK_DYN_TOKENS_DISP_STALL_CYCLES_RETIRE_TOKEN_STALL = 0x40 EVENT_RETIRED_INSTRUCTIONS = 0xC0 UMASK_RETIRED_INSTRUCTIONS = 0x00 EVENT_RETIRED_UOPS = 0xC1 UMASK_RETIRED_UOPS = 0x00 EVENT_RETIRED_BRANCH_INSTR = 0xC2 UMASK_RETIRED_BRANCH_INSTR = 0x00 EVENT_RETIRED_MISP_BRANCH_INSTR = 0xC3 UMASK_RETIRED_MISP_BRANCH_INSTR = 0x00 EVENT_RETIRED_TAKEN_BRANCH_INSTR = 0xC4 UMASK_RETIRED_TAKEN_BRANCH_INSTR = 0x00 EVENT_RETIRED_TAKEN_MISP_BRANCH_INSTR = 0xC5 UMASK_RETIRED_TAKEN_MISP_BRANCH_INSTR = 0x00 EVENT_RETIRED_FAR_CONTROL_TRANSFERS = 0xC6 UMASK_RETIRED_FAR_CONTROL_TRANSFERS = 0x00 EVENT_RETIRED_BRANCH_RESYNCS = 0xC7 UMASK_RETIRED_BRANCH_RESYNCS = 0x00 EVENT_RETIRED_NEAR_RETURNS = 0xC8 UMASK_RETIRED_NEAR_RETURNS = 0x00 EVENT_RETIRED_NEAR_RETURNS_MISP = 0xC9 UMASK_RETIRED_NEAR_RETURNS_MISP = 0x00 EVENT_RETIRED_INDIRECT_BRANCHES_MISP = 0xCA UMASK_RETIRED_INDIRECT_BRANCHES_MISP = 0x00 EVENT_RETIRED_MMX_FP_INSTR = 0xCB UMASK_RETIRED_MMX_FP_INSTR_X87 = 0x01 UMASK_RETIRED_MMX_FP_INSTR_MMX = 0x02 UMASK_RETIRED_MMX_FP_INSTR_SSE = 0x04 UMASK_RETIRED_MMX_FP_INSTR_ALL = 0x07 EVENT_RETIRED_COND_BRANCH_INSTR = 0xD1 UMASK_RETIRED_COND_BRANCH_INSTR = 0x00 EVENT_RETIRED_COND_BRANCH_INSTR_MISP = 0xD2 UMASK_RETIRED_COND_BRANCH_INSTR_MISP = 0x00 EVENT_DIV_BUSY_CYCLES = 0xD3 UMASK_DIV_BUSY_CYCLES = 0x00 EVENT_DIV_OP_COUNT = 0xD4 UMASK_DIV_OP_COUNT = 0x00 EVENT_TAGGED_IBS_OPS = 0x1CF UMASK_TAGGED_IBS_OPS_COUNT = 0x01 UMASK_TAGGED_IBS_OPS_COUNT_RETIRED = 0x02 UMASK_TAGGED_IBS_OPS_IBS_COUNT_ROLLOVER = 0x04 EVENT_RETIRED_FUSED_BRANCH_INSTR = 0x1D0 UMASK_RETIRED_FUSED_BRANCH_INSTR = 0x00 EVENT_REQUESTS_TO_L2_GRP1 = 0x60 UMASK_REQUESTS_TO_L2_GRP1_OTHER = 0x01 UMASK_REQUESTS_TO_L2_GRP1_L2_HW_PREF = 0x02 UMASK_REQUESTS_TO_L2_GRP1_PREF_L2 = 0x04 UMASK_REQUESTS_TO_L2_GRP1_CHANGE_TO_X = 0x08 UMASK_REQUESTS_TO_L2_GRP1_CACHEABLE_IC_READ = 0x10 UMASK_REQUESTS_TO_L2_GRP1_LS_RD_BLOCK_C_S = 0x20 UMASK_REQUESTS_TO_L2_GRP1_RD_BLOCK_X = 0x40 UMASK_REQUESTS_TO_L2_GRP1_RD_BLOCK_L = 0x80 UMASK_REQUESTS_TO_L2_GRP1_ALL = 0xFF UMASK_REQUESTS_TO_L2_GRP1_ALL_NO_PF = 0xF9 UMASK_REQUESTS_TO_L2_GRP1_DATA_CACHE_MISS = 0xC8 EVENT_REQUESTS_TO_L2_GRP2 = 0x61 UMASK_REQUESTS_TO_L2_GRP2_BUS_LOCK_RESP = 0x01 UMASK_REQUESTS_TO_L2_GRP2_BUS_LOCK_ORIG = 0x02 UMASK_REQUESTS_TO_L2_GRP2_SMC_INVAL = 0x04 UMASK_REQUESTS_TO_L2_GRP2_IC_READ_SIZED_NC = 0x08 UMASK_REQUESTS_TO_L2_GRP2_IC_READ_SIZED = 0x10 UMASK_REQUESTS_TO_L2_GRP2_LS_READ_SIZED_NC = 0x20 UMASK_REQUESTS_TO_L2_GRP2_LS_READ_SIZED = 0x40 UMASK_REQUESTS_TO_L2_GRP2_GRP1 = 0x80 UMASK_REQUESTS_TO_L2_GRP2_IC_READ = 0x18 EVENT_L2_LATENCY_CYCLES = 0x62 UMASK_L2_LATENCY_CYCLES_WAIT_ON_FILLS = 0x01 EVENT_LS_TO_L2_WBC_REQUESTS = 0x63 UMASK_LS_TO_L2_WBC_REQUESTS_CL_ZERO = 0x01 UMASK_LS_TO_L2_WBC_REQUESTS_LOCAL_IC_CLEAR = 0x02 UMASK_LS_TO_L2_WBC_REQUESTS_ZERO_BYTE_STORE = 0x04 UMASK_LS_TO_L2_WBC_REQUESTS_I_LINE_FLUSH = 0x08 UMASK_LS_TO_L2_WBC_REQUESTS_CACHELINE_FLUSH = 0x10 UMASK_LS_TO_L2_WBC_REQUESTS_WCB_CLOSE = 0x20 UMASK_LS_TO_L2_WBC_REQUESTS_WCB_WRITE = 0x40 UMASK_LS_TO_L2_WBC_REQUESTS_ALL = 0x7F EVENT_CORE_TO_L2_CACHE_REQUESTS = 0x64 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_MISS = 0x01 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_HIT_S = 0x02 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_HIT_X = 0x04 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_C = 0x08 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_X = 0x10 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_L_HIT_S = 0x20 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_L_HIT_X = 0x40 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_CS = 0x80 EVENT_CYCLES_FILL_PEND_FROM_L2 = 0x6D UMASK_CYCLES_FILL_PEND_FROM_L2_BUSY = 0x01 EVENT_L2_INST_CACHE_MISS = 0x64 UMASK_L2_INST_CACHE_MISS = 0x01 EVENT_L2_DATA_CACHE_MISS = 0x64 UMASK_L2_DATA_CACHE_MISS = 0x08 EVENT_L1_INST_MISS_L2_HIT = 0x64 UMASK_L1_INST_MISS_L2_HIT = 0x06 EVENT_L1_DATA_MISS_L2_HIT = 0x64 UMASK_L1_DATA_MISS_L2_HIT = 0x70 EVENT_L3_ACCESS = 0x01 UMASK_L3_ACCESS = 0x80 EVENT_L3_MISS = 0x06 UMASK_L3_MISS = 0x01 EVENT_RAPL_CORE_ENERGY = 0x01 UMASK_RAPL_CORE_ENERGY = 0x00 EVENT_RAPL_PKG_ENERGY = 0x02 UMASK_RAPL_PKG_ENERGY = 0x00 EVENT_DATA_FROM_LOCAL_DRAM_CHANNEL = 0x07 UMASK_DATA_FROM_LOCAL_DRAM_CHANNEL = 0x38 EVENT_DATA_TO_LOCAL_DRAM_CHANNEL = 0x47 UMASK_DATA_TO_LOCAL_DRAM_CHANNEL = 0x38 EVENT_DATA_OUT_TO_REMOTE_0 = 0x187 UMASK_DATA_OUT_TO_REMOTE_0 = 0x02 EVENT_DATA_OUT_TO_REMOTE_1 = 0x1C7 UMASK_DATA_OUT_TO_REMOTE_1 = 0x02 EVENT_DATA_OUT_TO_REMOTE_2 = 0x207 UMASK_DATA_OUT_TO_REMOTE_2 = 0x02 EVENT_DATA_OUT_TO_REMOTE_3 = 0x287 UMASK_DATA_OUT_TO_REMOTE_3 = 0x02 EVENT_DATA_OUT_TO_REMOTE_4 = 0x247 UMASK_DATA_OUT_TO_REMOTE_4 = 0x02 EVENT_DATA_OUT_TO_REMOTE_5 = 0x2C7 UMASK_DATA_OUT_TO_REMOTE_5 = 0x02 ) perf-utils-0.5.1/msr/zen2/000077500000000000000000000000001413762255200152755ustar00rootroot00000000000000perf-utils-0.5.1/msr/zen2/zen2_msr.go000066400000000000000000000240041413762255200173630ustar00rootroot00000000000000package zen2 const ( EVENT_ACTUAL_CPU_CLOCK = 0x01 UMASK_ACTUAL_CPU_CLOCK = 0x00 EVENT_APERF = 0x01 UMASK_APERF = 0x00 EVENT_MAX_CPU_CLOCK = 0x02 UMASK_MAX_CPU_CLOCK = 0x00 EVENT_MPERF = 0x02 UMASK_MPERF = 0x00 EVENT_MERGE = 0xFFF UMASK_MERGE = 0x00 EVENT_RETIRED_SSE_AVX_FLOPS = 0x03 UMASK_RETIRED_SSE_AVX_FLOPS_ADD_SUB = 0x01 UMASK_RETIRED_SSE_AVX_FLOPS_MULT = 0x02 UMASK_RETIRED_SSE_AVX_FLOPS_DIV = 0x04 UMASK_RETIRED_SSE_AVX_FLOPS_ADD_MULT_DIV = 0x07 UMASK_RETIRED_SSE_AVX_FLOPS_FMA = 0x08 UMASK_RETIRED_SSE_AVX_FLOPS_ALL = 0x0F EVENT_RETIRED_SERIALIZING_OPS = 0x05 UMASK_RETIRED_SERIALIZING_OPS_SSE_BOTTOM = 0x00 UMASK_RETIRED_SERIALIZING_OPS_SSE_CONTROL = 0x01 UMASK_RETIRED_SERIALIZING_OPS_X87_BOTTOM = 0x02 UMASK_RETIRED_SERIALIZING_OPS_X87_CONTROL = 0x04 EVENT_FP_DISPATCH_FAULTS = 0x0E UMASK_FP_DISPATCH_FAULTS_X86_FILL = 0x01 UMASK_FP_DISPATCH_FAULTS_XMM_FILL = 0x02 UMASK_FP_DISPATCH_FAULTS_YMM_FILL = 0x04 UMASK_FP_DISPATCH_FAULTS_YMM_SPILL = 0x08 EVENT_RETIRED_LOCK_INSTR = 0x25 UMASK_RETIRED_LOCK_INSTR = 0x00 EVENT_RETIRED_CLFLUSH = 0x26 UMASK_RETIRED_CLFLUSH = 0x00 EVENT_RETIRED_CPUID = 0x27 UMASK_RETIRED_CPUID = 0x00 EVENT_LS_DISPATCH = 0x29 UMASK_LS_DISPATCH_LOADS = 0x01 UMASK_LS_DISPATCH_STORES = 0x02 UMASK_LS_DISPATCH_LOAD_OP_STORES = 0x04 EVENT_SMIS_RECEIVED = 0x2B UMASK_SMIS_RECEIVED = 0x00 EVENT_INTERRUPTS_TAKEN = 0x2C UMASK_INTERRUPTS_TAKEN = 0x00 EVENT_ST_TO_LD_FWD = 0x35 UMASK_ST_TO_LD_FWD = 0x00 EVENT_ST_COMMIT_CANCELS = 0x37 UMASK_ST_COMMIT_CANCELS = 0x01 EVENT_DATA_CACHE_ACCESSES = 0x40 UMASK_DATA_CACHE_ACCESSES = 0x00 EVENT_LS_MAB_ALLOC = 0x41 UMASK_LS_MAB_ALLOC_LOADS = 0x01 UMASK_LS_MAB_ALLOC_STORES = 0x02 UMASK_LS_MAB_ALLOC_HW_PF = 0x08 EVENT_DATA_CACHE_REFILLS = 0x43 UMASK_DATA_CACHE_REFILLS_LOCAL_L2 = 0x01 UMASK_DATA_CACHE_REFILLS_LOCAL_CACHE = 0x02 UMASK_DATA_CACHE_REFILLS_LOCAL_DRAM = 0x08 UMASK_DATA_CACHE_REFILLS_LOCAL_ALL = 0x0B UMASK_DATA_CACHE_REFILLS_REMOTE_CACHE = 0x10 UMASK_DATA_CACHE_REFILLS_REMOTE_DRAM = 0x40 UMASK_DATA_CACHE_REFILLS_REMOTE_ALL = 0x50 UMASK_DATA_CACHE_REFILLS_ALL = 0x5B EVENT_L1_DTLB_MISS = 0x45 UMASK_L1_DTLB_MISS_4K_L2_HIT = 0x01 UMASK_L1_DTLB_MISS_32K_L2_HIT = 0x02 UMASK_L1_DTLB_MISS_2M_L2_HIT = 0x04 UMASK_L1_DTLB_MISS_1G_L2_HIT = 0x08 UMASK_L1_DTLB_MISS_ANY_L2_HIT = 0x0F UMASK_L1_DTLB_MISS_4K_L2_MISS = 0x10 UMASK_L1_DTLB_MISS_32K_L2_MISS = 0x20 UMASK_L1_DTLB_MISS_2M_L2_MISS = 0x40 UMASK_L1_DTLB_MISS_1G_L2_MISS = 0x80 UMASK_L1_DTLB_MISS_ANY_L2_MISS = 0xF0 EVENT_TABLEWALKER_ALLOC = 0x46 UMASK_TABLEWALKER_ALLOC_DSIDE0 = 0x01 UMASK_TABLEWALKER_ALLOC_DSIDE1 = 0x02 UMASK_TABLEWALKER_ALLOC_ISIDE0 = 0x04 UMASK_TABLEWALKER_ALLOC_ISIDE1 = 0x08 EVENT_MISALIGNED_LOADS = 0x47 UMASK_MISALIGNED_LOADS = 0x00 EVENT_PREF_INSTR_DISPATCHED = 0x4B UMASK_PREF_INSTR_DISPATCHED = 0x00 EVENT_INEFFECTIVE_SW_PREF = 0x52 UMASK_INEFFECTIVE_SW_PREF_DATA_CACHE_HIT = 0x01 UMASK_INEFFECTIVE_SW_PREF_MAB_MATCH = 0x02 EVENT_SWPREF_DATA_CACHE_FILLS = 0x59 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_L2 = 0x01 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_CACHE = 0x02 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_DRAM = 0x08 UMASK_SWPREF_DATA_CACHE_FILLS_LOCAL_ALL = 0x0B UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_CACHE = 0x10 UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_DRAM = 0x40 UMASK_SWPREF_DATA_CACHE_FILLS_REMOTE_ALL = 0x50 UMASK_SWPREF_DATA_CACHE_FILLS_ALL = 0x5B EVENT_HWPREF_DATA_CACHE_FILLS = 0x5A UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_L2 = 0x01 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_CACHE = 0x02 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_DRAM = 0x08 UMASK_HWPREF_DATA_CACHE_FILLS_LOCAL_ALL = 0x0B UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_CACHE = 0x10 UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_DRAM = 0x40 UMASK_HWPREF_DATA_CACHE_FILLS_REMOTE_ALL = 0x50 UMASK_HWPREF_DATA_CACHE_FILLS_ALL = 0x5B EVENT_CPU_CLOCKS_UNHALTED = 0x76 UMASK_CPU_CLOCKS_UNHALTED = 0x00 EVENT_TLB_FLUSHES = 0x78 UMASK_TLB_FLUSHES = 0x00 EVENT_ICACHE_FETCHES = 0x80 UMASK_ICACHE_FETCHES = 0x00 EVENT_ICACHE_MISSES = 0x81 UMASK_ICACHE_MISSES = 0x00 EVENT_ICACHE_L2_REFILLS = 0x82 UMASK_ICACHE_L2_REFILLS = 0x00 EVENT_ICACHE_SYSTEM_REFILLS = 0x83 UMASK_ICACHE_SYSTEM_REFILLS = 0x00 EVENT_L1_ITLB_MISS_L2_ITLB_HIT = 0x84 UMASK_L1_ITLB_MISS_L2_ITLB_HIT = 0x00 EVENT_L1_ITLB_MISS_L2_ITLB_MISS = 0x85 UMASK_L1_ITLB_MISS_L2_ITLB_MISS = 0x00 EVENT_L1_BTB_CORRECTION = 0x8A UMASK_L1_BTB_CORRECTION = 0x00 EVENT_L2_BTB_CORRECTION = 0x8B UMASK_L2_BTB_CORRECTION = 0x00 EVENT_ICACHE_LINES_INVALIDATED = 0x8C UMASK_ICACHE_LINES_INVALIDATED_FILL = 0x01 UMASK_ICACHE_LINES_INVALIDATED_L2_PROBE = 0x02 EVENT_DEC_OVERRIDE_BTB = 0x91 UMASK_DEC_OVERRIDE_BTB = 0x00 EVENT_UOPS_DISP = 0xAA UMASK_UOPS_DISP_FROM_DEC = 0x01 UMASK_UOPS_DISP_FROM_OPCACHE = 0x02 EVENT_DYN_TOKENS_DISP_STALL_CYCLES0 = 0xAF UMASK_DYN_TOKENS_DISP_STALL_CYCLES0_ALU_TOKEN_STALL = 0x08 EVENT_DYN_TOKENS_DISP_STALL_CYCLES1 = 0xAE UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_INT_REG_FILE_STALL = 0x01 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_LD_QUEUE_STALL = 0x02 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_ST_QUEUE_STALL = 0x04 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_INT_SCHED_MISC_STALL = 0x08 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_TAKEN_BRANCH_BUFFER_STALL = 0x10 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_FP_REG_FILE_STALL = 0x20 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_FP_SCHED_STALL = 0x40 UMASK_DYN_TOKENS_DISP_STALL_CYCLES1_FP_MISC_UNAVAIL = 0x80 EVENT_RETIRED_INSTRUCTIONS = 0xC0 UMASK_RETIRED_INSTRUCTIONS = 0x00 EVENT_RETIRED_UOPS = 0xC1 UMASK_RETIRED_UOPS = 0x00 EVENT_RETIRED_BRANCH_INSTR = 0xC2 UMASK_RETIRED_BRANCH_INSTR = 0x00 EVENT_RETIRED_MISP_BRANCH_INSTR = 0xC3 UMASK_RETIRED_MISP_BRANCH_INSTR = 0x00 EVENT_RETIRED_TAKEN_BRANCH_INSTR = 0xC4 UMASK_RETIRED_TAKEN_BRANCH_INSTR = 0x00 EVENT_RETIRED_TAKEN_MISP_BRANCH_INSTR = 0xC5 UMASK_RETIRED_TAKEN_MISP_BRANCH_INSTR = 0x00 EVENT_RETIRED_FAR_CONTROL_TRANSFERS = 0xC6 UMASK_RETIRED_FAR_CONTROL_TRANSFERS = 0x00 EVENT_RETIRED_NEAR_RETURNS = 0xC8 UMASK_RETIRED_NEAR_RETURNS = 0x00 EVENT_RETIRED_NEAR_RETURNS_MISP = 0xC9 UMASK_RETIRED_NEAR_RETURNS_MISP = 0x00 EVENT_RETIRED_INDIRECT_BRANCHES_MISP = 0xCA UMASK_RETIRED_INDIRECT_BRANCHES_MISP = 0x00 EVENT_RETIRED_MMX_FP_INSTR = 0xCB UMASK_RETIRED_MMX_FP_INSTR_X87 = 0x01 UMASK_RETIRED_MMX_FP_INSTR_MMX = 0x02 UMASK_RETIRED_MMX_FP_INSTR_SSE = 0x04 UMASK_RETIRED_MMX_FP_INSTR_ALL = 0x07 EVENT_RETIRED_COND_BRANCH_INSTR = 0xD1 UMASK_RETIRED_COND_BRANCH_INSTR = 0x00 EVENT_DIV_BUSY_CYCLES = 0xD3 UMASK_DIV_BUSY_CYCLES = 0x00 EVENT_DIV_OP_COUNT = 0xD4 UMASK_DIV_OP_COUNT = 0x00 EVENT_TAGGED_IBS_OPS = 0x1CF UMASK_TAGGED_IBS_OPS_COUNT = 0x01 UMASK_TAGGED_IBS_OPS_COUNT_RETIRED = 0x02 UMASK_TAGGED_IBS_OPS_COUNT_ROLLOVER = 0x04 EVENT_RETIRED_FUSED_BRANCH_INSTR = 0x1D0 UMASK_RETIRED_FUSED_BRANCH_INSTR = 0x00 EVENT_REQUESTS_TO_L2_GRP1 = 0x60 UMASK_REQUESTS_TO_L2_GRP1_GRP2 = 0x01 UMASK_REQUESTS_TO_L2_GRP1_L2_HW_PREF = 0x02 UMASK_REQUESTS_TO_L2_GRP1_PREF_L2 = 0x04 UMASK_REQUESTS_TO_L2_GRP1_CHANGE_TO_X = 0x08 UMASK_REQUESTS_TO_L2_GRP1_CACHEABLE_IC_READ = 0x10 UMASK_REQUESTS_TO_L2_GRP1_LS_RD_BLOCK_C_S = 0x20 UMASK_REQUESTS_TO_L2_GRP1_RD_BLOCK_X = 0x40 UMASK_REQUESTS_TO_L2_GRP1_RD_BLOCK_L = 0x80 UMASK_REQUESTS_TO_L2_GRP1_DATA_CACHE_MISSES = 0xC8 UMASK_REQUESTS_TO_L2_GRP1_L1_CACHES_MISS = 0x88 UMASK_REQUESTS_TO_L2_GRP1_ALL_NO_PF = 0xF9 UMASK_REQUESTS_TO_L2_GRP1_ALL = 0xFF EVENT_REQUESTS_TO_L2_GRP2 = 0x61 UMASK_REQUESTS_TO_L2_GRP2_BUS_LOCK_RESP = 0x01 UMASK_REQUESTS_TO_L2_GRP2_BUS_LOCK_ORIG = 0x02 UMASK_REQUESTS_TO_L2_GRP2_SMC_INVAL = 0x04 UMASK_REQUESTS_TO_L2_GRP2_IC_READ_SIZED_NC = 0x08 UMASK_REQUESTS_TO_L2_GRP2_IC_READ_SIZED = 0x10 UMASK_REQUESTS_TO_L2_GRP2_IC_READ = 0x18 UMASK_REQUESTS_TO_L2_GRP2_LS_READ_SIZED_NC = 0x20 UMASK_REQUESTS_TO_L2_GRP2_LS_READ_SIZED = 0x40 UMASK_REQUESTS_TO_L2_GRP2_LS_READ = 0x60 UMASK_REQUESTS_TO_L2_GRP2_GRP1 = 0x80 EVENT_CORE_TO_L2_CACHE_REQUESTS = 0x64 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_MISS = 0x01 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_HIT_S = 0x02 UMASK_CORE_TO_L2_CACHE_REQUESTS_IC_FILL_HIT_X = 0x04 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_C = 0x08 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_X = 0x10 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_L_HIT_S = 0x20 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_L_HIT_X = 0x40 UMASK_CORE_TO_L2_CACHE_REQUESTS_LD_READ_BLK_CS = 0x80 EVENT_L2_PF_HIT_IN_L2 = 0x70 UMASK_L2_PF_HIT_IN_L2 = 0x1F EVENT_L2_PF_HIT_IN_L3 = 0x71 UMASK_L2_PF_HIT_IN_L3 = 0x1F EVENT_L2_PF_MISS_IN_L3 = 0x72 UMASK_L2_PF_MISS_IN_L3 = 0x1F EVENT_L3_ACCESS = 0x01 UMASK_L3_ACCESS = 0x80 EVENT_L3_MISS = 0x06 UMASK_L3_MISS = 0x01 EVENT_L3_CACHE_REQ = 0x04 UMASK_L3_CACHE_REQ = 0xFF EVENT_L3_MISS_LAT = 0x90 UMASK_L3_MISS_LAT = 0x00 EVENT_L3_MISS_REQ = 0x9A UMASK_L3_MISS_REQ = 0x1F EVENT_RAPL_CORE_ENERGY = 0x01 UMASK_RAPL_CORE_ENERGY = 0x00 EVENT_RAPL_PKG_ENERGY = 0x02 UMASK_RAPL_PKG_ENERGY = 0x00 EVENT_DATA_FROM_LOCAL_DRAM_CHANNEL = 0x07 UMASK_DATA_FROM_LOCAL_DRAM_CHANNEL = 0x38 EVENT_DATA_TO_LOCAL_DRAM_CHANNEL = 0x47 UMASK_DATA_TO_LOCAL_DRAM_CHANNEL = 0x38 EVENT_DATA_OUT_TO_REMOTE_0 = 0x187 UMASK_DATA_OUT_TO_REMOTE_0 = 0x02 EVENT_DATA_OUT_TO_REMOTE_1 = 0x1C7 UMASK_DATA_OUT_TO_REMOTE_1 = 0x02 EVENT_DATA_OUT_TO_REMOTE_2 = 0x207 UMASK_DATA_OUT_TO_REMOTE_2 = 0x02 EVENT_DATA_OUT_TO_REMOTE_3 = 0x287 UMASK_DATA_OUT_TO_REMOTE_3 = 0x02 EVENT_DATA_OUT_TO_REMOTE_4 = 0x247 UMASK_DATA_OUT_TO_REMOTE_4 = 0x02 EVENT_DATA_OUT_TO_REMOTE_5 = 0x2C7 UMASK_DATA_OUT_TO_REMOTE_5 = 0x02 ) perf-utils-0.5.1/msr/zen3/000077500000000000000000000000001413762255200152765ustar00rootroot00000000000000perf-utils-0.5.1/msr/zen3/zen3_msr.go000066400000000000000000000000151413762255200173610ustar00rootroot00000000000000package zen3 perf-utils-0.5.1/msr_test.go000066400000000000000000000007671413762255200160170ustar00rootroot00000000000000package perf import ( "testing" "github.com/stretchr/testify/require" ) func TestMSRPaths(t *testing.T) { _, err := MSRPaths() require.Nil(t, err) } func TestMSR(t *testing.T) { msrs, err := MSRPaths() require.Nil(t, err) msr, err := NewMSR(msrs[0]) require.Nil(t, err) // TODO: This may only work on certain architectures :( _, err = msr.Read(0x00) require.Nil(t, err) require.Nil(t, msr.Close()) } func TestMSRs(t *testing.T) { MSRs(func(err error) { require.Nil(t, err) }) } perf-utils-0.5.1/pmu.go000066400000000000000000000013621413762255200147500ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "io/ioutil" "strconv" ) const ( PMUEventBaseDir = "/sys/bus/event_source/devices" ) // AvailablePMUs returns a mapping of available PMUs from // /sys/bus/event_sources/devices to the PMU event type (number). func AvailablePMUs() (map[string]int, error) { pmus := make(map[string]int) pmuTypes, err := ioutil.ReadDir(PMUEventBaseDir) if err != nil { return nil, err } for _, pmuFileInfo := range pmuTypes { pmu := pmuFileInfo.Name() pmuEventStr, err := fileToStrings(PMUEventBaseDir + "/" + pmu + "/type") if err != nil { return nil, err } pmuEvent, err := strconv.Atoi(pmuEventStr[0]) if err != nil { return nil, err } pmus[pmu] = pmuEvent } return pmus, nil } perf-utils-0.5.1/pmu_test.go000066400000000000000000000003441413762255200160060ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "testing" ) func TestAvailablePMUs(t *testing.T) { pmus, err := AvailablePMUs() if err != nil { t.Fatal(err) } if len(pmus) == 0 { t.Fatal("no PMU events") } } perf-utils-0.5.1/process_profile.go000066400000000000000000000366101413762255200173510ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "encoding/binary" "fmt" "sync" "syscall" "unsafe" "golang.org/x/sys/unix" ) const ( // PERF_SAMPLE_IDENTIFIER is not defined in x/sys/unix. PERF_SAMPLE_IDENTIFIER = 1 << 16 // PERF_IOC_FLAG_GROUP is not defined in x/sys/unix. PERF_IOC_FLAG_GROUP = 1 << 0 ) var ( // ErrNoProfiler is returned when no profiler is available for profiling. ErrNoProfiler = fmt.Errorf("No profiler available") bufPool = sync.Pool{ New: func() interface{} { return make([]byte, 24, 24) }, } // ProfileValuePool is a sync.Pool of ProfileValue structs. ProfileValuePool = sync.Pool{ New: func() interface{} { return &ProfileValue{} }, } ) // Profiler is a profiler. type Profiler interface { Start() error Reset() error Stop() error Close() error Profile(*ProfileValue) error } // HardwareProfiler is a hardware profiler. type HardwareProfiler interface { Start() error Reset() error Stop() error Close() error Profile(*HardwareProfile) error HasProfilers() bool } // HardwareProfile is returned by a HardwareProfiler. Depending on kernel // configuration some fields may return nil. type HardwareProfile struct { CPUCycles *uint64 `json:"cpu_cycles,omitempty"` Instructions *uint64 `json:"instructions,omitempty"` CacheRefs *uint64 `json:"cache_refs,omitempty"` CacheMisses *uint64 `json:"cache_misses,omitempty"` BranchInstr *uint64 `json:"branch_instr,omitempty"` BranchMisses *uint64 `json:"branch_misses,omitempty"` BusCycles *uint64 `json:"bus_cycles,omitempty"` StalledCyclesFrontend *uint64 `json:"stalled_cycles_frontend,omitempty"` StalledCyclesBackend *uint64 `json:"stalled_cycles_backend,omitempty"` RefCPUCycles *uint64 `json:"ref_cpu_cycles,omitempty"` TimeEnabled *uint64 `json:"time_enabled,omitempty"` TimeRunning *uint64 `json:"time_running,omitempty"` } // Reset sets all values to defaults and will nil any pointers. func (p *HardwareProfile) Reset() { p.CPUCycles = nil p.Instructions = nil p.CacheRefs = nil p.CacheMisses = nil p.BranchInstr = nil p.BranchMisses = nil p.BusCycles = nil p.StalledCyclesFrontend = nil p.StalledCyclesBackend = nil p.RefCPUCycles = nil p.TimeEnabled = nil p.TimeRunning = nil } // SoftwareProfiler is a software profiler. type SoftwareProfiler interface { Start() error Reset() error Stop() error Close() error Profile(*SoftwareProfile) error HasProfilers() bool } // SoftwareProfile is returned by a SoftwareProfiler. type SoftwareProfile struct { CPUClock *uint64 `json:"cpu_clock,omitempty"` TaskClock *uint64 `json:"task_clock,omitempty"` PageFaults *uint64 `json:"page_faults,omitempty"` ContextSwitches *uint64 `json:"context_switches,omitempty"` CPUMigrations *uint64 `json:"cpu_migrations,omitempty"` MinorPageFaults *uint64 `json:"minor_page_faults,omitempty"` MajorPageFaults *uint64 `json:"major_page_faults,omitempty"` AlignmentFaults *uint64 `json:"alignment_faults,omitempty"` EmulationFaults *uint64 `json:"emulation_faults,omitempty"` TimeEnabled *uint64 `json:"time_enabled,omitempty"` TimeRunning *uint64 `json:"time_running,omitempty"` } // Reset sets all values to defaults and will nil any pointers. func (p *SoftwareProfile) Reset() { p.CPUClock = nil p.TaskClock = nil p.PageFaults = nil p.ContextSwitches = nil p.CPUMigrations = nil p.MinorPageFaults = nil p.MajorPageFaults = nil p.AlignmentFaults = nil p.EmulationFaults = nil p.TimeEnabled = nil p.TimeRunning = nil } // CacheProfiler is a cache profiler. type CacheProfiler interface { Start() error Reset() error Stop() error Close() error Profile(*CacheProfile) error HasProfilers() bool } // CacheProfile is returned by a CacheProfiler. type CacheProfile struct { L1DataReadHit *uint64 `json:"l1_data_read_hit,omitempty"` L1DataReadMiss *uint64 `json:"l1_data_read_miss,omitempty"` L1DataWriteHit *uint64 `json:"l1_data_write_hit,omitempty"` L1InstrReadMiss *uint64 `json:"l1_instr_read_miss,omitempty"` LastLevelReadHit *uint64 `json:"last_level_read_hit,omitempty"` LastLevelReadMiss *uint64 `json:"last_level_read_miss,omitempty"` LastLevelWriteHit *uint64 `json:"last_level_write_hit,omitempty"` LastLevelWriteMiss *uint64 `json:"last_level_write_miss,omitempty"` DataTLBReadHit *uint64 `json:"data_tlb_read_hit,omitempty"` DataTLBReadMiss *uint64 `json:"data_tlb_read_miss,omitempty"` DataTLBWriteHit *uint64 `json:"data_tlb_write_hit,omitempty"` DataTLBWriteMiss *uint64 `json:"data_tlb_write_miss,omitempty"` InstrTLBReadHit *uint64 `json:"instr_tlb_read_hit,omitempty"` InstrTLBReadMiss *uint64 `json:"instr_tlb_read_miss,omitempty"` BPUReadHit *uint64 `json:"bpu_read_hit,omitempty"` BPUReadMiss *uint64 `json:"bpu_read_miss,omitempty"` NodeReadHit *uint64 `json:"node_read_hit,omitempty"` NodeReadMiss *uint64 `json:"node_read_miss,omitempty"` NodeWriteHit *uint64 `json:"node_write_hit,omitempty"` NodeWriteMiss *uint64 `json:"node_write_miss,omitempty"` TimeEnabled *uint64 `json:"time_enabled,omitempty"` TimeRunning *uint64 `json:"time_running,omitempty"` } // Reset sets all values to defaults and will nil any pointers. func (p *CacheProfile) Reset() { p.L1DataReadHit = nil p.L1DataReadMiss = nil p.L1DataWriteHit = nil p.L1InstrReadMiss = nil p.LastLevelReadHit = nil p.LastLevelReadMiss = nil p.LastLevelWriteHit = nil p.LastLevelWriteMiss = nil p.DataTLBReadHit = nil p.DataTLBReadMiss = nil p.DataTLBWriteHit = nil p.DataTLBWriteMiss = nil p.InstrTLBReadHit = nil p.InstrTLBReadMiss = nil p.BPUReadHit = nil p.BPUReadMiss = nil p.NodeReadHit = nil p.NodeReadMiss = nil p.NodeWriteHit = nil p.NodeWriteMiss = nil p.TimeEnabled = nil p.TimeRunning = nil } // ProfileValue is a value returned by a profiler. type ProfileValue struct { Value uint64 TimeEnabled uint64 TimeRunning uint64 } // profiler is used to profile a process. type profiler struct { fd int } // NewProfiler creates a new hardware profiler. It does not support grouping. func NewProfiler(profilerType uint32, config uint64, pid, cpu int, opts ...int) (Profiler, error) { eventAttr := &unix.PerfEventAttr{ Type: profilerType, Config: config, Size: uint32(unsafe.Sizeof(unix.PerfEventAttr{})), Bits: unix.PerfBitDisabled | unix.PerfBitExcludeHv | unix.PerfBitInherit, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, Sample_type: PERF_SAMPLE_IDENTIFIER, } var eventOps int if len(opts) > 0 { eventOps = opts[0] } fd, err := unix.PerfEventOpen( eventAttr, pid, cpu, -1, eventOps, ) if err != nil { return nil, err } return &profiler{ fd: fd, }, nil } // NewCPUCycleProfiler returns a Profiler that profiles CPU cycles. func NewCPUCycleProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CPU_CYCLES, pid, cpu, opts..., ) } // NewInstrProfiler returns a Profiler that profiles CPU instructions. func NewInstrProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_INSTRUCTIONS, pid, cpu, opts..., ) } // NewCacheRefProfiler returns a Profiler that profiles cache references. func NewCacheRefProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CACHE_REFERENCES, pid, cpu, opts..., ) } // NewCacheMissesProfiler returns a Profiler that profiles cache misses. func NewCacheMissesProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_CACHE_MISSES, pid, cpu, opts..., ) } // NewBranchInstrProfiler returns a Profiler that profiles branch instructions. func NewBranchInstrProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_BRANCH_INSTRUCTIONS, pid, cpu, opts..., ) } // NewBranchMissesProfiler returns a Profiler that profiles branch misses. func NewBranchMissesProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_BRANCH_MISSES, pid, cpu, opts..., ) } // NewBusCyclesProfiler returns a Profiler that profiles bus cycles. func NewBusCyclesProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_BUS_CYCLES, pid, cpu, opts..., ) } // NewStalledCyclesFrontProfiler returns a Profiler that profiles stalled // frontend cycles. func NewStalledCyclesFrontProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, pid, cpu, opts..., ) } // NewStalledCyclesBackProfiler returns a Profiler that profiles stalled // backend cycles. func NewStalledCyclesBackProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND, pid, cpu, opts..., ) } // NewRefCPUCyclesProfiler returns a Profiler that profiles CPU cycles, it // is not affected by frequency scaling. func NewRefCPUCyclesProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_REF_CPU_CYCLES, pid, cpu, opts..., ) } // NewCPUClockProfiler returns a Profiler that profiles CPU clock speed. func NewCPUClockProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_CPU_CLOCK, pid, cpu, opts..., ) } // NewTaskClockProfiler returns a Profiler that profiles clock count of the // running task. func NewTaskClockProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_TASK_CLOCK, pid, cpu, opts..., ) } // NewPageFaultProfiler returns a Profiler that profiles the number of page // faults. func NewPageFaultProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_PAGE_FAULTS, pid, cpu, opts..., ) } // NewCtxSwitchesProfiler returns a Profiler that profiles the number of context // switches. func NewCtxSwitchesProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_CONTEXT_SWITCHES, pid, cpu, opts..., ) } // NewCPUMigrationsProfiler returns a Profiler that profiles the number of times // the process has migrated to a new CPU. func NewCPUMigrationsProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_CPU_MIGRATIONS, pid, cpu, opts..., ) } // NewMinorFaultsProfiler returns a Profiler that profiles the number of minor // page faults. func NewMinorFaultsProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_PAGE_FAULTS_MIN, pid, cpu, opts..., ) } // NewMajorFaultsProfiler returns a Profiler that profiles the number of major // page faults. func NewMajorFaultsProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ, pid, cpu, opts..., ) } // NewAlignFaultsProfiler returns a Profiler that profiles the number of // alignment faults. func NewAlignFaultsProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_ALIGNMENT_FAULTS, pid, cpu, opts..., ) } // NewEmulationFaultsProfiler returns a Profiler that profiles the number of // alignment faults. func NewEmulationFaultsProfiler(pid, cpu int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_SOFTWARE, unix.PERF_COUNT_SW_EMULATION_FAULTS, pid, cpu, opts..., ) } // NewL1DataProfiler returns a Profiler that profiles L1 cache data. func NewL1DataProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_L1D)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewL1InstrProfiler returns a Profiler that profiles L1 instruction data. func NewL1InstrProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_L1I)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewLLCacheProfiler returns a Profiler that profiles last level cache. func NewLLCacheProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_LL)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewDataTLBProfiler returns a Profiler that profiles the data TLB. func NewDataTLBProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_DTLB)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewInstrTLBProfiler returns a Profiler that profiles the instruction TLB. func NewInstrTLBProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_ITLB)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewBPUProfiler returns a Profiler that profiles the BPU (branch prediction unit). func NewBPUProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_BPU)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // NewNodeCacheProfiler returns a Profiler that profiles the node cache accesses. func NewNodeCacheProfiler(pid, cpu, op, result int, opts ...int) (Profiler, error) { return NewProfiler( unix.PERF_TYPE_HW_CACHE, uint64((unix.PERF_COUNT_HW_CACHE_NODE)|(op<<8)|(result<<16)), pid, cpu, opts..., ) } // Reset is used to reset the counters of the profiler. func (p *profiler) Reset() error { return unix.IoctlSetInt(p.fd, unix.PERF_EVENT_IOC_RESET, 0) } // Start is used to Start the profiler. func (p *profiler) Start() error { return unix.IoctlSetInt(p.fd, unix.PERF_EVENT_IOC_ENABLE, 0) } // Stop is used to stop the profiler. func (p *profiler) Stop() error { return unix.IoctlSetInt(p.fd, unix.PERF_EVENT_IOC_DISABLE, 0) } // Profile returns the current Profile. func (p *profiler) Profile(val *ProfileValue) error { // The underlying struct that gets read from the profiler looks like: /* struct read_format { u64 value; // The value of the event u64 time_enabled; // if PERF_FORMAT_TOTAL_TIME_ENABLED u64 time_running; // if PERF_FORMAT_TOTAL_TIME_RUNNING u64 id; // if PERF_FORMAT_ID }; */ // read 24 bytes since PERF_FORMAT_TOTAL_TIME_ENABLED and // PERF_FORMAT_TOTAL_TIME_RUNNING are always set. // XXX: allow profile ids? buf := bufPool.Get().([]byte) _, err := syscall.Read(p.fd, buf) if err != nil { zero(buf) bufPool.Put(buf) return err } val.Value = binary.LittleEndian.Uint64(buf[0:8]) val.TimeEnabled = binary.LittleEndian.Uint64(buf[8:16]) val.TimeRunning = binary.LittleEndian.Uint64(buf[16:24]) zero(buf) bufPool.Put(buf) return nil } // Close is used to close the perf context. func (p *profiler) Close() error { return unix.Close(p.fd) } perf-utils-0.5.1/process_profile_test.go000066400000000000000000000020501413762255200203770ustar00rootroot00000000000000package perf import ( "os" "testing" "golang.org/x/sys/unix" ) func TestProfiler(t *testing.T) { profiler, err := NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_INSTRUCTIONS, os.Getpid(), -1, ) if err != nil { t.Fatal(err) } defer func() { if err := profiler.Close(); err != nil { t.Fatal(err) } }() if err := profiler.Start(); err != nil { t.Fatal(err) } profile := &ProfileValue{} err = profiler.Profile(profile) if err != nil { t.Fatal(err) } if err := profiler.Stop(); err != nil { t.Fatal(err) } } func BenchmarkProfiler(b *testing.B) { profiler, err := NewProfiler( unix.PERF_TYPE_HARDWARE, unix.PERF_COUNT_HW_INSTRUCTIONS, os.Getpid(), -1, ) if err != nil { b.Fatal(err) } defer func() { if err := profiler.Close(); err != nil { b.Fatal(err) } }() profile := &ProfileValue{} b.ResetTimer() b.ReportAllocs() if err := profiler.Start(); err != nil { b.Fatal(err) } for n := 0; n < b.N; n++ { if err := profiler.Profile(profile); err != nil { b.Fatal(err) } } } perf-utils-0.5.1/software_profiler.go000066400000000000000000000146051413762255200177070ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "sync" "go.uber.org/multierr" "golang.org/x/sys/unix" ) type SoftwareProfilerType int const ( AllSoftwareProfilers SoftwareProfilerType = 0 CpuClockProfiler SoftwareProfilerType = 1 << iota TaskClockProfiler SoftwareProfilerType = 1 << iota PageFaultProfiler SoftwareProfilerType = 1 << iota ContextSwitchProfiler SoftwareProfilerType = 1 << iota CpuMigrationProfiler SoftwareProfilerType = 1 << iota MinorFaultProfiler SoftwareProfilerType = 1 << iota MajorFaultProfiler SoftwareProfilerType = 1 << iota AlignFaultProfiler SoftwareProfilerType = 1 << iota EmuFaultProfiler SoftwareProfilerType = 1 << iota ) type softwareProfiler struct { // map of perf counter type to file descriptor profilers map[int]Profiler profilersMu sync.RWMutex } // NewSoftwareProfiler returns a new software profiler. func NewSoftwareProfiler(pid, cpu int, profilerSet SoftwareProfilerType, opts ...int) (SoftwareProfiler, error) { var e error profilers := map[int]Profiler{} if profilerSet&CpuClockProfiler > 0 || profilerSet == AllSoftwareProfilers { cpuClockProfiler, err := NewCPUClockProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_CPU_CLOCK] = cpuClockProfiler } } if profilerSet&TaskClockProfiler > 0 || profilerSet == AllSoftwareProfilers { taskClockProfiler, err := NewTaskClockProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_TASK_CLOCK] = taskClockProfiler } } if profilerSet&PageFaultProfiler > 0 || profilerSet == AllSoftwareProfilers { pageFaultProfiler, err := NewPageFaultProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_PAGE_FAULTS] = pageFaultProfiler } } if profilerSet&ContextSwitchProfiler > 0 || profilerSet == AllSoftwareProfilers { ctxSwitchesProfiler, err := NewCtxSwitchesProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_CONTEXT_SWITCHES] = ctxSwitchesProfiler } } if profilerSet&CpuMigrationProfiler > 0 || profilerSet == AllSoftwareProfilers { cpuMigrationsProfiler, err := NewCPUMigrationsProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_CPU_MIGRATIONS] = cpuMigrationsProfiler } } if profilerSet&MinorFaultProfiler > 0 || profilerSet == AllSoftwareProfilers { minorFaultProfiler, err := NewMinorFaultsProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_PAGE_FAULTS_MIN] = minorFaultProfiler } } if profilerSet&MajorFaultProfiler > 0 || profilerSet == AllSoftwareProfilers { majorFaultProfiler, err := NewMajorFaultsProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ] = majorFaultProfiler } } if profilerSet&AlignFaultProfiler > 0 || profilerSet == AllSoftwareProfilers { alignFaultsFrontProfiler, err := NewAlignFaultsProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_ALIGNMENT_FAULTS] = alignFaultsFrontProfiler } } if profilerSet&EmuFaultProfiler > 0 || profilerSet == AllSoftwareProfilers { emuFaultProfiler, err := NewEmulationFaultsProfiler(pid, cpu, opts...) if err != nil { e = multierr.Append(e, err) } else { profilers[unix.PERF_COUNT_SW_EMULATION_FAULTS] = emuFaultProfiler } } return &softwareProfiler{ profilers: profilers, }, e } // HasProfilers returns if there are any configured profilers. func (p *softwareProfiler) HasProfilers() bool { p.profilersMu.RLock() defer p.profilersMu.RUnlock() return len(p.profilers) >= 0 } // Start is used to start the SoftwareProfiler. func (p *softwareProfiler) Start() error { if !p.HasProfilers() { return ErrNoProfiler } var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Start()) } p.profilersMu.RUnlock() return err } // Reset is used to reset the SoftwareProfiler. func (p *softwareProfiler) Reset() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Reset()) } p.profilersMu.RUnlock() return err } // Stop is used to reset the SoftwareProfiler. func (p *softwareProfiler) Stop() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Stop()) } p.profilersMu.RUnlock() return err } // Close is used to reset the SoftwareProfiler. func (p *softwareProfiler) Close() error { var err error p.profilersMu.RLock() for _, profiler := range p.profilers { err = multierr.Append(err, profiler.Close()) } p.profilersMu.RUnlock() return err } // Profile is used to read the SoftwareProfiler SoftwareProfile it returns an // error only if all profiles fail. func (p *softwareProfiler) Profile(swProfile *SoftwareProfile) error { var err error swProfile.Reset() p.profilersMu.RLock() for profilerType, profiler := range p.profilers { profileVal := ProfileValuePool.Get().(*ProfileValue) err2 := profiler.Profile(profileVal) err = multierr.Append(err, err2) if err2 == nil { if swProfile.TimeEnabled == nil { swProfile.TimeEnabled = &profileVal.TimeEnabled } if swProfile.TimeRunning == nil { swProfile.TimeRunning = &profileVal.TimeRunning } switch profilerType { case unix.PERF_COUNT_SW_CPU_CLOCK: swProfile.CPUClock = &profileVal.Value case unix.PERF_COUNT_SW_TASK_CLOCK: swProfile.TaskClock = &profileVal.Value case unix.PERF_COUNT_SW_PAGE_FAULTS: swProfile.PageFaults = &profileVal.Value case unix.PERF_COUNT_SW_CONTEXT_SWITCHES: swProfile.ContextSwitches = &profileVal.Value case unix.PERF_COUNT_SW_CPU_MIGRATIONS: swProfile.CPUMigrations = &profileVal.Value case unix.PERF_COUNT_SW_PAGE_FAULTS_MIN: swProfile.MinorPageFaults = &profileVal.Value case unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ: swProfile.MajorPageFaults = &profileVal.Value case unix.PERF_COUNT_SW_ALIGNMENT_FAULTS: swProfile.AlignmentFaults = &profileVal.Value case unix.PERF_COUNT_SW_EMULATION_FAULTS: swProfile.EmulationFaults = &profileVal.Value default: } } } p.profilersMu.RUnlock() return nil } perf-utils-0.5.1/software_profiler_test.go000066400000000000000000000011261413762255200207400ustar00rootroot00000000000000package perf import ( "encoding/json" "os" "testing" ) func TestSoftwareProfiler(t *testing.T) { swProfiler, err := NewSoftwareProfiler(os.Getpid(), -1, AllSoftwareProfilers) if err != nil { t.Fatal(err) } defer func() { if err := swProfiler.Close(); err != nil { t.Fatal(err) } }() if err := swProfiler.Start(); err != nil { t.Fatal(err) } profile := &SoftwareProfile{} err = swProfiler.Profile(profile) if err != nil { t.Fatal(err) } _, err = json.Marshal(profile) if err != nil { t.Fatal(err) } if err := swProfiler.Stop(); err != nil { t.Fatal(err) } } perf-utils-0.5.1/utils.go000066400000000000000000001172161413762255200153150ustar00rootroot00000000000000//go:build linux // +build linux package perf import ( "encoding/binary" "fmt" "io/ioutil" "math/rand" "os" "runtime" "strconv" "strings" "syscall" "testing" "unsafe" "golang.org/x/sys/unix" ) // BenchOpt is a benchmark option. type BenchOpt uint8 var ( // EventAttrSize is the size of a PerfEventAttr EventAttrSize = uint32(unsafe.Sizeof(unix.PerfEventAttr{})) ) const ( // BenchLock is used to lock a benchmark to a goroutine. BenchLock BenchOpt = 1 << iota // BenchStrict is used to fail a benchmark if one or more events can be // profiled. BenchStrict ) func zero(b []byte) { for i := range b { b[i] = 0 } } // MaxOpenFiles returns the RLIMIT_NOFILE from getrlimit. func MaxOpenFiles() (uint64, error) { rlimit := &unix.Rlimit{} err := unix.Getrlimit(unix.RLIMIT_NOFILE, rlimit) return rlimit.Max, err } // LockThread locks an goroutine to an OS thread and then sets the affinity of // the thread to a processor core. func LockThread(core int) (func(), error) { runtime.LockOSThread() cpuSet := unix.CPUSet{} cpuSet.Set(core) return runtime.UnlockOSThread, unix.SchedSetaffinity(0, &cpuSet) } // failBenchmark is a helper function for RunBenchmarks: if an error occurs // while setting up performance counters, evaluate strict. If strict mode is // on, mark the benchmark as skipped and log err. If it is off, silently // ignore the failure. func failBenchmark(options BenchOpt, b *testing.B, msg ...interface{}) { b.Helper() if options&BenchStrict > 0 { b.Skip(msg...) } } func setupBenchmarkProfiler(fd int) error { if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, unix.PERF_IOC_FLAG_GROUP); err != nil { return err } if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, unix.PERF_IOC_FLAG_GROUP); err != nil { return err } return nil } func readBenchmarkProfiler(fd int) (uint64, error) { if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_DISABLE, unix.PERF_IOC_FLAG_GROUP); err != nil { return 0, err } buf := make([]byte, 24) if _, err := syscall.Read(fd, buf); err != nil { return 0, err } return binary.LittleEndian.Uint64(buf[0:8]), nil } // BenchmarkTracepoints runs benchmark and counts the func BenchmarkTracepoints( b *testing.B, f func(b *testing.B), options BenchOpt, tracepoints ...string, ) { pidOrTid := os.Getpid() if options&BenchLock > 0 { cb, err := LockThread(rand.Intn(runtime.NumCPU())) if err != nil { b.Fatal(err) } defer cb() pidOrTid = unix.Gettid() } var ( attrMap = map[string]int{} tidToPid = map[int]int{} childFds = map[int][]int{} ) setupCb, err := LockThread(rand.Intn(runtime.NumCPU())) if err != nil { failBenchmark(options, b, err) } for _, tracepoint := range tracepoints { split := strings.Split(tracepoint, ":") if len(split) != 2 { b.Fatalf("Expected :, got: %q", tracepoint) } eventAttr, err := TracepointEventAttr(split[0], split[1]) if err != nil { failBenchmark(options, b, err) continue } eventAttr.Bits |= unix.PerfBitDisabled | unix.PerfBitPinned | unix.PerfBitInherit | unix.PerfBitInheritStat | unix.PerfBitEnableOnExec fd, err := unix.PerfEventOpen( eventAttr, pidOrTid, -1, -1, 0, ) if err != nil { failBenchmark(options, b, err) continue } attrMap[tracepoint] = fd if options&BenchLock > 0 { continue } childFds[fd] = []int{} // Setup a profiler for all the threads in the current // process with the inherit bit set. If the runtime // spins up new threads they should get profiled. tids, err := getTids(pidOrTid) if err != nil { b.Fatal(err) } for _, tid := range tids { tfd, err := unix.PerfEventOpen( eventAttr, tid, -1, -1, 0, ) if err != nil { failBenchmark(options, b, err) continue } childFds[fd] = append(childFds[fd], tfd) tidToPid[tfd] = fd } } setupCb() b.ReportAllocs() b.StartTimer() f(b) b.StopTimer() for key, fd := range attrMap { if err := setupBenchmarkProfiler(fd); err != nil { failBenchmark(options, b, err) continue } if options&BenchLock == 0 { for _, child := range childFds[fd] { if err := setupBenchmarkProfiler(child); err != nil { failBenchmark(options, b, err) continue } } } f(b) count, err := readBenchmarkProfiler(fd) if err != nil { failBenchmark(options, b, err) continue } if options&BenchLock == 0 { for _, child := range childFds[fd] { childCount, err := readBenchmarkProfiler(child) if err != nil { failBenchmark(options, b, err) continue } count += childCount } } b.ReportMetric(float64(count)/float64(b.N), key+"/op") } for _, fd := range attrMap { _ = unix.Close(fd) for _, childFd := range childFds[fd] { _ = unix.Close(childFd) } } } // RunBenchmarks runs a series of benchmarks for a set of PerfEventAttrs. func RunBenchmarks( b *testing.B, f func(b *testing.B), options BenchOpt, eventAttrs ...unix.PerfEventAttr, ) { pidOrTid := os.Getpid() if options&BenchLock > 0 { cb, err := LockThread(rand.Intn(runtime.NumCPU())) if err != nil { b.Fatal(err) } defer cb() pidOrTid = unix.Gettid() } var ( attrMap = map[string]int{} tidToPid = map[int]int{} childFds = map[int][]int{} ) setupCb, err := LockThread(rand.Intn(runtime.NumCPU())) if err != nil { failBenchmark(options, b, err) } for _, eventAttr := range eventAttrs { eventAttr.Bits |= unix.PerfBitDisabled | unix.PerfBitPinned | unix.PerfBitInherit | unix.PerfBitInheritStat | unix.PerfBitEnableOnExec fd, err := unix.PerfEventOpen( &eventAttr, pidOrTid, -1, -1, 0, ) if err != nil { b.Fatal(err) } key := EventAttrString(&eventAttr) attrMap[key] = fd if options&BenchLock > 0 { continue } childFds[fd] = []int{} // Setup a profiler for all the threads in the current // process with the inherit bit set. If the runtime // spins up new threads they should get profiled. tids, err := getTids(pidOrTid) if err != nil { b.Fatal(err) } for _, tid := range tids { tfd, err := unix.PerfEventOpen( &eventAttr, tid, -1, -1, 0, ) if err != nil { failBenchmark(options, b, err) continue } childFds[fd] = append(childFds[fd], tfd) tidToPid[tfd] = fd } } setupCb() b.ReportAllocs() b.StartTimer() f(b) b.StopTimer() for key, fd := range attrMap { if err := setupBenchmarkProfiler(fd); err != nil { failBenchmark(options, b, err) continue } if options&BenchLock == 0 { for _, child := range childFds[fd] { if err := setupBenchmarkProfiler(child); err != nil { failBenchmark(options, b, err) continue } } } f(b) count, err := readBenchmarkProfiler(fd) if err != nil { failBenchmark(options, b, err) continue } if options&BenchLock == 0 { for _, child := range childFds[fd] { childCount, err := readBenchmarkProfiler(child) if err != nil { failBenchmark(options, b, err) continue } count += childCount } } b.ReportMetric(float64(count)/float64(b.N), key+"/op") } for _, fd := range attrMap { _ = unix.Close(fd) for _, childFd := range childFds[fd] { _ = unix.Close(childFd) } } } // profileFn is a helper function to profile a function, it will randomly choose a core to run on. func profileFn(eventAttr *unix.PerfEventAttr, f func() error) (*ProfileValue, error) { cb, err := LockThread(rand.Intn(runtime.NumCPU())) if err != nil { return nil, err } defer cb() fd, err := unix.PerfEventOpen( eventAttr, unix.Gettid(), -1, -1, 0, ) if err != nil { return nil, err } if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0); err != nil { return nil, err } if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { return nil, err } if err := f(); err != nil { return nil, err } if err := unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_DISABLE, 0); err != nil { return nil, err } buf := make([]byte, 24) if _, err := syscall.Read(fd, buf); err != nil { return nil, err } return &ProfileValue{ Value: binary.LittleEndian.Uint64(buf[0:8]), TimeEnabled: binary.LittleEndian.Uint64(buf[8:16]), TimeRunning: binary.LittleEndian.Uint64(buf[16:24]), }, unix.Close(fd) } // EventAttrString returns a short string representation of a unix.PerfEventAttr. func EventAttrString(eventAttr *unix.PerfEventAttr) string { var b strings.Builder switch eventAttr.Type { case unix.PERF_TYPE_HARDWARE: b.WriteString("hw_") switch eventAttr.Config { case unix.PERF_COUNT_HW_INSTRUCTIONS: b.WriteString("instr") case unix.PERF_COUNT_HW_CPU_CYCLES: b.WriteString("cycles") case unix.PERF_COUNT_HW_CACHE_REFERENCES: b.WriteString("cache_ref") case unix.PERF_COUNT_HW_CACHE_MISSES: b.WriteString("cache_miss") case unix.PERF_COUNT_HW_BUS_CYCLES: b.WriteString("bus_cycles") case unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: b.WriteString("stalled_cycles_front") case unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND: b.WriteString("stalled_cycles_back") case unix.PERF_COUNT_HW_REF_CPU_CYCLES: b.WriteString("ref_cycles") default: b.WriteString("unknown") } case unix.PERF_TYPE_SOFTWARE: b.WriteString("sw_") switch eventAttr.Config { case unix.PERF_COUNT_SW_CPU_CLOCK: b.WriteString("cpu_clock") case unix.PERF_COUNT_SW_TASK_CLOCK: b.WriteString("task_clock") case unix.PERF_COUNT_SW_PAGE_FAULTS: b.WriteString("page_faults") case unix.PERF_COUNT_SW_CONTEXT_SWITCHES: b.WriteString("ctx_switches") case unix.PERF_COUNT_SW_CPU_MIGRATIONS: b.WriteString("migrations") case unix.PERF_COUNT_SW_PAGE_FAULTS_MIN: b.WriteString("minor_faults") case unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ: b.WriteString("major_faults") case unix.PERF_COUNT_SW_ALIGNMENT_FAULTS: b.WriteString("align_faults") case unix.PERF_COUNT_SW_EMULATION_FAULTS: b.WriteString("emul_faults") default: b.WriteString("unknown") } case unix.PERF_TYPE_BREAKPOINT: b.WriteString("breakpoint") case unix.PERF_TYPE_TRACEPOINT: b.WriteString("tracepoint") case unix.PERF_TYPE_HW_CACHE: b.WriteString("cache_") switch eventAttr.Config { case (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("l1d_read") case (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("l1d_miss") case (unix.PERF_COUNT_HW_CACHE_L1D) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("l1d_write") case (unix.PERF_COUNT_HW_CACHE_L1I) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("l1i_read") case (unix.PERF_COUNT_HW_CACHE_L1I) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("l1i_miss") case (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("ll_read") case (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("ll_write") case (unix.PERF_COUNT_HW_CACHE_LL) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("ll_read_miss") case (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("dtlb_read") case (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("dtlb_miss") case (unix.PERF_COUNT_HW_CACHE_DTLB) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("dtlb_write") case (unix.PERF_COUNT_HW_CACHE_ITLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("itlb_read") case (unix.PERF_COUNT_HW_CACHE_ITLB) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("itlb_miss") case (unix.PERF_COUNT_HW_CACHE_BPU) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("bpu_read") case (unix.PERF_COUNT_HW_CACHE_BPU) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_MISS << 16): b.WriteString("bpu_miss") case (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_READ << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("node_read") case (unix.PERF_COUNT_HW_CACHE_NODE) | (unix.PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16): b.WriteString("node_write") default: b.WriteString("unknown") } case unix.PERF_TYPE_RAW: b.WriteString("raw") default: b.WriteString("unknown") } return b.String() } // CPUInstructions is used to profile a function and return the number of CPU instructions. // Note that it will call runtime.LockOSThread to ensure accurate profilng. func CPUInstructions(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_INSTRUCTIONS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPUInstructionsEventAttr returns a unix.PerfEventAttr configured for CPUInstructions. func CPUInstructionsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_INSTRUCTIONS, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CPUCycles is used to profile a function and return the number of CPU cycles. // Note that it will call runtime.LockOSThread to ensure accurate profilng. func CPUCycles(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CPU_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPUCyclesEventAttr returns a unix.PerfEventAttr configured for CPUCycles. func CPUCyclesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CPU_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CacheRef is used to profile a function and return the number of cache // references. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func CacheRef(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CACHE_REFERENCES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CacheRefEventAttr returns a unix.PerfEventAttr configured for CacheRef. func CacheRefEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CACHE_REFERENCES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CacheMiss is used to profile a function and return the number of cache // misses. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func CacheMiss(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CACHE_MISSES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CacheMissEventAttr returns a unix.PerfEventAttr configured for CacheMisses. func CacheMissEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_CACHE_MISSES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // BusCycles is used to profile a function and return the number of bus // cycles. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func BusCycles(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_BUS_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // BusCyclesEventAttr returns a unix.PerfEventAttr configured for BusCycles. func BusCyclesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_BUS_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // StalledFrontendCycles is used to profile a function and return the number of // stalled frontend cycles. Note that it will call runtime.LockOSThread to // ensure accurate profilng. func StalledFrontendCycles(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // StalledFrontendCyclesEventAttr returns a unix.PerfEventAttr configured for StalledFrontendCycles. func StalledFrontendCyclesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // StalledBackendCycles is used to profile a function and return the number of // stalled backend cycles. Note that it will call runtime.LockOSThread to // ensure accurate profilng. func StalledBackendCycles(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // StalledBackendCyclesEventAttr returns a unix.PerfEventAttr configured for StalledBackendCycles. func StalledBackendCyclesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CPURefCycles is used to profile a function and return the number of CPU // references cycles which are not affected by frequency scaling. Note that it // will call runtime.LockOSThread to ensure accurate profilng. func CPURefCycles(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_REF_CPU_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPURefCyclesEventAttr returns a unix.PerfEventAttr configured for CPURefCycles. func CPURefCyclesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HARDWARE, Config: unix.PERF_COUNT_HW_REF_CPU_CYCLES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CPUClock is used to profile a function and return the CPU clock timer. Note // that it will call runtime.LockOSThread to ensure accurate profilng. func CPUClock(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CPU_CLOCK, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPUClockEventAttr returns a unix.PerfEventAttr configured for CPUClock. func CPUClockEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CPU_CLOCK, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CPUTaskClock is used to profile a function and return the CPU clock timer // for the running task. Note that it will call runtime.LockOSThread to ensure // accurate profilng. func CPUTaskClock(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_TASK_CLOCK, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPUTaskClockEventAttr returns a unix.PerfEventAttr configured for CPUTaskClock. func CPUTaskClockEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_TASK_CLOCK, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // PageFaults is used to profile a function and return the number of page // faults. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func PageFaults(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // PageFaultsEventAttr returns a unix.PerfEventAttr configured for PageFaults. func PageFaultsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // ContextSwitches is used to profile a function and return the number of // context switches. Note that it will call runtime.LockOSThread to ensure // accurate profilng. func ContextSwitches(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CONTEXT_SWITCHES, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // ContextSwitchesEventAttr returns a unix.PerfEventAttr configured for ContextSwitches. func ContextSwitchesEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CONTEXT_SWITCHES, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // CPUMigrations is used to profile a function and return the number of times // the thread has been migrated to a new CPU. Note that it will call // runtime.LockOSThread to ensure accurate profilng. func CPUMigrations(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CPU_MIGRATIONS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // CPUMigrationsEventAttr returns a unix.PerfEventAttr configured for CPUMigrations. func CPUMigrationsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_CPU_MIGRATIONS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // MinorPageFaults is used to profile a function and return the number of minor // page faults. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func MinorPageFaults(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS_MIN, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // MinorPageFaultsEventAttr returns a unix.PerfEventAttr configured for MinorPageFaults. func MinorPageFaultsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS_MIN, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // MajorPageFaults is used to profile a function and return the number of major // page faults. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func MajorPageFaults(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // MajorPageFaultsEventAttr returns a unix.PerfEventAttr configured for MajorPageFaults. func MajorPageFaultsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // AlignmentFaults is used to profile a function and return the number of alignment // faults. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func AlignmentFaults(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_ALIGNMENT_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // AlignmentFaultsEventAttr returns a unix.PerfEventAttr configured for AlignmentFaults. func AlignmentFaultsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_ALIGNMENT_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // EmulationFaults is used to profile a function and return the number of emulation // faults. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func EmulationFaults(f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_EMULATION_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // EmulationFaultsEventAttr returns a unix.PerfEventAttr configured for EmulationFaults. func EmulationFaultsEventAttr() unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_SOFTWARE, Config: unix.PERF_COUNT_SW_EMULATION_FAULTS, Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // L1Data is used to profile a function and the L1 data cache faults. Use // PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func L1Data(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_L1D) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // L1DataEventAttr returns a unix.PerfEventAttr configured for L1Data. func L1DataEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_L1D) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // L1Instructions is used to profile a function for the instruction level L1 // cache. Use PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func L1Instructions(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_L1I) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // L1InstructionsEventAttr returns a unix.PerfEventAttr configured for L1Instructions. func L1InstructionsEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_L1I) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // LLCache is used to profile a function and return the number of emulation // PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func LLCache(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_LL) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // LLCacheEventAttr returns a unix.PerfEventAttr configured for LLCache. func LLCacheEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_LL) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // DataTLB is used to profile the data TLB. Use PERF_COUNT_HW_CACHE_OP_READ, // PERF_COUNT_HW_CACHE_OP_WRITE, or PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt // and PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for // the result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func DataTLB(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_DTLB) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // DataTLBEventAttr returns a unix.PerfEventAttr configured for DataTLB. func DataTLBEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_DTLB) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // InstructionTLB is used to profile the instruction TLB. Use // PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func InstructionTLB(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_ITLB) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // InstructionTLBEventAttr returns a unix.PerfEventAttr configured for InstructionTLB. func InstructionTLBEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_ITLB) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // BPU is used to profile a function for the Branch Predictor Unit. // Use PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func BPU(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_BPU) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // BPUEventAttr returns a unix.PerfEventAttr configured for BPU events. func BPUEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_BPU) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // NodeCache is used to profile a function for NUMA operations. Use Use // PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_OP_WRITE, or // PERF_COUNT_HW_CACHE_OP_PREFETCH for the opt and // PERF_COUNT_HW_CACHE_RESULT_ACCESS or PERF_COUNT_HW_CACHE_RESULT_MISS for the // result. Note that it will call runtime.LockOSThread to ensure accurate // profilng. func NodeCache(op, result int, f func() error) (*ProfileValue, error) { eventAttr := &unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_NODE) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitDisabled | unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } return profileFn(eventAttr, f) } // NodeCacheEventAttr returns a unix.PerfEventAttr configured for NUMA cache operations. func NodeCacheEventAttr(op, result int) unix.PerfEventAttr { return unix.PerfEventAttr{ Type: unix.PERF_TYPE_HW_CACHE, Config: uint64((unix.PERF_COUNT_HW_CACHE_NODE) | (op << 8) | (result << 16)), Size: EventAttrSize, Bits: unix.PerfBitExcludeKernel | unix.PerfBitExcludeHv, Read_format: unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_TOTAL_TIME_ENABLED, } } // returns the set of all thread ids for the a process. func getTids(pid int) ([]int, error) { fileInfo, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid)) if err != nil { return nil, err } tids := []int{} for _, file := range fileInfo { tid, err := strconv.Atoi(file.Name()) if err != nil { return nil, err } tids = append(tids, tid) } return tids, nil } perf-utils-0.5.1/utils_test.go000066400000000000000000000135741413762255200163560ustar00rootroot00000000000000package perf import ( "os" "runtime" "testing" "golang.org/x/sys/unix" ) func TestMaxOpenFiles(t *testing.T) { _, err := MaxOpenFiles() if err != nil { t.Fatal(err) } } func TestCPUInstructions(t *testing.T) { _, err := CPUInstructions( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCPUCycles(t *testing.T) { _, err := CPUCycles( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCacheRef(t *testing.T) { _, err := CacheRef( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCacheMiss(t *testing.T) { _, err := CacheMiss( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestBusCycles(t *testing.T) { _, err := BusCycles( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestStalledFrontendCycles(t *testing.T) { t.Skip() _, err := StalledFrontendCycles( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestStalledBackendCycles(t *testing.T) { t.Skip() _, err := StalledBackendCycles( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCPURefCycles(t *testing.T) { _, err := CPURefCycles( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCPUClock(t *testing.T) { _, err := CPUClock( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCPUTaskClock(t *testing.T) { _, err := CPUTaskClock( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestPageFaults(t *testing.T) { _, err := PageFaults( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestContextSwitches(t *testing.T) { _, err := ContextSwitches( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestCPUMigrations(t *testing.T) { _, err := CPUMigrations( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestMinorPageFaults(t *testing.T) { _, err := MinorPageFaults( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestMajorPageFaults(t *testing.T) { _, err := MajorPageFaults( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestAlignmentFaults(t *testing.T) { _, err := AlignmentFaults( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestEmulationFaults(t *testing.T) { _, err := EmulationFaults( func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestL1Data(t *testing.T) { _, err := L1Data( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestL1Instructions(t *testing.T) { _, err := L1Instructions( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_MISS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestLLCache(t *testing.T) { _, err := LLCache( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestDataTLB(t *testing.T) { _, err := DataTLB( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestInstructionTLB(t *testing.T) { _, err := InstructionTLB( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestBPU(t *testing.T) { _, err := BPU( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestNodeCache(t *testing.T) { _, err := NodeCache( unix.PERF_COUNT_HW_CACHE_OP_READ, unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS, func() error { return nil }, ) if err != nil { t.Fatal(err) } } func TestGetTids(t *testing.T) { tids, err := getTids(os.Getpid()) if err != nil { t.Fatal(err) } if len(tids) == 1 { t.Fatalf("expected multiple threads, got: %+v", tids) } } func BenchmarkCPUCycles(b *testing.B) { b.ResetTimer() b.ReportAllocs() for n := 0; n < b.N; n++ { CPUCycles( func() error { return nil }, ) } } func BenchmarkThreadLocking(b *testing.B) { b.ResetTimer() b.ReportAllocs() for n := 0; n < b.N; n++ { runtime.LockOSThread() runtime.UnlockOSThread() } } func BenchmarkRunBenchmarks(b *testing.B) { eventAttrs := []unix.PerfEventAttr{ CPUInstructionsEventAttr(), CPUCyclesEventAttr(), } RunBenchmarks( b, func(b *testing.B) { for n := 1; n < b.N; n++ { a := 42 for i := 0; i < 1000; i++ { a += i } } }, BenchStrict, eventAttrs..., ) } func BenchmarkRunBenchmarksLocked(b *testing.B) { eventAttrs := []unix.PerfEventAttr{ CPUInstructionsEventAttr(), CPUCyclesEventAttr(), } RunBenchmarks( b, func(b *testing.B) { for n := 1; n < b.N; n++ { a := 42 for i := 0; i < 1000; i++ { a += i } } }, BenchLock|BenchStrict, eventAttrs..., ) } func BenchmarkBenchmarkTracepointsLocked(b *testing.B) { tracepoints := []string{ "syscalls:sys_enter_getrusage", } BenchmarkTracepoints( b, func(b *testing.B) { for n := 1; n < b.N; n++ { if err := unix.Getrusage(0, &unix.Rusage{}); err != nil { b.Fatal(err) } if err := unix.Getrusage(0, &unix.Rusage{}); err != nil { b.Fatal(err) } } }, BenchLock|BenchStrict, tracepoints..., ) } func BenchmarkBenchmarkTracepoints(b *testing.B) { tracepoints := []string{ "syscalls:sys_enter_getrusage", } BenchmarkTracepoints( b, func(b *testing.B) { for n := 1; n < b.N; n++ { if err := unix.Getrusage(0, &unix.Rusage{}); err != nil { b.Fatal(err) } if err := unix.Getrusage(0, &unix.Rusage{}); err != nil { b.Fatal(err) } } }, BenchStrict, tracepoints..., ) }