papi-5.6.0/src/libpfm4/lib/events/amd64_events_fam15h.h000664 001750 001750 00000111243 13216244364 024547 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: amd64_fam15h (AMD64 Fam15h Interlagos) * * Based on libpfm patch by Robert Richter : * Family 15h Microarchitecture performance monitor events * * History: * * Apr 29 2011 -- Robert Richter, robert.richter@amd.com: * Source: BKDG for AMD Family 15h Models 00h-0Fh Processors, * 42301, Rev 1.15, April 18, 2011 * * Dec 09 2010 -- Robert Richter, robert.richter@amd.com: * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h * Processors, Rev 0.90, May 18, 2010 */ #define CORE_SELECT(b) \ { .uname = "CORE_0",\ .udesc = "Measure on Core0",\ .ucode = 0 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_1",\ .udesc = "Measure on Core1",\ .ucode = 1 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_2",\ .udesc = "Measure on Core2",\ .ucode = 2 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_3",\ .udesc = "Measure on Core3",\ .ucode = 3 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_4",\ .udesc = "Measure on Core4",\ .ucode = 4 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_5",\ .udesc = "Measure on Core5",\ .ucode = 5 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_6",\ .udesc = "Measure on Core6",\ .ucode = 6 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "CORE_7",\ .udesc = "Measure on Core7",\ .ucode = 7 << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO,\ },\ { .uname = "ANY_CORE",\ .udesc = "Measure on any core",\ .ucode = 0xf << 4,\ .grpid = b,\ .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL,\ } static const amd64_umask_t amd64_fam15h_dispatched_fpu_ops[]={ { .uname = "OPS_PIPE0", .udesc = "Total number uops assigned to Pipe 0", .ucode = 0x1, }, { .uname = "OPS_PIPE1", .udesc = "Total number uops assigned to Pipe 1", .ucode = 0x2, }, { .uname = "OPS_PIPE2", .udesc = "Total number uops assigned to Pipe 2", .ucode = 0x4, }, { .uname = "OPS_PIPE3", .udesc = "Total number uops assigned to Pipe 3", .ucode = 0x8, }, { .uname = "OPS_DUAL_PIPE0", .udesc = "Total number dual-pipe uops assigned to Pipe 0", .ucode = 0x10, }, { .uname = "OPS_DUAL_PIPE1", .udesc = "Total number dual-pipe uops assigned to Pipe 1", .ucode = 0x20, }, { .uname = "OPS_DUAL_PIPE2", .udesc = "Total number dual-pipe uops assigned to Pipe 2", .ucode = 0x40, }, { .uname = "OPS_DUAL_PIPE3", .udesc = "Total number dual-pipe uops assigned to Pipe 3", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_retired_sse_ops[]={ { .uname = "SINGLE_ADD_SUB_OPS", .udesc = "Single-precision add/subtract FLOPS", .ucode = 0x1, }, { .uname = "SINGLE_MUL_OPS", .udesc = "Single-precision multiply FLOPS", .ucode = 0x2, }, { .uname = "SINGLE_DIV_OPS", .udesc = "Single-precision divide/square root FLOPS", .ucode = 0x4, }, { .uname = "SINGLE_MUL_ADD_OPS", .udesc = "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", .ucode = 0x8, }, { .uname = "DOUBLE_ADD_SUB_OPS", .udesc = "Double precision add/subtract FLOPS", .ucode = 0x10, }, { .uname = "DOUBLE_MUL_OPS", .udesc = "Double precision multiply FLOPS", .ucode = 0x20, }, { .uname = "DOUBLE_DIV_OPS", .udesc = "Double precision divide/square root FLOPS", .ucode = 0x40, }, { .uname = "DOUBLE_MUL_ADD_OPS", .udesc = "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_move_scalar_optimization[]={ { .uname = "SSE_MOVE_OPS", .udesc = "Number of SSE Move Ops", .ucode = 0x1, }, { .uname = "SSE_MOVE_OPS_ELIM", .udesc = "Number of SSE Move Ops eliminated", .ucode = 0x2, }, { .uname = "OPT_CAND", .udesc = "Number of Ops that are candidates for optimization (Z-bit set or pass)", .ucode = 0x4, }, { .uname = "SCALAR_OPS_OPTIMIZED", .udesc = "Number of Scalar ops optimized", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_retired_serializing_ops[]={ { .uname = "SSE_RETIRED", .udesc = "SSE bottom-executing uops retired", .ucode = 0x1, }, { .uname = "SSE_MISPREDICTED", .udesc = "SSE control word mispredict traps due to mispredictions", .ucode = 0x2, }, { .uname = "X87_RETIRED", .udesc = "X87 bottom-executing uops retired", .ucode = 0x4, }, { .uname = "X87_MISPREDICTED", .udesc = "X87 control word mispredict traps due to mispredictions", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_segment_register_loads[]={ { .uname = "ES", .udesc = "ES", .ucode = 0x1, }, { .uname = "CS", .udesc = "CS", .ucode = 0x2, }, { .uname = "SS", .udesc = "SS", .ucode = 0x4, }, { .uname = "DS", .udesc = "DS", .ucode = 0x8, }, { .uname = "FS", .udesc = "FS", .ucode = 0x10, }, { .uname = "GS", .udesc = "GS", .ucode = 0x20, }, { .uname = "HS", .udesc = "HS", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_load_q_store_q_full[]={ { .uname = "LOAD_QUEUE", .udesc = "The number of cycles that the load buffer is full", .ucode = 0x1, }, { .uname = "STORE_QUEUE", .udesc = "The number of cycles that the store buffer is full", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_locked_ops[]={ { .uname = "EXECUTED", .udesc = "Number of locked instructions executed", .ucode = 0x1, }, { .uname = "CYCLES_NON_SPECULATIVE_PHASE", .udesc = "Number of cycles spent in non-speculative phase, excluding cache miss penalty", .ucode = 0x4, }, { .uname = "CYCLES_WAITING", .udesc = "Number of cycles spent in non-speculative phase, including the cache miss penalty", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xd, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_cancelled_store_to_load[]={ { .uname = "SIZE_ADDRESS_MISMATCHES", .udesc = "Store is smaller than load or different starting byte but partial overlap", .ucode = 0x1, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_data_cache_misses[]={ { .uname = "DC_MISS_STREAMING_STORE", .udesc = "First data cache miss or streaming store to a 64B cache line", .ucode = 0x1, }, { .uname = "STREAMING_STORE", .udesc = "First streaming store to a 64B cache line", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_data_cache_refills_from_l2_or_northbridge[]={ { .uname = "GOOD", .udesc = "Fill with good data. (Final valid status is valid)", .ucode = 0x1, }, { .uname = "INVALID", .udesc = "Early valid status turned out to be invalid", .ucode = 0x2, }, { .uname = "POISON", .udesc = "Fill with poison data", .ucode = 0x4, }, { .uname = "READ_ERROR", .udesc = "Fill with read data error", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_unified_tlb_hit[]={ { .uname = "4K_DATA", .udesc = "4 KB unified TLB hit for data", .ucode = 0x1, }, { .uname = "2M_DATA", .udesc = "2 MB unified TLB hit for data", .ucode = 0x2, }, { .uname = "1G_DATA", .udesc = "1 GB unified TLB hit for data", .ucode = 0x4, }, { .uname = "4K_INST", .udesc = "4 KB unified TLB hit for instruction", .ucode = 0x10, }, { .uname = "2M_INST", .udesc = "2 MB unified TLB hit for instruction", .ucode = 0x20, }, { .uname = "1G_INST", .udesc = "1 GB unified TLB hit for instruction", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x77, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_unified_tlb_miss[]={ { .uname = "4K_DATA", .udesc = "4 KB unified TLB miss for data", .ucode = 0x1, }, { .uname = "2M_DATA", .udesc = "2 MB unified TLB miss for data", .ucode = 0x2, }, { .uname = "1GB_DATA", .udesc = "1 GB unified TLB miss for data", .ucode = 0x4, }, { .uname = "4K_INST", .udesc = "4 KB unified TLB miss for instruction", .ucode = 0x10, }, { .uname = "2M_INST", .udesc = "2 MB unified TLB miss for instruction", .ucode = 0x20, }, { .uname = "1G_INST", .udesc = "1 GB unified TLB miss for instruction", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x77, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_prefetch_instructions_dispatched[]={ { .uname = "LOAD", .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", .ucode = 0x1, }, { .uname = "STORE", .udesc = "Store (PrefetchW)", .ucode = 0x2, }, { .uname = "NTA", .udesc = "NTA (PrefetchNTA)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_ineffective_sw_prefetches[]={ { .uname = "SW_PREFETCH_HIT_IN_L1", .udesc = "Software prefetch hit in the L1", .ucode = 0x1, }, { .uname = "SW_PREFETCH_HIT_IN_L2", .udesc = "Software prefetch hit in the L2", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x9, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_memory_requests[]={ { .uname = "NON_CACHEABLE", .udesc = "Requests to non-cacheable (UC) memory", .ucode = 0x1, }, { .uname = "WRITE_COMBINING", .udesc = "Requests to non-cacheable (WC, but not WC+/SS) memory", .ucode = 0x2, }, { .uname = "STREAMING_STORE", .udesc = "Requests to non-cacheable (WC+/SS, but not WC) memory", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x83, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_data_prefetcher[]={ { .uname = "ATTEMPTED", .udesc = "Prefetch attempts", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x2, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_mab_reqs[]={ { .uname = "BUFFER_BIT_0", .udesc = "Buffer entry index bit 0", .ucode = 0x1, }, { .uname = "BUFFER_BIT_1", .udesc = "Buffer entry index bit 1", .ucode = 0x2, }, { .uname = "BUFFER_BIT_2", .udesc = "Buffer entry index bit 2", .ucode = 0x4, }, { .uname = "BUFFER_BIT_3", .udesc = "Buffer entry index bit 3", .ucode = 0x8, }, { .uname = "BUFFER_BIT_4", .udesc = "Buffer entry index bit 4", .ucode = 0x10, }, { .uname = "BUFFER_BIT_5", .udesc = "Buffer entry index bit 5", .ucode = 0x20, }, { .uname = "BUFFER_BIT_6", .udesc = "Buffer entry index bit 6", .ucode = 0x40, }, { .uname = "BUFFER_BIT_7", .udesc = "Buffer entry index bit 7", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_system_read_responses[]={ { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x1, }, { .uname = "MODIFIED", .udesc = "Modified (D18F0x68[ATMModeEn]==0), Modified written (D18F0x68[ATMModeEn]==1)", .ucode = 0x2, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "DATA_ERROR", .udesc = "Data Error", .ucode = 0x10, }, { .uname = "MODIFIED_UNWRITTEN", .udesc = "Modified unwritten", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_octword_write_transfers[]={ { .uname = "OCTWORD_WRITE_TRANSFER", .udesc = "OW write transfer", .ucode = 0x1, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_requests_to_l2[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB fill (page table walks)", .ucode = 0x4, }, { .uname = "SNOOP", .udesc = "NB probe request", .ucode = 0x8, }, { .uname = "CANCELLED", .udesc = "Canceled request", .ucode = 0x10, }, { .uname = "PREFETCHER", .udesc = "L2 cache prefetcher request", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x5f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_l2_cache_miss[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill (includes possible replays, whereas PMCx041 does not)", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB page table walk", .ucode = 0x4, }, { .uname = "PREFETCHER", .udesc = "L2 Cache Prefetcher request", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x17, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_l2_cache_fill_writeback[]={ { .uname = "L2_FILLS", .udesc = "L2 fills from system", .ucode = 0x1, }, { .uname = "L2_WRITEBACKS", .udesc = "L2 Writebacks to system (Clean and Dirty)", .ucode = 0x2, }, { .uname = "L2_WRITEBACKS_CLEAN", .udesc = "L2 Clean Writebacks to system", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_page_splintering[]={ { .uname = "GUEST_LARGER", .udesc = "Guest page size is larger than host page size when nested paging is enabled", .ucode = 0x1, }, { .uname = "MTRR_MISMATCH", .udesc = "Splintering due to MTRRs, IORRs, APIC, TOMs or other special address region", .ucode = 0x2, }, { .uname = "HOST_LARGER", .udesc = "Host page size is larger than the guest page size", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_l1_itlb_miss_and_l2_itlb_miss[]={ { .uname = "4K_PAGE_FETCHES", .udesc = "Instruction fetches to a 4 KB page", .ucode = 0x1, }, { .uname = "2M_PAGE_FETCHES", .udesc = "Instruction fetches to a 2 MB page", .ucode = 0x2, }, { .uname = "1G_PAGE_FETCHES", .udesc = "Instruction fetches to a 1 GB page", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_instruction_cache_invalidated[]={ { .uname = "NON_SMC_PROBE_MISS", .udesc = "Non-SMC invalidating probe that missed on in-flight instructions", .ucode = 0x1, }, { .uname = "NON_SMC_PROBE_HIT", .udesc = "Non-SMC invalidating probe that hit on in-flight instructions", .ucode = 0x2, }, { .uname = "SMC_PROBE_MISS", .udesc = "SMC invalidating probe that missed on in-flight instructions", .ucode = 0x4, }, { .uname = "SMC_PROBE_HIT", .udesc = "SMC invalidating probe that hit on in-flight instructions", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_retired_mmx_fp_instructions[]={ { .uname = "X87", .udesc = "X87 instructions", .ucode = 0x1, }, { .uname = "MMX", .udesc = "MMX(tm) instructions", .ucode = 0x2, }, { .uname = "SSE", .udesc = "SSE instructions (SSE,SSE2,SSE3,SSSE3,SSE4A,SSE4.1,SSE4.2,AVX,XOP,FMA4)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_fpu_exceptions[]={ { .uname = "TOTAL_FAULTS", .udesc = "Total microfaults", .ucode = 0x1, }, { .uname = "TOTAL_TRAPS", .udesc = "Total microtraps", .ucode = 0x2, }, { .uname = "INT2EXT_FAULTS", .udesc = "Int2Ext faults", .ucode = 0x4, }, { .uname = "EXT2INT_FAULTS", .udesc = "Ext2Int faults", .ucode = 0x8, }, { .uname = "BYPASS_FAULTS", .udesc = "Bypass faults", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_ibs_ops_tagged[]={ { .uname = "TAGGED", .udesc = "Number of ops tagged by IBS", .ucode = 0x1, }, { .uname = "RETIRED", .udesc = "Number of ops tagged by IBS that retired", .ucode = 0x2, }, { .uname = "IGNORED", .udesc = "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_ls_dispatch[]={ { .uname = "LOADS", .udesc = "Loads", .ucode = 0x1, }, { .uname = "STORES", .udesc = "Stores", .ucode = 0x2, }, { .uname = "LOAD_OP_STORES", .udesc = "Load-op-Stores", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam15h_l2_prefetcher_trigger_events[]={ { .uname = "LOAD_L1_MISS_SEEN_BY_PREFETCHER", .udesc = "Load L1 miss seen by prefetcher", .ucode = 0x1, }, { .uname = "STORE_L1_MISS_SEEN_BY_PREFETCHER", .udesc = "Store L1 miss seen by prefetcher", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_entry_t amd64_fam15h_pe[]={ { .name = "DISPATCHED_FPU_OPS", .desc = "FPU Pipe Assignment", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_dispatched_fpu_ops), .ngrp = 1, .umasks = amd64_fam15h_dispatched_fpu_ops, }, { .name = "CYCLES_FPU_EMPTY", .desc = "FP Scheduler Empty", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x1, }, { .name = "RETIRED_SSE_OPS", .desc = "Retired SSE/BNI Ops", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x3, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_retired_sse_ops), .ngrp = 1, .umasks = amd64_fam15h_retired_sse_ops, }, { .name = "MOVE_SCALAR_OPTIMIZATION", .desc = "Number of Move Elimination and Scalar Op Optimization", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x4, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_move_scalar_optimization), .ngrp = 1, .umasks = amd64_fam15h_move_scalar_optimization, }, { .name = "RETIRED_SERIALIZING_OPS", .desc = "Retired Serializing Ops", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x5, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_retired_serializing_ops), .ngrp = 1, .umasks = amd64_fam15h_retired_serializing_ops, }, { .name = "BOTTOM_EXECUTE_OP", .desc = "Number of Cycles that a Bottom-Execute uop is in the FP Scheduler", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x6, }, { .name = "SEGMENT_REGISTER_LOADS", .desc = "Segment Register Loads", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x20, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_segment_register_loads), .ngrp = 1, .umasks = amd64_fam15h_segment_register_loads, }, { .name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", .desc = "Pipeline Restart Due to Self-Modifying Code", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x21, }, { .name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", .desc = "Pipeline Restart Due to Probe Hit", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x22, }, { .name = "LOAD_Q_STORE_Q_FULL", .desc = "Load Queue/Store Queue Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x23, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_load_q_store_q_full), .ngrp = 1, .umasks = amd64_fam15h_load_q_store_q_full, }, { .name = "LOCKED_OPS", .desc = "Locked Operations", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_locked_ops), .ngrp = 1, .umasks = amd64_fam15h_locked_ops, }, { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", .desc = "Retired CLFLUSH Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x26, }, { .name = "RETIRED_CPUID_INSTRUCTIONS", .desc = "Retired CPUID Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x27, }, { .name = "CANCELLED_STORE_TO_LOAD", .desc = "Canceled Store to Load Forward Operations", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_cancelled_store_to_load), .ngrp = 1, .umasks = amd64_fam15h_cancelled_store_to_load, }, { .name = "SMIS_RECEIVED", .desc = "SMIs Received", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x2b, }, { .name = "DATA_CACHE_ACCESSES", .desc = "Data Cache Accesses", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x40, }, { .name = "DATA_CACHE_MISSES", .desc = "Data Cache Misses", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x41, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_data_cache_misses), .ngrp = 1, .umasks = amd64_fam15h_data_cache_misses, }, { .name = "DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE", .desc = "Data Cache Refills from L2 or System", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_data_cache_refills_from_l2_or_northbridge), .ngrp = 1, .umasks = amd64_fam15h_data_cache_refills_from_l2_or_northbridge, }, { .name = "DATA_CACHE_REFILLS_FROM_NORTHBRIDGE", .desc = "Data Cache Refills from System", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x43, }, { .name = "UNIFIED_TLB_HIT", .desc = "Unified TLB Hit", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x45, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_unified_tlb_hit), .ngrp = 1, .umasks = amd64_fam15h_unified_tlb_hit, }, { .name = "UNIFIED_TLB_MISS", .desc = "Unified TLB Miss", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x46, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_unified_tlb_miss), .ngrp = 1, .umasks = amd64_fam15h_unified_tlb_miss, }, { .name = "MISALIGNED_ACCESSES", .desc = "Misaligned Accesses", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x47, }, { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", .desc = "Prefetch Instructions Dispatched", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_prefetch_instructions_dispatched), .ngrp = 1, .umasks = amd64_fam15h_prefetch_instructions_dispatched, }, { .name = "INEFFECTIVE_SW_PREFETCHES", .desc = "Ineffective Software Prefetches", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x52, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_ineffective_sw_prefetches), .ngrp = 1, .umasks = amd64_fam15h_ineffective_sw_prefetches, }, { .name = "MEMORY_REQUESTS", .desc = "Memory Requests by Type", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_memory_requests), .ngrp = 1, .umasks = amd64_fam15h_memory_requests, }, { .name = "DATA_PREFETCHER", .desc = "Data Prefetcher", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_data_prefetcher), .ngrp = 1, .umasks = amd64_fam15h_data_prefetcher, }, { .name = "MAB_REQS", .desc = "MAB Requests", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_mab_reqs), .ngrp = 1, .umasks = amd64_fam15h_mab_reqs, }, { .name = "MAB_WAIT", .desc = "MAB Wait Cycles", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_mab_reqs), .ngrp = 1, .umasks = amd64_fam15h_mab_reqs, /* identical to actual umasks list for this event */ }, { .name = "SYSTEM_READ_RESPONSES", .desc = "Response From System on Cache Refills", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_system_read_responses), .ngrp = 1, .umasks = amd64_fam15h_system_read_responses, }, { .name = "OCTWORD_WRITE_TRANSFERS", .desc = "Octwords Written to System", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_octword_write_transfers), .ngrp = 1, .umasks = amd64_fam15h_octword_write_transfers, }, { .name = "CPU_CLK_UNHALTED", .desc = "CPU Clocks not Halted", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x76, }, { .name = "REQUESTS_TO_L2", .desc = "Requests to L2 Cache", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x7d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_requests_to_l2), .ngrp = 1, .umasks = amd64_fam15h_requests_to_l2, }, { .name = "L2_CACHE_MISS", .desc = "L2 Cache Misses", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x7e, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l2_cache_miss), .ngrp = 1, .umasks = amd64_fam15h_l2_cache_miss, }, { .name = "L2_CACHE_FILL_WRITEBACK", .desc = "L2 Fill/Writeback", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x7f, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l2_cache_fill_writeback), .ngrp = 1, .umasks = amd64_fam15h_l2_cache_fill_writeback, }, { .name = "PAGE_SPLINTERING", .desc = "Page Splintering", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x165, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_page_splintering), .ngrp = 1, .umasks = amd64_fam15h_page_splintering, }, { .name = "INSTRUCTION_CACHE_FETCHES", .desc = "Instruction Cache Fetches", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x80, }, { .name = "INSTRUCTION_CACHE_MISSES", .desc = "Instruction Cache Misses", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x81, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", .desc = "Instruction Cache Refills from L2", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x82, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", .desc = "Instruction Cache Refills from System", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x83, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .desc = "L1 ITLB Miss, L2 ITLB Hit", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x84, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", .desc = "L1 ITLB Miss, L2 ITLB Miss", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x85, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l1_itlb_miss_and_l2_itlb_miss), .ngrp = 1, .umasks = amd64_fam15h_l1_itlb_miss_and_l2_itlb_miss, }, { .name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", .desc = "Pipeline Restart Due to Instruction Stream Probe", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x86, }, { .name = "INSTRUCTION_FETCH_STALL", .desc = "Instruction Fetch Stall", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x87, }, { .name = "RETURN_STACK_HITS", .desc = "Return Stack Hits", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x88, }, { .name = "RETURN_STACK_OVERFLOWS", .desc = "Return Stack Overflows", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x89, }, { .name = "INSTRUCTION_CACHE_VICTIMS", .desc = "Instruction Cache Victims", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x8b, }, { .name = "INSTRUCTION_CACHE_INVALIDATED", .desc = "Instruction Cache Lines Invalidated", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x8c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_instruction_cache_invalidated), .ngrp = 1, .umasks = amd64_fam15h_instruction_cache_invalidated, }, { .name = "ITLB_RELOADS", .desc = "ITLB Reloads", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x99, }, { .name = "ITLB_RELOADS_ABORTED", .desc = "ITLB Reloads Aborted", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x9a, }, { .name = "RETIRED_INSTRUCTIONS", .desc = "Retired Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc0, }, { .name = "RETIRED_UOPS", .desc = "Retired uops", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc1, }, { .name = "RETIRED_BRANCH_INSTRUCTIONS", .desc = "Retired Branch Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc2, }, { .name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .desc = "Retired Mispredicted Branch Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc3, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .desc = "Retired Taken Branch Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc4, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .desc = "Retired Taken Branch Instructions Mispredicted", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc5, }, { .name = "RETIRED_FAR_CONTROL_TRANSFERS", .desc = "Retired Far Control Transfers", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc6, }, { .name = "RETIRED_BRANCH_RESYNCS", .desc = "Retired Branch Resyncs", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc7, }, { .name = "RETIRED_NEAR_RETURNS", .desc = "Retired Near Returns", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc8, }, { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", .desc = "Retired Near Returns Mispredicted", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xc9, }, { .name = "RETIRED_INDIRECT_BRANCHES_MISPREDICTED", .desc = "Retired Indirect Branches Mispredicted", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xca, }, { .name = "RETIRED_MMX_FP_INSTRUCTIONS", .desc = "Retired MMX/FP Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_retired_mmx_fp_instructions), .ngrp = 1, .umasks = amd64_fam15h_retired_mmx_fp_instructions, }, { .name = "INTERRUPTS_MASKED_CYCLES", .desc = "Interrupts-Masked Cycles", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xcd, }, { .name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .desc = "Interrupts-Masked Cycles with Interrupt Pending", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xce, }, { .name = "INTERRUPTS_TAKEN", .desc = "Interrupts Taken", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xcf, }, { .name = "DECODER_EMPTY", .desc = "Decoder Empty", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd0, }, { .name = "DISPATCH_STALLS", .desc = "Dispatch Stalls", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd1, }, { .name = "DISPATCH_STALL_FOR_SERIALIZATION", .desc = "Microsequencer Stall due to Serialization", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd3, }, { .name = "DISPATCH_STALL_FOR_RETIRE_QUEUE_FULL", .desc = "Dispatch Stall for Instruction Retire Q Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd5, }, { .name = "DISPATCH_STALL_FOR_INT_SCHED_QUEUE_FULL", .desc = "Dispatch Stall for Integer Scheduler Queue Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd6, }, { .name = "DISPATCH_STALL_FOR_FPU_FULL", .desc = "Dispatch Stall for FP Scheduler Queue Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd7, }, { .name = "DISPATCH_STALL_FOR_LDQ_FULL", .desc = "Dispatch Stall for LDQ Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd8, }, { .name = "MICROSEQ_STALL_WAITING_FOR_ALL_QUIET", .desc = "Microsequencer Stall Waiting for All Quiet", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xd9, }, { .name = "FPU_EXCEPTIONS", .desc = "FPU Exceptions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xdb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_fpu_exceptions), .ngrp = 1, .umasks = amd64_fam15h_fpu_exceptions, }, { .name = "DR0_BREAKPOINTS", .desc = "DR0 Breakpoint Match", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xdc, }, { .name = "DR1_BREAKPOINTS", .desc = "DR1 Breakpoint Match", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xdd, }, { .name = "DR2_BREAKPOINTS", .desc = "DR2 Breakpoint Match", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xde, }, { .name = "DR3_BREAKPOINTS", .desc = "DR3 Breakpoint Match", .modmsk = AMD64_FAM15H_ATTRS, .code = 0xdf, }, { .name = "IBS_OPS_TAGGED", .desc = "Tagged IBS Ops", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x1cf, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_ibs_ops_tagged), .ngrp = 1, .umasks = amd64_fam15h_ibs_ops_tagged, }, { .name = "LS_DISPATCH", .desc = "LS Dispatch", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_ls_dispatch), .ngrp = 1, .umasks = amd64_fam15h_ls_dispatch, }, { .name = "EXECUTED_CLFLUSH_INSTRUCTIONS", .desc = "Executed CLFLUSH Instructions", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x30, }, { .name = "L2_PREFETCHER_TRIGGER_EVENTS", .desc = "L2 Prefetcher Trigger Events", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x16c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam15h_l2_prefetcher_trigger_events), .ngrp = 1, .umasks = amd64_fam15h_l2_prefetcher_trigger_events, }, { .name = "DISPATCH_STALL_FOR_STQ_FULL", .desc = "Dispatch Stall for STQ Full", .modmsk = AMD64_FAM15H_ATTRS, .code = 0x1d8, }, }; papi-5.6.0/src/aix-lock.h000664 001750 001750 00000000571 13216244356 017211 0ustar00jshenry1963jshenry1963000000 000000 #include /* Locks */ extern atomic_p lock[]; #define _papi_hwd_lock(lck) \ { \ while(_check_lock(lock[lck],0,1) == TRUE) { ; } \ } #define _papi_hwd_unlock(lck) \ { \ _clear_lock(lock[lck], 0); \ } papi-5.6.0/man/man3/PAPIF_set_domain.3000664 001750 001750 00000001022 13216244355 021267 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_set_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_set_domain \- .PP Set the default counting domain for new event sets bound to the cpu component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Prototype:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_set_domain( C_INT domain, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_domain\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/task.c000664 001750 001750 00000017514 13216244362 023527 0ustar00jshenry1963jshenry1963000000 000000 /* * task.c - example of a task monitoring another one * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int child(char **arg) { /* * will cause the program to stop before executing the first * user level instruction. We can only attach (load) a context * if the task is in the STOPPED state. */ ptrace(PTRACE_TRACEME, 0, NULL, NULL); /* * execute the requested command */ execvp(arg[0], arg); fatal_error("cannot exec: %s\n", arg[0]); /* not reached */ } int parent(char **arg) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_context_t ctx[1]; pfarg_reg_t pc[NUM_PMCS]; pfarg_reg_t pd[NUM_PMDS]; pfarg_load_t load_args; unsigned int i, num_counters; int status, ret; int ctx_fd; pid_t pid; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(ctx)); memset(pd, 0, sizeof(ctx)); memset(ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&load_args,0, sizeof(load_args)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; if (num_counters < i) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * now create a context. we will later attach it to the task we are creating. */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the identifier for our context */ ctx_fd = ctx[0].ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann counting monitors. */ if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * Create the child task */ if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); /* * and launch the child code */ if (pid == 0) exit(child(arg)); /* * wait for the child to exec */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) { fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status)); } /* * the task is stopped at this point */ /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = pid; if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) { fatal_error("perfmonctl error PFM_START errno %d\n",errno); } /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); /* * now the task is running */ /* * simply wait for completion */ waitpid(pid, &status, 0); /* * the task has disappeared at this point but our context is still * present and contains all the latest counts. */ /* * now simply read the results. */ if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the context */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; if (argc < 2) { fatal_error("You must specify a command to execute\n"); } /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { printf("Can't initialize library\n"); exit(1); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); return parent(argv+1); } papi-5.6.0/src/libpfm4/docs/man3/pfm_get_pmu_info.3000664 001750 001750 00000010642 13216244364 024054 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "December, 2009" "" "Linux Programmer's Manual" .SH NAME pfm_get_pmu_info \- get PMU information .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_get_pmu_info(pfm_pmu_t " pmu ", pfm_pmu_info_t *" info ");" .sp .SH DESCRIPTION This function returns in \fBinfo\fR information about a PMU model designated by its identifier in \fBpmu\fR. The \fBpfm_pmu_info\fR structure is defined as follows: .nf typedef struct { const char *name; const char *desc; pfm_pmu_t pmu; pfm_pmu_type_t type; int size; int nevents; int first_event; int max_encoding; int num_cntrs; int num_fixed_cntrs; struct { int is_present:1; int is_arch_default:1; int is_core:1; int is_uncore:1; int reserved:28; }; } pfm_pmu_info_t; .fi The fields of this structure are defined as follows: .TP .B name This is the symbolic name of the PMU. This name can be used as a prefix in an event string. This is a read-only string. .TP .B desc This is the description of PMU. This is a read-only string. .TP .B pmu This is the unique PMU identification code. It is identical to the value passed in \fBpmu\fR and it provided only for completeness. .TP .B type This field contains the type of the PMU. The following types are defined: .RS .TP .B PFM_PMU_TYPE_UNKNOWN The type of the PMU could not be determined. .TP .B PFM_PMU_TYPE_CORE This field is set to one when the PMU is implemented by the processor core. .TP .B PFM_PMU_TYPE_UNCORE This field is set to one when the PMU is implemented on the processor die but at the socket level, i.e., capturing events for all cores. .PP .RE .TP .B nevents This is the number of available events for this PMU model based on the host processor. It is \fBonly\fR valid if the \fBis_present\fR field is set to 1. Event identifiers are not guaranteed contiguous. In other words, it is not because \fBnevents\fR is equal to 100, that event identifiers go from 0 to 99. The iterator function \fBpfm_get_event_next()\fR must be used to go from one identifier to the next. .TP .B first_event This field returns the opaque index of the first event for this PMU model. The index can be used with \fBpfm_get_event_info()\fR or \fBpfm_get_event_next()\fR functions. In case no event is available, this field contains \fB-1\fR. .TP .B num_cntrs This field contains the number of generic counters supported by the PMU. A counter is generic if it can count more than one event. When it is not possible to determine the number of generic counters, this field contains \fb-1\fR. .TP .B num_fixed_cntrs This field contains the number of fixed counters supported by the PMU. A counter is fixed if it hardwired to count only one event. When it is not possible to determine the number of generic counters, this field contains \fb-1\fR. .TP .B size This field contains the size of the struct passed. This field is used to provide for extensibility of the struct without compromising backward compatibility. The value should be set to \fBsizeof(pfm_pmu_info_t)\fR. If instead, a value of \fB0\fR is specified, the library assumes the struct passed is identical to the first ABI version which size is \fBPFM_PMU_INFO_ABI0\fR. Thus, if fields were added after the first ABI, they will not be set by the library. The library does check that bytes beyond what is implemented are zeroes. .TP .B max_encoding This field returns the number of event codes returned by \fBpfm_get_event_encoding()\fR. .TP .B is_present This field is set to one is the PMU model has been detected on the host system. .TP .B is_dfl This field is set to one if the PMU is the default PMU for this architecture. Otherwise this field is zero. .PP .SH RETURN If successful, the function returns \fBPFM_SUCCESS\fR and PMU information in \fBinfo\fR, otherwise it returns an error code. .SH ERRORS .TP .B PFMLIB_ERR_NOINIT Library has not been initialized properly. .TP .B PFMLIB_ERR_NOTSUPP PMU model is not supported by the library. .TP .B PFMLIB_ERR_INVAL The \fBpmu\fR argument is invalid or \fBinfo\fR is \fBNULL\fR or \fBsize\fR is not zero. .SH SEE ALSO pfm_get_event_next(3) .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/components/mx/utils/000775 001750 001750 00000000000 13216244357 021300 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/kufrin.c000664 001750 001750 00000011305 13216244360 020270 0ustar00jshenry1963jshenry1963000000 000000 /* * File: multiplex1_pthreads.c * Author: Rick Kufrin * rkufrin@ncsa.uiuc.edu * Mods: Philip Mucci * mucci@cs.utk.edu */ /* This file really bangs on the multiplex pthread functionality */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" static int *events; static int numevents = 0; static int max_events=0; double loop( long n ) { long i; double a = 0.0012; for ( i = 0; i < n; i++ ) { a += 0.01; } return a; } void * thread( void *arg ) { ( void ) arg; /*unused */ int eventset = PAPI_NULL; long long *values; int ret = PAPI_register_thread( ); if ( ret != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_register_thread", ret ); ret = PAPI_create_eventset( &eventset ); if ( ret != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", ret ); values=calloc(max_events,sizeof(long long)); if (!TESTS_QUIET) printf( "Event set %d created\n", eventset ); /* In Component PAPI, EventSets must be assigned a component index before you can fiddle with their internals. 0 is always the cpu component */ ret = PAPI_assign_eventset_component( eventset, 0 ); if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", ret ); } ret = PAPI_set_multiplex( eventset ); if ( ret == PAPI_ENOSUPP) { test_skip( __FILE__, __LINE__, "Multiplexing not supported", 1 ); } else if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", ret ); } ret = PAPI_add_events( eventset, events, numevents ); if ( ret < PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events", ret ); } ret = PAPI_start( eventset ); if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", ret ); } do_stuff( ); ret = PAPI_stop( eventset, values ); if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", ret ); } ret = PAPI_cleanup_eventset( eventset ); if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", ret ); } ret = PAPI_destroy_eventset( &eventset ); if ( ret != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); } ret = PAPI_unregister_thread( ); if ( ret != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); return ( NULL ); } int main( int argc, char **argv ) { int nthreads = 8, retval, i; PAPI_event_info_t info; pthread_t *threads; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); if ( !quiet ) { if ( argc > 1 ) { int tmp = atoi( argv[1] ); if ( tmp >= 1 ) nthreads = tmp; } } retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) pthread_self ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); } retval = PAPI_multiplex_init( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval ); } if ((max_events = PAPI_get_cmp_opt(PAPI_MAX_MPX_CTRS,NULL,0)) <= 0) { test_fail( __FILE__, __LINE__, "PAPI_get_cmp_opt", max_events ); } if ((events = calloc(max_events,sizeof(int))) == NULL) { test_fail( __FILE__, __LINE__, "calloc", PAPI_ESYS ); } /* Fill up the event set with as many non-derived events as we can */ i = PAPI_PRESET_MASK; do { if ( PAPI_get_event_info( i, &info ) == PAPI_OK ) { if ( info.count == 1 ) { events[numevents++] = ( int ) info.event_code; if (!quiet) printf( "Added %s\n", info.symbol ); } else { if (!quiet) printf( "Skipping derived event %s\n", info.symbol ); } } } while ( ( PAPI_enum_event( &i, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK ) && ( numevents < max_events ) ); if (!quiet) printf( "Found %d events\n", numevents ); if (numevents==0) { test_skip(__FILE__,__LINE__,"No events found",0); } do_stuff( ); if (!quiet) printf( "Creating %d threads:\n", nthreads ); threads = ( pthread_t * ) malloc( ( size_t ) nthreads * sizeof ( pthread_t ) ); if ( threads == NULL ) { test_fail( __FILE__, __LINE__, "malloc", PAPI_ENOMEM ); } /* Create the threads */ for ( i = 0; i < nthreads; i++ ) { retval = pthread_create( &threads[i], NULL, thread, NULL ); if ( retval != 0 ) { test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); } } /* Wait for thread completion */ for ( i = 0; i < nthreads; i++ ) { retval = pthread_join( threads[i], NULL ); if ( retval != 0 ) { test_fail( __FILE__, __LINE__, "pthread_join", PAPI_ESYS ); } } if (!quiet) printf( "Done." ); test_pass( __FILE__ ); pthread_exit( NULL ); return 0; } papi-5.6.0/src/ctests/calibrate.c000664 001750 001750 00000030210 13216244360 020714 0ustar00jshenry1963jshenry1963000000 000000 /* Calibrate.c A program to perform one or all of three tests to count flops. Test 1. Inner Product: 2*n operations for i = 1:n; a = a + x(i)*y(i); end Test 2. Matrix Vector Product: 2*n^2 operations for i = 1:n; for j = 1:n; x(i) = x(i) + a(i,j)*y(j); end; end; Test 3. Matrix Matrix Multiply: 2*n^3 operations for i = 1:n; for j = 1:n; for k = 1:n; c(i,j) = c(i,j) + a(i,k)*b(k,j); end; end; end; Supply a command line argument of 1, 2, or 3 to perform each test, or no argument to perform all three. Each test initializes PAPI and presents a header with processor information. Then it performs 500 iterations, printing result lines containing: n, measured counts, theoretical counts, (measured - theory), % error */ #include #include #include #include "papi.h" #include "papi_test.h" #define INDEX1 100 #define INDEX5 500 #define MAX_WARN 10 #define MAX_ERROR 80 #define MAX_DIFF 14 /* Extract and display hardware information for this processor. (Re)Initialize PAPI_flops() and begin counting floating ops. */ static void headerlines( const char *title, int quiet ) { if ( !quiet ) { printf( "\n%s:\n%8s %12s %12s %8s %8s\n", title, "i", "papi", "theory", "diff", "%error" ); printf( "-------------------------------------------------------------------------\n" ); } } /* Read PAPI_flops. Format and display results. Compute error without using floating ops. */ #if defined(mips) #define FMA 1 #elif (defined(sparc) && defined(sun)) #define FMA 1 #else #define FMA 0 #endif static void resultline( int i, int j, int EventSet, int fail, int quiet ) { float ferror = 0; long long flpins = 0; long long papi, theory; int diff, retval; char err_str[PAPI_MAX_STR_LEN]; retval = PAPI_stop( EventSet, &flpins ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); i++; /* convert to 1s base */ theory = 2; while ( j-- ) theory *= i; /* theoretical ops */ papi = flpins << FMA; diff = ( int ) ( papi - theory ); ferror = ( ( float ) abs( diff ) ) / ( ( float ) theory ) * 100; if (!quiet) { printf( "%8d %12lld %12lld %8d %10.4f\n", i, papi, theory, diff, ferror ); } if ( ferror > MAX_WARN && abs( diff ) > MAX_DIFF && i > 20 ) { sprintf( err_str, "Calibrate: difference exceeds %d percent", MAX_WARN ); test_warn( __FILE__, __LINE__, err_str, 0 ); } if (fail) { if ( ferror > MAX_ERROR && abs( diff ) > MAX_DIFF && i > 20 ) { sprintf( err_str, "Calibrate: error exceeds %d percent", MAX_ERROR ); test_fail( __FILE__, __LINE__, err_str, PAPI_EMISC ); } } } static void print_help( char **argv ) { printf( "Usage: %s [-ivmdh] [-e event]\n", argv[0] ); printf( "Options:\n\n" ); printf( "\t-i Inner Product test.\n" ); printf( "\t-v Matrix-Vector multiply test.\n" ); printf( "\t-m Matrix-Matrix multiply test.\n" ); printf( "\t-d Double precision data. Default is float.\n" ); printf( "\t-e event Use as PAPI event instead of PAPI_FP_OPS\n" ); printf( "\t-f Suppress failures\n" ); printf( "\t-h Print this help message\n" ); printf( "\n" ); printf( "This test measures floating point operations for the specified test.\n" ); printf( "Operations can be performed in single or double precision.\n" ); printf( "Default operation is all three tests in single precision.\n" ); } static float inner_single( int n, float *x, float *y ) { float aa = 0.0; int i; for ( i = 0; i <= n; i++ ) aa = aa + x[i] * y[i]; return ( aa ); } static double inner_double( int n, double *x, double *y ) { double aa = 0.0; int i; for ( i = 0; i <= n; i++ ) aa = aa + x[i] * y[i]; return ( aa ); } static void vector_single( int n, float *a, float *x, float *y ) { int i, j; for ( i = 0; i <= n; i++ ) for ( j = 0; j <= n; j++ ) y[i] = y[i] + a[i * n + j] * x[i]; } static void vector_double( int n, double *a, double *x, double *y ) { int i, j; for ( i = 0; i <= n; i++ ) for ( j = 0; j <= n; j++ ) y[i] = y[i] + a[i * n + j] * x[i]; } static void matrix_single( int n, float *c, float *a, float *b ) { int i, j, k; for ( i = 0; i <= n; i++ ) for ( j = 0; j <= n; j++ ) for ( k = 0; k <= n; k++ ) c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; } static void matrix_double( int n, double *c, double *a, double *b ) { int i, j, k; for ( i = 0; i <= n; i++ ) for ( j = 0; j <= n; j++ ) for ( k = 0; k <= n; k++ ) c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; } static void reset_flops( const char *title, int EventSet ) { int retval; char err_str[PAPI_MAX_STR_LEN]; retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { sprintf( err_str, "%s: PAPI_start", title ); test_fail( __FILE__, __LINE__, err_str, retval ); } } int main( int argc, char *argv[] ) { extern void dummy( void * ); float aa, *a=NULL, *b=NULL, *c=NULL, *x=NULL, *y=NULL; double aad, *ad=NULL, *bd=NULL, *cd=NULL, *xd=NULL, *yd=NULL; int i, j, n; int inner = 0; int vector = 0; int matrix = 0; int double_precision = 0; int fail = 1; int retval = PAPI_OK; char papi_event_str[PAPI_MIN_STR_LEN] = "PAPI_FP_OPS"; int papi_event; int EventSet = PAPI_NULL; int quiet; /* Parse the input arguments */ for ( i = 0; i < argc; i++ ) { if ( strstr( argv[i], "-i" ) ) inner = 1; else if ( strstr( argv[i], "-f" ) ) fail = 0; else if ( strstr( argv[i], "-v" ) ) vector = 1; else if ( strstr( argv[i], "-m" ) ) matrix = 1; else if ( strstr( argv[i], "-e" ) ) { if ( ( argv[i + 1] == NULL ) || ( strlen( argv[i + 1] ) == 0 ) ) { print_help( argv ); exit( 1 ); } strncpy( papi_event_str, argv[i + 1], sizeof ( papi_event_str ) - 1); papi_event_str[sizeof ( papi_event_str )-1] = '\0'; i++; } else if ( strstr( argv[i], "-d" ) ) double_precision = 1; else if ( strstr( argv[i], "-h" ) ) { print_help( argv ); exit( 1 ); } } /* if no options specified, set all tests to TRUE */ if ( inner + vector + matrix == 0 ) inner = vector = matrix = 1; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); if ( !quiet ) { printf( "Initializing..." ); } /* Initialize PAPI */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Translate name */ retval = PAPI_event_name_to_code( papi_event_str, &papi_event ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); } if ( PAPI_query_event( papi_event ) != PAPI_OK ) { test_skip( __FILE__, __LINE__, "PAPI_query_event", PAPI_ENOEVNT ); } if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } if ( ( retval = PAPI_add_event( EventSet, papi_event ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } if (!quiet) printf( "\n" ); retval = PAPI_OK; /* Inner Product test */ if ( inner ) { /* Allocate the linear arrays */ if (double_precision) { xd = malloc( INDEX5 * sizeof(double) ); yd = malloc( INDEX5 * sizeof(double) ); if ( !( xd && yd ) ) retval = PAPI_ENOMEM; } else { x = malloc( INDEX5 * sizeof(float) ); y = malloc( INDEX5 * sizeof(float) ); if ( !( x && y ) ) retval = PAPI_ENOMEM; } if ( retval == PAPI_OK ) { headerlines( "Inner Product Test", quiet ); /* step through the different array sizes */ for ( n = 0; n < INDEX5; n++ ) { if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { /* Initialize the needed arrays at this size */ if ( double_precision ) { for ( i = 0; i <= n; i++ ) { xd[i] = ( double ) rand( ) * ( double ) 1.1; yd[i] = ( double ) rand( ) * ( double ) 1.1; } } else { for ( i = 0; i <= n; i++ ) { x[i] = ( float ) rand( ) * ( float ) 1.1; y[i] = ( float ) rand( ) * ( float ) 1.1; } } /* reset PAPI flops count */ reset_flops( "Inner Product Test", EventSet ); /* do the multiplication */ if ( double_precision ) { aad = inner_double( n, xd, yd ); dummy( ( void * ) &aad ); } else { aa = inner_single( n, x, y ); dummy( ( void * ) &aa ); } resultline( n, 1, EventSet, fail, quiet ); } } } if (double_precision) { free( xd ); free( yd ); } else { free( x ); free( y ); } } /* Matrix Vector test */ if ( vector && retval != PAPI_ENOMEM ) { /* Allocate the needed arrays */ if (double_precision) { ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); xd = malloc( INDEX5 * sizeof(double) ); yd = malloc( INDEX5 * sizeof(double) ); if ( !( ad && xd && yd ) ) retval = PAPI_ENOMEM; } else { a = malloc( INDEX5 * INDEX5 * sizeof(float) ); x = malloc( INDEX5 * sizeof(float) ); y = malloc( INDEX5 * sizeof(float) ); if ( !( a && x && y ) ) retval = PAPI_ENOMEM; } if ( retval == PAPI_OK ) { headerlines( "Matrix Vector Test", quiet ); /* step through the different array sizes */ for ( n = 0; n < INDEX5; n++ ) { if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { /* Initialize the needed arrays at this size */ if ( double_precision ) { for ( i = 0; i <= n; i++ ) { yd[i] = 0.0; xd[i] = ( double ) rand( ) * ( double ) 1.1; for ( j = 0; j <= n; j++ ) ad[i * n + j] = ( double ) rand( ) * ( double ) 1.1; } } else { for ( i = 0; i <= n; i++ ) { y[i] = 0.0; x[i] = ( float ) rand( ) * ( float ) 1.1; for ( j = 0; j <= n; j++ ) a[i * n + j] = ( float ) rand( ) * ( float ) 1.1; } } /* reset PAPI flops count */ reset_flops( "Matrix Vector Test", EventSet ); /* compute the resultant vector */ if ( double_precision ) { vector_double( n, ad, xd, yd ); dummy( ( void * ) yd ); } else { vector_single( n, a, x, y ); dummy( ( void * ) y ); } resultline( n, 2, EventSet, fail, quiet ); } } } if (double_precision) { free( ad ); free( xd ); free( yd ); } else { free( a ); free( x ); free( y ); } } /* Matrix Multiply test */ if ( matrix && retval != PAPI_ENOMEM ) { /* Allocate the needed arrays */ if (double_precision) { ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); bd = malloc( INDEX5 * INDEX5 * sizeof(double) ); cd = malloc( INDEX5 * INDEX5 * sizeof(double) ); if ( !( ad && bd && cd ) ) retval = PAPI_ENOMEM; } else { a = malloc( INDEX5 * INDEX5 * sizeof(float) ); b = malloc( INDEX5 * INDEX5 * sizeof(float) ); c = malloc( INDEX5 * INDEX5 * sizeof(float) ); if ( !( a && b && c ) ) retval = PAPI_ENOMEM; } if ( retval == PAPI_OK ) { headerlines( "Matrix Multiply Test", quiet ); /* step through the different array sizes */ for ( n = 0; n < INDEX5; n++ ) { if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { /* Initialize the needed arrays at this size */ if ( double_precision ) { for ( i = 0; i <= n * n + n; i++ ) { cd[i] = 0.0; ad[i] = ( double ) rand( ) * ( double ) 1.1; bd[i] = ( double ) rand( ) * ( double ) 1.1; } } else { for ( i = 0; i <= n * n + n; i++ ) { c[i] = 0.0; a[i] = ( float ) rand( ) * ( float ) 1.1; b[i] = ( float ) rand( ) * ( float ) 1.1; } } /* reset PAPI flops count */ reset_flops( "Matrix Multiply Test", EventSet ); /* compute the resultant matrix */ if ( double_precision ) { matrix_double( n, cd, ad, bd ); dummy( ( void * ) c ); } else { matrix_single( n, c, a, b ); dummy( ( void * ) c ); } resultline( n, 3, EventSet, fail, quiet ); } } } if (double_precision) { free( ad ); free( bd ); free( cd ); } else { free( a ); free( b ); free( c ); } } /* exit with status code */ if ( retval == PAPI_ENOMEM ) { test_fail( __FILE__, __LINE__, "malloc", retval ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/cuda/Rules.cuda000664 001750 001750 00000001157 13216244357 022364 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ CUDA_DIR ?= /opt/cuda CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI COMPSRCS += components/cuda/linux-cuda.c COMPOBJS += linux-cuda.o CFLAGS += -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include -g LDFLAGS += -L$(CUPTI_DIR)/lib64 -lcupti $(LDL) -Wl,-rpath=$(CUPTI_DIR)/lib64 -g linux-cuda.o: components/cuda/linux-cuda.c $(HEADERS) cuda_sampling $(CC) -E $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/linux-cuda.c -o linux-cuda.pre $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/linux-cuda.c -o linux-cuda.o cuda_sampling: cd components/cuda/sampling; $(MAKE); native_clean: cd components/cuda/sampling; $(MAKE) clean papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/mont_etb.c000664 001750 001750 00000031174 13216244362 024423 0ustar00jshenry1963jshenry1963000000 000000 /* * mont_btb.c - example of how use the ETB with the Dual-Core Itanium 2 PMU * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include #include typedef pfm_dfl_smpl_hdr_t etb_hdr_t; typedef pfm_dfl_smpl_entry_t etb_entry_t; typedef pfm_dfl_smpl_arg_t smpl_arg_t; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * The ETB_EVENT is increment by 1 for each branch event. Such event is composed of * two entries in the ETB: a source and a target entry. The ETB is full after 4 branch * events. */ #define SMPL_PERIOD (4UL*256) /* * We use a small buffer size to exercise the overflow handler */ #define SMPL_BUF_NENTRIES 64 static void *smpl_vaddr; static size_t entry_size; static int id; #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } /* * we don't use static to make sure the compiler does not inline the function */ long func1(void) { return random();} long func2(void) { return random();} long do_test(unsigned long loop) { long sum = 0; while(loop--) { if (loop & 0x1) sum += func1(); else sum += loop + func2(); } return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * print content of sampling buffer * * XXX: using stdio to print from a signal handler is not safe with multi-threaded * applications */ #define safe_printf printf static void show_etb_reg(int j, pfm_mont_pmd_reg_t reg, pfm_mont_pmd_reg_t pmd39) { unsigned long bruflush, b1, etb_ext; unsigned long addr; int is_valid; is_valid = reg.pmd48_63_etb_mont_reg.etb_s == 0 && reg.pmd48_63_etb_mont_reg.etb_mp == 0 ? 0 : 1; /* * the joy of the ETB extension register layout! */ if (j < 8) etb_ext = (pmd39.pmd_val>>(8*j)) & 0xf; else etb_ext = (pmd39.pmd_val>>(4+8*(j-8))) & 0xf; b1 = etb_ext & 0x1; bruflush = (etb_ext >> 1) & 0x1; safe_printf("\tPMD%-2d: 0x%016lx s=%d mp=%d bru=%ld b1=%ld valid=%c\n", j+48, reg.pmd_val, reg.pmd48_63_etb_mont_reg.etb_s, reg.pmd48_63_etb_mont_reg.etb_mp, bruflush, b1, is_valid ? 'Y' : 'N'); if (!is_valid) return; if (reg.pmd48_63_etb_mont_reg.etb_s) { addr = (reg.pmd48_63_etb_mont_reg.etb_addr+b1)<<4; addr |= reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? reg.pmd48_63_etb_mont_reg.etb_slot : 0; safe_printf("\t Source Address: 0x%016lx\n" "\t Taken=%c Prediction:%s\n\n", addr, reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? 'Y' : 'N', reg.pmd48_63_etb_mont_reg.etb_mp ? "FE Failure" : bruflush ? "BE Failure" : "Success"); } else { safe_printf("\t Target Address:0x%016lx\n\n", (unsigned long)(reg.pmd48_63_etb_mont_reg.etb_addr<<4)); } } static void show_etb(pfm_mont_pmd_reg_t *etb) { int i, last; pfm_mont_pmd_reg_t pmd38, pmd39; pmd38.pmd_val = etb[0].pmd_val; pmd39.pmd_val = etb[1].pmd_val; i = pmd38.pmd38_mont_reg.etbi_full ? pmd38.pmd38_mont_reg.etbi_ebi : 0; last = pmd38.pmd38_mont_reg.etbi_ebi; safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last, pmd38.pmd38_mont_reg.etbi_ebi, pmd38.pmd38_mont_reg.etbi_full); /* * i+2 = skip over PMD38/pmd39 */ do { show_etb_reg(i, etb[i+2], pmd39); i = (i+1) % 16; } while (i != last); } void process_smpl_buffer(void) { etb_hdr_t *hdr; etb_entry_t *ent; unsigned long pos; unsigned long smpl_entry = 0; pfm_mont_pmd_reg_t *reg; size_t count; static unsigned long last_ovfl = ~0UL; hdr = (etb_hdr_t *)smpl_vaddr; /* * check that we are not diplaying the previous set of samples again. * Required to take care of the last batch of samples. */ if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); count = hdr->hdr_count; /* * walk through all the entries recored in the buffer */ while(count--) { ent = (etb_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_mont_pmd_reg_t*)(ent+1); /* * in this particular example, we have pmd48-pmd63 has the ETB. We have also * included pmd38/pmd39 (ETB index and extenseion) has part of the registers * to record. This trick allows us to get the index to decode the sequential * order of the ETB. * * Recorded registers are always recorded in increasing index order. So we know * that where to find pmd38/pmd39. */ show_etb(reg); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { process_smpl_buffer(); /* * And resume monitoring */ if (pfm_restart(id)) fatal_error("pfm_restart errno %d\n", errno); } int main(void) { int ret; int type = 0; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmd_attr_t pd[NUM_PMDS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_mont_input_param_t mont_inp; smpl_arg_t buf_arg; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("Can't initialize library\n"); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_MONTECITO_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&buf_arg, 0, sizeof(buf_arg)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&mont_inp,0, sizeof(mont_inp)); /* * Before calling pfm_find_dispatch(), we must specify what kind * of branches we want to capture. We are interested in all taken * branches * therefore we program we set the various fields to: */ mont_inp.pfp_mont_etb.etb_used = 1; mont_inp.pfp_mont_etb.etb_tm = 0x2; mont_inp.pfp_mont_etb.etb_ptm = 0x3; mont_inp.pfp_mont_etb.etb_ppm = 0x3; mont_inp.pfp_mont_etb.etb_brt = 0x0; mont_inp.pfp_mont_etb.etb_plm = PFM_PLM3; if (pfm_find_full_event("ETB_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find event ETB_EVENT\n"); /* * set the (global) privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ buf_arg.buf_size = getpagesize(); /* * now create the session */ id = pfm_create(PFM_FL_SMPL_FMT, NULL, "default", &buf_arg, sizeof(buf_arg)); if (id == -1) { if (errno == ENOSYS) fatal_error("Your kernel does not have performance monitoring support!\n"); fatal_error("cannot create session %s\n", strerror(errno)); } /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc * PMD38 is part of the set of used PMD returned by libpfm. * It will be reset automatically */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full and randomization */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; /* * Now prepare the argument to initialize the PMD and the sampling period * We know we use only one PMD in this case, therefore pmd[0] corresponds * to our first event which is our sampling period. */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * populate our smpl_pmds bitmask to include all of the ETB PMDs, * including index, extensions */ pfm_bv_set(pd[0].reg_smpl_pmds, 38); pfm_bv_set(pd[0].reg_smpl_pmds, 39); entry_size = sizeof(etb_entry_t) + 2 * 8; for(i=48; i < 64; i++) { pfm_bv_set(pd[0].reg_smpl_pmds, i); entry_size += 8; } /* * When our counter overflows, we want to ETB index to be reset, so that we keep * in sync. */ pfm_bv_set(pd[0].reg_reset_pmds, 38); /* * Now program the registers */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc))) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD_ATTR, pd, outp.pfp_pmd_count * sizeof(*pd))) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * now we attach session */ if (pfm_attach(id, 0, getpid())) fatal_error("pfm_attach error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); do_test(1000); if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer. Note that the buffer may not be full at this point. * */ process_smpl_buffer(); /* * let's stop this now */ munmap(smpl_vaddr, (size_t)buf_arg.buf_size); close(id); return 0; } papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_impl_counters.3000664 001750 001750 00000000035 13216244361 025436 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_impl_pmcs.3 papi-5.6.0/src/perfctr-2.6.x/README000775 001750 001750 00000011557 13216244366 020441 0ustar00jshenry1963jshenry1963000000 000000 $Id: README,v 1.46.2.7 2010/01/30 11:50:30 mikpe Exp $ Linux Performance-Monitoring Counters Driver Mikael Pettersson ======================================================================== Overview -------- This package adds support to the Linux kernel (2.6.0 or newer) for using the Performance-Monitoring Counters (PMCs) found in many modern processors. Supported processors are: - All Intel x86 family 5, 6, and 15 processors, i.e., Pentium, Pentium MMX, Pentium Pro, Pentium II, Pentium III, Pentium M, Pentium 4, Core, Core 2, Atom, and Core i7, including Celeron and Xeon versions. - The AMD K7, K8, Family 10h, and Family 11h processor families. - Cyrix 6x86MX, MII, and III. - VIA C3 (Cyrix III). - Centaur WinChip C6/2/3. - PowerPC 604, 7xx, and 74xx processors. - Intel XScale 1 and 2 processors (ARM). PMCs are "event counters" capable of recording any of a large number of performance-related events during execution. These events typically include instructions executed, cache misses, TLB misses, stalls, and other events specific to the microarchitecture of the processor being used. PMCs are primarily used to identify low-level performance problems, and to validate code changes intended to improve performance. Limited support is available for generic x86 processors with a Time-Stamp Counter but no PMCs, such as the AMD K6 family. For these processors, only TSC-based cycle-count measurements are possible. However, all high-level facilities implemented by the driver are still available. Features -------- Each Linux process has its own set of "virtual" PMCs. That is, to a process the PMCs appear to be private and unrelated to the activities of other processes in the system. The virtual PMCs have 64-bit precision, even though current processors only implement 32, 40, or 48-bit PMCs. Each process also has a virtual Time-Stamp Counter (TSC). On most machines, the virtual PMCs can be sampled entirely in user-space without incurring the overhead of a system call. A process accesses its virtual PMCs by opening /dev/perfctr and issuing system calls on the resulting file descriptor. A user-space library is included which provides a more high-level interface. The driver also supports global-mode or system-wide PMCs. In this mode, each PMC on each processor can be controlled and read. The PMCs and TSC on active processors are sampled periodically and the accumulated sums have 64-bit precision. Global-mode PMCs are accessed via the /dev/perfctr device file; the user-space library provides a more high-level interface. The user-space library is accompanied by several example programs that illustrate how the driver and the library can be used. Support for performance-counter overflow interrupts is provided where such support exists in the processors. Limitations ----------- - Kernels older than 2.6.0 are not supported. - The performance counter interrupt facility on x86 requires SMP or uniprocessor APIC support. In the latter case, the BIOS must be reasonably non-buggy. Unfortunately, this is often not the case. - Almost no documentation. "Use the source, Luke." - Neither the kernel driver nor the sample user-space library attempt to hide any processor-specific details from the user. - This package makes it possible to compute aggregate event and cycle counts for sections of code. Since many x86-type processors use out-of-order execution, it is impossible to attribute exact event or cycle counts to individual instructions. See the "Continuous Profiling" and "ProfileMe" papers at Compaq's DCPI web site for more information on this issue. (The URL is listed in the OTHERS file.) - Centaur WinChip C6/2/3 support requires that the TSC is disabled. See linux/drivers/perfctr/x86.c for further information. Availability ------------ This and future versions of this package can be downloaded from . The perfctr-devel mailing list is an open forum for driver update announcements and general discussions about the perfctr driver and its usage. To subscribe to perfctr-devel, visit . Licensing --------- Copyright (C) 1999-2010 Mikael Pettersson This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_ivb_unc.3000664 001750 001750 00000003601 13216244364 024705 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "June, 2013" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_ivb_unc - support for Intel Ivy Bridge uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: ivb_unc_cbo0, ivb_unc_cbo1, ivb_unc_cbo2, ivb_unc_cbo3 .B PMU desc: Intel Ivy Bridge C-box uncore .sp .SH DESCRIPTION The library supports the Intel Ivy Bridge client part (model 58) uncore PMU. The support is currently limited to the Coherency Box, so called C-Box for up to 4 physical cores. Each physical core has an associated C-Box which it uses to communicate with the L3 cache. The C-boxes all support the same set of events. However, Core 0 C-box (snb_unc_cbo0) supports an additional uncore clock ticks event: \fBUNC_CLOCKTICKS\fR. .SH MODIFIERS The following modifiers are supported on Intel Ivy Bridge C-Box uncore PMU: .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .P Both the \fBUNC_CBO_CACHE_LOOKUP\fR and \fBUNC_CBO_XSNP_RESPONSE\fR requires two umasks to be valid. For \fBUNC_CBO_CACHE_LOOKUP\fR the first umask must be one of the MESI state umasks, the second has to be one of the filters. For \fBUNC_CBO_XSNP_RESPONSE\fR the first umask must be one of the snoop types, the second has to be one of the filters. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm-3.y/lib/pfmlib_power4_priv.h000664 001750 001750 00000001173 13216244363 023736 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __PFMLIB_POWER4_PRIV_H__ #define __PFMLIB_POWER4_PRIV_H__ /* * File: pfmlib_power4_priv.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2007. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define POWER4_NUM_EVENT_COUNTERS 8 #define POWER4_NUM_GROUP_VEC 1 #define POWER4_NUM_CONTROL_REGS 3 #endif papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_gen_ia32.h000664 001750 001750 00000006504 13216244362 025372 0ustar00jshenry1963jshenry1963000000 000000 /* * Intel architectural PMU v1, v2, v3 * * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_GEN_IA32_H__ #define __PFMLIB_GEN_IA32_H__ #include /* * privilege level mask usage for architected PMU: * * PFM_PLM0 = OS (kernel, hypervisor, ..) * PFM_PLM1 = unused (ignored) * PFM_PLM2 = unused (ignored) * PFM_PLM3 = USR (user level) */ #ifdef __cplusplus extern "C" { #endif /* * upper limit, actual number determined dynamically */ #define PMU_GEN_IA32_MAX_COUNTERS PFMLIB_MAX_PMCS /* * Even though, CPUID 0xa returns in eax the actual counter * width, the architecture specifies that writes are limited * to lower 32-bits. As such, only the lower 32-bit have full * degree of freedom. That is the "useable" counter width. */ #define PMU_GEN_IA32_COUNTER_WIDTH 32 typedef union { unsigned long long val; /* complete register value */ struct { unsigned long sel_event_select:8; /* event mask */ unsigned long sel_unit_mask:8; /* unit mask */ unsigned long sel_usr:1; /* user level */ unsigned long sel_os:1; /* system level */ unsigned long sel_edge:1; /* edge detec */ unsigned long sel_pc:1; /* pin control */ unsigned long sel_int:1; /* enable APIC intr */ unsigned long sel_any:1; /* any thread (v3) */ unsigned long sel_en:1; /* enable */ unsigned long sel_inv:1; /* invert counter mask */ unsigned long sel_cnt_mask:8; /* counter mask */ unsigned long sel_res2:32; } perfevtsel; } pfm_gen_ia32_sel_reg_t; typedef struct { unsigned long cnt_mask; /* threshold (cnt_mask) */ unsigned int flags; /* counter specific flag */ } pfmlib_gen_ia32_counter_t; #define PFM_GEN_IA32_SEL_INV 0x1 /* inverse */ #define PFM_GEN_IA32_SEL_EDGE 0x2 /* edge detect */ #define PFM_GEN_IA32_SEL_ANYTHR 0x4 /* measure on any thread (v3 and up) */ /* * model-specific parameters for the library */ typedef struct { pfmlib_gen_ia32_counter_t pfp_gen_ia32_counters[PMU_GEN_IA32_MAX_COUNTERS]; uint64_t reserved[4]; /* for future use */ } pfmlib_gen_ia32_input_param_t; typedef struct { uint64_t reserved[8]; /* for future use */ } pfmlib_gen_ia32_output_param_t; #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_GEN_IA32_H__ */ papi-5.6.0/src/libpfm-3.y/examples_v3.x/task_attach.c000664 001750 001750 00000020540 13216244362 024332 0ustar00jshenry1963jshenry1963000000 000000 /* * task_attach.c - example of how to attach to another task for monitoring * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int parent(pid_t pid) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; pfarg_msg_t msg; unsigned int i, num_counters; int status, ret; int ctx_fd; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * now create a session. we will later attach it to the task we are creating. */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for(i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann counting monitors. */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); /* * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret == -1) { fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); } /* * wait for the child to be actually stopped */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status)); /* * the task is stopped at this point */ /* * now we attach the session to ourself */ if (pfm_attach(ctx_fd, 0, pid) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); /* * now the task is running */ /* * We cannot simply do a waitpid() because we may be attaching to a process * totally unrelated to our program. Instead we use a perfmon facility that * notifies us when the monitoring task is exiting. * * When a task with a monitoring session attached to it exits, a PFM_MSG_END * is generated. It can be retrieve with a simple read() on the session's descriptor. * * Another reason why you might return from the read is if there was a counter * overflow, unlikely in this example. * * To measure only for short period of time, use select or poll with a timeout, * see task_attach_timeout.c * */ ret = read(ctx_fd, &msg, sizeof(msg)); if (ret == -1) fatal_error("cannot read from descriptor: %s\n", strerror(errno)); if (msg.type != PFM_MSG_END) fatal_error("unexpected msg type : %d\n", msg.type); /* * the task has exited, we can simply read the results */ /* * now simply read the results. */ if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error("pfm_read error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the session */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; pid_t pid; int ret; if (argc < 2) { fatal_error("usage: %s pid\n", argv[0]); } pid = atoi(argv[1]); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); return parent(pid); } papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/ita2_irr.c000664 001750 001750 00000025501 13216244362 024324 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_irr.c - example of how to use code range restriction with the Itanium2 PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define VECTOR_SIZE 1000000UL typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "fp_ops_retired", VECTOR_SIZE<<1 }, { NULL, 0UL } }; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } void saxpy(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("done saxpy\n"); } void saxpy2(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("done saxpy2\n"); } static int do_test(void) { unsigned long size; double *a, *b, *c; size = VECTOR_SIZE; a = malloc(size*sizeof(double)); b = malloc(size*sizeof(double)); c = malloc(size*sizeof(double)); if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); memset(a, 0, size*sizeof(double)); memset(b, 0, size*sizeof(double)); memset(c, 0, size*sizeof(double)); saxpy(a,b,c, size); saxpy2(a,b,c, size); return 0; } int main(int argc, char **argv) { event_desc_t *p; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita2_input_param_t ita2_inp; pfmlib_ita2_output_param_t ita2_outp; pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t ibrs[8]; pfmlib_options_t pfmlib_options; struct fd { /* function descriptor */ unsigned long addr; unsigned long gp; } *fd; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Compute the range we are interested in * * On IA-64, the function pointer does not point directly * to the function but to a descriptor which contains two * unsigned long: the first one is the actual start address * of the function, the second is the gp (global pointer) * to load into r1 before jumping into the function. Unlesss * we're jumping into a shared library the gp is the same as * the current gp. * * In the artificial example, we also rely on the compiler/linker * NOT reordering code layout. We depend on saxpy2() being just * after saxpy(). * */ fd = (struct fd *)saxpy; range_start = fd->addr; fd = (struct fd *)saxpy2; range_end = fd->addr; /* * linker may reorder saxpy() and saxpy2() */ if (range_end < range_start) { unsigned long tmp; tmp = range_start; range_start = range_end; range_end = tmp; } memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ibrs,0, sizeof(ibrs)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita2_inp,0, sizeof(ita2_inp)); memset(&ita2_outp,0, sizeof(ita2_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("cannot find %s event\n", p->event_name); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. * * In the case of code range restriction on Itanium 2, the library will try to use the fine * mode first and then it will default to using multiple pairs to cover the range. */ ita2_inp.pfp_ita2_irange.rr_used = 1; /* indicate we use code range restriction */ ita2_inp.pfp_ita2_irange.rr_limits[0].rr_start = range_start; ita2_inp.pfp_ita2_irange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * print offsets */ printf("code range : [0x%016lx-0x%016lx)\n" "start_offset:-0x%lx end_offset:+0x%lx\n" "%d pairs of debug registers used\n", range_start, range_end, ita2_outp.pfp_ita2_irange.rr_infos[0].rr_soff, ita2_outp.pfp_ita2_irange.rr_infos[0].rr_eoff, ita2_outp.pfp_ita2_irange.rr_nbr_used >> 1); /* * create session */ id = pfm_create(0, NULL); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * propagate the setup for the debug registers from the library to the arguments * to the syscall. The library does not know the type of the syscall * anymore. The code debug registers start at PMC256 on all Itanium processors. */ for (i=0; i < ita2_outp.pfp_ita2_irange.rr_nbr_used; i++) { ibrs[i].reg_num = 256+ita2_outp.pfp_ita2_irange.rr_br[i].reg_num; ibrs[i].reg_value = ita2_outp.pfp_ita2_irange.rr_br[i].reg_value; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); /* * Program the code debug registers. */ if (pfm_write(id, 0, PFM_RW_PMC, ibrs, ita2_outp.pfp_ita2_irange.rr_nbr_used * sizeof(*ibrs)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * now we attach the session to pid */ if (pfm_attach(id, 0, getpid()) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * Let's roll now. * * We run two distinct copies of the same function but we restrict measurement * to the first one (saxpy). Therefore the expected count is half what you would * get if code range restriction was not used. The core loop in both case uses * two floating point operation per iteration. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); do_test(); if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * now read the results */ if (pfm_read(id, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error("pfm_read(PMD) error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); } /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/libpfm-3.y/examples_v2.x/task_attach_timeout.c000664 001750 001750 00000023276 13216244362 026110 0ustar00jshenry1963jshenry1963000000 000000 /* * task_attach_timeout.c - attach to another task for monitoring for a short while * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int parent(pid_t pid, unsigned long delay) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_ctx_t ctx[1]; pfarg_pmc_t pc[NUM_PMCS]; pfarg_pmd_t pd[NUM_PMDS]; pfarg_load_t load_args; struct pollfd pollfd; pfarg_msg_t msg; unsigned int i, num_counters; int status, ret; int ctx_fd; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&load_args,0, sizeof(load_args)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("cannot find cycle event\n"); } if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) { fatal_error("cannot find inst retired event\n"); } i = 2; /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * now create a context. we will later attach it to the task we are creating. */ ctx_fd = pfm_create_context(ctx, NULL, NULL, 0); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * use our file descriptor for the poll. * we are interested in read events only. */ pollfd.fd = ctx_fd; pollfd.events = POLLIN; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann counting monitors. */ if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); /* * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret == -1) { fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); } /* * wait for the child to be actually stopped */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) { fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status)); } /* * the task is stopped at this point */ /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = pid; if (pfm_load_context(ctx_fd, &load_args) == -1) { fatal_error("pfm_load_context error errno %d\n",errno); } /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (pfm_start(ctx_fd, NULL) == -1) { fatal_error("pfm_start error errno %d\n",errno); } /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); printf("attached to [%d], timeout set to %lu seconds\n", pid, delay); /* * now the task is running */ /* * We cannot simply do a waitpid() because we may be attaching to a process * totally unrelated to our program. Instead we use a perfmon facility that * notifies us when the monitoring task is exiting. * * When a task with a monitoring context attached to it exits, a PFM_MSG_END * is generated. It can be retrieve with a simple read() on the context's descriptor. * * Another reason why you might return from the read is if there was a counter * overflow, unlikely in this example. * * To measure only for short period of time, use select or poll with a timeout, * see task_attach_timeout.c * */ ret = poll(&pollfd, 1, delay*1000); switch( ret ) { case -1: fatal_error("cannot read from descriptor: %s\n", strerror(errno)); /* no return */ case 1: /* * there is a message, i.e., the program exited before our timeout */ if (ret == 1) { /* * extract message */ ret = read(ctx_fd, &msg, sizeof(msg)); if (msg.type != PFM_MSG_END) { fatal_error("unexpected msg type : %d\n", msg.type); } } break; case 0: /* * we timed out, we need to stop the task to unload */ ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret == -1) { fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); } /* * wait for task to be actually stopped */ waitpid(pid, &status, WUNTRACED); /* * check if process exited, then no need to unload */ if (WIFEXITED(status)) goto read_results; if (pfm_unload_context(ctx_fd) == -1) { fatal_error("pfm_unload_context error errno %d\n",errno); } /* * let it run free again */ ptrace(PTRACE_DETACH, pid, NULL, 0); break; default: fatal_error("unexpected return from poll: %d\n", ret); } read_results: /* * now simply read the results. */ if (pfm_read_pmds(ctx_fd, pd, inp.pfp_event_count) == -1) { fatal_error("pfm_read_pmds error errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the context */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; unsigned long delay; pid_t pid; int ret; if (argc < 2) fatal_error("usage: %s pid [timeout]\n", argv[0]); pid = atoi(argv[1]); delay = argc > 2 ? strtoul(argv[2], NULL, 10) : 10; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); return parent(pid, delay); } papi-5.6.0/src/libpfm-3.y/libpfms/include/000775 001750 001750 00000000000 13216244363 022263 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/appio/tests/iozone/Generate_Graphs000775 001750 001750 00000001377 13216244356 026264 0ustar00jshenry1963jshenry1963000000 000000 # # This script will create the Iozone graphs using # gnuplot. # # # # ------------------------------------------------ # YOU MUST PROVIDE A FILE NAME FOR IT TO PROCESS. # This filename is the name of the file where you # sent Iozone's output. # ------------------------------------------------ # Generate data base for all of the operation types. ./gengnuplot.sh $1 write ./gengnuplot.sh $1 rewrite ./gengnuplot.sh $1 read ./gengnuplot.sh $1 reread ./gengnuplot.sh $1 randread ./gengnuplot.sh $1 randwrite ./gengnuplot.sh $1 bkwdread ./gengnuplot.sh $1 recrewrite ./gengnuplot.sh $1 strideread ./gengnuplot.sh $1 fwrite ./gengnuplot.sh $1 frewrite ./gengnuplot.sh $1 fread ./gengnuplot.sh $1 freread # Produce graphs and postscript results. gnuplot gnu3d.dem papi-5.6.0/src/freebsd/map-core2.h000664 001750 001750 00000015112 13216244361 020672 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-core2.h * CVS: $Id$ * Author: George Neville-Neil * gnn@freebsd.org */ #ifndef FreeBSD_MAP_CORE2 #define FreeBSD_MAP_CORE2 enum NativeEvent_Value_Core2Processor { PNE_CORE2_BACLEARS = PAPI_NATIVE_MASK , PNE_CORE2_BOGUS_BR, PNE_CORE2_BR_BAC_MISSP_EXEC, PNE_CORE2_BR_CALL_MISSP_EXEC, PNE_CORE2_BR_CALL_EXEC, PNE_CORE2_BR_CND_EXEC, PNE_CORE2_BR_CND_MISSP_EXEC, PNE_CORE2_BR_IND_CALL_EXEC, PNE_CORE2_BR_IND_EXEC, PNE_CORE2_BR_IND_MISSP_EXEC, PNE_CORE2_BR_INST_DECODED, PNE_CORE2_BR_INST_EXEC, PNE_CORE2_BR_INST_RETIRED_ANY, PNE_CORE2_BR_INST_RETIRED_MISPRED, PNE_CORE2_BR_INST_RETIRED_MISPRED_NOT_TAKEN, PNE_CORE2_BR_INST_RETIRED_MISPRED_TAKEN, PNE_CORE2_BR_INST_RETIRED_PRED_NOT_TAKEN, PNE_CORE2_BR_INST_RETIRED_PRED_TAKEN, PNE_CORE2_BR_INST_RETIRED_TAKEN, PNE_CORE2_BR_MISSP_EXEC, PNE_CORE2_BR_RET_MISSP_EXEC, PNE_CORE2_BR_RET_BAC_MISSP_EXEC, PNE_CORE2_BR_RET_EXEC, PNE_CORE2_BR_TKN_BUBBLE_1, PNE_CORE2_BR_TKN_BUBBLE_2, PNE_CORE2_BUSQ_EMPTY, PNE_CORE2_BUS_BNR_DRV, PNE_CORE2_BUS_DATA_RCV, PNE_CORE2_BUS_DRDY_CLOCKS, PNE_CORE2_BUS_HIT_DRV, PNE_CORE2_BUS_HITM_DRV, PNE_CORE2_BUS_IO_WAIT, PNE_CORE2_BUS_LOCK_CLOCKS, PNE_CORE2_BUS_REQUEST_OUTSTANDING, PNE_CORE2_BUS_TRANS_ANY, PNE_CORE2_BUS_TRANS_BRD, PNE_CORE2_BUS_TRANS_BURST, PNE_CORE2_BUS_TRANS_DEF, PNE_CORE2_BUS_TRANS_IFETCH, PNE_CORE2_BUS_TRANS_INVAL, PNE_CORE2_BUS_TRANS_IO, PNE_CORE2_BUS_TRANS_MEM, PNE_CORE2_BUS_TRANS_P, PNE_CORE2_BUS_TRANS_PWR, PNE_CORE2_BUS_TRANS_RFO, PNE_CORE2_BUS_TRANS_WB, PNE_CORE2_CMP_SNOOP, PNE_CORE2_CPU_CLK_UNHALTED_BUS, PNE_CORE2_CPU_CLK_UNHALTED_CORE_P, PNE_CORE2_CPU_CLK_UNHALTED_NO_OTHER, PNE_CORE2_CYCLES_DIV_BUSY, PNE_CORE2_CYCLES_INT_MASKED, PNE_CORE2_CYCLES_INT_PENDING_AND_MASKED, PNE_CORE2_CYCLES_L1I_MEM_STALLED, PNE_CORE2_DELAYED_BYPASS_FP, PNE_CORE2_DELAYED_BYPASS_LOAD, PNE_CORE2_DELAYED_BYPASS_SIMD, PNE_CORE2_DIV, PNE_CORE2_DTLB_MISSES_ANY, PNE_CORE2_DTLB_MISSES_L0_MISS_LD, PNE_CORE2_DTLB_MISSES_MISS_LD, PNE_CORE2_DTLB_MISSES_MISS_ST, PNE_CORE2_EIST_TRANS, PNE_CORE2_ESP_ADDITIONS, PNE_CORE2_ESP_SYNCH, PNE_CORE2_EXT_SNOOP, PNE_CORE2_FP_ASSIST, PNE_CORE2_FP_COMP_OPS_EXE, PNE_CORE2_FP_MMX_TRANS_TO_FP, PNE_CORE2_FP_MMX_TRANS_TO_MMX, PNE_CORE2_HW_INT_RCV, PNE_CORE2_IDLE_DURING_DIV, PNE_CORE2_ILD_STALL, PNE_CORE2_INST_QUEUE_FULL, PNE_CORE2_INST_RETIRED_ANY_P, PNE_CORE2_INST_RETIRED_LOADS, PNE_CORE2_INST_RETIRED_OTHER, PNE_CORE2_INST_RETIRED_STORES, PNE_CORE2_ITLB_FLUSH, PNE_CORE2_ITLB_LARGE_MISS, PNE_CORE2_ITLB_MISSES, PNE_CORE2_ITLB_SMALL_MISS, PNE_CORE2_ITLB_MISS_RETIRED, PNE_CORE2_L1D_ALL_CACHE_REF, PNE_CORE2_L1D_ALL_REF, PNE_CORE2_L1D_CACHE_LD, PNE_CORE2_L1D_CACHE_LOCK, PNE_CORE2_L1D_CACHE_LOCK_DURATION, PNE_CORE2_L1D_CACHE_ST, PNE_CORE2_L1D_M_EVICT, PNE_CORE2_L1D_M_REPL, PNE_CORE2_L1D_PEND_MISS, PNE_CORE2_L1D_PREFETCH_REQUESTS, PNE_CORE2_L1D_REPL, PNE_CORE2_L1D_SPLIT_LOADS, PNE_CORE2_L1D_SPLIT_STORES, PNE_CORE2_L1I_MISSES, PNE_CORE2_L1I_READS, PNE_CORE2_L2_ADS, PNE_CORE2_L2_DBUS_BUSY_RD, PNE_CORE2_L2_IFETCH, PNE_CORE2_L2_LD, PNE_CORE2_L2_LINES_IN, PNE_CORE2_L2_LINES_OUT, PNE_CORE2_L2_LOCK, PNE_CORE2_L2_M_LINES_IN, PNE_CORE2_L2_M_LINES_OUT, PNE_CORE2_L2_NO_REQ, PNE_CORE2_L2_REJECT_BUSQ, PNE_CORE2_L2_RQSTS, PNE_CORE2_L2_RQSTS_SELF_DEMAND_I_STATE, PNE_CORE2_L2_RQSTS_SELF_DEMAND_MESI, PNE_CORE2_L2_ST, PNE_CORE2_LOAD_BLOCK_L1D, PNE_CORE2_LOAD_BLOCK_OVERLAP_STORE, PNE_CORE2_LOAD_BLOCK_STA, PNE_CORE2_LOAD_BLOCK_STD, PNE_CORE2_LOAD_BLOCK_UNTIL_RETIRE, PNE_CORE2_LOAD_HIT_PRE, PNE_CORE2_MACHINE_NUKES_MEM_ORDER, PNE_CORE2_MACHINE_NUKES_SMC, PNE_CORE2_MACRO_INSTS_CISC_DECODED, PNE_CORE2_MACRO_INSTS_DECODED, PNE_CORE2_MEMORY_DISAMBIGUATION_RESET, PNE_CORE2_MEMORY_DISAMBIGUATION_SUCCESS, PNE_CORE2_MEM_LOAD_RETIRED_DTLB_MISS, PNE_CORE2_MEM_LOAD_RETIRED_L1D_LINE_MISS, PNE_CORE2_MEM_LOAD_RETIRED_L1D_MISS, PNE_CORE2_MEM_LOAD_RETIRED_L2_LINE_MISS, PNE_CORE2_MEM_LOAD_RETIRED_L2_MISS, PNE_CORE2_MUL, PNE_CORE2_PAGE_WALKS_COUNT, PNE_CORE2_PAGE_WALKS_CYCLES, PNE_CORE2_PREF_RQSTS_DN, PNE_CORE2_PREF_RQSTS_UP, PNE_CORE2_RAT_STALLS_ANY, PNE_CORE2_RAT_STALLS_FLAGS, PNE_CORE2_RAT_STALLS_FPSW, PNE_CORE2_RAT_STALLS_PARTIAL_CYCLES, PNE_CORE2_RAT_STALLS_ROB_READ_PORT, PNE_CORE2_RESOURCE_STALLS_ANY, PNE_CORE2_RESOURCE_STALLS_BR_MISS_CLEAR, PNE_CORE2_RESOURCE_STALLS_FPCW, PNE_CORE2_RESOURCE_STALLS_LD_ST, PNE_CORE2_RESOURCE_STALLS_ROB_FULL, PNE_CORE2_RESOURCE_STALLS_RS_FULL, PNE_CORE2_RS_UOPS_DISPATCHED, PNE_CORE2_RS_UOPS_DISPATCHED_PORT0, PNE_CORE2_RS_UOPS_DISPATCHED_PORT1, PNE_CORE2_RS_UOPS_DISPATCHED_PORT2, PNE_CORE2_RS_UOPS_DISPATCHED_PORT3, PNE_CORE2_RS_UOPS_DISPATCHED_PORT4, PNE_CORE2_RS_UOPS_DISPATCHED_PORT5, PNE_CORE2_SB_DRAIN_CYCLES, PNE_CORE2_SEGMENT_REG_LOADS, PNE_CORE2_SEG_REG_RENAMES_ANY, PNE_CORE2_SEG_REG_RENAMES_DS, PNE_CORE2_SEG_REG_RENAMES_ES, PNE_CORE2_SEG_REG_RENAMES_FS, PNE_CORE2_SEG_REG_RENAMES_GS, PNE_CORE2_SEG_RENAME_STALLS_ANY, PNE_CORE2_SEG_RENAME_STALLS_DS, PNE_CORE2_SEG_RENAME_STALLS_ES, PNE_CORE2_SEG_RENAME_STALLS_FS, PNE_CORE2_SEG_RENAME_STALLS_GS, PNE_CORE2_SIMD_ASSIST, PNE_CORE2_SIMD_COMP_INST_RETIRED_PACKED_DOUBLE, PNE_CORE2_SIMD_COMP_INST_RETIRED_PACKED_SINGLE, PNE_CORE2_SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE, PNE_CORE2_SIMD_COMP_INST_RETIRED_SCALAR_SINGLE, PNE_CORE2_SIMD_INSTR_RETIRED, PNE_CORE2_SIMD_INST_RETIRED_ANY, PNE_CORE2_SIMD_INST_RETIRED_PACKED_DOUBLE, PNE_CORE2_SIMD_INST_RETIRED_PACKED_SINGLE, PNE_CORE2_SIMD_INST_RETIRED_SCALAR_DOUBLE, PNE_CORE2_SIMD_INST_RETIRED_SCALAR_SINGLE, PNE_CORE2_SIMD_INST_RETIRED_VECTOR, PNE_CORE2_SIMD_SAT_INSTR_RETIRED, PNE_CORE2_SIMD_SAT_UOP_EXEC, PNE_CORE2_SIMD_UOPS_EXEC, PNE_CORE2_SIMD_UOP_TYPE_EXEC_ARITHMETIC, PNE_CORE2_SIMD_UOP_TYPE_EXEC_LOGICAL, PNE_CORE2_SIMD_UOP_TYPE_EXEC_MUL, PNE_CORE2_SIMD_UOP_TYPE_EXEC_PACK, PNE_CORE2_SIMD_UOP_TYPE_EXEC_SHIFT, PNE_CORE2_SIMD_UOP_TYPE_EXEC_UNPACK, PNE_CORE2_SNOOP_STALL_DRV, PNE_CORE2_SSE_PRE_EXEC_L1, PNE_CORE2_SSE_PRE_EXEC_L2, PNE_CORE2_SSE_PRE_EXEC_NTA, PNE_CORE2_SSE_PRE_EXEC_STORES, PNE_CORE2_SSE_PRE_MISS_L1, PNE_CORE2_SSE_PRE_MISS_L2, PNE_CORE2_SSE_PRE_MISS_NTA, PNE_CORE2_STORE_BLOCK_ORDER, PNE_CORE2_STORE_BLOCK_SNOOP, PNE_CORE2_THERMAL_TRIP, PNE_CORE2_UOPS_RETIRED_ANY, PNE_CORE2_UOPS_RETIRED_FUSED, PNE_CORE2_UOPS_RETIRED_LD_IND_BR, PNE_CORE2_UOPS_RETIRED_MACRO_FUSION, PNE_CORE2_UOPS_RETIRED_NON_FUSED, PNE_CORE2_UOPS_RETIRED_STD_STA, PNE_CORE2_X87_OPS_RETIRED_ANY, PNE_CORE2_X87_OPS_RETIRED_FXCH, PNE_CORE2_NATNAME_GUARD }; extern Native_Event_LabelDescription_t Core2Processor_info[]; extern hwi_search_t Core2Processor_map[]; #endif papi-5.6.0/src/libpfm4/python/src/000775 001750 001750 00000000000 13216244366 020774 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/intel_skl_events.h000664 001750 001750 00000301704 13216244364 024462 0ustar00jshenry1963jshenry1963000000 000000 /* * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: skl (Intel SkyLake) */ static const intel_x86_umask_t skl_baclears[]={ { .uname = "ANY", .udesc = "Number of front-end re-steers due to BPU misprediction", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_br_inst_retired[]={ { .uname = "CONDITIONAL", .udesc = "Counts all taken and not taken macro conditional branch instructions", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "COND", .udesc = "Counts all taken and not taken macro conditional branch instructions", .ucode = 0x100, .uequiv = "CONDITIONAL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_CALL", .udesc = "Counts all macro direct and indirect near calls", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "Counts all taken and not taken macro branches including far branches (architectural event)", .ucode = 0x0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, }, { .uname = "NEAR_RETURN", .udesc = "Counts the number of near ret instructions retired", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NOT_TAKEN", .udesc = "Counts all not taken macro branch instructions retired", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_TAKEN", .udesc = "Counts the number of near branch taken instructions retired", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR_BRANCH", .udesc = "Counts the number of far branch instructions retired", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_br_misp_retired[]={ { .uname = "CONDITIONAL", .udesc = "All mispredicted macro conditional branch instructions", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "COND", .udesc = "All mispredicted macro conditional branch instructions", .ucode = 0x100, .uequiv = "CONDITIONAL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "All mispredicted macro branches (architectural event)", .ucode = 0x0, /* architectural encoding */ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "NEAR_TAKEN", .udesc = "Number of near branch instructions retired that were mispredicted and taken", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_CALL", .udesc = "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_cpu_clk_thread_unhalted[]={ { .uname = "REF_XCLK", .udesc = "Count Xclk pulses (100Mhz) when the core is unhalted", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REF_XCLK_ANY", .udesc = "Count Xclk pulses (100Mhz) when the at least one thread on the physical core is unhalted", .ucode = 0x100 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "REF_XCLK:t", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "REF_P", .udesc = "Cycles when the core is unhalted (count at 100 Mhz)", .ucode = 0x100, .uequiv = "REF_XCLK", .uflags= INTEL_X86_NCOMBO, }, { .uname = "THREAD_P", .udesc = "Cycles when thread is not halted", .ucode = 0x000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ONE_THREAD_ACTIVE", .udesc = "Counts Xclk (100Mhz) pulses when this thread is unhalted and the other thread is halted", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RING0_TRANS", .udesc = "Counts when the current privilege level transitions from ring 1, 2 or 3 to ring 0 (kernel)", .ucode = 0x000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "THREAD_P:e:c=1", .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_cycle_activity[]={ { .uname = "CYCLES_L2_MISS", .udesc = "Cycles with pending L2 miss demand loads outstanding", .ucode = 0x0100 | (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_L2_PENDING", .udesc = "Cycles with pending L2 miss demand loads outstanding", .ucode = 0x0100 | (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, .uequiv = "CYCLES_L2_MISS", }, { .uname = "CYCLES_L3_MISS", .udesc = "Cycles with L3 cache miss demand loads outstanding", .ucode = 0x0200 | (0x2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_LDM_PENDING", .udesc = "Cycles with L3 cache miss demand loads outstanding", .ucode = 0x0200 | (0x2 << INTEL_X86_CMASK_BIT), .uequiv = "CYCLES_L3_MISS", .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_L1D_MISS", .udesc = "Cycles with pending L1D load cache misses", .ucode = 0x0800 | (0x8 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_L1D_PENDING", .udesc = "Cycles with pending L1D load cache misses", .ucode = 0x0800 | (0x8 << INTEL_X86_CMASK_BIT), .uequiv = "CYCLES_L1D_MISS", .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_MEM_ANY", .udesc = "Cycles when memory subsystem has at least one outstanding load", .ucode = 0x1000 | (0x10 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_L1D_MISS", .udesc = "Execution stalls while at least one L1D demand load cache miss is outstanding", .ucode = 0x0c00 | (0xc << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0x4, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_L2_MISS", .udesc = "Execution stalls while at least one L2 demand load is outstanding", .ucode = 0x0500 | (0x5 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_L3_MISS", .udesc = "Execution stalls while at least one L3 demand load is outstanding", .ucode = 0x0600 | (0x6 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_MEM_ANY", .udesc = "Execution stalls while at least one demand load is outstanding in the memory subsystem", .ucode = 0x1400 | (20 << INTEL_X86_CMASK_BIT), /* cnt=20 */ .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_TOTAL", .udesc = "Total execution stalls in cycles", .ucode = 0x0400 | (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_dtlb_load_misses[]={ { .uname = "MISS_CAUSES_A_WALK", .udesc = "Misses in all DTLB levels that cause page walks", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "Number of misses in all TLB levels causing a page walk of any page size that completes", .ucode = 0xe00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_4K", .udesc = "Number of misses in all TLB levels causing a page walk of 4KB page size that completes", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_2M_4M", .udesc = "Number of misses in all TLB levels causing a page walk of 2MB/4MB page size that completes", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_1G", .udesc = "Number of misses in all TLB levels causing a page walk of 1GB page size that completes", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_ACTIVE", .udesc = "Cycles with at least one hardware walker active for a load", .ucode = 0x1000 | (0x1 << INTEL_X86_CMASK_BIT), .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_DURATION", .udesc = "Cycles when hardware page walker is busy with page walks", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_PENDING", .udesc = "Cycles when hardware page walker is busy with page walks", .ucode = 0x1000, .uequiv = "WALK_DURATION", .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT", .udesc = "Number of cache load STLB hits. No page walk", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_itlb_misses[]={ { .uname = "MISS_CAUSES_A_WALK", .udesc = "Misses in all DTLB levels that cause page walks", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "Number of misses in all TLB levels causing a page walk of any page size that completes", .ucode = 0xe00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_4K", .udesc = "Number of misses in all TLB levels causing a page walk of 4KB page size that completes", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_2M_4M", .udesc = "Number of misses in all TLB levels causing a page walk of 2MB/4MB page size that completes", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_1G", .udesc = "Number of misses in all TLB levels causing a page walk of 1GB page size that completes", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_DURATION", .udesc = "Cycles when PMH is busy with page walks", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_PENDING", .udesc = "Cycles when PMH is busy with page walks", .ucode = 0x1000, .uequiv = "WALK_DURATION", .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT", .udesc = "Number of cache load STLB hits. No page walk", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_fp_assist[]={ { .uname = "ANY", .udesc = "Cycles with any input/output SEE or FP assists", .ucode = 0x1e00 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t skl_icache_16b[]={ { .uname = "IFDATA_STALL", .udesc = "Cycles where a code fetch is stalled due to L1 instruction cache miss", .ucode = 0x400, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_icache_64b[]={ { .uname = "IFTAG_HIT", .udesc = "Number of instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IFTAG_MISS", .udesc = "Number of instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IFTAG_STALL", .udesc = "Cycles where a code fetch is stalled due to L1 instruction cache tag miss", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_idq[]={ { .uname = "MITE_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from MITE path", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DSB_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS", .udesc = "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_MITE_UOPS", .udesc = "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_UOPS", .udesc = "Number of Uops were delivered into Instruction Decode Queue (IDQ) from MS, initiated by Decode Stream Buffer (DSB) or MITE", .ucode = 0x3000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_UOPS_CYCLES", .udesc = "Number of cycles that Uops were delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB) or MITE", .ucode = 0x3000 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MS_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_SWITCHES", .udesc = "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", .ucode = 0x3000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "MS_UOPS:c=1:e", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "MITE_UOPS_CYCLES", .udesc = "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", .ucode = 0x400 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MITE_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "DSB_UOPS_CYCLES", .udesc = "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", .ucode = 0x800 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "DSB_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_UOPS_CYCLES", .udesc = "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_OCCUR", .udesc = "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1:e=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "ALL_DSB_CYCLES_4_UOPS", .udesc = "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", .ucode = 0x1800 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_DSB_CYCLES_ANY_UOPS", .udesc = "Cycles Decode Stream Buffer (DSB) is delivering any Uop", .ucode = 0x1800 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_CYCLES_4_UOPS", .udesc = "Cycles MITE is delivering 4 Uops", .ucode = 0x2400 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_CYCLES_ANY_UOPS", .udesc = "Cycles MITE is delivering any Uop", .ucode = 0x2400 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from any path", .ucode = 0x3c00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_idq_uops_not_delivered[]={ { .uname = "CORE", .udesc = "Count number of non-delivered uops to Resource Allocation Table (RAT)", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, { .uname = "CYCLES_0_UOPS_DELIV_CORE", .udesc = "Number of uops not delivered to Resource Allocation Table (RAT) per thread when backend is not stalled", .ucode = 0x100 | (4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_FE_WAS_OK", .udesc = "Count cycles front-end (FE) delivered 4 uops or Resource Allocation Table (RAT) was stalling front-end", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 inv=1 */ .uequiv = "CORE:c=1:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_I, }, { .uname = "CYCLES_LE_1_UOPS_DELIV_CORE", .udesc = "Count cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend is not stalled", .ucode = 0x100 | (3 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_LE_2_UOPS_DELIV_CORE", .udesc = "Count cycles with less than 2 uops delivered by the front-end", .ucode = 0x100 | (2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_LE_3_UOPS_DELIV_CORE", .udesc = "Count cycles with less then 3 uops delivered by the front-end", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_inst_retired[]={ { .uname = "ANY_P", .udesc = "Number of instructions retired. General Counter - architectural event", .ucode = 0x000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution (Precise Event)", .ucode = 0x100, .uequiv = "PREC_DIST", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TOTAL_CYCLES", .udesc = "Number of cycles using always true condition", .ucode = 0x100 | INTEL_X86_MOD_INV | (10 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=10 */ .uequiv = "PREC_DIST:i=1:c=10", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "PREC_DIST", .udesc = "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution (Precise event)", .ucode = 0x100, .ucntmsk= 0x2, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_int_misc[]={ { .uname = "RECOVERY_CYCLES", .udesc = "Cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RECOVERY_CYCLES_ANY", .udesc = "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", .ucode = 0x100 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "RECOVERY_CYCLES:t", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "RECOVERY_STALLS_COUNT", .udesc = "Number of occurrences waiting for Machine Clears", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "RECOVERY_CYCLES:e:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E, }, { .uname = "CLEAR_RESTEER_CYCLES", .udesc = "Number of cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_itlb[]={ { .uname = "ITLB_FLUSH", .udesc = "Flushing of the Instruction TLB (ITLB) pages independent of page size", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_l1d[]={ { .uname = "REPLACEMENT", .udesc = "L1D Data line replacements", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_sq_misc[]={ { .uname = "SPLIT_LOCK", .udesc = "Number of split locks in the super queue (SQ)", .ucode = 0x1000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_l1d_pend_miss[]={ { .uname = "PENDING", .udesc = "Cycles with L1D load misses outstanding", .ucode = 0x100, .ucntmsk = 0x4, .uflags = INTEL_X86_DFL, }, { .uname = "FB_FULL", .udesc = "Number of times a request needed a fill buffer (FB) entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands load, store or SW prefetch", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PENDING_CYCLES", .udesc = "Cycles with L1D load misses outstanding", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "PENDING:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "PENDING_CYCLES_ANY", .udesc = "Cycles with L1D load misses outstanding from any thread", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT) | INTEL_X86_MOD_ANY, /* cnt=1 any=1 */ .uequiv = "PENDING:c=1:t", .ucntmsk = 0x4, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_T, }, { .uname = "OCCURRENCES", .udesc = "Number L1D miss outstanding", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "PENDING:c=1:e=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "EDGE", .udesc = "Number L1D miss outstanding", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "PENDING:c=1:e=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t skl_l2_lines_in[]={ { .uname = "ALL", .udesc = "L2 cache lines filling L2", .ucode = 0x1f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "L2 cache lines filling L2", .uequiv = "ALL", .ucode = 0x1f00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_l2_lines_out[]={ { .uname = "NON_SILENT", .udesc = "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped ", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "USELESS_HWPREF", .udesc = "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", .ucode = 0x400, .uequiv = "USELESS_HWPF", .uflags = INTEL_X86_NCOMBO, }, { .uname = "USELESS_HWPF", .udesc = "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SILENT", .udesc = "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. This is a per-core event.", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_l2_rqsts[]={ { .uname = "DEMAND_DATA_RD_MISS", .udesc = "Demand Data Read requests that miss L2 cache", .ucode = 0x2100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_HIT", .udesc = "Demand Data Read requests that hit L2 cache", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_MISS", .udesc = "RFO requests that miss L2 cache", .ucode = 0x2200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_MISS", .udesc = "RFO requests that miss L2 cache", .ucode = 0x2200, .uequiv = "DEMAND_RFO_MISS", .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_HIT", .udesc = "RFO requests that hit L2 cache", .ucode = 0x4200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT", .udesc = "RFO requests that hit L2 cache", .ucode = 0x4200, .uequiv = "DEMAND_RFO_HIT", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CODE_RD_MISS", .udesc = "L2 cache misses when fetching instructions", .ucode = 0x2400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_MISS", .udesc = "All demand requests that miss the L2 cache", .ucode = 0x2700, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CODE_RD_HIT", .udesc = "L2 cache hits when fetching instructions, code reads", .ucode = 0x4400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS", .udesc = "All requests that miss the L2 cache", .ucode = 0x3f00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PF_MISS", .udesc = "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", .ucode = 0x3800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PF_HIT", .udesc = "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", .ucode = 0xd800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_DATA_RD", .udesc = "Any data read request to L2 cache", .ucode = 0xe100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_RFO", .udesc = "Any data RFO request to L2 cache", .ucode = 0xe200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_CODE_RD", .udesc = "Any code read request to L2 cache", .ucode = 0xe400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_REFERENCES", .udesc = "All demand requests to L2 cache ", .ucode = 0xe700, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_PF", .udesc = "Any L2 HW prefetch request to L2 cache", .ucode = 0xf800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REFERENCES", .udesc = "All requests to L2 cache", .ucode = 0xff00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_l2_trans[]={ { .uname = "L2_WB", .udesc = "L2 writebacks that access L2 cache", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_ld_blocks[]={ { .uname = "STORE_FORWARD", .udesc = "Counts the number of loads blocked by overlapping with store buffer entries that cannot be forwarded", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NO_SR", .udesc = "number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_ld_blocks_partial[]={ { .uname = "ADDRESS_ALIAS", .udesc = "False dependencies in MOB due to partial compare on address", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_load_hit_pre[]={ { .uname = "SW_PF", .udesc = "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_lock_cycles[]={ { .uname = "CACHE_LOCK_DURATION", .udesc = "cycles that the L1D is locked", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_longest_lat_cache[]={ { .uname = "MISS", .udesc = "Core-originated cacheable demand requests missed LLC - architectural event", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REFERENCE", .udesc = "Core-originated cacheable demand requests that refer to LLC - architectural event", .ucode = 0x4f00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_machine_clears[]={ { .uname = "COUNT", .udesc = "Number of machine clears (Nukes) of any type", .ucode = 0x100| (1 << INTEL_X86_CMASK_BIT) | (1 << INTEL_X86_EDGE_BIT), .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MEMORY_ORDERING", .udesc = "Number of Memory Ordering Machine Clears detected", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SMC", .udesc = "Number of Self-modifying code (SMC) Machine Clears detected", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_mem_load_l3_hit_retired[]={ { .uname = "XSNP_MISS", .udesc = "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_HIT", .udesc = "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_HITM", .udesc = "Load had HitM Response from a core on same socket (shared L3). (Non PEBS", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_NONE", .udesc = "Retired load uops which data sources were hits in L3 without snoops required", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_mem_load_l3_miss_retired[]={ { .uname = "LOCAL_DRAM", .udesc = "Retired load instructions which data sources missed L3 but serviced from local dram", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_DRAM", .udesc = "Retired load instructions which data sources missed L3 but serviced from remote dram", .ucode = 0x200, .umodel = PFM_PMU_INTEL_SKX, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_HITM", .udesc = "Retired load instructions whose data sources was remote HITM", .ucode = 0x400, .umodel = PFM_PMU_INTEL_SKX, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_FWD", .udesc = "Retired load instructions whose data sources was remote HITM", .ucode = 0x800, .umodel = PFM_PMU_INTEL_SKX, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_mem_load_retired[]={ { .uname = "L1_HIT", .udesc = "Retired load uops with L1 cache hits as data source", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_HIT", .udesc = "Retired load uops with L2 cache hits as data source", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_HIT", .udesc = "Retired load uops with L3 cache hits as data source", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L1_MISS", .udesc = "Retired load uops which missed the L1D", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_MISS", .udesc = "Retired load uops which missed the L2. Unknown data source excluded", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_MISS", .udesc = "Retired load uops which missed the L3", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "HIT_LFB", .udesc = "Retired load uops which missed L1 but hit line fill buffer (LFB)", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FB_HIT", .udesc = "Retired load uops which missed L1 but hit line fill buffer (LFB)", .ucode = 0x4000, .uequiv = "HIT_LFB", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_mem_trans_retired[]={ { .uname = "LOAD_LATENCY", .udesc = "Memory load instructions retired above programmed clocks, minimum threshold value is 3 (Precise Event and ldlat required)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_LDLAT | INTEL_X86_DFL, }, { .uname = "LATENCY_ABOVE_THRESHOLD", .udesc = "Memory load instructions retired above programmed clocks, minimum threshold value is 3 (Precise Event and ldlat required)", .ucode = 0x100, .uequiv = "LOAD_LATENCY", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_LDLAT | INTEL_X86_NO_AUTOENCODE, }, }; static const intel_x86_umask_t skl_mem_inst_retired[]={ { .uname = "STLB_MISS_LOADS", .udesc = "Load uops with true STLB miss retired to architected path", .ucode = 0x1100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STLB_MISS_STORES", .udesc = "Store uops with true STLB miss retired to architected path", .ucode = 0x1200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOCK_LOADS", .udesc = "Load uops with locked access retired", .ucode = 0x2100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SPLIT_LOADS", .udesc = "Line-splitted load uops retired", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SPLIT_STORES", .udesc = "Line-splitted store uops retired", .ucode = 0x4200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_LOADS", .udesc = "All load uops retired", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_STORES", .udesc = "All store uops retired", .ucode = 0x8200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_misalign_mem_ref[]={ { .uname = "LOADS", .udesc = "Speculative cache-line split load uops dispatched to the L1D", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STORES", .udesc = "Speculative cache-line split store-address uops dispatched to L1D", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_move_elimination[]={ { .uname = "INT_ELIMINATED", .udesc = "Number of integer Move Elimination candidate uops that were eliminated", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_ELIMINATED", .udesc = "Number of SIMD Move Elimination candidate uops that were eliminated", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "INT_NOT_ELIMINATED", .udesc = "Number of integer Move Elimination candidate uops that were not eliminated", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_NOT_ELIMINATED", .udesc = "Number of SIMD Move Elimination candidate uops that were not eliminated", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_offcore_requests[]={ { .uname = "DEMAND_DATA_RD", .udesc = "Demand data read requests sent to uncore (use with HT off only)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD", .udesc = "Demand code read requests sent to uncore (use with HT off only)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO", .udesc = "Demand RFOs requests sent to uncore (use with HT off only)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DATA_RD", .udesc = "Data read requests sent to uncore (use with HT off only)", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_REQUESTS", .udesc = "Number of memory transactions that reached the superqueue (SQ)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L3_MISS_DEMAND_DATA_RD", .udesc = "Number of demand data read requests which missed the L3 cache", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_other_assists[]={ { .uname = "ANY", .udesc = "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists", .ucode = 0x3f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_resource_stalls[]={ { .uname = "ANY", .udesc = "Cycles Allocation is stalled due to Resource Related reason", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Cycles Allocation is stalled due to Resource Related reason", .ucode = 0x100, .uequiv = "ANY", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RS", .udesc = "Stall cycles caused by absence of eligible entries in Reservation Station (RS)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SB", .udesc = "Cycles Allocator is stalled due to Store Buffer full (not including draining from synch)", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ROB", .udesc = "ROB full stall cycles", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_rob_misc_events[]={ { .uname = "LBR_INSERTS", .udesc = "Count each time an new Last Branch Record (LBR) is inserted", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_rs_events[]={ { .uname = "EMPTY_CYCLES", .udesc = "Cycles the Reservation Station (RS) is empty for this thread", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, { .uname = "EMPTY_END", .udesc = "Number of times the reservation station (RS) was empty", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT) | INTEL_X86_MOD_EDGE, /* inv=1, cmask=1,edge=1 */ .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_E, }, }; static const intel_x86_umask_t skl_tlb_flush[]={ { .uname = "DTLB_THREAD", .udesc = "Count number of DTLB flushes of thread-specific entries", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_ANY", .udesc = "Count number of any STLB flushes", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_uops_executed[]={ { .uname = "THREAD", .udesc = "Number of uops executed per thread in each cycle", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, { .uname = "THREAD_CYCLES_GE_1", .udesc = "Number of cycles with at least 1 uop is executed per thread", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "THREAD_CYCLES_GE_2", .udesc = "Number of cycles with at least 2 uops are executed per thread", .ucode = 0x100 | (0x2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO }, { .uname = "THREAD_CYCLES_GE_3", .udesc = "Number of cycles with at least 3 uops are executed per thread", .ucode = 0x100 | (0x3 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "THREAD_CYCLES_GE_4", .udesc = "Number of cycles with at least 4 uops are executed per thread", .ucode = 0x100 | (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CORE", .udesc = "Number of uops executed from any thread in each cycle", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CORE_CYCLES_GE_1", .udesc = "Number of cycles with at least 1 uop is executed for any thread", .ucode = 0x200 | (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO }, { .uname = "CORE_CYCLES_GE_2", .udesc = "Number of cycles with at least 2 uops are executed for any thread", .ucode = 0x200 | (0x2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO }, { .uname = "CORE_CYCLES_GE_3", .udesc = "Number of cycles with at least 3 uops are executed for any thread", .ucode = 0x200 | (0x3 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CORE_CYCLES_GE_4", .udesc = "Number of cycles with at least 4 uops are executed for any thread", .ucode = 0x200 | (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALL_CYCLES", .udesc = "Number of cycles with no uops executed by thread", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "THREAD:c=1:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_NONE", .udesc = "Number of cycles with no uops executed from any thread", .ucode = 0x200 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "CORE:c=1:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "X87", .udesc = "Number of x87 uops executed per thread", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO }, }; static const intel_x86_umask_t skl_uops_dispatched_port[]={ { .uname = "PORT_0", .udesc = "Cycles which a Uop is executed on port 0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_1", .udesc = "Cycles which a Uop is executed on port 1", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_2", .udesc = "Cycles which a Uop is executed on port 2", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_3", .udesc = "Cycles which a Uop is executed on port 3", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_4", .udesc = "Cycles which a Uop is executed on port 4", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_5", .udesc = "Cycles which a Uop is executed on port 5", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_6", .udesc = "Cycles which a Uop is executed on port 6", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_7", .udesc = "Cycles which a Uop is executed on port 7", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_0_CORE", .udesc = "tbd", .ucode = 0x100 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_0:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_1_CORE", .udesc = "tbd", .ucode = 0x200 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_1:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_2_CORE", .udesc = "tbd", .ucode = 0x400 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_2:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_3_CORE", .udesc = "tbd", .ucode = 0x800 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_3:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_4_CORE", .udesc = "tbd", .ucode = 0x1000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_4:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_5_CORE", .udesc = "tbd", .ucode = 0x2000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_5:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_6_CORE", .udesc = "tbd", .ucode = 0x4000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_6:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_7_CORE", .udesc = "tbd", .ucode = 0x8000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_7:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, }; static const intel_x86_umask_t skl_uops_issued[]={ { .uname = "ANY", .udesc = "Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)", .ucode = 0x100, .uequiv = "ANY", .uflags = INTEL_X86_NCOMBO, }, { .uname = "VECTOR_WIDTH_MISMATCH", .udesc = "Number of blend uops issued by the Resource Allocation table (RAT) to the Reservation Station (RS) in order to preserve upper bits of vector registers", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FLAGS_MERGE", .udesc = "Number of flags-merge uops being allocated. Such uops adds delay", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SLOW_LEA", .udesc = "Number of slow LEA or similar uops allocated. Such uop has 3 sources regardless if result of LEA instruction or not", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SINGLE_MUL", .udesc = "Number of Multiply packed/scalar single precision uops allocated", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALL_CYCLES", .udesc = "Counts the number of cycles no uops issued by this thread", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ANY:c=1:i=1", .uflags = INTEL_X86_NCOMBO, .ucntmsk = 0xf, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CORE_STALL_CYCLES", .udesc = "Counts the number of cycles no uops issued on this core", .ucode = 0x100 | INTEL_X86_MOD_ANY | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* any=1 inv=1 cnt=1 */ .uequiv = "ANY:c=1:i=1:t=1", .ucntmsk = 0xf, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T | _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t skl_uops_retired[]={ { .uname = "ALL", .udesc = "All uops that actually retired", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "All uops that actually retired", .ucode = 0x100, .uequiv = "ALL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETIRE_SLOTS", .udesc = "number of retirement slots used non PEBS", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STALL_CYCLES", .udesc = "Cycles no executable uops retired (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ALL:c=1:i", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "TOTAL_CYCLES", .udesc = "Number of cycles using always true condition applied to PEBS uops retired event", .ucode = 0x100 | INTEL_X86_MOD_INV | (10 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=10 */ .uequiv = "ALL:c=10:i", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CORE_STALL_CYCLES", .udesc = "Cycles no executable uops retired on core (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ALL:c=1:i:t=1", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "STALL_OCCURRENCES", .udesc = "Number of transitions from stalled to unstalled execution (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | INTEL_X86_MOD_EDGE| (1 << INTEL_X86_CMASK_BIT), /* inv=1 edge=1 cnt=1 */ .uequiv = "ALL:c=1:i=1:e=1", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_E, }, }; static const intel_x86_umask_t skl_offcore_response[]={ { .uname = "DMND_DATA_RD", .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", .ucode = 1ULL << (0 + 8), .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", .ucode = 1ULL << (1 + 8), .grpid = 0, }, { .uname = "DMND_CODE_RD", .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", .ucode = 1ULL << (2 + 8), .grpid = 0, }, { .uname = "PF_L2_DATA_RD", .udesc = "Request: number of data prefetch requests to L2", .ucode = 1ULL << (4 + 8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "PF_L2_RFO", .udesc = "Request: number of RFO prefetch requests to L2", .ucode = 1ULL << (5 + 8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "PF_L3_DATA_RD", .udesc = "Request: number of data prefetch requests for loads that end up in L3", .ucode = 1ULL << (7 + 8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "PF_L3_RFO", .udesc = "Request: number of RFO prefetch requests that end up in L3", .ucode = 1ULL << (8 + 8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "PF_L1D_AND_SW", .udesc = "Request: number of L1 data cache hardware prefetch requests and software prefetch requests", .ucode = 1ULL << (10 + 8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "OTHER", .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", .ucode = 1ULL << (15+8), .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Request: combination of all request umasks", .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:OTHER", .ucode = 0x1800700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .umodel = PFM_PMU_INTEL_SKL, .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Request: combination of all request umasks", .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_DATA_RD:PF_L2_RFO:PF_L3_DATA_RD:PF_L3_RFO:PF_L1D_AND_SW:OTHER", .ucode = 0x85b700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "ANY_DATA_RD", .udesc = "Request: combination of DMND_DATA_RD | PF_L2_DATA_RD | PF_L3_DATA_RD | PF_L1D_AND_SW", .uequiv = "DMND_DATA_RD:PF_L2_DATA_RD:PF_L3_DATA_RD:PF_L1D_AND_SW", .ucode = 0x1049100, .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "ANY_DATA", .udesc = "Request: combination of ANY_DATA_RD | PF_L2_RFO | PF_L3_RFO | DMND_RFO", .uequiv = "ANY_DATA_RD:DMND_RFO:PF_L2_RFO:PF_L3_RFO", .ucode = 0x105b300, .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "ANY_DATA_PF", .udesc = "Request: combination of PF_L2_DATA_RD | PF_L3_DATA_RD | PF_L1D_AND_SW", .uequiv = "PF_L2_DATA_RD:PF_L3_DATA_RD:PF_L1D_AND_SW", .ucode = 0x1049000, .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Request: combination of DMND_RFO | PF_L2_RFO | PF_L3_RFO", .uequiv = "DMND_RFO:PF_L2_RFO:PF_L3_RFO", .ucode = 0x1012200, .umodel = PFM_PMU_INTEL_SKX, .grpid = 0, }, { .uname = "ANY_RESPONSE", .udesc = "Response: count any response type", .ucode = 1ULL << (16+8), .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 1, }, { .uname = "SUPPLIER_NONE", .udesc = "Supplier: counts number of times supplier information is not available", .ucode = 1ULL << (17+8), .grpid = 1, }, { .uname = "NO_SUPP", .udesc = "Supplier: counts number of times supplier information is not available", .ucode = 1ULL << (17+8), .uequiv = "SUPPLIER_NONE", .grpid = 1, }, { .uname = "L3_HITM", .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", .ucode = 1ULL << (18+8), .grpid = 1, }, { .uname = "L3_HITE", .udesc = "Supplier: counts L3 hits in E-state", .ucode = 1ULL << (19+8), .grpid = 1, }, { .uname = "L3_HITS", .udesc = "Supplier: counts L3 hits in S-state", .ucode = 1ULL << (20+8), .grpid = 1, }, { .uname = "L3_HITF", .udesc = "Supplier: counts L3 hits in F-state", .ucode = 1ULL << (21+8), .umodel = PFM_PMU_INTEL_SKX, .grpid = 1, }, { .uname = "L3_HITMES", .udesc = "Supplier: counts L3 hits in any state (M, E, S)", .ucode = 0x3ULL << (18+8), .uequiv = "L3_HITM:L3_HITE:L3_HITS", .umodel = PFM_PMU_INTEL_SKL, .grpid = 1, }, { .uname = "L3_HIT", .udesc = "Alias for L3_HITMES", .ucode = 0x3ULL << (18+8), .uequiv = "L3_HITMES", .umodel = PFM_PMU_INTEL_SKL, .grpid = 1, }, { .uname = "L3_HITMESF", .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", .ucode = 0xfULL << (18+8), .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", .umodel = PFM_PMU_INTEL_SKX, .grpid = 1, }, { .uname = "L3_HIT", .udesc = "Alias for L3_HITMES", .ucode = 0x3ULL << (18+8), .uequiv = "L3_HITMESF", .umodel = PFM_PMU_INTEL_SKX, .grpid = 1, }, { .uname = "L4_HIT_LOCAL_L4", .udesc = "Supplier: L4 local hit", .ucode = 0x1ULL << (22+8), .umodel = PFM_PMU_INTEL_SKL, .grpid = 1, }, { .uname = "L3_MISS_LOCAL", .udesc = "Supplier: counts L3 misses to local DRAM", .ucode = 1ULL << (26+8), .grpid = 1, }, { .uname = "L3_MISS_MISS_REMOTE_HOP1_DRAM", .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", .ucode = 1ULL << (28+8), .grpid = 1, }, { .uname = "L3_MISS", .udesc = "Supplier: counts L3 misses", .ucode = 0x1ULL << (26+8), .uequiv = "L3_MISS_LOCAL", .umodel = PFM_PMU_INTEL_SKL, .grpid = 1, }, { .uname = "L3_MISS", .udesc = "Supplier: counts L3 misses (local or remote)", .ucode = 0xfULL << (26+8), .uequiv = "L3_MISS_LOCAL", .umodel = PFM_PMU_INTEL_SKX, .grpid = 1, }, { .uname = "SPL_HIT", .udesc = "Snoop: counts L3 supplier hit", .ucode = 0x1ULL << (30+8), .umodel = PFM_PMU_INTEL_SKL, .grpid = 1, }, { .uname = "SNP_NONE", .udesc = "Snoop: counts number of times no snoop-related information is available", .ucode = 1ULL << (31+8), .grpid = 2, }, { .uname = "SNP_NOT_NEEDED", .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", .ucode = 1ULL << (32+8), .grpid = 2, }, { .uname = "SNP_MISS", .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", .ucode = 1ULL << (33+8), .grpid = 2, }, { .uname = "SNP_HIT_NO_FWD", .udesc = "Snoop: counts number of times a snoop was needed and it hit in at leas one snooped cache", .ucode = 1ULL << (34+8), .grpid = 2, }, { .uname = "SNP_HIT_WITH_FWD", .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", .ucode = 1ULL << (35+8), .grpid = 2, }, { .uname = "SNP_HITM", .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", .ucode = 1ULL << (36+8), .grpid = 2, }, { .uname = "SNP_NON_DRAM", .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", .ucode = 1ULL << (37+8), .grpid = 2, }, { .uname = "SNP_ANY", .udesc = "Snoop: any snoop reason", .ucode = 0x7fULL << (31+8), .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_HIT_NO_FWD:SNP_HIT_WITH_FWD:SNP_HITM:SNP_NON_DRAM", .uflags= INTEL_X86_DFL, .grpid = 2, }, }; static const intel_x86_umask_t skl_hle_retired[]={ { .uname = "START", .udesc = "Number of times an HLE execution started", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COMMIT", .udesc = "Number of times an HLE execution successfully committed", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED", .udesc = "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one) (Precise Event)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ABORTED_MEM", .udesc = "Number of times an HLE execution aborted due to various memory events", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_TMR", .udesc = "Number of times an HLE execution aborted due to hardware timer expiration", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_UNFRIENDLY", .udesc = "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain events such as AD-assists", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MEMTYPE", .udesc = "Number of times an HLE execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_EVENTS", .udesc = "Number of times an HLE execution aborted due to none of the other 4 reasons (e.g., interrupt)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_rtm_retired[]={ { .uname = "START", .udesc = "Number of times an RTM execution started", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COMMIT", .udesc = "Number of times an RTM execution successfully committed", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED", .udesc = "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one) (Precise Event)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ABORTED_MEM", .udesc = "Number of times an RTM execution aborted due to various memory events", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_TMR", .udesc = "Number of times an RTM execution aborted due to uncommon conditions", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_UNFRIENDLY", .udesc = "Number of times an RTM execution aborted due to RTM-unfriendly instructions", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MEMTYPE", .udesc = "Number of times an RTM execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_EVENTS", .udesc = "Number of times an RTM execution aborted due to none of the other 4 reasons (e.g., interrupt)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_tx_mem[]={ { .uname = "ABORT_CONFLICT", .udesc = "Number of times a transactional abort was signaled due to data conflict on a transactionally accessed address", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_CAPACITY", .udesc = "Number of times a transactional abort was signaled due to data capacity limitation", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_STORE_TO_ELIDED_LOCK", .udesc = "Number of times a HLE transactional execution aborted due to a non xrelease prefixed instruction writing to an elided lock in the elision buffer", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", .udesc = "Number of times a HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_MISMATCH", .udesc = "Number of times a HLE transaction execution aborted due to xrelease lock not satisfying the address and value requirements in the elision buffer", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", .udesc = "Number of times a HLE transaction execution aborted due to an unsupported read alignment from the elision buffer", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_FULL", .udesc = "Number of times a HLE clock could not be elided due to ElisionBufferAvailable being zero", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_tx_exec[]={ { .uname = "MISC1", .udesc = "Number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC2", .udesc = "Number of times a class of instructions that may cause a transactional abort was executed inside a transactional region", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC3", .udesc = "Number of times an instruction execution caused the supported nest count to be exceeded", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC4", .udesc = "Number of times an instruction a xbegin instruction was executed inside HLE transactional region", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC5", .udesc = "Number of times an instruction with HLE xacquire prefix was executed inside a RTM transactional region", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_offcore_requests_outstanding[]={ { .uname = "ALL_DATA_RD_CYCLES", .udesc = "Cycles with cacheable data read transactions in the superQ (use with HT off only)", .uequiv = "ALL_DATA_RD:c=1", .ucode = 0x800 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD_CYCLES", .udesc = "Cycles with demand code reads transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_CODE_RD:c=1", .ucode = 0x200 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "CYCLES_WITH_DEMAND_CODE_RD", .udesc = "Cycles with demand code reads transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_CODE_RD:c=1", .ucode = 0x200 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_CYCLES", .udesc = "Cycles with demand data read transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_DATA_RD:c=1", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "CYCLES_WITH_DEMAND_DATA_RD", .udesc = "Cycles with demand data read transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_DATA_RD:c=1", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "ALL_DATA_RD", .udesc = "Cacheable data read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD", .udesc = "Code read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD", .udesc = "Demand data read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_GE_6", .udesc = "Cycles with at lesat 6 offcore outstanding demand data read requests in the uncore queue", .uequiv = "DEMAND_DATA_RD:c=6", .ucode = 0x100 | (6 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "DEMAND_RFO", .udesc = "Outstanding RFO (store) transactions in the superQ every cycle (use with HT off only)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_CYCLES", .udesc = "Cycles with outstanding RFO (store) transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_RFO:c=1", .ucode = 0x400 | (0x1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_WITH_DEMAND_RFO", .udesc = "Cycles with outstanding RFO (store) transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_RFO:c=1", .ucode = 0x400 | (0x1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "L3_MISS_DEMAND_DATA_RD", .udesc = "Number of offcore outstanding demand data read requests missing the L3 cache every cycle", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L3_MISS_DEMAND_DATA_RD_GE_6", .udesc = "Number of cycles in which at least 6 demand data read requests missing the L3", .ucode = 0x1000 | (0x6 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", .udesc = "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ", .ucode = 0x1000 | (0x1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t skl_ild_stall[]={ { .uname = "LCP", .udesc = "Stall caused by changing prefix length of the instruction", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_lsd[]={ { .uname = "UOPS", .udesc = "Number of uops delivered by the Loop Stream Detector (LSD)", .ucode = 0x100, .uflags= INTEL_X86_DFL | INTEL_X86_NCOMBO, }, { .uname = "CYCLES_4_UOPS", .udesc = "Number of cycles the LSD delivered 4 uops which did not come from the decoder", .ucode = 0x100| (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags= INTEL_X86_NCOMBO, }, { .uname = "CYCLES_ACTIVE", .udesc = "Number of cycles the LSD delivered uops which did not come from the decoder", .ucode = 0x100| (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_dsb2mite_switches[]={ { .uname = "PENALTY_CYCLES", .udesc = "Number of DSB to MITE switch true penalty cycles", .ucode = 0x0200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_ept[]={ { .uname = "WALK_DURATION", .udesc = "Cycles for an extended page table walk of any type", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "WALK_PENDING", .udesc = "Cycles for an extended page table walk of any type", .ucode = 0x1000, .uequiv = "WALK_DURATION", .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_arith[]={ { .uname = "DIVIDER_ACTIVE", .udesc = "Cycles when divider is busy executing divide or square root operations on integers or floating-points", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "FPU_DIV_ACTIVE", .udesc = "Cycles when divider is busy executing divide or square root operations on integers or floating-points", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), .uequiv = "DIVIDER_ACTIVE", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t skl_fp_arith[]={ { .uname = "SCALAR_DOUBLE", .udesc = "Number of scalar double precision floating-point arithmetic instructions (multiply by 1 to get flops)", .ucode = 0x0100, }, { .uname = "SCALAR_SINGLE", .udesc = "Number of scalar single precision floating-point arithmetic instructions (multiply by 1 to get flops)", .ucode = 0x0200, }, { .uname = "128B_PACKED_DOUBLE", .udesc = "Number of scalar 128-bit packed double precision floating-point arithmetic instructions (multiply by 2 to get flops)", .ucode = 0x0400, }, { .uname = "128B_PACKED_SINGLE", .udesc = "Number of scalar 128-bit packed single precision floating-point arithmetic instructions (multiply by 4 to get flops)", .ucode = 0x0800, }, { .uname = "256B_PACKED_DOUBLE", .udesc = "Number of scalar 256-bit packed double precision floating-point arithmetic instructions (multiply by 4 to get flops)", .ucode = 0x1000, }, { .uname = "256B_PACKED_SINGLE", .udesc = "Number of scalar 256-bit packed single precision floating-point arithmetic instructions (multiply by 8 to get flops)", .ucode = 0x2000, }, { .uname = "512B_PACKED_DOUBLE", .udesc = "Number of scalar 512-bit packed double precision floating-point arithmetic instructions (multiply by 8 to get flops)", .ucode = 0x4000, }, { .uname = "512B_PACKED_SINGLE", .udesc = "Number of scalar 512-bit packed single precision floating-point arithmetic instructions (multiply by 16 to get flops)", .ucode = 0x8000, }, }; static const intel_x86_umask_t skl_exe_activity[]={ { .uname = "1_PORTS_UTIL", .udesc = "Cycles with 1 uop executing across all ports and Reservation Station is not empty", .ucode = 0x0200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "2_PORTS_UTIL", .udesc = "Cycles with 2 uops executing across all ports and Reservation Station is not empty", .ucode = 0x0400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "3_PORTS_UTIL", .udesc = "Cycles with 3 uops executing across all ports and Reservation Station is not empty", .ucode = 0x0800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "4_PORTS_UTIL", .udesc = "Cycles with 4 uops executing across all ports and Reservation Station is not empty", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "BOUND_ON_STORES", .udesc = "Cycles where the store buffer is full and no outstanding load", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "EXE_BOUND_0_PORTS", .udesc = "Cycles where no uop is executed and the Reservation Station was not empty", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_frontend_retired[]={ { .uname = "DSB_MISS", .udesc = "Retired instructions experiencing decode stream buffer (DSB) miss", .ucode = 0x11 << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ITLB_MISS", .udesc = "Retired instructions experiencing ITLB true miss", .ucode = 0x14 << 8, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L1I_MISS", .udesc = "Retired instructions experiencing L1I cache true miss", .ucode = 0x12 << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_MISS", .udesc = "Retired instructions experiencing instruction L2 cache true miss", .ucode = 0x13 << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STLB_MISS", .udesc = "Retired instructions experiencing STLB (2nd level TLB) true miss", .ucode = 0x15 << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "IDQ_4_BUBBLES", .udesc = "Retired instructions after an interval where the front-end did not deliver any uops (4 bubbles) for a period determined by the fe_thres modifier and which was not interrupted by a back-end stall", .ucode = (4 << 20 | 0x6) << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_FETHR | INTEL_X86_PEBS, }, { .uname = "IDQ_3_BUBBLES", .udesc = "Counts instructions retired after an interval where the front-end did not deliver more than 1 uop (3 bubbles) for a period determined by the fe_thres modifier and which was not interrupted by a back-end stall", .ucode = (3 << 20 | 0x6) << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_FETHR | INTEL_X86_PEBS, }, { .uname = "IDQ_2_BUBBLES", .udesc = "Counts instructions retired after an interval where the front-end did not deliver more than 2 uops (2 bubbles) for a period determined by the fe_thres modifier and which was not interrupted by a back-end stall", .ucode = (2 << 20 | 0x6) << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_FETHR | INTEL_X86_PEBS, }, { .uname = "IDQ_1_BUBBLE", .udesc = "Counts instructions retired after an interval where the front-end did not deliver more than 3 uops (1 bubble) for a period determined by the fe_thres modifier and which was not interrupted by a back-end stall", .ucode = (1 << 20 | 0x6) << 8, .uflags= INTEL_X86_NCOMBO | INTEL_X86_FETHR | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t skl_hw_interrupts[]={ { .uname = "RECEIVED", .udesc = "Number of hardware interrupts received by the processor", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_offcore_requests_buffer[]={ { .uname = "SQ_FULL", .udesc = "Number of requests for which the offcore buffer (SQ) is full", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_mem_load_misc_retired[]={ { .uname = "UC", .udesc = "Number of uncached load retired", .ucode = 0x400, .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, }, }; static const intel_x86_umask_t skl_idi_misc[]={ { .uname = "WB_UPGRADE", .udesc = "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WB_DOWNGRADE", .udesc = "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_core_power[]={ { .uname = "LVL0_TURBO_LICENSE", .udesc = "Number of core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", .ucode = 0x700, .umodel = PFM_PMU_INTEL_SKX, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LVL1_TURBO_LICENSE", .udesc = "Number of core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", .ucode = 0x1800, .umodel = PFM_PMU_INTEL_SKX, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LVL2_TURBO_LICENSE", .udesc = "Number of core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", .ucode = 0x2000, .umodel = PFM_PMU_INTEL_SKX, .uflags= INTEL_X86_NCOMBO, }, { .uname = "THROTTLE", .udesc = "Number of core cycles where the core was throttled due to a pending power level request.", .ucode = 0x4000, .umodel = PFM_PMU_INTEL_SKX, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t skl_sw_prefetch[]={ { .uname = "NTA", .udesc = "Number of prefetch.nta instructions executed", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T0", .udesc = "Number of prefetch.t0 instructions executed", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T1_T2", .udesc = "Number prefetch.t1 or prefetch.t2 instructions executed", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCHW", .udesc = "Number prefetch.w instructions executed", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_skl_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0x20000000full, .code = 0x3c, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycles", .modmsk = INTEL_FIXED3_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "INSTRUCTION_RETIRED", .desc = "Number of instructions at retirement", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias for INSTRUCTION_RETIRED", .modmsk = INTEL_V4_ATTRS, .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction", .modmsk = INTEL_V4_ATTRS, .equiv = "BR_INST_RETIRED:ALL_BRANCHES", .cntmsk = 0xff, .code = 0xc4, }, { .name = "MISPREDICTED_BRANCH_RETIRED", .desc = "Count mispredicted branch instructions at retirement. Specifically, this event counts at retirement of the last micro-op of a branch instruction in the architectural path of the execution and experienced misprediction in the branch prediction hardware", .modmsk = INTEL_V4_ATTRS, .equiv = "BR_MISP_RETIRED:ALL_BRANCHES", .cntmsk = 0xff, .code = 0xc5, }, { .name = "BACLEARS", .desc = "Branch re-steered", .code = 0xe6, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_baclears), .umasks = skl_baclears }, { .name = "BR_INST_RETIRED", .desc = "Branch instructions retired (Precise Event)", .code = 0xc4, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_br_inst_retired), .umasks = skl_br_inst_retired }, { .name = "BR_MISP_RETIRED", .desc = "Mispredicted retired branches (Precise Event)", .code = 0xc5, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_br_misp_retired), .umasks = skl_br_misp_retired }, { .name = "CPU_CLK_THREAD_UNHALTED", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .code = 0x3c, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_cpu_clk_thread_unhalted), .umasks = skl_cpu_clk_thread_unhalted }, { .name = "CPU_CLK_UNHALTED", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .code = 0x3c, .cntmsk = 0xff, .modmsk = INTEL_V4_ATTRS, .equiv = "CPU_CLK_THREAD_UNHALTED", }, { .name = "CYCLE_ACTIVITY", .desc = "Stalled cycles", .code = 0xa3, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_cycle_activity), .umasks = skl_cycle_activity }, { .name = "DTLB_LOAD_MISSES", .desc = "Data TLB load misses", .code = 0x8, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_dtlb_load_misses), .umasks = skl_dtlb_load_misses }, { .name = "DTLB_STORE_MISSES", .desc = "Data TLB store misses", .code = 0x49, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_dtlb_load_misses), .umasks = skl_dtlb_load_misses /* shared */ }, { .name = "FP_ASSIST", .desc = "X87 floating-point assists", .code = 0xca, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_fp_assist), .umasks = skl_fp_assist }, { .name = "HLE_RETIRED", .desc = "HLE execution (Precise Event)", .code = 0xc8, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_hle_retired), .umasks = skl_hle_retired }, { .name = "ICACHE_16B", .desc = "Instruction Cache", .code = 0x80, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_icache_16b), .umasks = skl_icache_16b }, { .name = "ICACHE_64B", .desc = "Instruction Cache", .code = 0x83, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_icache_64b), .umasks = skl_icache_64b }, { .name = "IDQ", .desc = "IDQ operations", .code = 0x79, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_idq), .umasks = skl_idq }, { .name = "IDQ_UOPS_NOT_DELIVERED", .desc = "Uops not delivered", .code = 0x9c, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_idq_uops_not_delivered), .umasks = skl_idq_uops_not_delivered }, { .name = "INST_RETIRED", .desc = "Number of instructions retired (Precise Event)", .code = 0xc0, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_inst_retired), .umasks = skl_inst_retired }, { .name = "INT_MISC", .desc = "Miscellaneous interruptions", .code = 0xd, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_int_misc), .umasks = skl_int_misc }, { .name = "ITLB", .desc = "Instruction TLB", .code = 0xae, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_itlb), .umasks = skl_itlb }, { .name = "ITLB_MISSES", .desc = "Instruction TLB misses", .code = 0x85, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_itlb_misses), .umasks = skl_itlb_misses }, { .name = "L1D", .desc = "L1D cache", .code = 0x51, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l1d), .umasks = skl_l1d }, { .name = "L1D_PEND_MISS", .desc = "L1D pending misses", .code = 0x48, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l1d_pend_miss), .umasks = skl_l1d_pend_miss }, { .name = "L2_LINES_IN", .desc = "L2 lines allocated", .code = 0xf1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l2_lines_in), .umasks = skl_l2_lines_in }, { .name = "L2_LINES_OUT", .desc = "L2 lines evicted", .code = 0xf2, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l2_lines_out), .umasks = skl_l2_lines_out }, { .name = "L2_RQSTS", .desc = "L2 requests", .code = 0x24, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l2_rqsts), .umasks = skl_l2_rqsts }, { .name = "L2_TRANS", .desc = "L2 transactions", .code = 0xf0, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_l2_trans), .umasks = skl_l2_trans }, { .name = "LD_BLOCKS", .desc = "Blocking loads", .code = 0x3, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_ld_blocks), .umasks = skl_ld_blocks }, { .name = "LD_BLOCKS_PARTIAL", .desc = "Partial load blocks", .code = 0x7, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_ld_blocks_partial), .umasks = skl_ld_blocks_partial }, { .name = "LOAD_HIT_PRE", .desc = "Load dispatches", .code = 0x4c, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_load_hit_pre), .umasks = skl_load_hit_pre }, { .name = "LOCK_CYCLES", .desc = "Locked cycles in L1D and L2", .code = 0x63, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_lock_cycles), .umasks = skl_lock_cycles }, { .name = "LONGEST_LAT_CACHE", .desc = "L3 cache", .code = 0x2e, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_longest_lat_cache), .umasks = skl_longest_lat_cache }, { .name = "MACHINE_CLEARS", .desc = "Machine clear asserted", .code = 0xc3, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_machine_clears), .umasks = skl_machine_clears }, { .name = "MEM_LOAD_L3_HIT_RETIRED", .desc = "L3 hit load uops retired (Precise Event)", .code = 0xd2, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_l3_hit_retired), .umasks = skl_mem_load_l3_hit_retired }, { .name = "MEM_LOAD_UOPS_L3_HIT_RETIRED", .desc = "L3 hit load uops retired (Precise Event)", .equiv = "MEM_LOAD_L3_HIT_RETIRED", .code = 0xd2, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_l3_hit_retired), .umasks = skl_mem_load_l3_hit_retired }, { .name = "MEM_LOAD_UOPS_L3_MISS_RETIRED", .desc = "L3 miss load uops retired (Precise Event)", .code = 0xd3, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_l3_miss_retired), .umasks = skl_mem_load_l3_miss_retired }, { .name = "MEM_LOAD_UOPS_LLC_HIT_RETIRED", .desc = "L3 hit load uops retired (Precise Event)", .equiv = "MEM_LOAD_L3_HIT_RETIRED", .code = 0xd2, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_l3_hit_retired), .umasks = skl_mem_load_l3_hit_retired }, { .name = "MEM_LOAD_RETIRED", .desc = "Retired load uops (Precise Event)", .code = 0xd1, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_retired), .umasks = skl_mem_load_retired }, { .name = "MEM_LOAD_UOPS_RETIRED", .desc = "Retired load uops (Precise Event)", .code = 0xd1, .equiv = "MEM_LOAD_RETIRED", .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_retired), .umasks = skl_mem_load_retired }, { .name = "MEM_TRANS_RETIRED", .desc = "Memory transactions retired (Precise Event)", .code = 0xcd, .cntmsk = 0x8, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS | _INTEL_X86_ATTR_LDLAT, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_trans_retired), .umasks = skl_mem_trans_retired }, { .name = "MEM_INST_RETIRED", .desc = "Memory instructions retired (Precise Event)", .code = 0xd0, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_inst_retired), .umasks = skl_mem_inst_retired }, { .name = "MEM_UOPS_RETIRED", .desc = "Memory instructions retired (Precise Event)", .code = 0xd0, .cntmsk = 0xf, .equiv = "MEM_INST_RETIRED", .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_inst_retired), .umasks = skl_mem_inst_retired }, { .name = "MISALIGN_MEM_REF", .desc = "Misaligned memory references", .code = 0x5, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_misalign_mem_ref), .umasks = skl_misalign_mem_ref }, { .name = "MOVE_ELIMINATION", .desc = "Move Elimination", .code = 0x58, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_move_elimination), .umasks = skl_move_elimination }, { .name = "OFFCORE_REQUESTS", .desc = "Demand Data Read requests sent to uncore", .code = 0xb0, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_offcore_requests), .umasks = skl_offcore_requests }, { .name = "OTHER_ASSISTS", .desc = "Software assist", .code = 0xc1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_other_assists), .umasks = skl_other_assists }, { .name = "RESOURCE_STALLS", .desc = "Cycles Allocation is stalled due to Resource Related reason", .code = 0xa2, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_resource_stalls), .umasks = skl_resource_stalls }, { .name = "ROB_MISC_EVENTS", .desc = "ROB miscellaneous events", .code = 0xcc, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_rob_misc_events), .umasks = skl_rob_misc_events }, { .name = "RS_EVENTS", .desc = "Reservation Station", .code = 0x5e, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_rs_events), .umasks = skl_rs_events }, { .name = "RTM_RETIRED", .desc = "Restricted Transaction Memory execution (Precise Event)", .code = 0xc9, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_rtm_retired), .umasks = skl_rtm_retired }, { .name = "TLB_FLUSH", .desc = "TLB flushes", .code = 0xbd, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_tlb_flush), .umasks = skl_tlb_flush }, { .name = "UOPS_EXECUTED", .desc = "Uops executed", .code = 0xb1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_uops_executed), .umasks = skl_uops_executed }, { .name = "LSD", .desc = "Loop stream detector", .code = 0xa8, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_lsd), .umasks = skl_lsd, }, { .name = "UOPS_DISPATCHED_PORT", .desc = "Uops dispatched to specific ports", .code = 0xa1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_uops_dispatched_port), .umasks = skl_uops_dispatched_port, }, { .name = "UOPS_DISPATCHED", .desc = "Uops dispatched to specific ports", .equiv = "UOPS_DISPATCHED_PORT", .code = 0xa1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_uops_dispatched_port), .umasks = skl_uops_dispatched_port, }, { .name = "UOPS_ISSUED", .desc = "Uops issued", .code = 0xe, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_uops_issued), .umasks = skl_uops_issued }, { .name = "ARITH", .desc = "Arithmetic uop", .code = 0x14, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_arith), .umasks = skl_arith }, { .name = "UOPS_RETIRED", .desc = "Uops retired (Precise Event)", .code = 0xc2, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_uops_retired), .umasks = skl_uops_retired }, { .name = "TX_MEM", .desc = "Transactional memory aborts", .code = 0x54, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_tx_mem), .umasks = skl_tx_mem, }, { .name = "TX_EXEC", .desc = "Transactional execution", .code = 0x5d, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(skl_tx_exec), .umasks = skl_tx_exec }, { .name = "OFFCORE_REQUESTS_OUTSTANDING", .desc = "Outstanding offcore requests", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x60, .numasks = LIBPFM_ARRAY_SIZE(skl_offcore_requests_outstanding), .ngrp = 1, .umasks = skl_offcore_requests_outstanding, }, { .name = "ILD_STALL", .desc = "Instruction Length Decoder stalls", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x87, .numasks = LIBPFM_ARRAY_SIZE(skl_ild_stall), .ngrp = 1, .umasks = skl_ild_stall, }, { .name = "DSB2MITE_SWITCHES", .desc = "Number of DSB to MITE switches", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0xab, .numasks = LIBPFM_ARRAY_SIZE(skl_dsb2mite_switches), .ngrp = 1, .umasks = skl_dsb2mite_switches, }, { .name = "EPT", .desc = "Extended page table", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x4f, .numasks = LIBPFM_ARRAY_SIZE(skl_ept), .ngrp = 1, .umasks = skl_ept, }, { .name = "FP_ARITH", .desc = "Floating-point", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xc7, .numasks = LIBPFM_ARRAY_SIZE(skl_fp_arith), .ngrp = 1, .umasks = skl_fp_arith, }, { .name = "EXE_ACTIVITY", .desc = "Execution activity", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xa6, .numasks = LIBPFM_ARRAY_SIZE(skl_exe_activity), .ngrp = 1, .umasks = skl_exe_activity, }, { .name = "FRONTEND_RETIRED", .desc = "Precise Front-End activity", .modmsk = INTEL_SKL_FE_ATTRS, .cntmsk = 0xf, .code = 0x1c6, .flags = INTEL_X86_FRONTEND | INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(skl_frontend_retired), .ngrp = 1, .umasks = skl_frontend_retired, }, { .name = "HW_INTERRUPTS", .desc = "Number of hardware interrupts received by the processor", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(skl_hw_interrupts), .ngrp = 1, .umasks = skl_hw_interrupts, }, { .name = "SQ_MISC", .desc = "SuperQueue miscellaneous", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xf4, .numasks = LIBPFM_ARRAY_SIZE(skl_sq_misc), .ngrp = 1, .umasks = skl_sq_misc, }, { .name = "MEM_LOAD_MISC_RETIRED", .desc = "Load retired miscellaneous", .modmsk = INTEL_V4_ATTRS, .flags = INTEL_X86_PEBS, .cntmsk = 0xf, .code = 0xd4, .numasks = LIBPFM_ARRAY_SIZE(skl_mem_load_misc_retired), .ngrp = 1, .umasks = skl_mem_load_misc_retired, }, { .name = "IDI_MISC", .desc = "Miscellaneous", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xfe, .numasks = LIBPFM_ARRAY_SIZE(skl_idi_misc), .model = PFM_PMU_INTEL_SKX, .ngrp = 1, .umasks = skl_idi_misc, }, { .name = "CORE_POWER", .desc = "Power power cycles", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(skl_core_power), .model = PFM_PMU_INTEL_SKX, .ngrp = 1, .umasks = skl_core_power, }, { .name = "SW_PREFETCH", .desc = "Software prefetches", .modmsk = INTEL_V4_ATTRS, .equiv = "SW_PREFETCH_ACCESS", .cntmsk = 0xf, .code = 0x32, .numasks = LIBPFM_ARRAY_SIZE(skl_sw_prefetch), .ngrp = 1, .umasks = skl_sw_prefetch, }, { .name = "SW_PREFETCH_ACCESS", .desc = "Software prefetches", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x32, .numasks = LIBPFM_ARRAY_SIZE(skl_sw_prefetch), .ngrp = 1, .umasks = skl_sw_prefetch, }, { .name = "OFFCORE_REQUESTS_BUFFER", .desc = "Offcore requests buffer", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0xb2, .numasks = LIBPFM_ARRAY_SIZE(skl_offcore_requests_buffer), .ngrp = 1, .umasks = skl_offcore_requests_buffer, }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x1b7, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(skl_offcore_response), .ngrp = 3, .umasks = skl_offcore_response, }, { .name = "OFFCORE_RESPONSE_1", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x1bb, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(skl_offcore_response), .ngrp = 3, .umasks = skl_offcore_response, /* identical to actual umasks list for this event */ }, }; papi-5.6.0/src/freebsd/map-core2.c000664 001750 001750 00000047672 13216244361 020705 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-core2.c * Author: George Neville-Neil * gnn@freebsd.org * Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** CORE2 SUBSTRATE CORE2 SUBSTRATE CORE2 SUBSTRATE CORE2 SUBSTRATE CORE2 SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_Core2Processor must match Core2Processor_info */ Native_Event_LabelDescription_t Core2Processor_info[] = { {"BACLEARS", "The number of times the front end is resteered."}, {"BOGUS_BR", "The number of byte sequences mistakenly detected as taken branch instructions."}, {"BR_BAC_MISSP_EXEC", "The number of branch instructions that were mispredicted when decoded."}, {"BR_CALL_MISSP_EXEC", "The number of mispredicted CALL instructions that were executed."}, {"BR_CALL_EXEC", "The number of CALL instructions executed."}, {"BR_CND_EXEC", "The number of conditional branches executed, but not necessarily retired."}, {"BR_CND_MISSP_EXEC", "The number of mispredicted conditional branches executed."}, {"BR_IND_CALL_EXEC", "The number of indirect CALL instructions executed."}, {"BR_IND_EXEC", "The number of indirect branch instructions executed."}, {"BR_IND_MISSP_EXEC", "The number of mispredicted indirect branch instructions executed."}, {"BR_INST_DECODED", "The number of branch instructions decoded."}, {"BR_INST_EXEC", "The number of branches executed, but not necessarily retired."}, {"BR_INST_RETIRED.ANY", "The number of branch instructions retired. This is an architectural performance event."}, {"BR_INST_RETIRED.MISPRED", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, {"BR_INST_RETIRED.MISPRED_NOT_TAKEN", "The number of not taken branch instructions retired that were mispredicted."}, {"BR_INST_RETIRED.MISPRED_TAKEN", "The number taken branch instructions retired that were mispredicted."}, {"BR_INST_RETIRED.PRED_NOT_TAKEN", "The number of not taken branch instructions retired that were correctly predicted."}, {"BR_INST_RETIRED.PRED_TAKEN", "The number of taken branch instructions retired that were correctly predicted."}, {"BR_INST_RETIRED.TAKEN", "The number of taken branch instructions retired."}, {"BR_MISSP_EXEC", "The number of mispredicted branch instructions that were executed."}, {"BR_RET_MISSP_EXEC", "The number of mispredicted RET instructions executed."}, {"BR_RET_BAC_MISSP_EXEC", "The number of RET instructions executed that were mispredicted at decode time."}, {"BR_RET_EXEC", "The number of RET instructions executed."}, {"BR_TKN_BUBBLE_1", "The number of branch predicted taken with bubble 1."}, {"BR_TKN_BUBBLE_2", "The number of branch predicted taken with bubble 2."}, {"BUSQ_EMPTY", "The number of cycles during which the core did not have any pending transactions in the bus queue."}, {"BUS_BNR_DRV", "Number of Bus Not Ready signals asserted on the bus."}, {"BUS_DATA_RCV", "Number of bus cycles during which the processor is receiving data."}, {"BUS_DRDY_CLOCKS", "The number of bus cycles during which the Data Ready signal is asserted on the bus."}, {"BUS_HIT_DRV", "The number of bus cycles during which the processor drives the HIT# pin."}, {"BUS_HITM_DRV", "The number of bus cycles during which the processor drives the HITM# pin."}, {"BUS_IO_WAIT", "The number of core cycles during which I/O requests wait in the bus queue."}, {"BUS_LOCK_CLOCKS", "The number of bus cycles during which the LOCK signal was asserted on the bus."}, {"BUS_REQUEST_OUTSTANDING", "The number of pending full cache line read transactions on the bus occuring in each cycle."}, {"BUS_TRANS_ANY", "The number of bus transactions of any kind."}, {"BUS_TRANS_BRD", "The number of burst read transactions."}, {"BUS_TRANS_BURST", "The number of burst transactions."}, {"BUS_TRANS_DEF", "The number of deferred bus transactions."}, {"BUS_TRANS_IFETCH", "The number of instruction fetch full cache line bus transactions."}, {"BUS_TRANS_INVAL", "The number of invalidate bus transactions."}, {"BUS_TRANS_IO", "The number of completed I/O bus transaactions due to IN and OUT instructions."}, {"BUS_TRANS_MEM", "The number of memory bus transactions."}, {"BUS_TRANS_P", "The number of partial bus transactions."}, {"BUS_TRANS_PWR", "The number of partial write bus transactions."}, {"BUS_TRANS_RFO", "The number of Read For Ownership bus transactions."}, {"BUS_TRANS_WB", "The number of explicit writeback bus transactions due to dirty line evictions."}, {"CMP_SNOOP", "The number of times the L1 data cache is snooped by the other core in the same processor."}, {"CPU_CLK_UNHALTED.BUS", "The number of bus cycles when the core is not in the halt state. This is an architectural performance event."}, {"CPU_CLK_UNHALTED.CORE_P", "The number of core cycles while the core is not in a halt state. This is an architectural performance event."}, {"CPU_CLK_UNHALTED.NO_OTHER", "The number of bus cycles during which the core remains unhalted and the other core is halted."}, {"CYCLES_DIV_BUSY", "The number of cycles the divider is busy. This event is only available on PMC0."}, {"CYCLES_INT_MASKED", "The number of cycles during which interrupts are disabled."}, {"CYCLES_INT_PENDING_AND_MASKED", "The number of cycles during which there were pending interrupts while interrupts were disabled."}, {"CYCLES_L1I_MEM_STALLED", "The number of cycles for which an instruction fetch stalls."}, {"DELAYED_BYPASS.FP", "The number of floating point operations that used data immediately after the data was generated by a non floating point execution unit."}, {"DELAYED_BYPASS.LOAD", "The number of delayed bypass penalty cycles that a load operation incurred."}, {"DELAYED_BYPASS.SIMD", "The number of times SIMD operations use data immediately after data, was generated by a non-SIMD execution unit."}, {"DIV", "The number of divide operations executed."}, {"DTLB_MISSES.ANY", "The number of Data TLB misses, including misses that result from speculative accesses."}, {"DTLB_MISSES.L0_MISS_LD", "The number of level 0 DTLB misses due to load operations."}, {"DTLB_MISSES.MISS_LD", "The number of Data TLB misses due to load operations."}, {"DTLB_MISSES.MISS_ST", "The number of Data TLB misses due to store operations."}, {"EIST_TRANS", "The number of Enhanced Intel SpeedStep Technology transitions."}, {"ESP.ADDITIONS", "The number of automatic additions to the esp register."}, {"ESP.SYNCH", "The number of times the esp register was explicitly used in an address expression after it is implicitly used by a PUSH or POP instruction."}, {"EXT_SNOOP", "The number of snoop responses to bus transactions."}, {"FP_ASSIST", "The number of floating point operations executed that needed a microcode assist."}, {"FP_COMP_OPS_EXE", "The number of floating point computational micro-ops executed. The event is available only on PMC0."}, {"FP_MMX_TRANS_TO_FP", "The number of transitions from MMX instructions to floating point instructions."}, {"FP_MMX_TRANS_TO_MMX", "The number of transitions from floating point instructions to MMX instructions."}, {"HW_INT_RCV", "The number of hardware interrupts recieved."}, {"IDLE_DURING_DIV", "The number of cycles the divider is busy and no other execution unit or load operation was in progress. This event is available only on PMC0."}, {"ILD_STALL", "The number of cycles the instruction length decoder stalled due to a length changing prefix."}, {"INST_QUEUE.FULL", "The number of cycles during which the instruction queue is full."}, {"INST_RETIRED.ANY_P", "The number of instructions retired. This is an architectural performance event."}, {"INST_RETIRED.LOADS", "The number of instructions retired that contained a load operation."}, {"INST_RETIRED.OTHER", "The number of instructions retired that did not contain a load or a store operation."}, {"INST_RETIRED.STORES", "The number of instructions retired that contained a store operation."}, {"ITLB.FLUSH", "The number of ITLB flushes."}, {"ITLB.LARGE_MISS", "The number of instruction fetches from large pages that miss the ITLB."}, {"ITLB.MISSES", "The number of instruction fetches from both large and small pages that miss the ITLB."}, {"ITLB.SMALL_MISS", "The number of instruction fetches from small pages that miss the ITLB."}, {"ITLB_MISS_RETIRED", "The number of retired instructions that missed the ITLB when they were fetched."}, {"L1D_ALL_CACHE_REF", "The number of data reads and writes to cacheable memory."}, {"L1D_ALL_REF", "The number of references to L1 data cache counting loads and stores of to all memory types."}, {"L1D_CACHE_LD", "Number of data reads from cacheable memory excluding locked reads."}, {"L1D_CACHE_LOCK", "Number of locked reads from cacheable memory."}, {"L1D_CACHE_LOCK_DURATION", "The number of cycles during which any cache line is locked by any locking instruction."}, {"L1D_CACHE_ST", "The number of data writes to cacheable memory excluding locked writes."}, {"L1D_M_EVICT", "The number of modified cache lines evicted from L1 data cache."}, {"L1D_M_REPL", "The number of modified lines allocated in L1 data cache."}, {"L1D_PEND_MISS", "The total number of outstanding L1 data cache misses at any clock."}, {"L1D_PREFETCH.REQUESTS", "The number of times L1 data cache requested to prefetch a data cache line."}, {"L1D_REPL", "The number of lines brought into L1 data cache."}, {"L1D_SPLIT.LOADS", "The number of load operations that span two cache lines."}, {"L1D_SPLIT.STORES", "The number of store operations that span two cache lines."}, {"L1I_MISSES", "The number of instruction fetch unit misses."}, {"L1I_READS", "The number of instruction fetches."}, {"L2_ADS", "The number of cycles that the L2 address bus is in use."}, {"L2_DBUS_BUSY_RD", "The number of cycles during which the L2 data bus is busy transferring data to the core."}, {"L2_IFETCH", "The number of instruction cache line requests from the instruction fetch unit."}, {"L2_LD", "The number of L2 cache read requests from L1 cache and L2 prefetchers."}, {"L2_LINES_IN", "The number of cache lines allocated in L2 cache."}, {"L2_LINES_OUT", "The number of L2 cache lines evicted."}, {"L2_LOCK", "The number of locked accesses to cache lines that miss L1 data cache."}, {"L2_M_LINES_IN", "The number of L2 cache line modifications."}, {"L2_M_LINES_OUT", "The number of modified lines evicted from L2 cache."}, {"L2_NO_REQ", "Number of cycles during which no L2 cache requests were pending from a core."}, {"L2_REJECT_BUSQ", "Number of L2 cache requests that were rejected."}, {"L2_RQSTS", "The number of completed L2 cache requests."}, {"L2_RQSTS.SELF.DEMAND.I_STATE", "The number of completed L2 cache demand requests from this core that missed the L2 cache. This is an architectural performance event."}, {"L2_RQSTS.SELF.DEMAND.MESI", "The number of completed L2 cache demand requests from this core. This is an architectural performance event."}, {"L2_ST", "The number of store operations that miss the L1 cache and request data from the L2 cache."}, {"LOAD_BLOCK.L1D", "The number of loads blocked by the L1 data cache."}, {"LOAD_BLOCK.OVERLAP_STORE", "The number of loads that partially overlap an earlier store or are aliased with a previous store."}, {"LOAD_BLOCK.STA", "The number of loads blocked by preceding stores whose address is yet to be calculated."}, {"LOAD_BLOCK.STD", "The number of loads blocked by preceding stores to the same address whose data value is not known."}, {"LOAD_BLOCK.UNTIL_RETIRE", "The numer of load operations that were blocked until retirement."}, {"LOAD_HIT_PRE", "The number of load operations that conflicted with an prefetch to the same cache line."}, {"MACHINE_NUKES.MEM_ORDER", "The number of times the execution pipeline was restarted due to a memory ordering conflict or memory disambiguation misprediction."}, {"MACHINE_NUKES.SMC", "The number of times a program writes to a code section."}, {"MACRO_INSTS.CISC_DECODED", "The number of complex instructions decoded."}, {"MACRO_INSTS.DECODED", "The number of instructions decoded."}, {"MEMORY_DISAMBIGUATION.RESET", "The number of cycles during which memory disambiguation misprediction occurs."}, {"MEMORY_DISAMBIGUATION.SUCCESS", "The number of load operations that were successfully disambiguated."}, {"MEM_LOAD_RETIRED.DTLB_MISS", "The number of retired loads that missed the DTLB."}, {"MEM_LOAD_RETIRED.L1D_LINE_MISS", "The number of retired load operations that missed L1 data cache and that sent a request to L2 cache. This event is only available on PMC0."}, {"MEM_LOAD_RETIRED.L1D_MISS", "The number of retired load operations that missed L1 data cache. This event is only available on PMC0."}, {"MEM_LOAD_RETIRED.L2_LINE_MISS", "The number of load operations that missed L2 cache and that caused a bus request."}, {"MEM_LOAD_RETIRED.L2_MISS", "The number of load operations that missed L2 cache."}, {"MUL","The number of multiply operations executed (only available on PMC1.)"}, {"PAGE_WALKS.COUNT", "The number of page walks executed due to an ITLB or DTLB miss."}, {"PAGE_WALKS.CYCLES", "The number of cycles spent in a page walk caused by an ITLB or DTLB miss."}, {"PREF_RQSTS_DN", "The number of downward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, {"PREF_RQSTS_UP", "The number of upward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, {"RAT_STALLS.ANY", "The number of stall cycles due to any of RAT_STALLS.FLAGS RAT_STALLS.FPSW, RAT_STALLS.PARTIAL and RAT_STALLS.ROB_READ_PORT."}, {"RAT_STALLS.FLAGS", "The number of cycles execution stalled due to a flag register induced stall."}, {"RAT_STALLS.FPSW", "The number of times the floating point status word was written."}, {"RAT_STALLS.PARTIAL_CYCLES", "The number of cycles of added instruction execution latency due to the use of a register that was partially written by previous instructions."}, {"RAT_STALLS.ROB_READ_PORT", "The number of cycles when ROB read port stalls occurred."}, {"RESOURCE_STALLS.ANY", "The number of cycles during which any resource related stall occurred."}, {"RESOURCE_STALLS.BR_MISS_CLEAR", "The number of cycles stalled due to branch misprediction."}, {"RESOURCE_STALLS.FPCW", "The number of cycles stalled due to writing the floating point control word."}, {"RESOURCE_STALLS.LD_ST", "The number of cycles during which the number of loads and stores in the pipeline exceeded their limits."}, {"RESOURCE_STALLS.ROB_FULL", "The number of cycles when the reorder buffer was full."}, {"RESOURCE_STALLS.RS_FULL", "The number of cycles during which the RS was full."}, {"RS_UOPS_DISPATCHED", "The number of micro-ops dispatched for execution."}, {"RS_UOPS_DISPATCHED.PORT0", "The number of cycles micro-ops were dispatched for execution on port 0."}, {"RS_UOPS_DISPATCHED.PORT1", "The number of cycles micro-ops were dispatched for execution on port 1."}, {"RS_UOPS_DISPATCHED.PORT2", "The number of cycles micro-ops were dispatched for execution on port 2."}, {"RS_UOPS_DISPATCHED.PORT3", "The number of cycles micro-ops were dispatched for execution on port 3."}, {"RS_UOPS_DISPATCHED.PORT4", "The number of cycles micro-ops were dispatched for execution on port 4."}, {"RS_UOPS_DISPATCHED.PORT5", "The number of cycles micro-ops were dispatched for execution on port 5."}, {"SB_DRAIN_CYCLES", "The number of cycles while the store buffer is draining."}, {"SEGMENT_REG_LOADS", "The number of segment register loads."}, {"SEG_REG_RENAMES.ANY", "The number of times the any segment register was renamed."}, {"SEG_REG_RENAMES.DS", "The number of times the ds register is renamed."}, {"SEG_REG_RENAMES.ES", "The number of times the es register is renamed."}, {"SEG_REG_RENAMES.FS", "The number of times the fs register is renamed."}, {"SEG_REG_RENAMES.GS", "The number of times the gs register is renamed."}, {"SEG_RENAME_STALLS.ANY", "The number of stalls due to lack of resource to rename any segment register."}, {"SEG_RENAME_STALLS.DS", "The number of stalls due to lack of renaming resources for the ds register."}, {"SEG_RENAME_STALLS.ES", "The number of stalls due to lack of renaming resources for the es register."}, {"SEG_RENAME_STALLS.FS", "The number of stalls due to lack of renaming resources for the fs register."}, {"SEG_RENAME_STALLS.GS", "The number of stalls due to lack of renaming resources for the gs register."}, {"SIMD_ASSIST", "The number SIMD assists invoked."}, {"SIMD_COMP_INST_RETIRED.PACKED_DOUBLE", "Then number of computational SSE2 packed double precision instructions retired."}, {"SIMD_COMP_INST_RETIRED.PACKED_SINGLE", "Then number of computational SSE2 packed single precision instructions retired."}, {"SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE", "Then number of computational SSE2 scalar double precision instructions retired."}, {"SIMD_COMP_INST_RETIRED.SCALAR_SINGLE", "Then number of computational SSE2 scalar single precision instructions retired."}, {"SIMD_INSTR_RETIRED", "The number of retired SIMD instructions that use MMX registers."}, {"SIMD_INST_RETIRED.ANY", "The number of streaming SIMD instructions retired."}, {"SIMD_INST_RETIRED.PACKED_DOUBLE", "The number of SSE2 packed double precision instructions retired."}, {"SIMD_INST_RETIRED.PACKED_SINGLE", "The number of SSE packed single precision instructions retired."}, {"SIMD_INST_RETIRED.SCALAR_DOUBLE", "The number of SSE2 scalar double precision instructions retired."}, {"SIMD_INST_RETIRED.SCALAR_SINGLE", "The number of SSE scalar single precision instructions retired."}, {"SIMD_INST_RETIRED.VECTOR", "The number of SSE2 vector instructions retired."}, {"SIMD_SAT_INSTR_RETIRED", "The number of saturated arithmetic SIMD instructions retired."}, {"SIMD_SAT_UOP_EXEC", "The number of SIMD saturated arithmetic micro-ops executed."}, {"SIMD_UOPS_EXEC", "The number of SIMD micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.ARITHMETIC", "The number of SIMD packed arithmetic micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.LOGICAL", "The number of SIMD packed logical micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.MUL", "The number of SIMD packed multiply micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.PACK", "The number of SIMD pack micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.SHIFT", "The number of SIMD packed shift micro-ops executed."}, {"SIMD_UOP_TYPE_EXEC.UNPACK", "The number of SIMD unpack micro-ops executed."}, {"SNOOP_STALL_DRV", "The number of times the bus stalled for snoops."}, {"SSE_PRE_EXEC.L1", "The number of PREFETCHT0 instructions executed."}, {"SSE_PRE_EXEC.L2", "The number of PREFETCHT1 instructions executed."}, {"SSE_PRE_EXEC.NTA", "The number of PREFETCHNTA instructions executed."}, {"SSE_PRE_EXEC.STORES", "The number of times SSE non-temporal store instructions were executed."}, {"SSE_PRE_MISS.L1", "The number of times the PREFETCHT0 instruction executed and missed all cache levels."}, {"SSE_PRE_MISS.L2", "The number of times the PREFETCHT1 instruction executed and missed all cache levels."}, {"SSE_PRE_MISS.NTA", "The number of times the PREFETCHNTA instruction executed and missed all cache levels."}, {"STORE_BLOCK.ORDER", "The number of cycles while a store was waiting for another store to be globally observed."}, {"STORE_BLOCK.SNOOP", "The number of cycles while a store was blocked due to a conflict with an internal or external snoop."}, {"THERMAL_TRIP", "The number of thermal trips."}, {"UOPS_RETIRED.ANY", "The number of micro-ops retired."}, {"UOPS_RETIRED.FUSED", "The number of fused micro-ops retired."}, {"UOPS_RETIRED.LD_IND_BR", "The number of micro-ops retired that fused a load with another operation."}, {"UOPS_RETIRED.MACRO_FUSION", "The number of times retired instruction pairs were fused into one micro-op."}, {"UOPS_RETIRED.NON_FUSED", "he number of non-fused micro-ops retired."}, {"UOPS_RETIRED.STD_STA", "The number of store address calculations that fused into one micro-op."}, {"X87_OPS_RETIRED.ANY", "The number of floating point computational instructions retired."}, {"X87_OPS_RETIRED.FXCH", "The number of FXCH instructions retired."}, { NULL, NULL } }; papi-5.6.0/src/components/coretemp/tests/Makefile000664 001750 001750 00000001021 13216244357 024126 0ustar00jshenry1963jshenry1963000000 000000 NAME=coretemp include ../../Makefile_comp_tests.target %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< TESTS = coretemp_basic coretemp_pretty coretemp_tests: $(TESTS) coretemp_basic: coretemp_basic.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o coretemp_basic coretemp_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) coretemp_pretty: coretemp_pretty.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o coretemp_pretty coretemp_pretty.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/libpfm-3.y/lib/pfmlib_os_macos.c000664 001750 001750 00000005541 13216244363 023257 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_os_macos.c: set of functions for MacOS (Tiger) * * Copyright (c) 2008 Stephane Eranian * Contributed by Stephane Eranian * As a sign of friendship to my friend Eric, big fan of MacOS * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "pfmlib_priv.h" typedef enum { TYPE_NONE, TYPE_STR, TYPE_INT } mib_name_t; /* * helper function to retrieve one value from /proc/cpuinfo * for internal libpfm use only * attr: the attribute (line) to look for * ret_buf: a buffer to store the value of the attribute (as a string) * maxlen : number of bytes of capacity in ret_buf * * ret_buf is null terminated. * * Return: * 0 : attribute found, ret_buf populated * -1: attribute not found */ int __pfm_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen) { mib_name_t type = TYPE_NONE; union { char str[32]; int val; } value; char *name = NULL; int mib[16]; int ret = -1; size_t len, mib_len; if (attr == NULL || ret_buf == NULL || maxlen < 1) return -1; *ret_buf = '\0'; if (!strcmp(attr, "vendor_id")) { name = "machdep.cpu.vendor"; type = TYPE_STR; } else if (!strcmp(attr, "model")) { name = "machdep.cpu.model"; type = TYPE_INT; } else if (!strcmp(attr, "cpu family")) { name = "machdep.cpu.family"; type = TYPE_INT; } mib_len = 16; ret = sysctlnametomib(name, mib, &mib_len); if (ret) return -1; len = sizeof(value); ret = sysctl(mib, mib_len, &value, &len, NULL, 0); if (ret) return ret; if (type == TYPE_STR) strncpy(ret_buf, value.str, maxlen); else if (type == TYPE_INT) snprintf(ret_buf, maxlen, "%d", value.val); __pfm_vbprintf("attr=%s ret=%d ret_buf=%s\n", attr, ret, ret_buf); return ret; } void pfm_init_syscalls(void) { } papi-5.6.0/src/solaris-niagara2.h000664 001750 001750 00000011326 13216244370 020634 0ustar00jshenry1963jshenry1963000000 000000 /******************************************************************************* * >>>>>> "Development of a PAPI Backend for the Sun Niagara 2 Processor" <<<<<< * ----------------------------------------------------------------------------- * * Fabian Gorsler * * Hochschule Bonn-Rhein-Sieg, Sankt Augustin, Germany * University of Applied Sciences * * ----------------------------------------------------------------------------- * * File: solaris-niagara2.c * Author: fg215045 * * Description: Data structures used for the communication between PAPI and the * component. Additionally some macros are defined here. See solaris-niagara2.c. * * ***** Feel free to convert this header to the PAPI default ***** * * ----------------------------------------------------------------------------- * Created on April 23, 2009, 7:31 PM ******************************************************************************/ #ifndef _SOLARIS_NIAGARA2_H #define _SOLARIS_NIAGARA2_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "papi_defines.h" //////////////////////////////////////////////////////////////////////////////// /// COPIED ITEMS FROM THE OLD PORT TO SOLARIS ////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /* DESCRIPTION: * ----------------------------------------------------------------------------- * The following lines are taken from the old Solaris port of PAPI. If changes * have been made there are (additional) comments. * ******************************************************************************/ #define MAX_COUNTERS 2 #define MAX_COUNTER_TERMS MAX_COUNTERS #define PAPI_MAX_NATIVE_EVENTS 71 #define MAX_NATIVE_EVENT PAPI_MAX_NATIVE_EVENTS typedef int niagara2_reg_alloc_t; /* libcpc 2 does not need any bit masks */ typedef struct _niagara2_register { int event_code; } _niagara2_register_t; #define BUF_T0 0 #define BUF_T1 1 #define EVENT_NOT_SET -1; #define SYNTHETIC_EVENTS_SUPPORTED 1 /* This structured bundles everything needed for sampling up to MAX_COUNTERS */ typedef struct _niagara2_control_state { /* A set instruments the hardware counters */ cpc_set_t *set; /* A buffer stores the events counted. For measuring a start of measurment and an end is needed as measurement does not always start from 0. This is done by using an array of bufs, accessed by the indexes BUF_T0 as start and BUF_T1 as end. */ cpc_buf_t *counter_buffer; /* The indexes are needed for accessing the single counter events, if the value of these indexes is equal to EVENT_NOT_SET this means it is unused */ int idx[MAX_COUNTERS]; /* The event codes applied to this set */ _niagara2_register_t code[MAX_COUNTERS]; /* The total number of events being counted */ int count; /* The values retrieved from the counter */ uint64_t result[MAX_COUNTERS]; /* Flags for controlling overflow handling and binding, see cpc_set_create(3CPC) for more details on this topic. */ uint_t flags[MAX_COUNTERS]; /* Preset values for the counters */ uint64_t preset[MAX_COUNTERS]; /* Memory to store values when an overflow occours */ long_long threshold[MAX_COUNTERS]; long_long hangover[MAX_COUNTERS]; #ifdef SYNTHETIC_EVENTS_SUPPORTED int syn_count; uint64_t syn_hangover[MAX_COUNTERS]; #endif } _niagara2_control_state_t; #define GET_OVERFLOW_ADDRESS(ctx) (void*)(ctx->ucontext->uc_mcontext.gregs[REG_PC]) typedef int hwd_register_map_t; #include "solaris-context.h" typedef _niagara2_control_state_t _niagara2_context_t; // Needs an explicit declaration, no longer externally found. rwlock_t lock[PAPI_MAX_LOCK]; // For setting and releasing locks. #define _papi_hwd_lock(lck) rw_wrlock(&lock[lck]); #define _papi_hwd_unlock(lck) rw_unlock(&lock[lck]); #define DEFAULT_CNTR_PRESET (0) #define NOT_A_PAPI_HWD_READ -666 #define CPC_COUNTING_DOMAINS (CPC_COUNT_USER|CPC_COUNT_SYSTEM|CPC_COUNT_HV) #define EVENT_NOT_SET -1; /* Clean the stubbed data structures from framework initialization */ #undef hwd_context_t #define hwd_context_t _niagara2_context_t #undef hwd_control_state_t #define hwd_control_state_t _niagara2_control_state_t #undef hwd_register_t #define hwd_register_t _niagara2_register_t #endif papi-5.6.0/src/examples/multiplex.c000664 001750 001750 00000010245 13216244361 021331 0ustar00jshenry1963jshenry1963000000 000000 /**************************************************************************** * Multiplexing allows more counters to be used than what is supported by * * the platform, thus allowing a larger number of events to be counted * * simultaneously. When a microprocessor has a very limited number of * * counters that can be counted simultaneously, a large application with * * many hours of run time may require days of profiling in order to gather * * enough information to base a performance analysis. Multiplexing overcomes* * this limitation by the usage of the counters over timesharing. * * This is an example demonstrating how to use PAPI_set_multiplex to * * convert a standard event set to a multiplexed event set. * ****************************************************************************/ #include #include #include #include "papi.h" #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } #define NUM_ITERS 10000000 #define MAX_TO_ADD 6 double c = 0.11; void do_flops(int n) { int i; double a = 0.5; double b = 6.2; for (i=0; i < n; i++) c += a * b; return; } /* Tests that we can really multiplex a lot. */ int multiplex(void) { int retval, i, EventSet = PAPI_NULL, j = 0; long long *values; PAPI_event_info_t pset; int events[MAX_TO_ADD], number; /* Initialize the library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { printf("Library initialization error! \n"); exit(1); } /* initialize multiplex support */ retval = PAPI_multiplex_init(); if (retval != PAPI_OK) ERROR_RETURN(retval); retval = PAPI_create_eventset(&EventSet); if (retval != PAPI_OK) ERROR_RETURN(retval); /* convert the event set to a multiplex event set */ retval = PAPI_set_multiplex(EventSet); if (retval != PAPI_OK) ERROR_RETURN(retval); /* retval = PAPI_add_event(EventSet, PAPI_TOT_INS); if ((retval != PAPI_OK) && (retval != PAPI_ECNFLCT)) ERROR_RETURN(retval); printf("Adding %s\n", "PAPI_TOT_INS"); */ for (i = 0; i < PAPI_MAX_PRESET_EVENTS; i++) { retval = PAPI_get_event_info(i | PAPI_PRESET_MASK, &pset); if (retval != PAPI_OK) ERROR_RETURN(retval); if ((pset.count) && (pset.event_code != PAPI_TOT_CYC)) { printf("Adding %s\n", pset.symbol); retval = PAPI_add_event(EventSet, pset.event_code); if ((retval != PAPI_OK) && (retval != PAPI_ECNFLCT)) ERROR_RETURN(retval); if (retval == PAPI_OK) printf("Added %s\n", pset.symbol); else printf("Could not add %s due to resource limitation.\n", pset.symbol); if (retval == PAPI_OK) { if (++j >= MAX_TO_ADD) break; } } } values = (long long *) malloc(MAX_TO_ADD * sizeof(long long)); if (values == NULL) { printf("Not enough memory available. \n"); exit(1); } if ((retval=PAPI_start(EventSet)) != PAPI_OK) ERROR_RETURN(retval); do_flops(NUM_ITERS); retval = PAPI_stop(EventSet, values); if (retval != PAPI_OK) ERROR_RETURN(retval); /* get the number of events in the event set */ number=MAX_TO_ADD; if ( (retval = PAPI_list_events(EventSet, events, &number)) != PAPI_OK) ERROR_RETURN(retval); /* print the read result */ for (i = 0; i < MAX_TO_ADD; i++) { retval = PAPI_get_event_info(events[i], &pset); if (retval != PAPI_OK) ERROR_RETURN(retval); printf("Event name: %s value: %lld \n", pset.symbol, values[i]); } retval = PAPI_cleanup_eventset(EventSet); if (retval != PAPI_OK) ERROR_RETURN(retval); retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) ERROR_RETURN(retval); /* free the resources used by PAPI */ PAPI_shutdown(); return (0); } int main(int argc, char **argv) { printf("Using %d iterations\n\n", NUM_ITERS); printf("Does PAPI_multiplex_init() handle lots of events?\n"); multiplex(); exit(0); } papi-5.6.0/src/libpfm4/debian/libpfm4-dev.manpages000664 001750 001750 00000000016 13216244363 023726 0ustar00jshenry1963jshenry1963000000 000000 docs/man3/*.3 papi-5.6.0/src/solaris-niagara2.c000664 001750 001750 00000163244 13216244370 020636 0ustar00jshenry1963jshenry1963000000 000000 /******************************************************************************* * >>>>>> "Development of a PAPI Backend for the Sun Niagara 2 Processor" <<<<<< * ----------------------------------------------------------------------------- * * Fabian Gorsler * * Hochschule Bonn-Rhein-Sieg, Sankt Augustin, Germany * University of Applied Sciences * * ----------------------------------------------------------------------------- * * File: solaris-niagara2.c * Author: fg215045 * * Description: This source file is the implementation of a PAPI * component for the Sun Niagara 2 processor (aka UltraSPARC T2) * running on Solaris 10 with libcpc 2. * The machine for implementing this component was courtesy of RWTH * Aachen University, Germany. Thanks to the HPC-Team at RWTH! * * Conventions used: * - __cpc_*: Functions, variables, etc. related to libcpc handling * - __sol_*: Functions, variables, etc. related to Solaris handling * - __int_*: Functions, variables, etc. related to extensions of libcpc * - _niagara*: Functions, variables, etc. needed by PAPI hardware dependent * layer, i.e. the component itself * * * ***** Feel free to convert this header to the PAPI default ***** * * ----------------------------------------------------------------------------- * Created on April 23, 2009, 7:31 PM ******************************************************************************/ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "solaris-niagara2.h" #include "papi_memory.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "solaris-common.h" #include "solaris-memory.h" #define hwd_control_state_t _niagara2_control_state_t #define hwd_context_t _niagara2_context_t #define hwd_register_t _niagara2_register_t extern caddr_t _start, _end, _etext, _edata; extern papi_vector_t _niagara2_vector; /* Synthetic events */ int __int_setup_synthetic_event( int, hwd_control_state_t *, void * ); uint64_t __int_get_synthetic_event( int, hwd_control_state_t *, void * ); void __int_walk_synthetic_events_action_count( void ); void __int_walk_synthetic_events_action_store( void ); /* Simple error handlers for convenience */ #define __CHECK_ERR_DFLT(retval) \ if(retval != 0){ SUBDBG("RETVAL: %d\n", retval); return PAPI_ECMP;} #define __CHECK_ERR_NULL(retval) \ if(retval == NULL){ SUBDBG("RETVAL: NULL\n"); return PAPI_ECMP;} #define __CHECK_ERR_PAPI(retval) \ if(retval != PAPI_OK){ SUBDBG("RETVAL: %d\n", retval); return PAPI_ECMP;} #define __CHECK_ERR_INVA(retval) \ if(retval != 0){ SUBDBG("RETVAL: %d\n", retval); return PAPI_EINVAL;} #define __CHECK_ERR_NEGV(retval) \ if(retval < 0){ SUBDBG("RETVAL: %d\n", retval); return PAPI_ECMP;} // PAPI defined variables extern papi_mdi_t _papi_hwi_system_info; // The instance of libcpc static cpc_t *cpc = NULL; typedef struct __t2_store { // Number of counters for a processing unit int npic; int *pic_ntv_count; int syn_evt_count; } __t2_store_t; static __t2_store_t __t2_store; static char **__t2_ntv_events; // Variables copied from the old component static int pid; // Data types for utility functions typedef struct __sol_processor_information { int total; int clock; } __sol_processor_information_t; typedef struct __t2_pst_table { int papi_pst; char *ntv_event[MAX_COUNTERS]; int ntv_ctrs; int ntv_opcode; } __t2_pst_table_t; #define SYNTHETIC_EVENTS_SUPPORTED 1 /* This table structure holds all preset events */ static __t2_pst_table_t __t2_table[] = { /* Presets defined by generic_events(3CPC) */ {PAPI_L1_DCM, {"DC_miss", NULL}, 1, NOT_DERIVED}, {PAPI_L1_ICM, {"IC_miss", NULL}, 1, NOT_DERIVED}, {PAPI_L2_ICM, {"L2_imiss", NULL}, 1, NOT_DERIVED}, {PAPI_TLB_DM, {"DTLB_miss", NULL}, 1, NOT_DERIVED}, {PAPI_TLB_IM, {"ITLB_miss", NULL}, 1, NOT_DERIVED}, {PAPI_TLB_TL, {"TLB_miss", NULL}, 1, NOT_DERIVED}, {PAPI_L2_LDM, {"L2_dmiss_ld", NULL}, 1, NOT_DERIVED}, {PAPI_BR_TKN, {"Br_taken", NULL}, 1, NOT_DERIVED}, {PAPI_TOT_INS, {"Instr_cnt", NULL}, 1, NOT_DERIVED}, {PAPI_LD_INS, {"Instr_ld", NULL}, 1, NOT_DERIVED}, {PAPI_SR_INS, {"Instr_st", NULL}, 1, NOT_DERIVED}, {PAPI_BR_INS, {"Br_completed", NULL}, 1, NOT_DERIVED}, /* Presets additionally found, should be checked twice */ {PAPI_BR_MSP, {"Br_taken", NULL}, 1, NOT_DERIVED}, {PAPI_FP_INS, {"Instr_FGU_arithmetic", NULL}, 1, NOT_DERIVED}, {PAPI_RES_STL, {"Idle_strands", NULL}, 1, NOT_DERIVED}, {PAPI_SYC_INS, {"Atomics", NULL}, 1, NOT_DERIVED}, {PAPI_L2_ICR, {"CPU_ifetch_to_PCX", NULL}, 1, NOT_DERIVED}, {PAPI_L1_TCR, {"CPU_ld_to_PCX", NULL}, 1, NOT_DERIVED}, {PAPI_L2_TCW, {"CPU_st_to_PCX", NULL}, 1, NOT_DERIVED}, /* Derived presets found, should be checked twice */ {PAPI_L1_TCM, {"IC_miss", "DC_miss"}, 2, DERIVED_ADD}, {PAPI_BR_CN, {"Br_completed", "Br_taken"}, 2, DERIVED_ADD}, {PAPI_BR_PRC, {"Br_completed", "Br_taken"}, 2, DERIVED_SUB}, {PAPI_LST_INS, {"Instr_st", "Instr_ld"}, 2, DERIVED_ADD}, #ifdef SYNTHETIC_EVENTS_SUPPORTED /* This preset does exist in order to support multiplexing */ {PAPI_TOT_CYC, {"_syn_cycles_elapsed", "DC_miss"}, 1, NOT_DERIVED}, #endif {0, {NULL, NULL}, 0, 0}, }; hwi_search_t *preset_table; #ifdef SYNTHETIC_EVENTS_SUPPORTED enum { SYNTHETIC_CYCLES_ELAPSED = 1, SYNTHETIC_RETURN_ONE, SYNTHETIC_RETURN_TWO, } __int_synthetic_enum; #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED typedef struct __int_synthetic_table { int code; char *name; } __int_syn_table_t; #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED static __int_syn_table_t __int_syn_table[] = { {SYNTHETIC_CYCLES_ELAPSED, "_syn_cycles_elapsed"}, {SYNTHETIC_RETURN_ONE, "_syn_return_one"}, {SYNTHETIC_RETURN_TWO, "_syn_return_two"}, {-1, NULL}, }; #endif //////////////////////////////////////////////////////////////////////////////// /// PAPI HWD LAYER RELATED FUNCTIONS /////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// /* DESCRIPTION: * ----------------------------------------------------------------------------- * Functions in this section are related to the PAPI hardware dependend layer, * also known as "HWD". In this case the HWD layer is the interface from PAPI * to libcpc 2/Solaris 10. ******************************************************************************/ int _niagara2_set_domain( hwd_control_state_t * ctrl, int domain ) { int i; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Clean and set the new flag for each counter */ for ( i = 0; i < MAX_COUNTERS; i++ ) { #ifdef DEBUG SUBDBG( " -> %s: Setting flags for PIC#%d, old value: %p\n", __func__, i, ctrl->flags[i] ); #endif ctrl->flags[i] &= ~( CPC_COUNTING_DOMAINS ); #ifdef DEBUG SUBDBG( " -> %s: +++ cleaned value: %p\n", __func__, ctrl->flags[i] ); #endif ctrl->flags[i] |= __cpc_domain_translator( domain ); #ifdef DEBUG SUBDBG( " -> %s: +++ new value: %p\n", __func__, ctrl->flags[i] ); #endif } /* Recreate the set */ __CHECK_ERR_PAPI( __cpc_recreate_set( ctrl ) ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: Option #%d requested\n", __func__, code ); #endif /* Only these options are handled which are handled in PAPI_set_opt, as many of the left out options are not settable, like PAPI_MAX_CPUS. */ switch ( code ) { case PAPI_DEFDOM: /* From papi.h: Domain for all new eventsets. Takes non-NULL option pointer. */ _niagara2_vector.cmp_info.default_domain = option->domain.domain; return PAPI_OK; case PAPI_DOMAIN: /* From papi.h: Domain for an eventset */ return _niagara2_set_domain( ctx, option->domain.domain ); case PAPI_DEFGRN: /* From papi.h: Granularity for all new eventsets */ _niagara2_vector.cmp_info.default_granularity = option->granularity.granularity; return PAPI_OK; case PAPI_GRANUL: /* From papi.h: Granularity for an eventset */ /* Only supported granularity is PAPI_GRN_THREAD */ return PAPI_OK; case PAPI_DEF_MPX_NS: /* From papi.h: Multiplexing/overflowing interval in ns, same as PAPI_DEF_ITIMER_NS */ /* From the old component */ option->itimer.ns = __sol_get_itimer_ns( option->itimer.ns ); #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_MPX_NS, option->itimer.ns=%d\n", __func__, option->itimer.ns ); #endif return PAPI_OK; case PAPI_DEF_ITIMER: // IN THE OLD COMPONENT // USED /* From papi.h: Option to set the type of itimer used in both software multiplexing, overflowing and profiling */ /* These tests are taken from the old component. For Solaris 10 the same rules apply as documented in getitimer(2). */ if ( ( option->itimer.itimer_num == ITIMER_REAL ) && ( option->itimer.itimer_sig != SIGALRM ) ) { #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_ITIMER, ITIMER_REAL needs SIGALRM\n", __func__ ); #endif return PAPI_EINVAL; } if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && ( option->itimer.itimer_sig != SIGVTALRM ) ) { #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_ITIMER, ITIMER_VIRTUAL needs SIGVTALRM\n", __func__ ); #endif return PAPI_EINVAL; } if ( ( option->itimer.itimer_num == ITIMER_PROF ) && ( option->itimer.itimer_sig != SIGPROF ) ) { #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_ITIMER, ITIMER_PROF needs SIGPROF\n", __func__ ); #endif return PAPI_EINVAL; } /* As in the old component defined, timer values below 0 are NOT filtered out, but timer values greater than 0 are rounded, either to a value which is at least itimer_res_ns or padded to a multiple of itimer_res_ns. */ if ( option->itimer.ns > 0 ) { option->itimer.ns = __sol_get_itimer_ns( option->itimer.ns ); #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_ITIMER, option->itimer.ns=%d\n", __func__, option->itimer.ns ); #endif } return PAPI_OK; case PAPI_DEF_ITIMER_NS: // IN THE OLD COMPONENT // USED /* From papi.h: Multiplexing/overflowing interval in ns, same as PAPI_DEF_MPX_NS */ /* From the old component */ option->itimer.ns = __sol_get_itimer_ns( option->itimer.ns ); #ifdef DEBUG SUBDBG( " -> %s: PAPI_DEF_ITIMER_NS, option->itimer.ns=%d\n", __func__, option->itimer.ns ); #endif return PAPI_OK; } #ifdef DEBUG SUBDBG( " -> %s: Option not found\n", __func__ ); SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* This place should never be reached */ return PAPI_EINVAL; } void _niagara2_dispatch_timer( int signal, siginfo_t * si, void *info ) { EventSetInfo_t *ESI = NULL; ThreadInfo_t *thread = NULL; int overflow_vector = 0; hwd_control_state_t *ctrl = NULL; long_long results[MAX_COUNTERS]; int i; // Hint from perf_events.c int cidx = _niagara2_vector.cmp_info.CmpIdx; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: Overflow handler called by signal #%d\n", __func__, signal ); #endif /* From the old component */ thread = _papi_hwi_lookup_thread( 0 ); ESI = ( EventSetInfo_t * ) thread->running_eventset[cidx]; /* From the old component, modified */ // if ( ESI == NULL || ESI->master != thread || ESI->ctl_state == NULL || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) { #ifdef DEBUG SUBDBG( " -> %s: Problems with ESI, not necessarily serious\n", __func__ ); if ( ESI == NULL ) { SUBDBG( " -> %s: +++ ESI is NULL\n", __func__ ); } if ( ESI->master != thread ) { SUBDBG( " -> %s: +++ Thread mismatch, ESI->master=%#x thread=%#x\n", __func__, ESI->master, thread ); } if ( ESI->ctl_state == NULL ) { SUBDBG( " -> %s: +++ Counter state invalid\n", __func__ ); } if ( ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) { SUBDBG ( " -> %s: +++ Overflow flag missing, ESI->overflow.flags=%#x\n", __func__, ESI->overflow.flags ); } #endif return; } #ifdef DEBUG printf( " -> %s: Preconditions valid, trying to read counters\n", __func__ ); #endif ctrl = ESI->ctl_state; if ( _niagara2_read ( ctrl, ctrl, ( long_long ** ) & results, NOT_A_PAPI_HWD_READ ) != PAPI_OK ) { /* Failure */ #ifdef DEBUG printf( "%s: Failed to read counters\n", __func__ ); #endif return; } else { /* Success */ #ifdef DEBUG SUBDBG( " -> %s: Counters read\n", __func__ ); #endif /* Iterate over all available counters in order to detect which counter overflowed (counter value should be 0 if an hw overflow happened), store the position in the overflow_vector, calculte the offset and shift (value range signed long long vs. unsigned long long). */ for ( i = 0; i < ctrl->count; i++ ) { if ( results[i] >= 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Overflow detected at PIC #%d\n", __func__, i ); #endif /* Set the bit in the overflow_vector */ overflow_vector = overflow_vector | ( 1 << i ); /* hoose which method to use depending on the overflow signal. */ if ( signal == SIGEMT ) { /* Store the counter value, but only if we have a real * hardware overflow counting with libcpc/SIGEMT. */ ctrl->preset[i] = UINT64_MAX - ctrl->threshold[i]; ctrl->hangover[i] += ctrl->threshold[i]; } else { /* Push the value back, this time PAPI does the work. This is software overflow handling. */ cpc_request_preset( cpc, ctrl->idx[i], ctrl->result[i] ); } } else { #ifdef DEBUG SUBDBG( " -> %s: No overflow detected at PIC #%d, value=%ld\n", __func__, i, results[i] ); #endif /* Save the results read from the counter as we can not store the temporary value in hardware or libcpc. */ if ( signal == SIGEMT ) { ctrl->preset[i] += results[i]; ctrl->hangover[i] = results[i]; } } } #ifdef DEBUG SUBDBG( " -> %s: Restarting set to push values back\n", __func__ ); #endif /* Push all values back to the counter as preset */ cpc_set_restart( cpc, ctrl->set ); } #ifdef DEBUG SUBDBG( " -> %s: Passing overflow to PAPI with overflow_vector=%p\n", __func__, overflow_vector ); #endif { /* hw is used as pointer in the dispatching routine of PAPI and might be changed. For safety it is not a pseudo pointer to NULL. */ int hw; if ( signal == SIGEMT ) { /* This is a hardware overflow */ hw = 1; _papi_hwi_dispatch_overflow_signal( ctrl, ( caddr_t ) _niagara2_get_overflow_address ( info ), &hw, overflow_vector, 1, &thread, ESI->CmpIdx ); } else { /* This is a software overflow */ hw = 0; _papi_hwi_dispatch_overflow_signal( ctrl, ( caddr_t ) _niagara2_get_overflow_address ( info ), &hw, overflow_vector, 1, &thread, ESI->CmpIdx ); } } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } static inline void * _niagara2_get_overflow_address( void *context ) { ucontext_t *ctx = ( ucontext_t * ) context; #ifdef DEBUG SUBDBG( "ENTERING/LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return ( void * ) ctx->uc_mcontext.gregs[REG_PC]; } /** Although the created set in this function will be destroyed by * _papi_update_control_state later, at least the functionality of the * underlying CPU driver will be tested completly. */ int _niagara2_init_control_state( hwd_control_state_t * ctrl ) { int i; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif // cpc_seterrhndlr(cpc, myapp_errfn); /* Clear the buffer */ if ( ctrl->counter_buffer != NULL ) { #ifdef DEBUG SUBDBG( " -> %s: Cleaning buffer\n", __func__ ); #endif cpc_buf_destroy( cpc, ctrl->counter_buffer ); ctrl->counter_buffer = NULL; } /* Clear the set */ if ( ctrl->set != NULL ) { #ifdef DEBUG SUBDBG( " -> %s: Cleaning set\n", __func__ ); #endif cpc_set_destroy( cpc, ctrl->set ); ctrl->set = NULL; } /* Indicate this idx has no request associated, this counter is unused. */ for ( i = 0; i < MAX_COUNTERS; i++ ) { #ifdef DEBUG SUBDBG( " -> %s: Cleaning counter state #%d\n", __func__, i ); #endif /* Indicate missing setup values */ ctrl->idx[i] = EVENT_NOT_SET; ctrl->code[i].event_code = EVENT_NOT_SET; /* No flags yet set, this is for overflow and binding */ ctrl->flags[i] = 0; /* Preset value for counting results */ ctrl->preset[i] = DEFAULT_CNTR_PRESET; /* Needed for overflow handling, will be set later */ ctrl->threshold[i] = 0; ctrl->hangover[i] = 0; #ifdef SYNTHETIC_EVENTS_SUPPORTED ctrl->syn_hangover[i] = 0; #endif } /* No counters active in this set */ ctrl->count = 0; #ifdef SYNTHETIC_EVENTS_SUPPORTED ctrl->syn_count = 0; #endif #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_init_component( int cidx ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Create an instance of libcpc */ #ifdef DEBUG SUBDBG( " -> %s: Trying to initalize libcpc\n", __func__ ); #endif cpc = cpc_open( CPC_VER_CURRENT ); __CHECK_ERR_NULL( cpc ); #ifdef DEBUG SUBDBG( " -> %s: Registering libcpc error handler\n", __func__ ); #endif cpc_seterrhndlr( cpc, __cpc_error_handler ); #ifdef DEBUG SUBDBG( " -> %s: Detecting supported PICs", __func__ ); #endif __t2_store.npic = cpc_npic( cpc ); #ifdef DEBUG SUBDBG( " -> %s: Storing component index, cidx=%d\n", __func__, cidx ); #endif _niagara2_vector.cmp_info.CmpIdx = cidx; #ifdef DEBUG SUBDBG( " -> %s: Gathering system information for PAPI\n", __func__ ); #endif /* Store system info in central data structure */ __CHECK_ERR_PAPI( _niagara2_get_system_info( &_papi_hwi_system_info ) ); #ifdef DEBUG SUBDBG( " -> %s: Initializing locks\n", __func__ ); #endif /* Set up the lock after initialization */ _niagara2_lock_init( ); // Copied from the old component, _papi_init_component() SUBDBG( "Found %d %s %s CPUs at %d Mhz.\n", _papi_hwi_system_info.hw_info.totalcpus, _papi_hwi_system_info.hw_info.vendor_string, _papi_hwi_system_info.hw_info.model_string, _papi_hwi_system_info.hw_info.cpu_max_mhz ); /* Build native event table */ #ifdef DEBUG SUBDBG( " -> %s: Building native event table\n", __func__ ); #endif __CHECK_ERR_PAPI( __cpc_build_ntv_table( ) ); /* Build preset event table */ #ifdef DEBUG SUBDBG( " -> %s: Building PAPI preset table\n", __func__ ); #endif __CHECK_ERR_PAPI( __cpc_build_pst_table( ) ); /* Register presets and finish event related setup */ #ifdef DEBUG SUBDBG( " -> %s: Registering presets in PAPI\n", __func__ ); #endif __CHECK_ERR_PAPI( _papi_hwi_setup_all_presets( preset_table, NULL ) ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Everything is ok */ return PAPI_OK; } static void _niagara2_lock_init( void ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Copied from old component, lock_init() */ memset( lock, 0x0, sizeof ( rwlock_t ) * PAPI_MAX_LOCK ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } int _niagara2_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) { int event_code = EventCode & PAPI_NATIVE_AND_MASK; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif if ( event_code >= 0 && event_code <= _niagara2_vector.cmp_info.num_native_events ) { return PAPI_ENOEVNT; } bits->event_code = event_code; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len ) { #ifdef DEBUG SUBDBG( "ENTERING/LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* libcpc offers no descriptions, just a link to the reference manual */ return _niagara2_ntv_code_to_name( EventCode, ntv_descr, len ); } int _niagara2_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) { int event_code = EventCode & PAPI_NATIVE_AND_MASK; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif if ( event_code >= 0 && event_code <= _niagara2_vector.cmp_info.num_native_events ) { strlcpy( ntv_name, __t2_ntv_events[event_code], len ); if ( strlen( __t2_ntv_events[event_code] ) > len - 1 ) { #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* It's not a real error, but at least a hint */ return PAPI_EBUF; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_ENOEVNT; } int _niagara2_ntv_enum_events( unsigned int *EventCode, int modifier ) { /* This code is very similar to the code from the old component. */ int event_code = *EventCode & PAPI_NATIVE_AND_MASK; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif if ( modifier == PAPI_ENUM_FIRST ) { *EventCode = PAPI_NATIVE_MASK + 1; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } /* The table needs to be shifted by one position (starting index 1), as PAPI expects native event codes not to be 0 (papi_internal.c:744). */ if ( event_code >= 1 && event_code <= _niagara2_vector.cmp_info.num_native_events - 1 ) { *EventCode = *EventCode + 1; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif // If nothing found report an error return PAPI_ENOEVNT; } int _niagara2_read( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) { int i; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: called with flags=%p\n", __func__, flags ); #endif /* Take a new sample from the PIC to the buffer */ __CHECK_ERR_DFLT( cpc_set_sample( cpc, ctrl->set, ctrl->counter_buffer ) ); /* Copy the buffer values from all active counters */ for ( i = 0; i < ctrl->count; i++ ) { /* Retrieve the counting results of libcpc */ __CHECK_ERR_DFLT( cpc_buf_get( cpc, ctrl->counter_buffer, ctrl->idx[i], &ctrl->result[i] ) ); /* As libcpc uses uint64_t and PAPI uses int64_t, we need to normalize the result back to a value that PAPI can handle, otherwise the result is not usable as its in the negative range of int64_t and the result becomes useless for PAPI. */ if ( ctrl->threshold[i] > 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Normalizing result on PIC#%d to %lld\n", __func__, i, ctrl->result[i] ); #endif /* DEBUG */ /* This shifts the retrieved value back to the PAPI value range */ ctrl->result[i] = ctrl->result[i] - ( UINT64_MAX - ctrl->threshold[i] ) - 1; /* Needed if called internally if a PIC didn't really overflow, but was programmed in the same set. */ if ( flags != NOT_A_PAPI_HWD_READ ) { ctrl->result[i] = ctrl->hangover[i]; } #ifdef DEBUG SUBDBG( " -> %s: Overflow scaling on PIC#%d:\n", __func__, i ); SUBDBG( " -> %s: +++ ctrl->result[%d]=%llu\n", __func__, i, ctrl->result[i] ); SUBDBG( " -> %s: +++ ctrl->threshold[%d]=%lld\n", __func__, i, ctrl->threshold[i] ); SUBDBG( " -> %s: +++ ctrl->hangover[%d]=%lld\n", __func__, i, ctrl->hangover[i] ); #endif } #ifdef DEBUG SUBDBG( " -> %s: +++ ctrl->result[%d]=%llu\n", __func__, i, ctrl->result[i] ); #endif } #ifdef SYNTHETIC_EVENTS_SUPPORTED { int i; const int syn_barrier = _niagara2_vector.cmp_info.num_native_events - __t2_store.syn_evt_count; for ( i = 0; i < ctrl->count; i++ ) { if ( ctrl->code[i].event_code >= syn_barrier ) { ctrl->result[i] = __int_get_synthetic_event( ctrl->code[i].event_code - syn_barrier, ctrl, &i ); } } } #endif /* Pass the address of the results back to the calling function */ *events = ( long_long * ) & ctrl->result[0]; #ifdef DEBUG SUBDBG( "LEAVING: %s\n", "_papi_read" ); #endif return PAPI_OK; } int _niagara2_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* This does a restart of the whole set, setting the internal counters back to the value passed as preset of the last call of cpc_set_add_request or cpc_request_preset. */ cpc_set_restart( cpc, ctrl->set ); #ifdef SYNTHETIC_EVENTS_SUPPORTED { const int syn_barrier = _niagara2_vector.cmp_info.num_native_events - __t2_store.syn_evt_count; int i; if ( ctrl->syn_count > 0 ) { for ( i = 0; i < MAX_COUNTERS; i++ ) { if ( ctrl->code[i].event_code >= syn_barrier ) { ctrl->syn_hangover[i] += __int_get_synthetic_event( ctrl->code[i].event_code - syn_barrier, ctrl, &i ); } } } } #endif #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_set_profile( EventSetInfo_t * ESI, int EventIndex, int threshold ) { /* Seems not to be used. */ #ifdef DEBUG SUBDBG( "ENTERING/LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_ENOSUPP; } int _niagara2_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) { hwd_control_state_t *ctrl = ESI->ctl_state; struct sigaction sigact; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: Overflow handling for %#x on PIC#%d requested\n", __func__, ctrl, EventIndex ); SUBDBG( " -> %s: ESI->overflow.flags=%#x\n\n", __func__, ctrl, ESI->overflow.flags ); #endif /* If threshold > 0, then activate hardware overflow handling, otherwise disable it. */ if ( threshold > 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Activating overflow handling\n", __func__ ); #endif ctrl->preset[EventIndex] = UINT64_MAX - threshold; ctrl->threshold[EventIndex] = threshold; /* If SIGEMT is not yet enabled, enable it. In libcpc this means to re- recreate the used set. In order not to break PAPI operations only the event referred by EventIndex will be updated to use SIGEMT. */ if ( !( ctrl->flags[EventIndex] & CPC_OVF_NOTIFY_EMT ) ) { #ifdef DEBUG SUBDBG( " -> %s: Need to activate SIGEMT on PIC %d\n", __func__, EventIndex ); #endif /* Enable overflow handling */ if ( __cpc_enable_sigemt( ctrl, EventIndex ) != PAPI_OK ) { #ifdef DEBUG SUBDBG( " -> %s: Activating SIGEMT failed for PIC %d\n", __func__, EventIndex ); #endif return PAPI_ESYS; } } #ifdef DEBUG SUBDBG( " -> %s: SIGEMT activated, will install signal handler\n", __func__ ); #endif // FIXME: Not really sure that this construct is working return _papi_hwi_start_signal( SIGEMT, 1, 0 ); } else { #ifdef DEBUG SUBDBG( " -> %s: Disabling overflow handling\n", __func__ ); #endif /* Resetting values which were used for overflow handling */ ctrl->preset[EventIndex] = DEFAULT_CNTR_PRESET; ctrl->flags[EventIndex] &= ~( CPC_OVF_NOTIFY_EMT ); ctrl->threshold[EventIndex] = 0; ctrl->hangover[EventIndex] = 0; #ifdef DEBUG SUBDBG( " -> %s:ctrl->preset[%d]=%d, ctrl->flags[%d]=%p\n", __func__, EventIndex, ctrl->preset[EventIndex], EventIndex, ctrl->flags[EventIndex] ); #endif /* Recreate the undelying set and disable the signal handler */ __CHECK_ERR_PAPI( __cpc_recreate_set( ctrl ) ); __CHECK_ERR_PAPI( _papi_hwi_stop_signal( SIGEMT ) ); } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_shutdown( hwd_context_t * ctx ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif cpc_buf_destroy( cpc, ctx->counter_buffer ); cpc_set_destroy( cpc, ctx->set ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_shutdown_global( void ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Free allocated memory */ // papi_calloc in __cpc_build_ntv_table papi_free( __t2_store.pic_ntv_count ); // papi_calloc in __cpc_build_ntv_table papi_free( __t2_ntv_events ); // papi_calloc in __cpc_build_pst_table papi_free( preset_table ); /* Shutdown libcpc */ // cpc_open in _papi_init_component cpc_close( cpc ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_start( hwd_context_t * ctx, hwd_control_state_t * ctrl ) { int retval; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: Starting EventSet %p\n", __func__, ctrl ); #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED { #ifdef DEBUG SUBDBG( " -> %s: Event count: ctrl->count=%d, ctrl->syn_count=%d\n", __func__, ctrl->count, ctrl->syn_count ); #endif if ( ctrl->count > 0 && ctrl->count == ctrl->syn_count ) { ctrl->idx[0] = cpc_set_add_request( cpc, ctrl->set, "Instr_cnt", ctrl->preset[0], ctrl->flags[0], 0, NULL ); ctrl->counter_buffer = cpc_buf_create( cpc, ctrl->set ); } } #endif #ifdef DEBUG { int i; for ( i = 0; i < MAX_COUNTERS; i++ ) { SUBDBG( " -> %s: Flags for PIC#%d: ctrl->flags[%d]=%d\n", __func__, i, i, ctrl->flags[i] ); } } #endif __CHECK_ERR_DFLT( cpc_bind_curlwp( cpc, ctrl->set, CPC_BIND_LWP_INHERIT ) ); /* Ensure the set is working properly */ retval = cpc_set_sample( cpc, ctrl->set, ctrl->counter_buffer ); if ( retval != 0 ) { printf( "%s: cpc_set_sample failed, return=%d, errno=%d\n", __func__, retval, errno ); return PAPI_ECMP; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_stop( hwd_context_t * ctx, hwd_control_state_t * ctrl ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif __CHECK_ERR_DFLT( cpc_unbind( cpc, ctrl->set ) ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_update_control_state( hwd_control_state_t * ctrl, NativeInfo_t * native, int count, hwd_context_t * ctx ) { int i; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Delete everything as we can't change an existing set */ if ( ctrl->counter_buffer != NULL ) { __CHECK_ERR_DFLT( cpc_buf_destroy( cpc, ctrl->counter_buffer ) ); } if ( ctrl->set != NULL ) { __CHECK_ERR_DFLT( cpc_set_destroy( cpc, ctrl->set ) ); } for ( i = 0; i < MAX_COUNTERS; i++ ) { ctrl->idx[i] = EVENT_NOT_SET; } /* New setup */ ctrl->set = cpc_set_create( cpc ); __CHECK_ERR_NULL( ctrl->set ); ctrl->count = count; ctrl->syn_count = 0; for ( i = 0; i < count; i++ ) { /* Store the active event */ ctrl->code[i].event_code = native[i].ni_event & PAPI_NATIVE_AND_MASK; ctrl->flags[i] = __cpc_domain_translator( PAPI_DOM_USER ); ctrl->preset[i] = DEFAULT_CNTR_PRESET; #ifdef DEBUG SUBDBG ( " -> %s: EventSet@%p/PIC#%d - ntv request >>%s<< (%d), flags=%#x\n", __func__, ctrl, i, __t2_ntv_events[ctrl->code[i].event_code], ctrl->code[i].event_code, ctrl->flags[i] ); #endif /* Store the counter position (???) */ native[i].ni_position = i; #ifdef SYNTHETIC_EVENTS_SUPPORTED { int syn_code = ctrl->code[i].event_code - ( _niagara2_vector.cmp_info.num_native_events - __t2_store.syn_evt_count ) - 1; /* Check if the event code is bigger than the CPC provided events. */ if ( syn_code >= 0 ) { #ifdef DEBUG SUBDBG ( " -> %s: Adding synthetic event %#x (%s) on position %d\n", __func__, native[i].ni_event, __t2_ntv_events[ctrl->code[i].event_code], i ); #endif /* Call the setup routine */ __int_setup_synthetic_event( syn_code, ctrl, NULL ); /* Clean the hangover count as this event is new */ ctrl->syn_hangover[i] = 0; /* Register this event as being synthetic, as an event set only based on synthetic events can not be actived through libcpc */ ctrl->syn_count++; /* Jump to next iteration */ continue; } } #endif #ifdef DEBUG SUBDBG( " -> %s: Adding native event %#x (%s) on position %d\n", __func__, native[i].ni_event, __t2_ntv_events[ctrl->code[i].event_code], i ); #endif /* Pass the event as request to libcpc */ ctrl->idx[i] = cpc_set_add_request( cpc, ctrl->set, __t2_ntv_events[ctrl->code[i]. event_code], ctrl->preset[i], ctrl->flags[i], 0, NULL ); __CHECK_ERR_NEGV( ctrl->idx[i] ); } #ifdef DEBUG if ( i == 0 ) { SUBDBG( " -> %s: nothing added\n", __func__ ); } #endif ctrl->counter_buffer = cpc_buf_create( cpc, ctrl->set ); __CHECK_ERR_NULL( ctrl->counter_buffer ); /* Finished the new setup */ /* Linking to context (same data type by typedef!) */ ctx = ctrl; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } int _niagara2_update_shlib_info( papi_mdi_t *mdi ) { char *file = "/proc/self/map"; char *resolve_pattern = "/proc/self/path/%s"; char lastobject[PRMAPSZ]; char link[PAPI_HUGE_STR_LEN]; char path[PAPI_HUGE_STR_LEN]; prmap_t mapping; int fd, count = 0, total = 0, position = -1, first = 1; caddr_t t_min, t_max, d_min, d_max; PAPI_address_map_t *pam, *cur; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif fd = open( file, O_RDONLY ); if ( fd == -1 ) { return PAPI_ESYS; } memset( lastobject, 0, PRMAPSZ ); #ifdef DEBUG SUBDBG( " -> %s: Preprocessing memory maps from procfs\n", __func__ ); #endif /* Search through the list of mappings in order to identify a) how many mappings are available and b) how many unique mappings are available. */ while ( read( fd, &mapping, sizeof ( prmap_t ) ) > 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Found a new memory map entry\n", __func__ ); #endif /* Another entry found, just the total count of entries. */ total++; /* Is the mapping accessible and not anonymous? */ if ( mapping.pr_mflags & ( MA_READ | MA_WRITE | MA_EXEC ) && !( mapping.pr_mflags & MA_ANON ) ) { /* Test if a new library has been found. If a new library has been found a new entry needs to be counted. */ if ( strcmp( lastobject, mapping.pr_mapname ) != 0 ) { strncpy( lastobject, mapping.pr_mapname, PRMAPSZ ); count++; #ifdef DEBUG SUBDBG( " -> %s: Memory mapping entry valid for %s\n", __func__, mapping.pr_mapname ); #endif } } } #ifdef DEBUG SUBDBG( " -> %s: Preprocessing done, starting to analyze\n", __func__ ); #endif /* Start from the beginning, now fill in the found mappings */ if ( lseek( fd, 0, SEEK_SET ) == -1 ) { return PAPI_ESYS; } memset( lastobject, 0, PRMAPSZ ); /* Allocate memory */ pam = ( PAPI_address_map_t * ) papi_calloc( count, sizeof ( PAPI_address_map_t ) ); while ( read( fd, &mapping, sizeof ( prmap_t ) ) > 0 ) { if ( mapping.pr_mflags & MA_ANON ) { #ifdef DEBUG SUBDBG ( " -> %s: Anonymous mapping (MA_ANON) found for %s, skipping\n", __func__, mapping.pr_mapname ); #endif continue; } /* Check for a new entry */ if ( strcmp( mapping.pr_mapname, lastobject ) != 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Analyzing mapping for %s\n", __func__, mapping.pr_mapname ); #endif cur = &( pam[++position] ); strncpy( lastobject, mapping.pr_mapname, PRMAPSZ ); snprintf( link, PAPI_HUGE_STR_LEN, resolve_pattern, lastobject ); memset( path, 0, PAPI_HUGE_STR_LEN ); readlink( link, path, PAPI_HUGE_STR_LEN ); strncpy( cur->name, path, PAPI_HUGE_STR_LEN ); #ifdef DEBUG SUBDBG( " -> %s: Resolved name for %s: %s\n", __func__, mapping.pr_mapname, cur->name ); #endif } if ( mapping.pr_mflags & MA_READ ) { /* Data (MA_WRITE) or text (MA_READ) segment? */ if ( mapping.pr_mflags & MA_WRITE ) { cur->data_start = ( caddr_t ) mapping.pr_vaddr; cur->data_end = ( caddr_t ) ( mapping.pr_vaddr + mapping.pr_size ); if ( strcmp ( cur->name, _papi_hwi_system_info.exe_info.fullname ) == 0 ) { _papi_hwi_system_info.exe_info.address_info.data_start = cur->data_start; _papi_hwi_system_info.exe_info.address_info.data_end = cur->data_end; } if ( first ) d_min = cur->data_start; if ( first ) d_max = cur->data_end; if ( cur->data_start < d_min ) { d_min = cur->data_start; } if ( cur->data_end > d_max ) { d_max = cur->data_end; } } else if ( mapping.pr_mflags & MA_EXEC ) { cur->text_start = ( caddr_t ) mapping.pr_vaddr; cur->text_end = ( caddr_t ) ( mapping.pr_vaddr + mapping.pr_size ); if ( strcmp ( cur->name, _papi_hwi_system_info.exe_info.fullname ) == 0 ) { _papi_hwi_system_info.exe_info.address_info.text_start = cur->text_start; _papi_hwi_system_info.exe_info.address_info.text_end = cur->text_end; } if ( first ) t_min = cur->text_start; if ( first ) t_max = cur->text_end; if ( cur->text_start < t_min ) { t_min = cur->text_start; } if ( cur->text_end > t_max ) { t_max = cur->text_end; } } } first = 0; } close( fd ); /* During the walk of shared objects the upper and lower bound of the segments could be discovered. The bounds are stored in the PAPI info structure. The information is important for the profiling functions of PAPI. */ /* This variant would pass the addresses of all text and data segments _papi_hwi_system_info.exe_info.address_info.text_start = t_min; _papi_hwi_system_info.exe_info.address_info.text_end = t_max; _papi_hwi_system_info.exe_info.address_info.data_start = d_min; _papi_hwi_system_info.exe_info.address_info.data_end = d_max; */ #ifdef DEBUG SUBDBG( " -> %s: Analysis of memory maps done, results:\n", __func__ ); SUBDBG( " -> %s: text_start=%#x, text_end=%#x, text_size=%lld\n", __func__, _papi_hwi_system_info.exe_info.address_info.text_start, _papi_hwi_system_info.exe_info.address_info.text_end, _papi_hwi_system_info.exe_info.address_info.text_end - _papi_hwi_system_info.exe_info.address_info.text_start ); SUBDBG( " -> %s: data_start=%#x, data_end=%#x, data_size=%lld\n", __func__, _papi_hwi_system_info.exe_info.address_info.data_start, _papi_hwi_system_info.exe_info.address_info.data_end, _papi_hwi_system_info.exe_info.address_info.data_end - _papi_hwi_system_info.exe_info.address_info.data_start ); #endif /* Store the map read and the total count of shlibs found */ _papi_hwi_system_info.shlib_info.map = pam; _papi_hwi_system_info.shlib_info.count = count; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } ////////////////////////////////////////////////////////////////////////////////// /// UTILITY FUNCTIONS FOR ACCESS TO LIBCPC AND SOLARIS ///////////////////////// //////////////////////////////////////////////////////////////////////////////// /* DESCRIPTION: * ----------------------------------------------------------------------------- * The following functions are for accessing libcpc 2 and Solaris related stuff * needed for PAPI. ******************************************************************************/ static inline int __cpc_build_ntv_table( void ) { int i, tmp; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif __t2_store.pic_ntv_count = papi_calloc( __t2_store.npic, sizeof ( int ) ); __CHECK_ERR_NULL( __t2_store.pic_ntv_count ); #ifdef DEBUG SUBDBG( " -> %s: Checking PICs for functionality\n", __func__ ); #endif for ( i = 0; i < __t2_store.npic; i++ ) { cpc_walk_events_pic( cpc, i, NULL, __cpc_walk_events_pic_action_count ); #ifdef DEBUG SUBDBG( " -> %s: Found %d events on PIC#%d\n", __func__, __t2_store.pic_ntv_count[i], i ); #endif } tmp = __t2_store.pic_ntv_count[0]; /* There should be at least one counter... */ if ( tmp == 0 ) { #ifdef DEBUG SUBDBG( " -> %s: PIC#0 has 0 events\n", __func__ ); #endif return PAPI_ECMP; } /* Check if all PICs have the same number of counters */ for ( i = 0; i < __t2_store.npic; i++ ) { if ( __t2_store.pic_ntv_count[i] != tmp ) { #ifdef DEBUG SUBDBG( " -> %s: PIC#%d has %d events, should have %d\n", __func__, i, __t2_store.pic_ntv_count[i], tmp ); #endif return PAPI_ECMP; } } /* Count synthetic events which add functionality to libcpc */ #ifdef SYNTHETIC_EVENTS_SUPPORTED __t2_store.syn_evt_count = 0; __int_walk_synthetic_events_action_count( ); #endif /* Store the count of events available in central data structure */ #ifndef SYNTHETIC_EVENTS_SUPPORTED _niagara2_vector.cmp_info.num_native_events = __t2_store.pic_ntv_count[0]; #else _niagara2_vector.cmp_info.num_native_events = __t2_store.pic_ntv_count[0] + __t2_store.syn_evt_count; #endif /* Allocate memory for storing all events found, including the first empty slot */ __t2_ntv_events = papi_calloc( _niagara2_vector.cmp_info.num_native_events + 1, sizeof ( char * ) ); __t2_ntv_events[0] = "THIS IS A BUG!"; tmp = 1; cpc_walk_events_pic( cpc, 0, ( void * ) &tmp, __cpc_walk_events_pic_action_store ); #ifdef SYNTHETIC_EVENTS_SUPPORTED __int_walk_synthetic_events_action_store( ); #endif #ifdef DEBUG for ( i = 1; i < __t2_store.pic_ntv_count[0]; i++ ) { SUBDBG( " -> %s: Event #%d: %s\n", __func__, i, __t2_ntv_events[i] ); } #endif #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } /* Return event code for event_name */ static inline int __cpc_search_ntv_event( char *event_name, int *event_code ) { int i; for ( i = 0; i < _niagara2_vector.cmp_info.num_native_events; i++ ) { if ( strcmp( event_name, __t2_ntv_events[i] ) == 0 ) { *event_code = i; return PAPI_OK; } } return PAPI_ENOEVNT; } static inline int __cpc_build_pst_table( void ) { int num_psts, i, j, event_code, pst_events; hwi_search_t tmp; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif num_psts = 0; while ( __t2_table[num_psts].papi_pst != 0 ) { num_psts++; } #ifdef DEBUG SUBDBG( " -> %s: Found %d presets\n", __func__, num_psts ); #endif preset_table = papi_calloc( num_psts + 1, sizeof ( hwi_search_t ) ); __CHECK_ERR_NULL( preset_table ); pst_events = 0; for ( i = 0; i < num_psts; i++ ) { memset( &tmp, PAPI_NULL, sizeof ( tmp ) ); /* Mark counters as unused. If they are needed, they will be overwritten later. See papi_preset.c:51 for more details. */ for ( j = 0; j < PAPI_EVENTS_IN_DERIVED_EVENT; j++ ) { tmp.data.native[j] = PAPI_NULL; } tmp.event_code = __t2_table[i].papi_pst; tmp.data.derived = __t2_table[i].ntv_opcode; tmp.data.operation[0] = '\0'; switch ( __t2_table[i].ntv_opcode ) { case DERIVED_ADD: tmp.data.operation[0] = '+'; break; case DERIVED_SUB: tmp.data.operation[0] = '-'; break; } for ( j = 0; j < __t2_table[i].ntv_ctrs; j++ ) { if ( __cpc_search_ntv_event ( __t2_table[i].ntv_event[j], &event_code ) >= PAPI_OK ) { tmp.data.native[j] = event_code; } else { continue; } } #ifdef DEBUG SUBDBG( " -> %s: pst row %d - event_code=%d\n", __func__, i, tmp.event_code ); SUBDBG( " -> %s: pst row %d - data.derived=%d, data.operation=%c\n", __func__, i, tmp.data.derived, tmp.data.operation[0] ); SUBDBG( " -> %s: pst row %d - native event codes:\n", __func__, i ); { int d_i; for ( d_i = 0; d_i < PAPI_EVENTS_IN_DERIVED_EVENT; d_i++ ) { SUBDBG( " -> %s: pst row %d - +++ data.native[%d]=%d\n", __func__, i, d_i, tmp.data.native[d_i] ); } } #endif memcpy( &preset_table[i], &tmp, sizeof ( tmp ) ); pst_events++; } // Check! memset( &preset_table[num_psts], 0, sizeof ( hwi_search_t ) ); _niagara2_vector.cmp_info.num_preset_events = pst_events; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } static inline int __cpc_recreate_set( hwd_control_state_t * ctrl ) { #ifdef SYNTHETIC_EVENTS_SUPPORTED const int syn_barrier = _niagara2_vector.cmp_info.num_native_events - __t2_store.syn_evt_count; #endif int i; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Destroy the old buffer and the old set if they exist, we need to do a full recreate as changing flags or events through libcpc is not possible */ if ( ctrl->counter_buffer != NULL ) { __CHECK_ERR_DFLT( cpc_buf_destroy( cpc, ctrl->counter_buffer ) ); } if ( ctrl->set != NULL ) { __CHECK_ERR_DFLT( cpc_set_destroy( cpc, ctrl->set ) ); } /* Create a new set */ ctrl->set = cpc_set_create( cpc ); __CHECK_ERR_NULL( ctrl->set ); for ( i = 0; i < ctrl->count; i++ ) { #ifdef DEBUG SUBDBG( " -> %s: Adding native event %#x (%s) on position %d\n", __func__, ctrl->code[i].event_code, __t2_ntv_events[ctrl->code[i].event_code], i ); SUBDBG( " -> %s: Event setup: ctrl->code[%d].event_code=%#x\n", __func__, i, ctrl->code[i].event_code ); SUBDBG( " -> %s: Event setup: ctrl->preset[%d]=%d\n", __func__, i, ctrl->preset[i] ); SUBDBG( " -> %s: Event setup: ctrl->flags[%d]=%#x\n", __func__, i, ctrl->flags[i] ); #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED /* Ensure that synthetic events are skipped */ if ( ctrl->code[i].event_code >= syn_barrier ) { #ifdef DEBUG SUBDBG( " -> %s: Skipping counter %d, synthetic event found\n", __func__, i ); #endif /* Next iteration */ continue; } #endif ctrl->idx[i] = cpc_set_add_request( cpc, ctrl->set, __t2_ntv_events[ctrl->code[i]. event_code], ctrl->preset[i], ctrl->flags[i], 0, NULL ); __CHECK_ERR_NEGV( ctrl->idx[i] ); } ctrl->counter_buffer = cpc_buf_create( cpc, ctrl->set ); __CHECK_ERR_NULL( ctrl->counter_buffer ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } static inline int __cpc_domain_translator( const int papi_domain ) { int domain = 0; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); SUBDBG( " -> %s: papi_domain=%d requested\n", __func__, papi_domain ); #endif if ( papi_domain & PAPI_DOM_USER ) { #ifdef DEBUG SUBDBG( " -> %s: Domain PAPI_DOM_USER/CPC_COUNT_USER selected\n", __func__ ); #endif domain |= CPC_COUNT_USER; } if ( papi_domain & PAPI_DOM_KERNEL ) { #ifdef DEBUG SUBDBG( " -> %s: Domain PAPI_DOM_KERNEL/CPC_COUNT_SYSTEM selected\n", __func__ ); #endif domain |= CPC_COUNT_SYSTEM; } if ( papi_domain & PAPI_DOM_SUPERVISOR ) { #ifdef DEBUG SUBDBG( " -> %s: Domain PAPI_DOM_SUPERVISOR/CPC_COUNT_HV selected\n", __func__ ); #endif domain |= CPC_COUNT_HV; } #ifdef DEBUG SUBDBG( " -> %s: domain=%d\n", __func__, domain ); #endif return domain; } void __cpc_error_handler( const char *fn, int subcode, const char *fmt, va_list ap ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* From the libcpc manpages */ fprintf( stderr, "ERROR - libcpc error handler in %s() called!\n", fn ); vfprintf( stderr, fmt, ap ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } static inline int __cpc_enable_sigemt( hwd_control_state_t * ctrl, int position ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif if ( position >= MAX_COUNTERS ) { #ifdef DEBUG SUBDBG( " -> %s: Position of the counter does not exist\n", __func__ ); #endif return PAPI_EINVAL; } ctrl->flags[position] = ctrl->flags[position] | CPC_OVF_NOTIFY_EMT; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return __cpc_recreate_set( ctrl ); } void __cpc_walk_events_pic_action_count( void *arg, uint_t picno, const char *event ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif __t2_store.pic_ntv_count[picno]++; #ifdef DEBUG SUBDBG ( " -> %s: Found one native event on PIC#%d (now totally %d events)\n", __func__, picno, __t2_store.pic_ntv_count[picno] ); #endif #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } void __cpc_walk_events_pic_action_store( void *arg, uint_t picno, const char *event ) { int *tmp = ( int * ) arg; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif __t2_ntv_events[*tmp] = papi_strdup( event ); #ifdef DEBUG SUBDBG( " -> %s: Native event >>%s<< registered\n", __func__, __t2_ntv_events[*tmp] ); #endif *tmp = *tmp + 1; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } static inline int __sol_get_processor_clock( void ) { processor_info_t pinfo; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif // Fetch information from the first processor in the system if ( processor_info( getcpuid( ), &pinfo ) == 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Clock at %d MHz\n", __func__, pinfo.pi_clock ); #endif return pinfo.pi_clock; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_ESYS; } /* This function either increases the ns supplied to itimer_res_ns or pads it up * to a multiple of itimer_res_ns if the value is bigger than itimer_res_ns. * * The source is taken from the old component. */ static inline int __sol_get_itimer_ns( int ns ) { if ( ns < _papi_os_info.itimer_res_ns ) { return _papi_os_info.itimer_res_ns; } else { int leftover_ns = ns % _papi_os_info.itimer_res_ns; return ns + leftover_ns; } } static inline lwpstatus_t * __sol_get_lwp_status( const pid_t pid, const lwpid_t lwpid ) { char *pattern = "/proc/%d/lwp/%d/lwpstatus"; char filename[PAPI_MIN_STR_LEN]; int fd; static lwpstatus_t lwp; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif memset( &lwp, 0, sizeof ( lwp ) ); snprintf( filename, PAPI_MIN_STR_LEN, pattern, pid, lwpid ); fd = open( filename, O_RDONLY ); if ( fd == -1 ) return NULL; read( fd, ( void * ) &lwp, sizeof ( lwp ) ); close( fd ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return &lwp; } static inline psinfo_t * __sol_get_proc_info( const pid_t pid ) { char *pattern = "/proc/%d/psinfo"; char filename[PAPI_MIN_STR_LEN]; int fd; static psinfo_t proc; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif memset( &proc, 0, sizeof ( proc ) ); snprintf( filename, PAPI_MIN_STR_LEN, pattern, pid ); fd = open( filename, O_RDONLY ); if ( fd == -1 ) return NULL; read( fd, ( void * ) &proc, sizeof ( proc ) ); close( fd ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return &proc; } static inline pstatus_t * __sol_get_proc_status( const pid_t pid ) { char *pattern = "/proc/%d/status"; char filename[PAPI_MIN_STR_LEN]; int fd; static pstatus_t proc; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif memset( &proc, 0, sizeof ( proc ) ); snprintf( filename, PAPI_MIN_STR_LEN, pattern, pid ); fd = open( filename, O_RDONLY ); if ( fd == -1 ) return NULL; read( fd, ( void * ) &proc, sizeof ( proc ) ); close( fd ); #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return &proc; } /* This function handles synthetic events and returns their result. Synthetic * events are events retrieved from outside of libcpc, e.g. all events which * can not be retrieved using cpc_set_add_request/cpc_buf_get. */ #ifdef SYNTHETIC_EVENTS_SUPPORTED uint64_t __int_get_synthetic_event( int code, hwd_control_state_t * ctrl, void *arg ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif switch ( code ) { case SYNTHETIC_CYCLES_ELAPSED: /* Return the count of ticks this set was bound. If a reset of the set has been executed the last count will be subtracted. */ { int *i = ( int * ) arg; return cpc_buf_tick( cpc, ctrl->counter_buffer ) - ctrl->syn_hangover[*i]; } case SYNTHETIC_RETURN_ONE: // The name says it - only for testing purposes. #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return 1; case SYNTHETIC_RETURN_TWO: // The name says it - only for testing purposes. #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return 2; default: #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_EINVAL; } } #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED int __int_setup_synthetic_event( int code, hwd_control_state_t * ctrl, void *arg ) { #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif switch ( code ) { case SYNTHETIC_CYCLES_ELAPSED: #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; default: #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_EINVAL; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED void __int_walk_synthetic_events_action_count( void ) { int i = 0; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Count all synthetic events in __int_syn_table, the last event is marked with an event code of -1. */ while ( __int_syn_table[i].code != -1 ) { __t2_store.syn_evt_count++; i++; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } #endif #ifdef SYNTHETIC_EVENTS_SUPPORTED void __int_walk_synthetic_events_action_store( void ) { /* The first index of a synthetic event starts after last native event */ int i = 0; int offset = _niagara2_vector.cmp_info.num_native_events + 1 - __t2_store.syn_evt_count; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif while ( i < __t2_store.syn_evt_count ) { __t2_ntv_events[i + offset] = papi_strdup( __int_syn_table[i].name ); i++; } #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif } #endif papi_vector_t _niagara2_vector = { /************* COMPONENT CAPABILITIES/INFORMATION/ETC ************************/ .cmp_info = { .name = "solaris-niagara2", .description = "Solaris Counters", .num_cntrs = MAX_COUNTERS, .num_mpx_cntrs = MAX_COUNTERS, .default_domain = PAPI_DOM_USER, .available_domains = ( PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR ), .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .fast_real_timer = 1, .fast_virtual_timer = 1, .attach = 1, .attach_must_ptrace = 1, .hardware_intr = 1, .hardware_intr_sig = SIGEMT, .precise_intr = 1, } , /************* COMPONENT DATA STRUCTURE SIZES ********************************/ .size = { .context = sizeof ( hwd_context_t ), .control_state = sizeof ( hwd_control_state_t ), .reg_value = sizeof ( hwd_register_t ), .reg_alloc = sizeof ( niagara2_reg_alloc_t ), } , /************* COMPONENT INTERFACE FUNCTIONS *********************************/ .init_control_state = _niagara2_init_control_state, .start = _niagara2_start, .stop = _niagara2_stop, .read = _niagara2_read, .write = NULL, /* NOT IMPLEMENTED */ .shutdown_thread = _niagara2_shutdown, .shutdown_component = _niagara2_shutdown_global, .ctl = _niagara2_ctl, .update_control_state = _niagara2_update_control_state, .set_domain = _niagara2_set_domain, .reset = _niagara2_reset, .set_overflow = _niagara2_set_overflow, .set_profile = _niagara2_set_profile, .stop_profiling = NULL, /* NOT IMPLEMENTED */ .ntv_enum_events = _niagara2_ntv_enum_events, .ntv_name_to_code = NULL, /* NOT IMPLEMENTED */ .ntv_code_to_name = _niagara2_ntv_code_to_name, .ntv_code_to_descr = _niagara2_ntv_code_to_descr, .ntv_code_to_bits = _niagara2_ntv_code_to_bits, .init_component = _niagara2_init_component, .dispatch_timer = _niagara2_dispatch_timer, }; papi_os_vector_t _papi_os_vector = { .get_memory_info = _niagara2_get_memory_info, .get_dmem_info = _solaris_get_dmem_info, .get_real_usec = _solaris_get_real_usec, .get_real_cycles = _solaris_get_real_cycles, .get_virt_usec = _solaris_get_virt_usec, .update_shlib_info = _solaris_update_shlib_info, .get_system_info = _solaris_get_system_info, }; papi-5.6.0/src/perfctr-2.6.x/examples/global/arm.c000664 001750 001750 00000001120 13216244366 023540 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arm.c,v 1.1.2.1 2007/02/11 20:14:31 mikpe Exp $ * ARM-specific code. * * Copyright (C) 2005-2007 Mikael Pettersson */ #include #include #include #include "libperfctr.h" #include "arch.h" void setup_control(const struct perfctr_info *info, struct perfctr_cpu_control *control) { memset(control, 0, sizeof *control); switch (info->cpu_type) { case PERFCTR_ARM_XSC1: case PERFCTR_ARM_XSC2: control->nractrs = 1; control->pmc_map[0] = 0; control->evntsel[0] = 0x07; /* INSTRUCTIONS_EXECUTED */ counting_mips = 1; } } papi-5.6.0/man/000775 001750 001750 00000000000 13216244355 015311 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPI_exe_info_t.3000664 001750 001750 00000001113 13216244356 021160 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_exe_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_exe_info_t \- .PP get the executable's info .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "char \fBfullname\fP [1024]" .br .ti -1c .RI "\fBPAPI_address_map_t\fP \fBaddress_info\fP" .br .in -1c .SH "Field Documentation" .PP .SS "\fBPAPI_address_map_t\fP PAPI_exe_info_t::address_info" executable's address space info .SS "char PAPI_exe_info_t::fullname[1024]" path + name .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPIF_ipc.3000664 001750 001750 00000001022 13216244355 017720 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_ipc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_ipc \- .PP Get instructions per cycle, real and processor time\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_ipc( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ins, C_FLOAT ipc, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_ipc\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/lib/pfmlib_cell.c000664 001750 001750 00000043545 13216244363 022401 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_cell.c : support for the Cell PMU family * * Copyright (c) 2007 TOSHIBA CORPORATION based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_cell_priv.h" /* architecture private */ #include "cell_events.h" /* PMU private */ #define SIGNAL_TYPE_CYCLES 0 #define PM_COUNTER_CTRL_CYLES 0x42C00000U #define PFM_CELL_NUM_PMCS 24 #define PFM_CELL_EVENT_MIN 1 #define PFM_CELL_EVENT_MAX 8 #define PMX_MIN_NUM 1 #define PMX_MAX_NUM 8 #define PFM_CELL_16BIT_CNTR_EVENT_MAX 8 #define PFM_CELL_32BIT_CNTR_EVENT_MAX 4 #define COMMON_REG_NUMS 8 #define ENABLE_WORD0 0 #define ENABLE_WORD1 1 #define ENABLE_WORD2 2 #define PFM_CELL_GRP_CONTROL_REG_GRP0_BIT 30 #define PFM_CELL_GRP_CONTROL_REG_GRP1_BIT 28 #define PFM_CELL_BASE_WORD_UNIT_FIELD_BIT 24 #define PFM_CELL_WORD_UNIT_FIELD_WIDTH 2 #define PFM_CELL_MAX_WORD_NUMBER 3 #define PFM_CELL_COUNTER_CONTROL_GRP1 0x80000000U #define PFM_CELL_DEFAULT_TRIGGER_EVENT_UNIT 0x00555500U #define PFM_CELL_PM_CONTROL_16BIT_CNTR_MASK 0x01E00000U #define PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_PROBLEM 0x00080000U #define PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_SUPERVISOR 0x00000000U #define PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_HYPERVISOR 0x00040000U #define PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_ALL 0x000C0000U #define PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_MASK 0x000C0000U #define ONLY_WORD(x) \ ((x == WORD_0_ONLY)||(x == WORD_2_ONLY)) ? x : 0 struct pfm_cell_signal_group_desc { unsigned int signal_type; unsigned int word_type; unsigned long long word; unsigned long long freq; unsigned int subunit; }; #define swap_int(num1, num2) do { \ int tmp = num1; \ num1 = num2; \ num2 = tmp; \ } while(0) static int pfm_cell_detect(void) { int ret; char buffer[128]; ret = __pfm_getcpuinfo_attr("cpu", buffer, sizeof(buffer)); if (ret == -1) { return PFMLIB_ERR_NOTSUPP; } if (strcmp(buffer, "Cell Broadband Engine, altivec supported")) { return PFMLIB_ERR_NOTSUPP; } return PFMLIB_SUCCESS; } static int get_pmx_offset(int pmx_num, unsigned int *pmx_ctrl_bits) { /* pmx_num==0 -> not specified * pmx_num==1 -> pm0 * : * pmx_num==8 -> pm7 */ int i = 0; int offset; if ((pmx_num >= PMX_MIN_NUM) && (pmx_num <= PMX_MAX_NUM)) { /* offset is specified */ offset = (pmx_num - 1); if ((~*pmx_ctrl_bits >> offset) & 0x1) { *pmx_ctrl_bits |= (0x1 << offset); return offset; } else { /* offset is used */ return PFMLIB_ERR_INVAL; } } else if (pmx_num == 0){ /* offset is not specified */ while (((*pmx_ctrl_bits >> i) & 0x1) && (i < PMX_MAX_NUM)) { i++; } *pmx_ctrl_bits |= (0x1 << i); return i; } /* pmx_num is invalid */ return PFMLIB_ERR_INVAL; } static unsigned long long search_enable_word(int word) { unsigned long long count = 0; while ((~word) & 0x1) { count++; word >>= 1; } return count; } static int get_count_bit(unsigned int type) { int count = 0; while(type) { if (type & 1) { count++; } type >>= 1; } return count; } static int get_debug_bus_word(struct pfm_cell_signal_group_desc *group0, struct pfm_cell_signal_group_desc *group1) { unsigned int word_type0, word_type1; /* search enable word */ word_type0 = group0->word_type; word_type1 = group1->word_type; if (group1->signal_type == NONE_SIGNAL) { group0->word = search_enable_word(word_type0); goto found; } /* swap */ if ((get_count_bit(word_type0) > get_count_bit(word_type1)) || (group0->freq == PFM_CELL_PME_FREQ_SPU)) { swap_int(group0->signal_type, group1->signal_type); swap_int(group0->freq, group1->freq); swap_int(group0->word_type, group1->word_type); swap_int(group0->subunit, group1->subunit); swap_int(word_type0, word_type1); } if ((ONLY_WORD(word_type0) != 0) && (word_type0 == word_type1)) { return PFMLIB_ERR_INVAL; } if (ONLY_WORD(word_type0)) { group0->word = search_enable_word(ONLY_WORD(word_type0)); word_type1 &= ~(1UL << (group0->word)); group1->word = search_enable_word(word_type1); } else if (ONLY_WORD(word_type1)) { group1->word = search_enable_word(ONLY_WORD(word_type1)); word_type0 &= ~(1UL << (group1->word)); group0->word = search_enable_word(word_type0); } else { group0->word = ENABLE_WORD0; if (word_type1 == WORD_0_AND_1) { group1->word = ENABLE_WORD1; } else if(word_type1 == WORD_0_AND_2) { group1->word = ENABLE_WORD2; } else { return PFMLIB_ERR_INVAL; } } found: return PFMLIB_SUCCESS; } static unsigned int get_signal_type(unsigned long long event_code) { return (event_code & 0x00000000FFFFFFFFULL) / 100; } static unsigned int get_signal_bit(unsigned long long event_code) { return (event_code & 0x00000000FFFFFFFFULL) % 100; } static int is_spe_signal_group(unsigned int signal_type) { if (41 <= signal_type && signal_type <= 56) { return 1; } else { return 0; } } static int check_signal_type(pfmlib_input_param_t *inp, pfmlib_cell_input_param_t *mod_in, struct pfm_cell_signal_group_desc *group0, struct pfm_cell_signal_group_desc *group1) { pfmlib_event_t *e; unsigned int event_cnt; int signal_cnt = 0; int i; int cycles_signal_cnt = 0; unsigned int signal_type, subunit; e = inp->pfp_events; event_cnt = inp->pfp_event_count; for(i = 0; i < event_cnt; i++) { signal_type = get_signal_type(cell_pe[e[i].event].pme_code); if ((signal_type == SIGNAL_SPU_TRIGGER) || (signal_type == SIGNAL_SPU_EVENT)) { continue; } if (signal_type == SIGNAL_TYPE_CYCLES) { cycles_signal_cnt = 1; continue; } subunit = 0; if (is_spe_signal_group(signal_type)) { subunit = mod_in->pfp_cell_counters[i].spe_subunit; } switch(signal_cnt) { case 0: group0->signal_type = signal_type; group0->word_type = cell_pe[e[i].event].pme_enable_word; group0->freq = cell_pe[e[i].event].pme_freq; group0->subunit = subunit; signal_cnt++; break; case 1: if ((group0->signal_type != signal_type) || (is_spe_signal_group(signal_type) && group0->subunit != subunit)) { group1->signal_type = signal_type; group1->word_type = cell_pe[e[i].event].pme_enable_word; group1->freq = cell_pe[e[i].event].pme_freq; group1->subunit = subunit; signal_cnt++; } break; case 2: if ((group0->signal_type != signal_type) && (group1->signal_type != signal_type)) { DPRINT("signal count is invalid\n"); return PFMLIB_ERR_INVAL; } break; default: DPRINT("signal count is invalid\n"); return PFMLIB_ERR_INVAL; } } return (signal_cnt + cycles_signal_cnt); } /* * The assignment between the privilege leve options * and ppu-count-mode field in pm_control register. * * option ppu count mode(pm_control) * --------------------------------- * -u(-3) 0b10 : Problem mode * -k(-0) 0b00 : Supervisor mode * -1 0b00 : Supervisor mode * -2 0b01 : Hypervisor mode * two options 0b11 : Any mode * * Note : Hypervisor-mode and Any-mode don't work on PS3. * */ static unsigned int get_ppu_count_mode(unsigned int plm) { unsigned int ppu_count_mode = 0; switch (plm) { case PFM_PLM0: case PFM_PLM1: ppu_count_mode = PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_SUPERVISOR; break; case PFM_PLM2: ppu_count_mode = PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_HYPERVISOR; break; case PFM_PLM3: ppu_count_mode = PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_PROBLEM; break; default : ppu_count_mode = PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_ALL; break; } return ppu_count_mode; } static int pfm_cell_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_cell_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; unsigned int event_cnt; unsigned int signal_cnt = 0, pmcs_cnt = 0; unsigned int signal_type; unsigned long long signal_bit; struct pfm_cell_signal_group_desc group[2]; int pmx_offset = 0; int i, ret; int input_control, polarity, count_cycle, count_enable; unsigned long long subunit; int shift0, shift1; unsigned int pmx_ctrl_bits; int max_event_cnt = PFM_CELL_32BIT_CNTR_EVENT_MAX; count_enable = 1; group[0].signal_type = group[1].signal_type = NONE_SIGNAL; group[0].word = group[1].word = 0L; group[0].freq = group[1].freq = 0L; group[0].subunit = group[1].subunit = 0; group[0].word_type = group[1].word_type = WORD_NONE; event_cnt = inp->pfp_event_count; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; /* check event_cnt */ if (mod_in->control & PFM_CELL_PM_CONTROL_16BIT_CNTR_MASK) max_event_cnt = PFM_CELL_16BIT_CNTR_EVENT_MAX; if (event_cnt < PFM_CELL_EVENT_MIN) return PFMLIB_ERR_NOTFOUND; if (event_cnt > max_event_cnt) return PFMLIB_ERR_TOOMANY; /* check signal type */ signal_cnt = check_signal_type(inp, mod_in, &group[0], &group[1]); if (signal_cnt == PFMLIB_ERR_INVAL) return PFMLIB_ERR_NOASSIGN; /* decide debug_bus word */ if (signal_cnt != 0 && group[0].signal_type != NONE_SIGNAL) { ret = get_debug_bus_word(&group[0], &group[1]); if (ret != PFMLIB_SUCCESS) return PFMLIB_ERR_NOASSIGN; } /* common register setting */ pc[pmcs_cnt].reg_num = REG_GROUP_CONTROL; if (signal_cnt == 1) { pc[pmcs_cnt].reg_value = group[0].word << PFM_CELL_GRP_CONTROL_REG_GRP0_BIT; } else if (signal_cnt == 2) { pc[pmcs_cnt].reg_value = (group[0].word << PFM_CELL_GRP_CONTROL_REG_GRP0_BIT) | (group[1].word << PFM_CELL_GRP_CONTROL_REG_GRP1_BIT); } pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_DEBUG_BUS_CONTROL; if (signal_cnt == 1) { shift0 = PFM_CELL_BASE_WORD_UNIT_FIELD_BIT + ((PFM_CELL_MAX_WORD_NUMBER - group[0].word) * PFM_CELL_WORD_UNIT_FIELD_WIDTH); pc[pmcs_cnt].reg_value = group[0].freq << shift0; } else if (signal_cnt == 2) { shift0 = PFM_CELL_BASE_WORD_UNIT_FIELD_BIT + ((PFM_CELL_MAX_WORD_NUMBER - group[0].word) * PFM_CELL_WORD_UNIT_FIELD_WIDTH); shift1 = PFM_CELL_BASE_WORD_UNIT_FIELD_BIT + ((PFM_CELL_MAX_WORD_NUMBER - group[1].word) * PFM_CELL_WORD_UNIT_FIELD_WIDTH); pc[pmcs_cnt].reg_value = (group[0].freq << shift0) | (group[1].freq << shift1); } pc[pmcs_cnt].reg_value |= PFM_CELL_DEFAULT_TRIGGER_EVENT_UNIT; pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_TRACE_ADDRESS; pc[pmcs_cnt].reg_value = 0; pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_EXT_TRACE_TIMER; pc[pmcs_cnt].reg_value = 0; pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_PM_STATUS; pc[pmcs_cnt].reg_value = 0; pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_PM_CONTROL; pc[pmcs_cnt].reg_value = (mod_in->control & ~PFM_CELL_PM_CONTROL_PPU_CNTR_MODE_MASK) | get_ppu_count_mode(inp->pfp_dfl_plm); pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_PM_INTERVAL; pc[pmcs_cnt].reg_value = mod_in->interval; pmcs_cnt++; pc[pmcs_cnt].reg_num = REG_PM_START_STOP; pc[pmcs_cnt].reg_value = mod_in->triggers; pmcs_cnt++; pmx_ctrl_bits = 0; /* pmX register setting */ for(i = 0; i < event_cnt; i++) { /* PMX_CONTROL */ pmx_offset = get_pmx_offset(mod_in->pfp_cell_counters[i].pmX_control_num, &pmx_ctrl_bits); if (pmx_offset == PFMLIB_ERR_INVAL) { DPRINT("pmX already used\n"); return PFMLIB_ERR_INVAL; } signal_type = get_signal_type(cell_pe[e[i].event].pme_code); if (signal_type == SIGNAL_TYPE_CYCLES) { pc[pmcs_cnt].reg_value = PM_COUNTER_CTRL_CYLES; pc[pmcs_cnt].reg_num = REG_PM0_CONTROL + pmx_offset; pmcs_cnt++; pc[pmcs_cnt].reg_value = cell_pe[e[i].event].pme_code; pc[pmcs_cnt].reg_num = REG_PM0_EVENT + pmx_offset; pmcs_cnt++; pd[i].reg_num = pmx_offset; pd[i].reg_value = 0; continue; } switch(cell_pe[e[i].event].pme_type) { case COUNT_TYPE_BOTH_TYPE: case COUNT_TYPE_CUMULATIVE_LEN: case COUNT_TYPE_MULTI_CYCLE: case COUNT_TYPE_SINGLE_CYCLE: count_cycle = 1; break; case COUNT_TYPE_OCCURRENCE: count_cycle = 0; break; default: return PFMLIB_ERR_INVAL; } signal_bit = get_signal_bit(cell_pe[e[i].event].pme_code); polarity = mod_in->pfp_cell_counters[i].polarity; input_control = mod_in->pfp_cell_counters[i].input_control; subunit = 0; if (is_spe_signal_group(signal_type)) { subunit = mod_in->pfp_cell_counters[i].spe_subunit; } pc[pmcs_cnt].reg_value = ( (signal_bit << (31 - 5)) | (input_control << (31 - 6)) | (polarity << (31 - 7)) | (count_cycle << (31 - 8)) | (count_enable << (31 - 9)) ); pc[pmcs_cnt].reg_num = REG_PM0_CONTROL + pmx_offset; if (signal_type == group[1].signal_type && subunit == group[1].subunit) { pc[pmcs_cnt].reg_value |= PFM_CELL_COUNTER_CONTROL_GRP1; } pmcs_cnt++; /* PMX_EVENT */ pc[pmcs_cnt].reg_num = REG_PM0_EVENT + pmx_offset; /* debug bus word setting */ if (signal_type == group[0].signal_type && subunit == group[0].subunit) { pc[pmcs_cnt].reg_value = (cell_pe[e[i].event].pme_code | (group[0].word << 48) | (subunit << 32)); } else if (signal_type == group[1].signal_type && subunit == group[1].subunit) { pc[pmcs_cnt].reg_value = (cell_pe[e[i].event].pme_code | (group[1].word << 48) | (subunit << 32)); } else if ((signal_type == SIGNAL_SPU_TRIGGER) || (signal_type == SIGNAL_SPU_EVENT)) { pc[pmcs_cnt].reg_value = cell_pe[e[i].event].pme_code | (subunit << 32); } else { return PFMLIB_ERR_INVAL; } pmcs_cnt++; /* pmd setting */ pd[i].reg_num = pmx_offset; pd[i].reg_value = 0; } outp->pfp_pmc_count = pmcs_cnt; outp->pfp_pmd_count = event_cnt; return PFMLIB_SUCCESS; } static int pfm_cell_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { pfmlib_cell_input_param_t *mod_in = (pfmlib_cell_input_param_t *)model_in; pfmlib_cell_input_param_t default_model_in; int i; if (model_in) { mod_in = (pfmlib_cell_input_param_t *)model_in; } else { mod_in = &default_model_in; mod_in->control = 0x80000000; mod_in->interval = 0; mod_in->triggers = 0; for (i = 0; i < PMU_CELL_NUM_COUNTERS; i++) { mod_in->pfp_cell_counters[i].pmX_control_num = 0; mod_in->pfp_cell_counters[i].spe_subunit = 0; mod_in->pfp_cell_counters[i].polarity = 1; mod_in->pfp_cell_counters[i].input_control = 0; mod_in->pfp_cell_counters[i].cnt_mask = 0; mod_in->pfp_cell_counters[i].flags = 0; } } return pfm_cell_dispatch_counters(inp, mod_in, outp); } static int pfm_cell_get_event_code(unsigned int i, unsigned int cnt, int *code) { // if (cnt != PFMLIB_CNT_FIRST && cnt > 2) { if (cnt != PFMLIB_CNT_FIRST && cnt > cell_support.num_cnt) { return PFMLIB_ERR_INVAL; } *code = cell_pe[i].pme_code; return PFMLIB_SUCCESS; } static void pfm_cell_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; memset(counters, 0, sizeof(*counters)); for(i=0; i < PMU_CELL_NUM_COUNTERS; i++) { pfm_regmask_set(counters, i); } } static void pfm_cell_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { unsigned int i; memset(impl_pmcs, 0, sizeof(*impl_pmcs)); for(i=0; i < PFM_CELL_NUM_PMCS; i++) { pfm_regmask_set(impl_pmcs, i); } } static void pfm_cell_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { unsigned int i; memset(impl_pmds, 0, sizeof(*impl_pmds)); for(i=0; i < PMU_CELL_NUM_PERFCTR; i++) { pfm_regmask_set(impl_pmds, i); } } static void pfm_cell_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i; for(i=0; i < PMU_CELL_NUM_COUNTERS; i++) { pfm_regmask_set(impl_counters, i); } } static char* pfm_cell_get_event_name(unsigned int i) { return cell_pe[i].pme_name; } static int pfm_cell_get_event_desc(unsigned int ev, char **str) { char *s; s = cell_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_cell_get_cycle_event(pfmlib_event_t *e) { int i; for (i = 0; i < PME_CELL_EVENT_COUNT; i++) { if (!strcmp(cell_pe[i].pme_name, "CYCLES")) { e->event = i; return PFMLIB_SUCCESS; } } return PFMLIB_ERR_NOTFOUND; } int pfm_cell_spe_event(unsigned int event_index) { if (event_index >= PME_CELL_EVENT_COUNT) return 0; return is_spe_signal_group(get_signal_type(cell_pe[event_index].pme_code)); } pfm_pmu_support_t cell_support={ .pmu_name = "CELL", .pmu_type = PFMLIB_CELL_PMU, .pme_count = PME_CELL_EVENT_COUNT, .pmc_count = PFM_CELL_NUM_PMCS, .pmd_count = PMU_CELL_NUM_PERFCTR, .num_cnt = PMU_CELL_NUM_COUNTERS, .get_event_code = pfm_cell_get_event_code, .get_event_name = pfm_cell_get_event_name, .get_event_counters = pfm_cell_get_event_counters, .dispatch_events = pfm_cell_dispatch_events, .pmu_detect = pfm_cell_detect, .get_impl_pmcs = pfm_cell_get_impl_pmcs, .get_impl_pmds = pfm_cell_get_impl_pmds, .get_impl_counters = pfm_cell_get_impl_counters, .get_event_desc = pfm_cell_get_event_desc, .get_cycle_event = pfm_cell_get_cycle_event }; papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumII-350000775 001750 001750 00000001315 13216244366 023551 0ustar00jshenry1963jshenry1963000000 000000 [data from a 350 MHz Pentium II (Deschutes)] PERFCTR INIT: vendor 0, family 6, model 5, stepping 1, clock 349410 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 88 cycles PERFCTR INIT: rdtsc cost is 33.7 cycles (2250 total) PERFCTR INIT: rdpmc cost is 29.6 cycles (1986 total) PERFCTR INIT: rdmsr (counter) cost is 81.2 cycles (5289 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4534 total) PERFCTR INIT: wrmsr (counter) cost is 87.4 cycles (5684 total) PERFCTR INIT: wrmsr (evntsel) cost is 79.1 cycles (5153 total) PERFCTR INIT: read cr4 cost is 1.9 cycles (211 total) PERFCTR INIT: write cr4 cost is 42.2 cycles (2792 total) perfctr: driver 2.3.4, cpu type Intel Pentium II at 349410 kHz papi-5.6.0/src/libpfm-3.y/examples_v3.x/task.c000664 001750 001750 00000017176 13216244362 023021 0ustar00jshenry1963jshenry1963000000 000000 /* * task.c - example of a task monitoring another one * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int child(char **arg) { /* * will cause the program to stop before executing the first * user level instruction. We can only attach a session * if the task is in the STOPPED state. */ ptrace(PTRACE_TRACEME, 0, NULL, NULL); /* * execute the requested command */ execvp(arg[0], arg); fatal_error("cannot exec: %s\n", arg[0]); /* not reached */ } int parent(char **arg) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; unsigned int i, num_counters; int status, ret; int ctx_fd; pid_t pid; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ if (num_counters < i) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } inp.pfp_event_count = i; /* * now create the session */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We use pfp_pmc_count to determine the number of PMC to intialize. * We use pfp_pmd_count to determine the number of PMD to initialize. * Some events/features may cause extra PMCs to be used, leading to: * - pfp_pmc_count may be >= pfp_event_count * - pfp_pmd_count may be >= pfp_event_count */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for(i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * Create the child task */ if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); /* * and launch the child code */ if (pid == 0) { /* no need to have fd in child */ close(ctx_fd); exit(child(arg)); } /* * wait for the child to exec */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status)); /* * the task is stopped at this point */ /* * now we load (i.e., attach) the session */ if (pfm_attach(ctx_fd, 0, pid) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); /* * now the task is running */ /* * simply wait for completion */ waitpid(pid, &status, 0); /* * the task has disappeared at this point but our session is still * present and contains all the latest counts. */ /* * now simply read the results. */ if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error("pfm_read error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the session */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; int ret; if (argc < 2) { fatal_error("You must specify a command to execute\n"); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose= 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); return parent(argv+1); } papi-5.6.0/man/man3/PAPI_register_thread.3000664 001750 001750 00000002345 13216244356 022224 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_register_thread" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_register_thread \- .PP Notify PAPI that a thread has 'appeared'\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_register_thread\fP (void); .RE .PP \fBPAPI_register_thread()\fP should be called when the user wants to force PAPI to initialize a thread that PAPI has not seen before\&. .PP Usually this is not necessary as PAPI implicitly detects the thread when an eventset is created or other thread local PAPI functions are called\&. However, it can be useful for debugging and performance enhancements in the run-time systems of performance tools\&. .PP \fBReturn values:\fP .RS 4 \fIPAPI_ENOMEM\fP Space could not be allocated to store the new thread information\&. .br \fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. .br \fIPAPI_ECMP\fP Hardware counters for this thread could not be initialized\&. .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_unregister_thread\fP .PP \fBPAPI_thread_id\fP .PP \fBPAPI_thread_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_v3.x/notify_self2.c000664 001750 001750 00000022153 13216244362 024451 0ustar00jshenry1963jshenry1963000000 000000 /* * notify_self2.c - example of how you can use overflow notifications with F_SETSIG * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define SMPL_PERIOD 1000000000ULL static volatile unsigned long notification_received; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static pfarg_pmr_t pdx[1]; static int ctx_fd; static char *event1_name; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void warning(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } static void sigio_handler(int n, struct siginfo *info, void *data) { pfarg_msg_t msg; int fd; int r =0; if (info == NULL) fatal_error("info is NULL\n"); fd = info->si_fd; if (fd != ctx_fd) fatal_error("handler does not get valid file descriptor\n"); if (event1_name && pfm_read(fd, 0, PFM_RW_PMD, pdx, sizeof(pdx))) fatal_error("pfm_read: %s", strerror(errno)); retry: r = read(fd, &msg, sizeof(msg)); if (r != sizeof(msg)) { if(r == -1 && errno == EINTR) { warning("read interrupted, retrying\n"); goto retry; } fatal_error("cannot read overflow message: %s\n", strerror(errno)); } if (msg.type != PFM_MSG_OVFL) fatal_error("unexpected msg type: %d\n",msg.type); /* * increment our notification counter */ notification_received++; /* * XXX: risky to do printf() in signal handler! */ if (event1_name) printf("Notification %lu: %"PRIu64" %s\n", notification_received, pdx[0].reg_value, event1_name); else printf("Notification %lu\n", notification_received); /* * And resume monitoring */ if (pfm_set_state(fd, 0, PFM_ST_RESTART)) fatal_error("pfm_set_state(restart): %d\n", errno); } /* * infinite loop waiting for notification to get out */ void busyloop(void) { /* * busy loop to burn CPU cycles */ for(;notification_received < 20;) ; } #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } int main(int argc, char **argv) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmd_attr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i, num_counters; size_t len; int ret; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Install the signal handler (SIGIO) * * SA_SIGINFO required on some platforms * to get siginfo passed to handler. */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)sigio_handler; act.sa_flags = SA_SIGINFO; sigaction (SIGIO, &act, 0); memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } inp.pfp_event_count = i; /* * how many counters we use */ if (i > 1) { pfm_get_max_event_name_len(&len); event1_name = malloc(len+1); if (event1_name == NULL) fatal_error("cannot allocate event name\n"); pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); } /* * now create the session for self monitoring/per-task */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * We want to get notified when the counter used for our first * event overflows */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; if (inp.pfp_event_count > 1) { pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); pdx[0].reg_num = pd[1].reg_num; } /* * we arm the first counter, such that it will overflow * after SMPL_PERIOD events have been observed */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc))) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(ctx_fd, 0, PFM_RW_PMD_ATTR, pd, outp.pfp_pmd_count * sizeof(*pd))) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * we want to monitor ourself */ if (pfm_attach(ctx_fd, 0, getpid())) fatal_error("pfm_attach error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(ctx_fd, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); #ifndef _GNU_SOURCE #error "this program must be compiled with -D_GNU_SOURCE" #else /* * when you explicitely declare that you want a particular signal, * even with you use the default signal, the kernel will send more * information concerning the event to the signal handler. * * In particular, it will send the file descriptor from which the * event is originating which can be quite useful when monitoring * multiple tasks from a single thread. */ ret = fcntl(ctx_fd, F_SETSIG, SIGIO); if (ret == -1) fatal_error("cannot setsig: %s\n", strerror(errno)); #endif /* * Let's roll now */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START)) fatal_error("pfm_set_state(start) error errno %d\n", errno); busyloop(); if (pfm_set_state(ctx_fd, 0, PFM_ST_STOP)) fatal_error("pfm_set_state(stop) error errno %d\n", errno); /* * destroy our context */ close(ctx_fd); if (event1_name) free(event1_name); return 0; } papi-5.6.0/src/components/emon/linux-emon.c000664 001750 001750 00000035056 13216244357 022722 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file linux-emon.c * @author Heike Jagode * jagode@eecs.utk.edu * BGPM / emon component * * @brief * This file has the source code for a component that enables PAPI-C to * access hardware power data for BG/Q through the EMON interface. */ #include #include #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "extras.h" #define EMON_DEFINE_GLOBALS #include #include // the emon library header file (no linking required) #define EMON_MAX_COUNTERS 8 #define EMON_TOTAL_EVENTS 8 #ifndef DEBUG #define EMONDBG( fmt, args...) do {} while(0) #else #define EMONDBG( fmt, args... ) do { printf("%s:%d\t"fmt, __func__, __LINE__, ##args); } while(0) #endif /* Stores private information for each event */ typedef struct EMON_register { unsigned int selector; /* Signifies which counter slot is being used */ /* Indexed from 1 as 0 has a special meaning */ } EMON_register_t; /** This structure is used to build the table of events */ /* The contents of this structure will vary based on */ /* your component, however having name and description */ /* fields are probably useful. */ typedef struct EMON_native_event_entry { EMON_register_t resources; /**< Per counter resources */ char *name; /**< Name of the counter */ char *description; /**< Description of the counter */ int return_type; } EMON_native_event_entry_t; /* Used when doing register allocation */ typedef struct EMON_reg_alloc { EMON_register_t ra_bits; } EMON_reg_alloc_t; typedef struct EMON_overflow { int threshold; int EventIndex; } EMON_overflow_t; /* Holds control flags */ typedef struct EMON_control_state { int count; long long counters[EMON_MAX_COUNTERS]; int being_measured[EMON_MAX_COUNTERS]; long long last_update; } EMON_control_state_t; /* Holds per-thread information */ typedef struct EMON_context { EMON_control_state_t state; } EMON_context_t; /* Declare our vector in advance */ papi_vector_t _emon2_vector; static void _check_EMON_error( char* emon2func, int err ) { ( void ) emon2func; if ( err < 0 ) { printf( "Error: EMON API function '%s' returned %d.\n", emon2func, err ); } } /** This table contains the native events * So with the EMON interface, we get every domain at a time. */ static EMON_native_event_entry_t EMON_native_table[] = { { .name = "DOMAIN1", .description = "Chip core", .resources.selector = 1, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN2", .description = "Chip Memory Interface and Dramm", .resources.selector = 2, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN3", .description = "Optics", .resources.selector = 3, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN4", .description = "Optics + PCIExpress", .resources.selector = 4, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN6", .description = "HSS Network and Link Chip", .resources.selector = 5, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN8", .description = "Link Chip Core", .resources.selector = 6, .return_type = PAPI_DATATYPE_FP64, }, { .name = "DOMAIN7", .description = "Chip SRAM", .resources.selector = 7, .return_type = PAPI_DATATYPE_FP64, }, { .name="EMON_DOMAIN_ALL", .description = "Measures power on all domains.", .resources.selector = 8, .return_type = PAPI_DATATYPE_FP64, }, }; /***************************************************************************** ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* *****************************************************************************/ /* * This is called whenever a thread is initialized */ int EMON_init_thread( hwd_context_t * ctx ) { EMONDBG( "EMON_init_thread\n" ); ( void ) ctx; return PAPI_OK; } /* Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ int EMON_init_component( int cidx ) { int ret = 0; _emon2_vector.cmp_info.CmpIdx = cidx; EMONDBG( "EMON_init_component cidx = %d\n", cidx ); /* Setup connection with the fpga: * NOTE: any other threads attempting to call into the EMON API * will be turned away. */ ret = EMON_SetupPowerMeasurement(); _check_EMON_error("EMON_SetupPowerMeasurement", ret ); _emon2_vector.cmp_info.num_native_events = EMON_TOTAL_EVENTS; _emon2_vector.cmp_info.num_cntrs = EMON_TOTAL_EVENTS; _emon2_vector.cmp_info.num_mpx_cntrs = EMON_TOTAL_EVENTS; return ( PAPI_OK ); } /* * Control of counters (Reading/Writing/Starting/Stopping/Setup) * functions */ int EMON_init_control_state( hwd_control_state_t * ptr ) { EMONDBG( "EMON_init_control_state\n" ); EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; memset( this_state, 0, sizeof ( EMON_control_state_t ) ); return PAPI_OK; } static int _emon_accessor( EMON_control_state_t * this_state ) { union { long long ll; double fp; } return_value; return_value.fp = -1; double volts[14],amps[14]; double cpu = 0; double dram = 0; double link_chip = 0; double network = 0; double optics = 0; double pci = 0; double sram = 0; unsigned k_const; EMONDBG( "_emon_accessor, enter this_state = %x\n", this_state); return_value.fp = EMON_GetPower_impl( volts, amps ); EMONDBG("_emon_accessor, after EMON_GetPower %lf \n", return_value.fp); if ( -1 == return_value.fp ) { PAPIERROR("EMON_GetPower() failed!\n"); return ( PAPI_ESYS ); } this_state->counters[7] = return_value.ll; /* We just stuff everything in counters, there is no extra overhead here */ k_const = domain_info[0].k_const; /* Chip Core Voltage */ cpu += volts[0] * amps[0] * k_const; cpu += volts[1] * amps[1] * k_const; k_const = domain_info[1].k_const; /* Chip Core Voltage */ dram += volts[2] * amps[2] * k_const; dram += volts[3] * amps[3] * k_const; k_const = domain_info[2].k_const; /* Chip Core Voltage */ optics += volts[4] * amps[4] * k_const; optics += volts[5] * amps[5] * k_const; k_const = domain_info[3].k_const; /* Chip Core Voltage */ pci += volts[6] * amps[6] * k_const; pci += volts[7] * amps[7] * k_const; k_const = domain_info[4].k_const; /* Chip Core Voltage */ network += volts[8] * amps[8] * k_const; network += volts[9] * amps[9] * k_const; k_const = domain_info[5].k_const; /* Chip Core Voltage */ link_chip += volts[10] * amps[10] * k_const; link_chip += volts[11] * amps[11] * k_const; k_const = domain_info[6].k_const; /* Chip Core Voltage */ sram += volts[12] * amps[12] * k_const; sram += volts[13] * amps[13] * k_const; this_state->counters[0] = *(long long*)&cpu; this_state->counters[1] = *(long long*)&dram; this_state->counters[2] = *(long long*)&optics; this_state->counters[3] = *(long long*)&pci; this_state->counters[4] = *(long long*)&link_chip; this_state->counters[5] = *(long long*)&network; this_state->counters[6] = *(long long*)&sram; EMONDBG("CPU = %lf\n", *(double*)&this_state->counters[0]); EMONDBG("DRAM = %lf\n", *(double*)&this_state->counters[1]); EMONDBG("Optics = %lf\n", *(double*)&this_state->counters[2]); EMONDBG("PCI = %lf\n", *(double*)&this_state->counters[3]); EMONDBG("Link Chip = %lf\n", *(double*)&this_state->counters[4]); EMONDBG("Network = %lf\n", *(double*)&this_state->counters[5]); EMONDBG("SRAM = %lf\n", *(double*)&this_state->counters[6]); EMONDBG("TOTAL = %lf\n", *(double*)&this_state->counters[7] ); return ( PAPI_OK ); } /* * */ int EMON_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) { EMONDBG( "EMON_start\n" ); ( void ) ctx; ( void ) ptr; /*EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr;*/ return ( PAPI_OK ); } /* * */ int EMON_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) { EMONDBG( "EMON_stop\n" ); ( void ) ctx; EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; return _emon_accessor( this_state ); } /* * */ int EMON_read( hwd_context_t * ctx, hwd_control_state_t * ptr, long long ** events, int flags ) { EMONDBG( "EMON_read\n" ); ( void ) ctx; ( void ) flags; int ret; EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; ret = _emon_accessor( this_state ); *events = this_state->counters; return ret; } /* * */ int EMON_shutdown_thread( hwd_context_t * ctx ) { EMONDBG( "EMON_shutdown_thread\n" ); ( void ) ctx; return ( PAPI_OK ); } int EMON_shutdown_component( void ) { EMONDBG( "EMON_shutdown_component\n" ); return ( PAPI_OK ); } /* This function sets various options in the component * The valid codes being passed in are PAPI_SET_DEFDOM, * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT */ int EMON_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) { EMONDBG( "EMON_ctl\n" ); ( void ) ctx; ( void ) code; ( void ) option; return ( PAPI_OK ); } /* * PAPI Cleanup Eventset */ int EMON_cleanup_eventset( hwd_control_state_t * ctrl ) { EMONDBG( "EMON_cleanup_eventset\n" ); EMON_control_state_t * this_state = ( EMON_control_state_t * ) ctrl; ( void ) this_state; return ( PAPI_OK ); } /* * */ int EMON_update_control_state( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) { EMONDBG( "EMON_update_control_state: count = %d\n", count ); ( void ) ctx; int index, i; EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; ( void ) ptr; // otherwise, add the events to the eventset for ( i = 0; i < count; i++ ) { index = ( native[i].ni_event ) ; native[i].ni_position = i; EMONDBG("EMON_update_control_state: ADD event: i = %d, index = %d\n", i, index ); } // store how many events we added to an EventSet this_state->count = count; return ( PAPI_OK ); } /* * As a system wide count, PAPI_DOM_ALL is all we support */ int EMON_set_domain( hwd_control_state_t * cntrl, int domain ) { EMONDBG( "EMON_set_domain\n" ); ( void ) cntrl; if ( PAPI_DOM_ALL != domain ) return ( PAPI_EINVAL ); return ( PAPI_OK ); } /* * */ int EMON_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) { EMONDBG( "EMON_reset\n" ); ( void ) ctx; int retval; EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; ( void ) this_state; ( void ) retval; memset( this_state->counters, 0x0, sizeof(long long) * EMON_MAX_COUNTERS); return ( PAPI_OK ); } /* * Native Event functions */ int EMON_ntv_enum_events( unsigned int *EventCode, int modifier ) { EMONDBG( "EMON_ntv_enum_events, EventCode = %#x\n", *EventCode ); switch ( modifier ) { case PAPI_ENUM_FIRST: *EventCode = 0; return ( PAPI_OK ); break; case PAPI_ENUM_EVENTS: { int index = ( *EventCode ); if ( index < EMON_TOTAL_EVENTS ) { *EventCode = *EventCode + 1; return ( PAPI_OK ); } else { return ( PAPI_ENOEVNT ); } break; } default: return ( PAPI_EINVAL ); } return ( PAPI_EINVAL ); } /* * */ int EMON_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { EMONDBG( "EMON_ntv_code_to_name\n" ); int index; ( void ) name; ( void ) len; index = ( EventCode ); if ( index >= EMON_TOTAL_EVENTS || index < 0 ) { return PAPI_ENOEVNT; } strncpy( name, EMON_native_table[index].name, len ); return ( PAPI_OK ); } /* * */ int EMON_ntv_name_to_code( const char *name, unsigned int *code ) { int index; for ( index = 0; index < EMON_TOTAL_EVENTS; index++ ) { if ( 0 == strcmp( name, EMON_native_table[index].name ) ) { *code = index; } } return ( PAPI_OK ); } int EMON_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { EMONDBG( "EMON_ntv_code_to_descr\n" ); int index; ( void ) name; ( void ) len; index = ( EventCode ) ; if ( index >= EMON_TOTAL_EVENTS || index < 0 ) { return PAPI_ENOEVNT; } strncpy( name, EMON_native_table[index].description, len ); return ( PAPI_OK ); } /* * */ int EMON_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) { EMONDBG( "EMON_ntv_code_to_bits\n" ); ( void ) EventCode; ( void ) bits; return ( PAPI_OK ); } int EMON_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) { int index = EventCode; if ( ( index < 0) || (index >= EMON_TOTAL_EVENTS )) return PAPI_ENOEVNT; strncpy( info->symbol, EMON_native_table[index].name, sizeof(info->symbol)); strncpy( info->long_descr, EMON_native_table[index].description, sizeof(info->symbol)); //strncpy( info->units, rapl_native_events[index].units, //sizeof(info->units)); info->data_type = EMON_native_table[index].return_type; return PAPI_OK; } /* * */ papi_vector_t _emon_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "EMON", .short_name = "EMON", .description = "Blue Gene/Q EMON component", .num_native_events = EMON_MAX_COUNTERS, .num_cntrs = EMON_MAX_COUNTERS, .num_mpx_cntrs = EMON_MAX_COUNTERS, .default_domain = PAPI_DOM_ALL, .available_domains = PAPI_DOM_ALL, .default_granularity = PAPI_GRN_SYS, .available_granularities = PAPI_GRN_SYS, .hardware_intr_sig = PAPI_INT_SIGNAL, .hardware_intr = 1, .kernel_multiplex = 0, /* component specific cmp_info initializations */ .fast_real_timer = 0, .fast_virtual_timer = 0, .attach = 0, .attach_must_ptrace = 0, } , /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( EMON_context_t ), .control_state = sizeof ( EMON_control_state_t ), .reg_value = sizeof ( EMON_register_t ), .reg_alloc = sizeof ( EMON_reg_alloc_t ), } , /* function pointers in this component */ .init_thread = EMON_init_thread, .init_component = EMON_init_component, .init_control_state = EMON_init_control_state, .start = EMON_start, .stop = EMON_stop, .read = EMON_read, .shutdown_thread = EMON_shutdown_thread, .shutdown_component = EMON_shutdown_component, .cleanup_eventset = EMON_cleanup_eventset, .ctl = EMON_ctl, .update_control_state = EMON_update_control_state, .set_domain = EMON_set_domain, .reset = EMON_reset, .ntv_enum_events = EMON_ntv_enum_events, .ntv_code_to_name = EMON_ntv_code_to_name, .ntv_code_to_descr = EMON_ntv_code_to_descr, .ntv_code_to_bits = EMON_ntv_code_to_bits, .ntv_code_to_info = EMON_ntv_code_to_info, }; papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_imc_events.h000664 001750 001750 00000063402 13216244364 026143 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_imc */ static intel_x86_umask_t bdx_unc_m_act_count[]={ { .uname = "BYP", .ucode = 0x800, .udesc = "DRAM Activate Count -- Activate due to Write", }, { .uname = "RD", .ucode = 0x100, .udesc = "DRAM Activate Count -- Activate due to Read", }, { .uname = "WR", .ucode = 0x200, .udesc = "DRAM Activate Count -- Activate due to Write", }, }; static intel_x86_umask_t bdx_unc_m_byp_cmds[]={ { .uname = "ACT", .ucode = 0x100, .udesc = "ACT command issued by 2 cycle bypass", }, { .uname = "CAS", .ucode = 0x200, .udesc = "CAS command issued by 2 cycle bypass", }, { .uname = "PRE", .ucode = 0x400, .udesc = "PRE command issued by 2 cycle bypass", }, }; static intel_x86_umask_t bdx_unc_m_cas_count[]={ { .uname = "ALL", .ucode = 0xf00, .udesc = "DRAM RD_CAS and WR_CAS Commands. All DRAM WR_CAS (w/ and w/out auto-pre)", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "RD", .ucode = 0x300, .udesc = "DRAM RD_CAS and WR_CAS Commands. All DRAM Reads (RD_CAS + Underfills)", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RD_REG", .ucode = 0x100, .udesc = "DRAM RD_CAS and WR_CAS Commands. All DRAM RD_CAS (w/ and w/out auto-pre)", }, { .uname = "RD_RMM", .ucode = 0x2000, .udesc = "DRAM RD_CAS and WR_CAS Commands. Read CAS issued in RMM", }, { .uname = "RD_UNDERFILL", .ucode = 0x200, .udesc = "DRAM RD_CAS and WR_CAS Commands. Underfill Read Issued", }, { .uname = "RD_WMM", .ucode = 0x1000, .udesc = "DRAM RD_CAS and WR_CAS Commands. Read CAS issued in WMM", }, { .uname = "WR", .ucode = 0xc00, .udesc = "DRAM RD_CAS and WR_CAS Commands. All DRAM WR_CAS (both Modes)", .uflags = INTEL_X86_NCOMBO, }, { .uname = "WR_RMM", .ucode = 0x800, .udesc = "DRAM RD_CAS and WR_CAS Commands. DRAM WR_CAS (w/ and w/out auto-pre) in Read Major Mode", }, { .uname = "WR_WMM", .ucode = 0x400, .udesc = "DRAM RD_CAS and WR_CAS Commands. DRAM WR_CAS (w/ and w/out auto-pre) in Write Major Mode", }, }; static intel_x86_umask_t bdx_unc_m_dram_refresh[]={ { .uname = "HIGH", .ucode = 0x400, .udesc = "Number of DRAM Refreshes Issued", }, { .uname = "PANIC", .ucode = 0x200, .udesc = "Number of DRAM Refreshes Issued", }, }; static intel_x86_umask_t bdx_unc_m_major_modes[]={ { .uname = "ISOCH", .ucode = 0x800, .udesc = "Cycles in a Major Mode -- Isoch Major Mode", }, { .uname = "PARTIAL", .ucode = 0x400, .udesc = "Cycles in a Major Mode -- Partial Major Mode", }, { .uname = "READ", .ucode = 0x100, .udesc = "Cycles in a Major Mode -- Read Major Mode", }, { .uname = "WRITE", .ucode = 0x200, .udesc = "Cycles in a Major Mode -- Write Major Mode", }, }; static intel_x86_umask_t bdx_unc_m_power_cke_cycles[]={ { .uname = "RANK0", .ucode = 0x100, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK1", .ucode = 0x200, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK2", .ucode = 0x400, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK3", .ucode = 0x800, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK4", .ucode = 0x1000, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK5", .ucode = 0x2000, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK6", .ucode = 0x4000, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK7", .ucode = 0x8000, .udesc = "CKE_ON_CYCLES by Rank -- DIMM ID", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_m_power_throttle_cycles[]={ { .uname = "RANK0", .ucode = 0x100, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK1", .ucode = 0x200, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK2", .ucode = 0x400, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK3", .ucode = 0x800, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK4", .ucode = 0x1000, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK5", .ucode = 0x2000, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK6", .ucode = 0x4000, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, { .uname = "RANK7", .ucode = 0x8000, .udesc = "Throttle Cycles for Rank 0 -- DIMM ID", }, }; static intel_x86_umask_t bdx_unc_m_preemption[]={ { .uname = "RD_PREEMPT_RD", .ucode = 0x100, .udesc = "Read Preemption Count -- Read over Read Preemption", }, { .uname = "RD_PREEMPT_WR", .ucode = 0x200, .udesc = "Read Preemption Count -- Read over Write Preemption", }, }; static intel_x86_umask_t bdx_unc_m_pre_count[]={ { .uname = "BYP", .ucode = 0x1000, .udesc = "DRAM Precharge commands. -- Precharge due to bypass", }, { .uname = "PAGE_CLOSE", .ucode = 0x200, .udesc = "DRAM Precharge commands. -- Precharge due to timer expiration", }, { .uname = "PAGE_MISS", .ucode = 0x100, .udesc = "DRAM Precharge commands. -- Precharges due to page miss", }, { .uname = "RD", .ucode = 0x400, .udesc = "DRAM Precharge commands. -- Precharge due to read", }, { .uname = "WR", .ucode = 0x800, .udesc = "DRAM Precharge commands. -- Precharge due to write", }, }; static intel_x86_umask_t bdx_unc_m_rd_cas_prio[]={ { .uname = "HIGH", .ucode = 0x400, .udesc = "Read CAS issued with HIGH priority", }, { .uname = "LOW", .ucode = 0x100, .udesc = "Read CAS issued with LOW priority", }, { .uname = "MED", .ucode = 0x200, .udesc = "Read CAS issued with MEDIUM priority", }, { .uname = "PANIC", .ucode = 0x800, .udesc = "Read CAS issued with PANIC NON ISOCH priority (starved)", }, }; static intel_x86_umask_t bdx_unc_m_rd_cas_rank0[]={ { .uname = "ALLBANKS", .ucode = 0x1000, .udesc = "Access to Rank 0 -- All Banks", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK0", .ucode = 0x0, .udesc = "Access to Rank 0 -- Bank 0", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK1", .ucode = 0x100, .udesc = "Access to Rank 0 -- Bank 1", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK10", .ucode = 0xa00, .udesc = "Access to Rank 0 -- Bank 10", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK11", .ucode = 0xb00, .udesc = "Access to Rank 0 -- Bank 11", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK12", .ucode = 0xc00, .udesc = "Access to Rank 0 -- Bank 12", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK13", .ucode = 0xd00, .udesc = "Access to Rank 0 -- Bank 13", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK14", .ucode = 0xe00, .udesc = "Access to Rank 0 -- Bank 14", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK15", .ucode = 0xf00, .udesc = "Access to Rank 0 -- Bank 15", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK2", .ucode = 0x200, .udesc = "Access to Rank 0 -- Bank 2", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK3", .ucode = 0x300, .udesc = "Access to Rank 0 -- Bank 3", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK4", .ucode = 0x400, .udesc = "Access to Rank 0 -- Bank 4", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK5", .ucode = 0x500, .udesc = "Access to Rank 0 -- Bank 5", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK6", .ucode = 0x600, .udesc = "Access to Rank 0 -- Bank 6", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK7", .ucode = 0x700, .udesc = "Access to Rank 0 -- Bank 7", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK8", .ucode = 0x800, .udesc = "Access to Rank 0 -- Bank 8", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANK9", .ucode = 0x900, .udesc = "Access to Rank 0 -- Bank 9", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANKG0", .ucode = 0x1100, .udesc = "Access to Rank 0 -- Bank Group 0 (Banks 0-3)", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANKG1", .ucode = 0x1200, .udesc = "Access to Rank 0 -- Bank Group 1 (Banks 4-7)", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANKG2", .ucode = 0x1300, .udesc = "Access to Rank 0 -- Bank Group 2 (Banks 8-11)", .uflags = INTEL_X86_NCOMBO, }, { .uname = "BANKG3", .ucode = 0x1400, .udesc = "Access to Rank 0 -- Bank Group 3 (Banks 12-15)", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_m_rd_cas_rank2[]={ { .uname = "BANK0", .ucode = 0x0, .udesc = "RD_CAS Access to Rank 2 -- Bank 0", .uflags = INTEL_X86_DFL, }, }; static intel_x86_umask_t bdx_unc_m_vmse_wr_push[]={ { .uname = "RMM", .ucode = 0x200, .udesc = "VMSE WR PUSH issued -- VMSE write PUSH issued in RMM", }, { .uname = "WMM", .ucode = 0x100, .udesc = "VMSE WR PUSH issued -- VMSE write PUSH issued in WMM", }, }; static intel_x86_umask_t bdx_unc_m_wmm_to_rmm[]={ { .uname = "LOW_THRESH", .ucode = 0x100, .udesc = "Transition from WMM to RMM because of low threshold -- Transition from WMM to RMM because of starve counter", }, { .uname = "STARVE", .ucode = 0x200, .udesc = "Transition from WMM to RMM because of low threshold -- ", }, { .uname = "VMSE_RETRY", .ucode = 0x400, .udesc = "Transition from WMM to RMM because of low threshold -- ", }, }; static intel_x86_entry_t intel_bdx_unc_m_pe[]={ { .name = "UNC_M_CLOCKTICKS", .desc = "IMC Uncore clockticks (fixed counter)", .modmsk = 0x0, .cntmsk = 0x100000000ull, .code = 0xff, /* perf pseudo encoding for fixed counter */ .flags = INTEL_X86_FIXED, }, { .name = "UNC_M_ACT_COUNT", .code = 0x1, .desc = "Counts the number of DRAM Activate commands sent on this channel. Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS. One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_act_count, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_act_count), }, { .name = "UNC_M_BYP_CMDS", .code = 0xa1, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_byp_cmds, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_byp_cmds), }, { .name = "UNC_M_CAS_COUNT", .code = 0x4, .desc = "DRAM RD_CAS and WR_CAS Commands", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_cas_count, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_cas_count), }, { .name = "UNC_M_DCLOCKTICKS", .code = 0x0, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_DRAM_PRE_ALL", .code = 0x6, .desc = "Counts the number of times that the precharge all command was sent.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_DRAM_REFRESH", .code = 0x5, .desc = "Counts the number of refreshes issued.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_dram_refresh, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_dram_refresh), }, { .name = "UNC_M_ECC_CORRECTABLE_ERRORS", .code = 0x9, .desc = "Counts the number of ECC errors detected and corrected by the iMC on this channel. This counter is only useful with ECC DRAM devices. This count will increment one time for each correction regardless of the number of bits corrected. The iMC can correct up to 4 bit errors in independent channel mode and 8 bit erros in lockstep mode.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_MAJOR_MODES", .code = 0x7, .desc = "Counts the total number of cycles spent in a major mode (selected by a filter) on the given channel. Major modea are channel-wide, and not a per-rank (or dimm or bank) mode.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_major_modes, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_major_modes), }, { .name = "UNC_M_POWER_CHANNEL_DLLOFF", .code = 0x84, .desc = "Number of cycles when all the ranks in the channel are in CKE Slow (DLLOFF) mode.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_POWER_CHANNEL_PPD", .code = 0x85, .desc = "Number of cycles when all the ranks in the channel are in PPD mode. If IBT=off is enabled, then this can be used to count those cycles. If it is not enabled, then this can count the number of cycles when that could have been taken advantage of.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_POWER_CKE_CYCLES", .code = 0x83, .desc = "Number of cycles spent in CKE ON mode. The filter allows you to select a rank to monitor. If multiple ranks are in CKE ON mode at one time, the counter will ONLY increment by one rather than doing accumulation. Multiple counters will need to be used to track multiple ranks simultaneously. There is no distinction between the different CKE modes (APD, PPDS, PPDF). This can be determined based on the system programming. These events should commonly be used with Invert to get the number of cycles in power saving mode. Edge Detect is also useful here. Make sure that you do NOT use Invert with Edge Detect (this just confuses the system and is not necessary).", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_power_cke_cycles, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_power_cke_cycles), }, { .name = "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", .code = 0x86, .desc = "Counts the number of cycles when the iMC is in critical thermal throttling. When this happens, all traffic is blocked. This should be rare unless something bad is going on in the platform. There is no filtering by rank for this event.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_POWER_PCU_THROTTLING", .code = 0x42, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_POWER_SELF_REFRESH", .code = 0x43, .desc = "Counts the number of cycles when the iMC is in self-refresh and the iMC still has a clock. This happens in some package C-states. For example, the PCU may ask the iMC to enter self-refresh even though some of the cores are still processing. One use of this is for Monroe technology. Self-refresh is required during package C3 and C6, but there is no clock in the iMC at this time, so it is not possible to count these cases.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_POWER_THROTTLE_CYCLES", .code = 0x41, .desc = "Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_power_throttle_cycles, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_power_throttle_cycles), }, { .name = "UNC_M_PREEMPTION", .code = 0x8, .desc = "Counts the number of times a read in the iMC preempts another read or write. Generally reads to an open page are issued ahead of requests to closed pages. This improves the page hit rate of the system. However, high priority requests can cause pages of active requests to be closed in order to get them out. This will reduce the latency of the high-priority request at the expense of lower bandwidth and increased overall average latency.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_preemption, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_preemption), }, { .name = "UNC_M_PRE_COUNT", .code = 0x2, .desc = "Counts the number of DRAM Precharge commands sent on this channel.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_pre_count, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_pre_count), }, { .name = "UNC_M_RD_CAS_PRIO", .code = 0xa0, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_prio, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_prio), }, { .name = "UNC_M_RD_CAS_RANK0", .code = 0xb0, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), }, { .name = "UNC_M_RD_CAS_RANK1", .code = 0xb1, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_RD_CAS_RANK2", .code = 0xb2, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank2, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank2), }, { .name = "UNC_M_RD_CAS_RANK4", .code = 0xb4, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_RD_CAS_RANK5", .code = 0xb5, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_RD_CAS_RANK6", .code = 0xb6, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_RD_CAS_RANK7", .code = 0xb7, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_RPQ_CYCLES_NE", .code = 0x11, .desc = "Counts the number of cycles that the Read Pending Queue is not empty. This can then be used to calculate the average occupancy (in conjunction with the Read Pending Queue Occupancy count). The RPQ is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after the CAS command has been issued to memory. This filter is to be used in conjunction with the occupancy filter so that one can correctly track the average occupancies for schedulable entries and scheduled requests.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_RPQ_INSERTS", .code = 0x10, .desc = "Counts the number of allocations into the Read Pending Queue. This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after the CAS command has been issued to memory. This includes both ISOCH and non-ISOCH requests.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_VMSE_MXB_WR_OCCUPANCY", .code = 0x91, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_VMSE_WR_PUSH", .code = 0x90, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_vmse_wr_push, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_vmse_wr_push), }, { .name = "UNC_M_WMM_TO_RMM", .code = 0xc0, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_wmm_to_rmm, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_wmm_to_rmm), }, { .name = "UNC_M_WPQ_CYCLES_FULL", .code = 0x22, .desc = "Counts the number of cycles when the Write Pending Queue is full. When the WPQ is full, the HA will not be able to issue any additional read requests into the iMC. This count should be similar count in the HA which tracks the number of cycles that the HA has no WPQ credits, just somewhat smaller to account for the credit return overhead.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_WPQ_CYCLES_NE", .code = 0x21, .desc = "Counts the number of cycles that the Write Pending Queue is not empty. This can then be used to calculate the average queue occupancy (in conjunction with the WPQ Occupancy Accumulation count). The WPQ is used to schedule write out to the memory controller and to track the writes. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC. They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have posted to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencieies.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_WPQ_READ_HIT", .code = 0x23, .desc = "Counts the number of times a request hits in the WPQ (write-pending queue). The iMC allows writes and reads to pass up other writes to different addresses. Before a read or a write is issued, it will first CAM the WPQ to see if there is a write pending to that address. When reads hit, they are able to directly pull their data from the WPQ instead of going to memory. Writes that hit will overwrite the existing data. Partial writes that hit will not need to do underfill reads and will simply update their relevant sections.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_WPQ_WRITE_HIT", .code = 0x24, .desc = "Counts the number of times a request hits in the WPQ (write-pending queue). The iMC allows writes and reads to pass up other writes to different addresses. Before a read or a write is issued, it will first CAM the WPQ to see if there is a write pending to that address. When reads hit, they are able to directly pull their data from the WPQ instead of going to memory. Writes that hit will overwrite the existing data. Partial writes that hit will not need to do underfill reads and will simply update their relevant sections.", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_WRONG_MM", .code = 0xc1, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_M_WR_CAS_RANK0", .code = 0xb8, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), }, { .name = "UNC_M_WR_CAS_RANK1", .code = 0xb9, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_WR_CAS_RANK4", .code = 0xbc, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_WR_CAS_RANK5", .code = 0xbd, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_WR_CAS_RANK6", .code = 0xbe, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, { .name = "UNC_M_WR_CAS_RANK7", .code = 0xbf, .desc = "TBD", .modmsk = BDX_UNC_IMC_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_m_rd_cas_rank0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_m_rd_cas_rank0), /* shared */ }, }; papi-5.6.0/src/components/perf_event/pe_libpfm4_events.h000664 001750 001750 00000002315 13216244357 025430 0ustar00jshenry1963jshenry1963000000 000000 /* * File: pe_libpfm4_events.h */ /* Prototypes for libpfm name library access */ int _pe_libpfm4_setup_presets( char *name, int type, int cidx ); int _pe_libpfm4_ntv_enum_events( unsigned int *EventCode, int modifier, int cidx, struct native_event_table_t *event_table); int _pe_libpfm4_ntv_name_to_code( const char *ntv_name, unsigned int *EventCode, int cidx, struct native_event_table_t *event_table); int _pe_libpfm4_ntv_code_to_name( unsigned int EventCode, char *name, int len, struct native_event_table_t *event_table); int _pe_libpfm4_ntv_code_to_descr( unsigned int EventCode, char *name, int len, struct native_event_table_t *event_table); int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table); int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table); int _pe_libpfm4_init(papi_vector_t *my_vector, int cidx, struct native_event_table_t *event_table, int pmu_type); int _peu_libpfm4_init(papi_vector_t *my_vector, int cidx, struct native_event_table_t *event_table, int pmu_type); papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-500000664 001750 001750 00000001012 13216244367 023121 0ustar00jshenry1963jshenry1963000000 000000 [data from a 500MHz Athlon] PERFCTR INIT: vendor 2, family 6, model 1 PERFCTR INIT: NITER == 64 PERFCTR INIT: rdpmc ticks == 930 PERFCTR INIT: rdmsr (counter) ticks == 3401 PERFCTR INIT: rdmsr (evntsel) ticks == 3454 PERFCTR INIT: wrmsr (counter) ticks == 5197 PERFCTR INIT: wrmsr (evntsel) ticks == 14915 PERFCTR INIT: loop overhead ticks == 98 PERFCTR INIT: Athlon test0 == 0 (ok) PERFCTR INIT: Athlon test1 == 43 (ok) PERFCTR INIT: Athlon test2 == 43 (EvntSel0 does not override) PERFCTR INIT: Athlon test3 == 0 (ok) papi-5.6.0/src/components/perf_event/pe_libpfm4_events.c000664 001750 001750 00000127420 13216244357 025430 0ustar00jshenry1963jshenry1963000000 000000 /* * File: pe_libpfm4_events.c * Author: Vince Weaver vincent.weaver@maine.edu * Mods: Gary Mohr * gary.mohr@bull.com * Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4. * This adds several new event masks, including cpu=, u=, and k= which give the user * the ability to set cpu number to use or control the domain (user, kernel, or both) * in which the counter should be incremented. These are event masks so it is now * possible to have multiple events in the same event set that count activity from * differennt cpu's or count activity in different domains. * * Handle the libpfm4 event interface for the perf_event component */ #include #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_libpfm4_events.h" #include "pe_libpfm4_events.h" #include "perf_event_lib.h" #include "perfmon/pfmlib.h" #include "perfmon/pfmlib_perf_event.h" #define NATIVE_EVENT_CHUNK 1024 // used to step through the attributes when enumerating events static int attr_idx; /** @class find_existing_event * @brief looks up an event, returns it if it exists * * @param[in] name * -- name of the event * @param[in] event_table * -- native_event_table structure * * @returns returns offset in array * */ static int find_existing_event(const char *name, struct native_event_table_t *event_table) { SUBDBG("Entry: name: %s, event_table: %p, num_native_events: %d\n", name, event_table, event_table->num_native_events); int i,event=PAPI_ENOEVNT; _papi_hwi_lock( NAMELIB_LOCK ); for(i=0;inum_native_events;i++) { // Most names passed in will contain the pmu name, so first we compare to the allocated name (it has pmu name on front) if (!strcmp(name,event_table->native_events[i].allocated_name)) { SUBDBG("Found allocated_name: %s, libpfm4_idx: %#x, papi_event_code: %#x\n", event_table->native_events[i].allocated_name, event_table->native_events[i].libpfm4_idx, event_table->native_events[i].papi_event_code); event=i; break; } // some callers have an event name without the pmu name on the front, so we also compare to the base name (just the event name part) if (!strcmp(name,event_table->native_events[i].base_name)) { int nameLen = strlen(event_table->native_events[i].base_name); // the name we are looking for must be the same length as this event table entry name for them to match if (strlen(name) != nameLen + strlen(event_table->native_events[i].mask_string) + 1) { continue; } if(!strcmp(name+nameLen+1, event_table->native_events[i].mask_string)) { SUBDBG("Found base_name: %s, mask_string: %s, libpfm4_idx: %#x, papi_event_code: %#x\n", event_table->native_events[i].base_name, event_table->native_events[i].mask_string , event_table->native_events[i].libpfm4_idx, event_table->native_events[i].papi_event_code); event=i; break; } } } _papi_hwi_unlock( NAMELIB_LOCK ); SUBDBG("EXIT: returned: %#x\n", event); return event; } static int pmu_is_present_and_right_type(pfm_pmu_info_t *pinfo, int type) { SUBDBG("ENTER: pinfo: %s %p, pinfo->is_present: %d, " "pinfo->type: %#x, type: %#x\n", pinfo->name, pinfo, pinfo->is_present, pinfo->type, type); if (!pinfo->is_present) { // SUBDBG("EXIT: not present\n"); return 0; } if ((pinfo->type==PFM_PMU_TYPE_UNCORE) && (type&PMU_TYPE_UNCORE)) { // SUBDBG("EXIT: found PFM_PMU_TYPE_UNCORE\n"); return 1; } if ((pinfo->type==PFM_PMU_TYPE_CORE) && (type&PMU_TYPE_CORE)) { // SUBDBG("EXIT: found PFM_PMU_TYPE_CORE\n"); return 1; } if ((pinfo->type==PFM_PMU_TYPE_OS_GENERIC) && (type&PMU_TYPE_OS)) { // SUBDBG("EXIT: found PFM_PMU_TYPE_OS_GENERIC\n"); return 1; } // SUBDBG("EXIT: not right type\n"); return 0; } /** @class allocate_native_event * @brief Allocates a native event * * @param[in] name * -- name of the event * @param[in] libpfm4_idx * -- libpfm4 identifier for the event * @param[in] cidx * -- PAPI component index * @param[in] event_table * -- native event table struct * * @returns returns a native_event_t or NULL * */ static struct native_event_t *allocate_native_event( const char *name, int libpfm4_index, int cidx, struct native_event_table_t *event_table) { SUBDBG("ENTER: name: %s, libpfm4_index: %#x, event_table: %p, " "event_table->pmu_type: %d\n", name, libpfm4_index, event_table, event_table->pmu_type); int nevt_idx; int event_num; int encode_failed=0; pfm_err_t ret; char *event_string=NULL; char *pmu_name; char *event; char *masks; char fullname[BUFSIZ]; struct native_event_t *ntv_evt; pfm_perf_encode_arg_t perf_arg; pfm_event_info_t einfo; pfm_event_attr_info_t ainfo; pfm_pmu_info_t pinfo; // if no place to put native events, report that allocate failed if (event_table->native_events==NULL) { SUBDBG("EXIT: no place to put native events\n"); return NULL; } // find out if this event is already known event_num=find_existing_event(name, event_table); /* add the event to our event table */ _papi_hwi_lock( NAMELIB_LOCK ); // if we already know this event name, // it was created as part of setting up the preset tables // we need to use the event table which is already created if (event_num >= 0) { nevt_idx = event_num; ntv_evt = &(event_table->native_events[event_num]); } else { // set to use a new event table // (count of used events not bumped // until we are sure setting it up does not get an errror) nevt_idx = event_table->num_native_events; ntv_evt = &(event_table->native_events[nevt_idx]); } SUBDBG("event_num: %d, nevt_idx: %d, ntv_evt: %p\n", event_num, nevt_idx, ntv_evt); /* clear the argument and attribute structures */ memset(&perf_arg,0,sizeof(pfm_perf_encode_arg_t)); memset(&(ntv_evt->attr),0,sizeof(struct perf_event_attr)); // set argument structure fields so the encode // function can give us what we need perf_arg.attr=&ntv_evt->attr; perf_arg.fstr=&event_string; /* use user provided name of the event to get the */ /* perf_event encoding and a fully qualified event string */ ret = pfm_get_os_event_encoding(name, PFM_PLM0 | PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &perf_arg); // If the encode function failed, skip processing of the event_string if ((ret != PFM_SUCCESS) || (event_string == NULL)) { SUBDBG("encode failed for event: %s, returned: %d\n", name, ret); // we need to remember that this event encoding failed // but still create the native event table // the event table is used by the list so we put what we // can get into it // but the failure doing the encode causes us to // return null to our caller encode_failed = 1; // Noting the encode_failed error in the attr.config allows // any later validate attempts to return an error value // ??? .config is 64-bits? --vmw ntv_evt->attr.config = 0xFFFFFF; // we also want to make it look like a cpu number // was not provided as an event mask perf_arg.cpu = -1; // Why don't we just return NULL here? --vmw //return NULL; } // get a copy of the event name and break it up into its parts event_string = strdup(name); SUBDBG("event_string: %s\n", event_string); // get the pmu name, event name and mask list pointers // from the event string event = strstr (event_string, "::"); if (event != NULL) { *event = 0; // null terminate pmu name event += 2; // event name follows '::' pmu_name = strdup(event_string); } else { // no pmu name in event string pmu_name = malloc(2); pmu_name[0] = 0; event = event_string; } masks = strstr (event, ":"); if (masks != NULL) { *masks = 0; // null terminate event name masks += 1; // masks follow : } else { masks = ""; } // build event name to find, put a pmu name on it if we have one if (strlen(pmu_name) == 0) { sprintf(fullname,"%s", event); } else { sprintf(fullname,"%s::%s", pmu_name, event); } SUBDBG("pmu_name: %s, event: %s, masks: %s, fullname: %s\n", pmu_name, event, masks, fullname); // if the libpfm4 index was not provided, // try to get one based on the event name passed in. /* This may return a value for a disabled PMU */ if (libpfm4_index == -1) { libpfm4_index = pfm_find_event(fullname); if (libpfm4_index < 0) { free(event_string); free(pmu_name); _papi_hwi_unlock( NAMELIB_LOCK ); SUBDBG("EXIT: error from libpfm4 find event\n"); return NULL; } SUBDBG("libpfm4_index: %#x\n", libpfm4_index); } // get this events information from libpfm4, // if unavailable return event not found (structure be zeroed) memset( &einfo, 0, sizeof( pfm_event_info_t )); einfo.size = sizeof(pfm_event_info_t); if ((ret = pfm_get_event_info(libpfm4_index, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { free(event_string); free(pmu_name); _papi_hwi_unlock( NAMELIB_LOCK ); SUBDBG("EXIT: pfm_get_event_info failed with %d\n", ret); return NULL; } // if pmu type is not one supported by this component, // return event not found (structure be zeroed) memset(&pinfo,0,sizeof(pfm_pmu_info_t)); pinfo.size = sizeof(pfm_pmu_info_t); pfm_get_pmu_info(einfo.pmu, &pinfo); if (pmu_is_present_and_right_type(&pinfo, event_table->pmu_type) == 0) { free(event_string); free(pmu_name); _papi_hwi_unlock( NAMELIB_LOCK ); SUBDBG("EXIT: PMU not supported by this component: einfo.pmu: %d, PFM_PMU_TYPE_CORE: %d\n", einfo.pmu, PFM_PMU_TYPE_CORE); return NULL; } ntv_evt->allocated_name=strdup(name); ntv_evt->mask_string=strdup(masks); ntv_evt->component=cidx; ntv_evt->pmu=pmu_name; ntv_evt->base_name=strdup(event); ntv_evt->pmu_plus_name=strdup(fullname); ntv_evt->libpfm4_idx=libpfm4_index; ntv_evt->event_description=strdup(einfo.desc); ntv_evt->users=0; /* is this needed? */ ntv_evt->cpu=perf_arg.cpu; SUBDBG("ntv_evt->mask_string: %p (%s)\n", ntv_evt->mask_string, ntv_evt->mask_string); char *msk_ptr = strdup(masks); // get a work copy of the mask string before we free the space it was in free(event_string); char mask_desc[PAPI_HUGE_STR_LEN] = ""; // if there is any mask data, collect their descriptions if ((msk_ptr != NULL) && (strlen(msk_ptr) > 0)) { // go get the descriptions for each of the // masks provided with this event char *ptr = msk_ptr; SUBDBG("ptr: %p (%s)\n", ptr, ptr); while (ptr != NULL) { char *ptrm = strstr(ptr, ":"); if (ptrm != NULL) { *ptrm = '\0'; ptrm++; } // get the length of the mask name char *wrk = strchr(ptr, '='); unsigned int msk_name_len; if (wrk != NULL) { msk_name_len = wrk - ptr; SUBDBG("Found =, length=%d\n",msk_name_len); } else { msk_name_len = strlen (ptr); SUBDBG("No =, length=%d\n",msk_name_len); } int i, mask_found=0; for (i=0 ; i 0) { strcat (mask_desc, ":"); mskleft--; } // if new description will not all fit in buffer, report truncation if (mskleft < (strlen(ainfo.desc) + 1)) { SUBDBG("EXIT: Attribute description truncated: %s\n", ainfo.desc); } // move as much of this description as will fit strncat (mask_desc, ainfo.desc, mskleft-1); mask_desc[mskleft-1] = '\0'; break; } } /* See if we had a mask that wasn't found */ if (!mask_found) { SUBDBG("Mask not found! %s\n",ptr); /* FIXME: do we need to unlock here? */ return NULL; } // if we have filled the work buffer, we can quit now if ( (sizeof(mask_desc) - strlen(mask_desc)) <= 1) { break; } ptr = ptrm; } } ntv_evt->mask_description=strdup(mask_desc); SUBDBG("ntv_evt->mask_description: %p (%s)\n", ntv_evt->mask_description, ntv_evt->mask_description); // give back space if we got any if (msk_ptr != NULL) { free (msk_ptr); } // create a papi table for this native event, put the index into the event sets array of native events into the papi table int new_event_code = _papi_hwi_native_to_eventcode(cidx, libpfm4_index, nevt_idx, ntv_evt->allocated_name); _papi_hwi_set_papi_event_string((const char *)ntv_evt->allocated_name); _papi_hwi_set_papi_event_code(new_event_code, 1); ntv_evt->papi_event_code=new_event_code; SUBDBG("Using %#x as index for %s\n", ntv_evt->libpfm4_idx, fullname); SUBDBG("num_native_events: %d, allocated_native_events: %d\n", event_table->num_native_events, event_table->allocated_native_events); SUBDBG("Native Event: papi_event_code: %#x, libpfm4_idx: %#x, pmu: %s, base_name: %s, mask_string: %s, allocated_name: %s\n", ntv_evt->papi_event_code, ntv_evt->libpfm4_idx, ntv_evt->pmu, ntv_evt->base_name, ntv_evt->mask_string, ntv_evt->allocated_name); SUBDBG("event_table->native_events[%d]: %p, cpu: %d, attr.config: 0x%"PRIx64", attr.config1: 0x%"PRIx64", attr.config2: 0x%"PRIx64", attr.type: 0x%"PRIx32", attr.exclude_user: %d, attr.exclude_kernel: %d, attr.exclude_guest: %d\n", nevt_idx, &(event_table->native_events[nevt_idx]), ntv_evt->cpu, ntv_evt->attr.config, ntv_evt->attr.config1, ntv_evt->attr.config2, ntv_evt->attr.type, ntv_evt->attr.exclude_user, ntv_evt->attr.exclude_kernel, ntv_evt->attr.exclude_guest); /* If we've used all of the allocated native events, */ /* then allocate more room */ if (event_table->num_native_events >= event_table->allocated_native_events-1) { SUBDBG("Allocating more room for native events (%d %ld)\n", (event_table->allocated_native_events+NATIVE_EVENT_CHUNK), (long)sizeof(struct native_event_t) * (event_table->allocated_native_events+NATIVE_EVENT_CHUNK)); event_table->native_events=realloc(event_table->native_events, sizeof(struct native_event_t) * (event_table->allocated_native_events+NATIVE_EVENT_CHUNK)); event_table->allocated_native_events+=NATIVE_EVENT_CHUNK; // we got new space so we need to reset // the pointer to the correct native event in the new space ntv_evt = &(event_table->native_events[nevt_idx]); } // if getting more space for native events failed, // report that allocate failed if (event_table->native_events==NULL) { SUBDBG("EXIT: attempt to get more space for " "native events failed\n"); return NULL; } // if we created a new event, bump the number used if (event_num < 0) { event_table->num_native_events++; } _papi_hwi_unlock( NAMELIB_LOCK ); if (encode_failed != 0) { SUBDBG("EXIT: encoding event failed\n"); return NULL; } SUBDBG("EXIT: new_event: %p\n", ntv_evt); return ntv_evt; } /** @class get_first_event_next_pmu * @brief return the first available event that's on an active PMU * * @returns returns a libpfm event number * @retval PAPI_ENOEVENT Could not find an event * */ static int get_first_event_next_pmu(int pmu_idx, int pmu_type) { SUBDBG("ENTER: pmu_idx: %d, pmu_type: %d\n", pmu_idx, pmu_type); int pidx, ret; pfm_pmu_info_t pinfo; // start looking at the next pmu in the list pmu_idx++; /* We loop forever here and exit if pfm_get_pmu_info() fails. */ /* Before we only went up to PFM_PMU_MAX but this is set at */ /* compile time and might not reflect the number of PMUs if */ /* PAPI is dynamically linked against libpfm4. */ while(1) { /* clear the PMU structure (required by libpfm4) */ memset(&pinfo,0,sizeof(pfm_pmu_info_t)); pinfo.size = sizeof(pfm_pmu_info_t); ret=pfm_get_pmu_info(pmu_idx, &pinfo); if (ret==PFM_ERR_INVAL) { break; } if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) { pidx=pinfo.first_event; SUBDBG("First event in pmu: %s is %#x\n", pinfo.name, pidx); if (pidx<0) { /* For some reason no events available */ /* despite the PMU being active. */ /* This can happen, for example with ix86arch */ /* inside of VMware */ } else { SUBDBG("EXIT: pidx: %#x\n", pidx); return pidx; } } pmu_idx++; } SUBDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } /***********************************************************/ /* Exported functions */ /***********************************************************/ /** @class _pe_libpfm4_ntv_name_to_code * @brief Take an event name and convert it to an event code. * * @param[in] *name * -- name of event to convert * @param[out] *event_code * -- pointer to an integer to hold the event code * @param[in] *cidx * -- PAPI component index * @param[in] event_table * -- native event table struct * * @retval PAPI_OK event was found and an event assigned * @retval PAPI_ENOEVENT event was not found */ int _pe_libpfm4_ntv_name_to_code( const char *name, unsigned int *event_code, int cidx, struct native_event_table_t *event_table) { SUBDBG( "ENTER: name: %s, event_code: %p, *event_code: %#x, event_table: %p\n", name, event_code, *event_code, event_table); struct native_event_t *our_event; int event_num; // if we already know this event name, just return its native code event_num=find_existing_event(name, event_table); if (event_num >= 0) { *event_code=event_table->native_events[event_num].libpfm4_idx; // the following call needs to happen to prevent the internal layer from creating a new papi native event table _papi_hwi_set_papi_event_code(event_table->native_events[event_num].papi_event_code, 1); SUBDBG("EXIT: Found papi_event_code: %#x, libpfm4_idx: %#x\n", event_table->native_events[event_num].papi_event_code, event_table->native_events[event_num].libpfm4_idx); return PAPI_OK; } // Try to allocate this event to see if it is known by libpfm4, if allocate fails tell the caller it is not valid our_event=allocate_native_event(name, -1, cidx, event_table); if (our_event==NULL) { SUBDBG("EXIT: Allocating event: '%s' failed\n", name); return PAPI_ENOEVNT; } *event_code = our_event->libpfm4_idx; SUBDBG("EXIT: Found code: %#x\n",*event_code); return PAPI_OK; } /** @class _pe_libpfm4_ntv_code_to_name * @brief Take an event code and convert it to a name * * @param[in] EventCode * -- PAPI event code * @param[out] *ntv_name * -- pointer to a string to hold the name * @param[in] len * -- length of ntv_name string * @param[in] event_table * -- native event table struct * * @retval PAPI_OK The event was found and converted to a name * @retval PAPI_ENOEVENT The event does not exist * @retval PAPI_EBUF The event name was too big for ntv_name */ int _pe_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table) { SUBDBG("ENTER: EventCode: %#x, ntv_name: %p, len: %d, event_table: %p\n", EventCode, ntv_name, len, event_table); int eidx; int papi_event_code; // get the attribute index for this papi event papi_event_code = _papi_hwi_get_papi_event_code(); // a papi event code less than 0 is invalid, return error if (papi_event_code <= 0) { SUBDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } // find our native event table for this papi event code (search list backwards because it improves chances of finding it quickly) for (eidx=event_table->num_native_events-1 ; eidx>=0 ; eidx--) { if ((papi_event_code == event_table->native_events[eidx].papi_event_code) && (EventCode == ((unsigned)event_table->native_events[eidx].libpfm4_idx))) { SUBDBG("Found native_event[%d]: papi_event_code: %#x, libpfm4_idx: %#x\n", eidx, event_table->native_events[eidx].papi_event_code, event_table->native_events[eidx].libpfm4_idx); break; } } // if we did not find a match, return an error if (eidx < 0) { // If we did not find a match in our native event table, then the code passed in has not been // allocated yet It should not be possible to get to this code. The user has to call the papi // code_to_name api with a papi event code for a native event. But the only way to get one of // those is to call either name_to_code or enum_cmp_events first. When one of these calls is // done we allocate the event so it should always be there. SUBDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } // if this event is defined by the default pmu, then use only the event name // if it is not defined by the default pmu, then use both the pmu name and event name char *ename; if ((event_table->default_pmu.name) && (strcmp(event_table->default_pmu.name, event_table->native_events[eidx].pmu) == 0)) { ename = event_table->native_events[eidx].base_name; } else { ename = event_table->native_events[eidx].pmu_plus_name; } // if it will not fit, return error if (strlen (ename) >= (unsigned)len) { SUBDBG("EXIT: event name %s will not fit in buffer provided\n", ename); return PAPI_EBUF; } strcpy (ntv_name, ename); // if this event had masks, also add their names char *mname = event_table->native_events[eidx].mask_string; if ((mname != NULL) && (strlen(mname) > 0)) { if ((strlen(ename) + 8 + strlen(mname)) >= (unsigned)len) { SUBDBG("EXIT: Not enough room for event and mask descriptions: need: %u, have: %u", (unsigned)(strlen(ename) + 8 + strlen(mname)), (unsigned)len); return PAPI_EBUF; } strcat (ntv_name, ":"); strcat (ntv_name, mname); } SUBDBG("EXIT: event name: %s\n", ntv_name); return PAPI_OK; } /** @class _pe_libpfm4_ntv_code_to_descr * @brief Take an event code and convert it to a description * * @param[in] EventCode * -- PAPI event code * @param[out] *ntv_descr * -- pointer to a string to hold the description * @param[in] len * -- length of ntv_descr string * @param[in] event_table * -- native event table struct * * @retval PAPI_OK The event was found and converted to a description * @retval PAPI_ENOEVENT The event does not exist * @retval PAPI_EBUF The event name was too big for ntv_descr * * Return the event description. * If the event has umasks, then include ", masks" and the * umask descriptions follow, separated by commas. */ int _pe_libpfm4_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table) { SUBDBG("ENTER: EventCode: %#x, ntv_descr: %p, len: %d: event_table: %p\n", EventCode, ntv_descr, len, event_table); int eidx; int papi_event_code; char *mdesc; char *edesc; // get the attribute index for this papi event papi_event_code = _papi_hwi_get_papi_event_code(); // a papi event code less than 0 is invalid, return error if (papi_event_code <= 0) { SUBDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } // find our native event table for this papi event code (search list backwards because it improves chances of finding it quickly) for (eidx=event_table->num_native_events-1 ; eidx>=0 ; eidx--) { SUBDBG("native_event[%d]: papi_event_code: %#x, libpfm4_idx: %#x\n", eidx, event_table->native_events[eidx].papi_event_code, event_table->native_events[eidx].libpfm4_idx); if ((papi_event_code == event_table->native_events[eidx].papi_event_code) && (EventCode == ((unsigned)event_table->native_events[eidx].libpfm4_idx))) { break; } } // if we did not find a match, return an error if (eidx < 0) { // If we did not find a match in our native event table, then the code passed in has not been // allocated yet It should not be possible to get to this code. The user has to call the papi // code_to_name api with a papi event code for a native event. But the only way to get one of // those is to call either name_to_code or enum_cmp_events first. When one of these calls is // done we allocate the event so it should always be there. SUBDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } edesc = event_table->native_events[eidx].event_description; // if it will not fit, return error if (strlen (edesc) >= (unsigned)len) { SUBDBG("EXIT: event name %s will not fit in buffer provided\n", edesc); return PAPI_EBUF; } strcpy (ntv_descr, edesc); // if this event had masks, also add their descriptions mdesc = event_table->native_events[eidx].mask_description; if ((mdesc != NULL) && (strlen(mdesc) > 0)) { if ((strlen(edesc) + 8 + strlen(mdesc)) >= (unsigned)len) { SUBDBG("EXIT: Not enough room for event and mask descriptions: need: %u, have: %u", (unsigned)(strlen(edesc) + 8 + strlen(mdesc)), (unsigned)len); return PAPI_EBUF; } strcat (ntv_descr, ", masks:"); strcat (ntv_descr, mdesc); } SUBDBG("EXIT: event description: %s\n", ntv_descr); return PAPI_OK; } int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table) { SUBDBG("ENTER: EventCode: %#x, info: %p, event_table: %p\n", EventCode, info, event_table); int ret; // get the event name first if ((ret = _pe_libpfm4_ntv_code_to_name(EventCode, info->symbol, sizeof(info->symbol), event_table)) != PAPI_OK) { SUBDBG("EXIT: _pe_libpfm4_ntv_code_to_name returned: %d\n", ret); return PAPI_ENOEVNT; } if ((ret = _pe_libpfm4_ntv_code_to_descr(EventCode, info->long_descr, sizeof(info->long_descr), event_table)) != PAPI_OK) { SUBDBG("EXIT: _pe_libpfm4_ntv_code_to_descr returned: %d\n", ret); return PAPI_ENOEVNT; } SUBDBG("EXIT: EventCode: %#x, name: %s, desc: %s\n", EventCode, info->symbol, info->long_descr); return PAPI_OK; } /** @class _pe_libpfm4_ntv_enum_events * @brief Walk through all events in a pre-defined order * * @param[in,out] *PapiEventCode * -- PAPI event code to start with * @param[in] modifier * -- describe how to enumerate * @param[in] event_table * -- native event table struct * * @retval PAPI_OK The event was found and converted to a description * @retval PAPI_ENOEVENT The event does not exist * @retval PAPI_ENOIMPL The enumeration method requested in not implemented * */ int _pe_libpfm4_ntv_enum_events( unsigned int *PapiEventCode, int modifier, int cidx, struct native_event_table_t *event_table) { SUBDBG("ENTER: PapiEventCode: %p, *PapiEventCode: %#x, modifier: %d, event_table: %p\n", PapiEventCode, *PapiEventCode, modifier, event_table); int code,ret, pnum; int max_umasks; char event_string[BUFSIZ]; pfm_pmu_info_t pinfo; pfm_event_info_t einfo; struct native_event_t *our_event; /* return first event if so specified */ if ( modifier == PAPI_ENUM_FIRST ) { attr_idx = 0; // set so if they want attribute information, it will start with the first attribute code=get_first_event_next_pmu(-1, event_table->pmu_type); if (code < 0 ) { SUBDBG("EXIT: Invalid component first event code: %d\n", code); return code; } // get the event information from libpfm4 (must zero structure) memset( &einfo, 0, sizeof( pfm_event_info_t )); einfo.size = sizeof(pfm_event_info_t); if ((ret = pfm_get_event_info(code, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); return PAPI_ENOIMPL; } // get the pmu information from libpfm4 (must zero structure) memset( &pinfo, 0, sizeof(pfm_pmu_info_t) ); pinfo.size = sizeof(pfm_pmu_info_t); ret=pfm_get_pmu_info(einfo.pmu, &pinfo); if (ret!=PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_pmu_info returned: %d\n", ret); return ret; } // build full event name sprintf (event_string, "%s::%s", pinfo.name, einfo.name); SUBDBG("code: %#x, pmu: %s, event: %s, event_string: %s\n", code, pinfo.name, einfo.name, event_string); // go allocate this event, need to create tables used by the get event info call that will probably follow if ((our_event = allocate_native_event(event_string, code, cidx, event_table)) == NULL) { // allocate may have created the event table but returned NULL to tell the caller the event string was invalid (attempt to encode it failed). // if the caller wants to use this event to count something, it will report an error // but if the caller is just interested in listing the event, then we need an event table with an event name and libpfm4 index int evt_idx; if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); return PAPI_ENOEVNT; } // give back the new event code *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); return PAPI_OK; } *PapiEventCode = our_event->libpfm4_idx; SUBDBG("EXIT: *PapiEventCode: %#x\n", *PapiEventCode); return PAPI_OK; } /* Handle looking for the next event */ if ( modifier == PAPI_ENUM_EVENTS ) { attr_idx = 0; // set so if they want attribute information, it will start with the first attribute // get the next event code from libpfm4, if there are no more in this pmu find first event in next pmu if ((code = pfm_get_event_next(*PapiEventCode)) < 0) { // get this events information from libpfm4, we need the pmu number of the last event we processed (table must be cleared) memset( &einfo, 0, sizeof( pfm_event_info_t )); einfo.size = sizeof(pfm_event_info_t); if ((ret = pfm_get_event_info(*PapiEventCode, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); return PAPI_ENOIMPL; } SUBDBG("*PapiEventCode: %#x, event: %s\n", *PapiEventCode, einfo.name); // get the pmu number of the last event pnum = einfo.pmu; SUBDBG("pnum: %d\n", pnum); code=get_first_event_next_pmu(pnum, event_table->pmu_type); if (code < 0) { SUBDBG("EXIT: No more PMUs to list, returning: %d\n", code); return code; } } // get the event information from libpfm4 (must zero structure) memset( &einfo, 0, sizeof( pfm_event_info_t )); einfo.size = sizeof(pfm_event_info_t); if ((ret = pfm_get_event_info(code, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); return PAPI_ENOIMPL; } // get the pmu information from libpfm4 (must zero structure) memset( &pinfo, 0, sizeof(pfm_pmu_info_t) ); pinfo.size = sizeof(pfm_pmu_info_t); ret=pfm_get_pmu_info(einfo.pmu, &pinfo); if (ret!=PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_pmu_info returned: %d\n", ret); return ret; } // build full event name sprintf (event_string, "%s::%s", pinfo.name, einfo.name); SUBDBG("code: %#x, pmu: %s, event: %s, event_string: %s\n", code, pinfo.name, einfo.name, event_string); // go allocate this event, need to create tables used by the get event info call that will follow if ((our_event = allocate_native_event(event_string, code, cidx, event_table)) == NULL) { // allocate may have created the event table but returned NULL to tell the caller the event string was invalid (attempt to encode it failed). // if the caller wants to use this event to count something, it will report an error // but if the caller is just interested in listing the event, then we need an event table with an event name and libpfm4 index int evt_idx; if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); return PAPI_ENOEVNT; } // give back the new event code *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); return PAPI_OK; } // give back the new event code *PapiEventCode = our_event->libpfm4_idx; SUBDBG("EXIT: *PapiEventCode: %#x\n", *PapiEventCode); return PAPI_OK; } /* We don't handle PAPI_NTV_ENUM_UMASK_COMBOS */ if ( modifier == PAPI_NTV_ENUM_UMASK_COMBOS ) { SUBDBG("EXIT: do not support umask combos yet\n"); return PAPI_ENOIMPL; } /* Enumerate PAPI_NTV_ENUM_UMASKS (umasks on an event) */ if ( modifier == PAPI_NTV_ENUM_UMASKS ) { // get this events information from libpfm4, we need the number of masks this event knows about (table must be cleared) memset( &einfo, 0, sizeof( pfm_event_info_t )); einfo.size = sizeof(pfm_event_info_t); if ((ret = pfm_get_event_info(*PapiEventCode, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); return PAPI_ENOIMPL; } // SUBDBG("*PapiEventCode: %#x, einfo.name: %s, einfo.code: %#x, einfo.nattrs: %d\n", *PapiEventCode, einfo.name, einfo.code, einfo.nattrs); // set max number of masks max_umasks = einfo.nattrs; // if we reached last attribute, return error to show we are done with this events masks if (attr_idx == max_umasks) { SUBDBG("EXIT: already processed all umasks: attr_idx: %d\n", attr_idx); return PAPI_ENOEVNT; } // find the event table for this event, we need the pmu name and event name without any masks int ntv_idx = _papi_hwi_get_ntv_idx(_papi_hwi_get_papi_event_code()); if (ntv_idx < 0) { SUBDBG("EXIT: _papi_hwi_get_ntv_idx returned: %d\n", ntv_idx); return ntv_idx; } char *ename = event_table->native_events[ntv_idx].pmu_plus_name; if ((ename == NULL) || (strlen(ename) >= sizeof(event_string))) { SUBDBG("EXIT: Event name will not fit into buffer\n"); return PAPI_EBUF; } strcpy (event_string, ename); SUBDBG("event_string: %s\n", event_string); // go get the attribute information for this event // libpfm4 likes the table cleared pfm_event_attr_info_t ainfo; memset (&ainfo, 0, sizeof(pfm_event_attr_info_t)); ainfo.size = sizeof(pfm_event_attr_info_t); ret = pfm_get_event_attr_info(*PapiEventCode, attr_idx, PFM_OS_PERF_EVENT_EXT, &ainfo); if (ret != PFM_SUCCESS) { SUBDBG("EXIT: Attribute info not found, EventCode: %#x, attr_idx: %d, ret: %d\n", *PapiEventCode, attr_idx, _papi_libpfm4_error(ret)); return _papi_libpfm4_error(ret); } SUBDBG("*PapiEventCode: %#x, attr_idx: %d, type: %d, name: %s, description: %s\n", *PapiEventCode, attr_idx, ainfo.type, ainfo.name, ainfo.desc); if (strlen(event_string) + strlen(ainfo.name) + 35 > sizeof(event_string)) { SUBDBG("EXIT: Event name and mask will not fit into buffer\n"); return PAPI_EBUF; } strcat (event_string, ":"); strcat (event_string, ainfo.name); switch (ainfo.type) { case PFM_ATTR_UMASK: break; case PFM_ATTR_MOD_BOOL: case PFM_ATTR_MOD_INTEGER: // a few attributes require a non-zero value to encode correctly (most would accept zero here) strcat(event_string,"=0"); break; default: SUBDBG("EXIT: Unsupported attribute type: %d", ainfo.type); return PAPI_EATTR; } // go allocate this event, need to create tables used by the get event info call that will follow if ((our_event = allocate_native_event(event_string, *PapiEventCode, cidx, event_table)) == NULL) { // allocate may have created the event table but returned NULL to tell the caller the event string was invalid. // if the caller wants to use this event to count something, it must report the error // but if the caller is just interested in listing the event (like this code), then find the table that was created and return its libpfm4 index int evt_idx; if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); return PAPI_ENOEVNT; } // bump so next time we will use next attribute attr_idx++; // give back the new event code *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); return PAPI_OK; } // bump so next time we will use next attribute attr_idx++; // give back the new event code *PapiEventCode = our_event->libpfm4_idx; SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); return PAPI_OK; } /* Enumerate PAPI_NTV_ENUM_GROUPS (groups on an event) */ if ( modifier == PAPI_NTV_ENUM_GROUPS ) { SUBDBG("EXIT: do not support enumerating groups in this component\n"); return PAPI_ENOIMPL; } /* An unknown enumeration method was indicated */ SUBDBG("EXIT: Invalid modifier argument provided\n"); return PAPI_ENOIMPL; } /** @class _pe_libpfm4_shutdown * @brief Shutdown any initialization done by the libpfm4 code * * @param[in] event_table * -- native event table struct * * @retval PAPI_OK We always return PAPI_OK * */ int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table) { SUBDBG("ENTER: event_table: %p\n", event_table); int i; for (i=0 ; icmp_info.pmu_names[i] != NULL) { free (my_vector->cmp_info.pmu_names[i]); } } /* clean out and free the native events structure */ _papi_hwi_lock( NAMELIB_LOCK ); /* free memory allocated with strdup or malloc */ for( i=0; inum_native_events; i++) { free(event_table->native_events[i].base_name); free(event_table->native_events[i].pmu_plus_name); free(event_table->native_events[i].pmu); free(event_table->native_events[i].allocated_name); free(event_table->native_events[i].mask_string); free(event_table->native_events[i].event_description); if (event_table->native_events[i].mask_description != NULL) { free(event_table->native_events[i].mask_description); } } free(event_table->native_events); _papi_hwi_unlock( NAMELIB_LOCK ); SUBDBG("EXIT: PAPI_OK\n"); return PAPI_OK; } /** @class _pe_libpfm4_init * @brief Initialize the libpfm4 code * * @param[in] component * -- pointer to component structure * @param[in] event_table * -- native event table structure * * @retval PAPI_OK We initialized correctly * @retval PAPI_ECMP There was an error initializing the component * */ int _pe_libpfm4_init(papi_vector_t *component, int cidx, struct native_event_table_t *event_table, int pmu_type) { int detected_pmus=0, found_default=0; int i; int j=0; unsigned int ncnt; pfm_err_t retval = PFM_SUCCESS; pfm_pmu_info_t pinfo; /* allocate the native event structure */ event_table->num_native_events=0; event_table->pmu_type=pmu_type; event_table->native_events=calloc(NATIVE_EVENT_CHUNK, sizeof(struct native_event_t)); if (event_table->native_events==NULL) { strncpy(component->cmp_info.disabled_reason, "calloc NATIVE_EVENT_CHUNK failed",PAPI_MAX_STR_LEN); return PAPI_ENOMEM; } event_table->allocated_native_events=NATIVE_EVENT_CHUNK; /* Count number of present PMUs */ detected_pmus=0; ncnt=0; /* init default pmu */ /* need to init pinfo or pfmlib might complain */ memset(&(event_table->default_pmu), 0, sizeof(pfm_pmu_info_t)); event_table->default_pmu.size = sizeof(pfm_pmu_info_t); retval=pfm_get_pmu_info(0, &(event_table->default_pmu)); SUBDBG("Detected pmus:\n"); i=0; while(1) { memset(&pinfo,0,sizeof(pfm_pmu_info_t)); pinfo.size = sizeof(pfm_pmu_info_t); retval=pfm_get_pmu_info(i, &pinfo); /* We're done if we hit an invalid PMU entry */ /* We can't check against PFM_PMU_MAX as that might not */ /* match if libpfm4 is dynamically linked */ if (retval==PFM_ERR_INVAL) { break; } if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) && (pmu_is_present_and_right_type(&pinfo,pmu_type))) { SUBDBG("\t%d %s %s %d\n",i, pinfo.name,pinfo.desc,pinfo.type); detected_pmus++; ncnt+=pinfo.nevents; if (j < PAPI_PMU_MAX) { component->cmp_info.pmu_names[j++] = strdup(pinfo.name); } if (pmu_type & PMU_TYPE_CORE) { /* Hack to have "default core" PMU */ if ( (pinfo.type==PFM_PMU_TYPE_CORE) && strcmp(pinfo.name,"ix86arch")) { SUBDBG("\t %s is default\n",pinfo.name); memcpy(&(event_table->default_pmu), &pinfo,sizeof(pfm_pmu_info_t)); found_default++; } } if (pmu_type==PMU_TYPE_UNCORE) { /* To avoid confusion, no "default" CPU for uncore */ found_default=1; } } i++; } SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); if (detected_pmus==0) { SUBDBG("Could not find any PMUs\n"); return PAPI_ENOSUPP; } if (!found_default) { strncpy(component->cmp_info.disabled_reason, "could not find default PMU",PAPI_MAX_STR_LEN); return PAPI_ECMP; } if (found_default>1) { strncpy(component->cmp_info.disabled_reason, "found more than one default PMU",PAPI_MAX_STR_LEN); return PAPI_ECOUNT; } component->cmp_info.num_native_events = ncnt; component->cmp_info.num_cntrs = event_table->default_pmu.num_cntrs+ event_table->default_pmu.num_fixed_cntrs; SUBDBG( "num_counters: %d\n", component->cmp_info.num_cntrs ); /* Setup presets, only if Component 0 and default core PMU */ if ((cidx==0) && (found_default)) { retval = _papi_load_preset_table( (char *)event_table->default_pmu.name, event_table->default_pmu.pmu, cidx ); if ( retval!=PAPI_OK ) { strncpy(component->cmp_info.disabled_reason,"_papi_load_preset_table failed",PAPI_MAX_STR_LEN); return PAPI_ENOEVNT; } } return PAPI_OK; } /** @class _peu_libpfm4_init * @brief Initialize the libpfm4 code * * @param[in] event_table * -- native event table struct * * @retval PAPI_OK We initialized correctly * @retval PAPI_ECMP There was an error initializing the component * */ int _peu_libpfm4_init(papi_vector_t *my_vector, int cidx, struct native_event_table_t *event_table, int pmu_type) { int detected_pmus=0; int i; int j=0; pfm_err_t retval = PFM_SUCCESS; unsigned int ncnt; pfm_pmu_info_t pinfo; (void)cidx; /* allocate the native event structure */ event_table->num_native_events=0; event_table->pmu_type=pmu_type; event_table->native_events=calloc(NATIVE_EVENT_CHUNK, sizeof(struct native_event_t)); if (event_table->native_events==NULL) { return PAPI_ENOMEM; } event_table->allocated_native_events=NATIVE_EVENT_CHUNK; /* Count number of present PMUs */ detected_pmus=0; ncnt=0; my_vector->cmp_info.num_cntrs=0; SUBDBG("Detected pmus:\n"); i=0; while(1) { memset(&pinfo,0,sizeof(pfm_pmu_info_t)); pinfo.size = sizeof(pfm_pmu_info_t); retval=pfm_get_pmu_info(i, &pinfo); /* We're done if we hit an invalid PMU entry */ /* We can't check against PFM_PMU_MAX */ /* as that might not match if libpfm4 is dynamically linked */ if (retval==PFM_ERR_INVAL) { break; } if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) && (pmu_is_present_and_right_type(&pinfo,pmu_type))) { SUBDBG("\t%d %s %s %d\n",i,pinfo.name,pinfo.desc,pinfo.type); detected_pmus++; ncnt+=pinfo.nevents; if ((j < PAPI_PMU_MAX) && (pinfo.name != NULL)) { my_vector->cmp_info.pmu_names[j++] = strdup(pinfo.name); } my_vector->cmp_info.num_cntrs += pinfo.num_cntrs+ pinfo.num_fixed_cntrs; } i++; } SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); my_vector->cmp_info.num_native_events = ncnt; SUBDBG( "num_counters: %d\n", my_vector->cmp_info.num_cntrs ); return PAPI_OK; } papi-5.6.0/src/libpfm4/lib/pfmlib_sicortex.c000664 001750 001750 00000050517 13216244365 022776 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_sicortex.c : support for the generic MIPS64 PMU family * * Contributed by Philip Mucci based on code from * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include /* public headers */ #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_sicortex_priv.h" /* architecture private */ #include "sicortex/ice9a/ice9a_all_spec_pme.h" #include "sicortex/ice9b/ice9b_all_spec_pme.h" #include "sicortex/ice9/ice9_scb_spec_sw.h" /* let's define some handy shortcuts! */ #define sel_event_mask perfsel.sel_event_mask #define sel_exl perfsel.sel_exl #define sel_os perfsel.sel_os #define sel_usr perfsel.sel_usr #define sel_sup perfsel.sel_sup #define sel_int perfsel.sel_int static pme_sicortex_entry_t *sicortex_pe = NULL; // CHANGE FOR ICET #define core_counters 2 #define MAX_ICE9_PMCS 2+4+256 #define MAX_ICE9_PMDS 2+4+256 static int compute_ice9_counters(int type) { int i; int bound = 0; pme_gen_mips64_entry_t *gen_mips64_pe = NULL; sicortex_support.pmd_count = 0; sicortex_support.pmc_count = 0; for (i=0;i 2) { /* Account for 4 sampling PMD registers */ sicortex_support.num_cnt = sicortex_support.pmd_count - 4; sicortex_support.pme_count = bound; } else { sicortex_support.pme_count = 0; /* Count up CPU only events */ for (i=0;i> (cntr*8)) & 0xff; pc[j].reg_addr = cntr*2; pc[j].reg_value = reg.val; pc[j].reg_num = cntr; __pfm_vbprintf("[CP0_25_%u(pmc%u)=0x%"PRIx64" event_mask=0x%x usr=%d os=%d sup=%d exl=%d int=1] %s\n", pc[j].reg_addr, pc[j].reg_num, pc[j].reg_value, reg.sel_event_mask, reg.sel_usr, reg.sel_os, reg.sel_sup, reg.sel_exl, sicortex_pe[e[j].event].pme_name); pd[j].reg_num = cntr; pd[j].reg_addr = cntr*2 + 1; __pfm_vbprintf("[CP0_25_%u(pmd%u)]\n", pc[j].reg_addr, pc[j].reg_num); } /* SCB event */ else { pmc_sicortex_scb_reg_t scbreg; int k; scbreg.val = 0; scbreg.sicortex_ScbPerfBucket_reg.event = sicortex_pe[e[j].event].pme_code >> 16; for (k=0;kflags & PFMLIB_SICORTEX_INPUT_SCB_INTERVAL)) { two.sicortex_ScbPerfCtl_reg.Interval = mod_in->pfp_sicortex_scb_global.Interval; } else { two.sicortex_ScbPerfCtl_reg.Interval = 6; /* 2048 cycles */ } if (mod_in && (mod_in->flags & PFMLIB_SICORTEX_INPUT_SCB_NOINC)) { two.sicortex_ScbPerfCtl_reg.NoInc = mod_in->pfp_sicortex_scb_global.NoInc; } else { two.sicortex_ScbPerfCtl_reg.NoInc = 0; } two.sicortex_ScbPerfCtl_reg.IntBit = 31; /* Interrupt on last bit */ two.sicortex_ScbPerfCtl_reg.MagicEvent = 0; two.sicortex_ScbPerfCtl_reg.AddrAssert = 1; __pfm_vbprintf("[Scb%s(pmc%u)=0x%"PRIx64" Interval=0x%x IntBit=0x%x NoInc=%d AddrAssert=%d MagicEvent=0x%x]\n","PerfCtl", pc[num].reg_num, two.val, two.sicortex_ScbPerfCtl_reg.Interval, two.sicortex_ScbPerfCtl_reg.IntBit, two.sicortex_ScbPerfCtl_reg.NoInc, two.sicortex_ScbPerfCtl_reg.AddrAssert, two.sicortex_ScbPerfCtl_reg.MagicEvent); pc[num].reg_value = two.val; /*ScbPerfHist */ pc[++num].reg_num = 3; pc[num].reg_addr = 3; three.val = 0; if (mod_in && (mod_in->flags & PFMLIB_SICORTEX_INPUT_SCB_HISTGTE)) three.sicortex_ScbPerfHist_reg.HistGte = mod_in->pfp_sicortex_scb_global.HistGte; else three.sicortex_ScbPerfHist_reg.HistGte = 1; __pfm_vbprintf("[Scb%s(pmc%u)=0x%"PRIx64" HistGte=0x%x]\n","PerfHist", pc[num].reg_num, three.val, three.sicortex_ScbPerfHist_reg.HistGte); pc[num].reg_value = three.val; /*ScbPerfBuckNum */ pc[++num].reg_num = 4; pc[num].reg_addr = 4; four.val = 0; if (mod_in && (mod_in->flags & PFMLIB_SICORTEX_INPUT_SCB_BUCKET)) four.sicortex_ScbPerfBuckNum_reg.Bucket = mod_in->pfp_sicortex_scb_global.Bucket; else four.sicortex_ScbPerfBuckNum_reg.Bucket = 0; __pfm_vbprintf("[Scb%s(pmc%u)=0x%"PRIx64" Bucket=0x%x]\n","PerfBuckNum", pc[num].reg_num, four.val, four.sicortex_ScbPerfBuckNum_reg.Bucket); pc[num].reg_value = four.val; /*ScbPerfEna */ pc[++num].reg_num = 5; pc[num].reg_addr = 5; five.val = 0; five.sicortex_ScbPerfEna_reg.ena = 1; __pfm_vbprintf("[Scb%s(pmc%u)=0x%"PRIx64" ena=%d]\n","PerfEna", pc[num].reg_num, five.val, five.sicortex_ScbPerfEna_reg.ena); pc[num].reg_value = five.val; ++num; return(num); } /* * Automatically dispatch events to corresponding counters following constraints. * Upon return the pfarg_regt structure is ready to be submitted to kernel */ static int pfm_sicortex_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_sicortex_input_param_t *mod_in, pfmlib_output_param_t *outp) { /* pfmlib_sicortex_input_param_t *param = mod_in; */ pfmlib_event_t *e = inp->pfp_events; pfmlib_reg_t *pc, *pd; unsigned int i, j, cnt = inp->pfp_event_count; unsigned int used = 0; extern pfm_pmu_support_t sicortex_support; unsigned int cntr, avail; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; /* Degree N rank based allocation */ if (cnt > sicortex_support.pmc_count) return PFMLIB_ERR_TOOMANY; if (PFMLIB_DEBUG()) { for (j=0; j < cnt; j++) { DPRINT("ev[%d]=%s, counters=0x%x\n", j, sicortex_pe[e[j].event].pme_name,sicortex_pe[e[j].event].pme_counters); } } /* Do rank based allocation, counters that live on 1 reg before counters that live on 2 regs etc. */ /* CPU counters first */ for (i=1;i<=core_counters;i++) { for (j=0; j < cnt;j++) { /* CPU counters first */ if ((sicortex_pe[e[j].event].pme_counters & ((1<pfp_dfl_plm,pc,pd,cntr,j,mod_in); used |= (1 << cntr); DPRINT("Rank %d: Used counters 0x%x\n",i, used); } } } /* SCB counters can live anywhere */ used = 0; for (j=0; j < cnt;j++) { unsigned int cntr; /* CPU counters first */ if (sicortex_pe[e[j].event].pme_counters & (1<pfp_dfl_plm,pc,pd,cntr,j,mod_in); used++; DPRINT("SCB(%d): Used counters %d\n",j,used); } } if (used) { outp->pfp_pmc_count = stuff_sicortex_scb_control_regs(pc,pd,cnt,mod_in); outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } /* number of evtsel registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_sicortex_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { pfmlib_sicortex_input_param_t *mod_sicortex_in = (pfmlib_sicortex_input_param_t *)model_in; return pfm_sicortex_dispatch_counters(inp, mod_sicortex_in, outp); } static int pfm_sicortex_get_event_code(unsigned int i, unsigned int cnt, int *code) { extern pfm_pmu_support_t sicortex_support; /* check validity of counter index */ if (cnt != PFMLIB_CNT_FIRST) { if (cnt < 0 || cnt >= sicortex_support.pmc_count) return PFMLIB_ERR_INVAL; } else { cnt = ffs(sicortex_pe[i].pme_counters)-1; if (cnt == -1) return(PFMLIB_ERR_INVAL); } /* if cnt == 1, shift right by 0, if cnt == 2, shift right by 8 */ /* Works on both 5k anf 20K */ unsigned int tmp = sicortex_pe[i].pme_counters; /* CPU event */ if (tmp & ((1<> (cnt*8)); else return PFMLIB_ERR_INVAL; } /* SCB event */ else { if ((cnt < 6) || (cnt >= sicortex_support.pmc_count)) return PFMLIB_ERR_INVAL; *code = 0xffff & (sicortex_pe[i].pme_code >> 16); } return PFMLIB_SUCCESS; } /* * This function is accessible directly to the user */ int pfm_sicortex_get_event_umask(unsigned int i, unsigned long *umask) { extern pfm_pmu_support_t sicortex_support; if (i >= sicortex_support.pme_count || umask == NULL) return PFMLIB_ERR_INVAL; *umask = 0; //evt_umask(i); return PFMLIB_SUCCESS; } static void pfm_sicortex_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { extern pfm_pmu_support_t sicortex_support; unsigned int tmp; memset(counters, 0, sizeof(*counters)); tmp = sicortex_pe[j].pme_counters; /* CPU counter */ if (tmp & ((1< core_counters) { /* counting pmds are not contiguous on ICE9*/ for(i=6; i < sicortex_support.pmd_count; i++) pfm_regmask_set(impl_counters, i); } } static void pfm_sicortex_get_hw_counter_width(unsigned int *width) { *width = PMU_GEN_MIPS64_COUNTER_WIDTH; } static char * pfm_sicortex_get_event_name(unsigned int i) { return sicortex_pe[i].pme_name; } static int pfm_sicortex_get_event_description(unsigned int ev, char **str) { char *s; s = sicortex_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_sicortex_get_cycle_event(pfmlib_event_t *e) { return pfm_find_full_event("CPU_CYCLES",e); } static int pfm_sicortex_get_inst_retired(pfmlib_event_t *e) { return pfm_find_full_event("CPU_INSEXEC",e); } /* SiCortex specific functions */ /* CPU counter */ int pfm_sicortex_is_cpu(unsigned int i) { if (i < sicortex_support.pme_count) { unsigned int tmp = sicortex_pe[i].pme_counters; return !(tmp & (1< .sp .B PMU name: slm .B PMU desc: Intel Silvermont .sp .SH DESCRIPTION The library supports the Intel Silvermont core PMU. .SH MODIFIERS The following modifiers are supported on Intel Silvermont processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH OFFCORE_RESPONSE events Intel Silvermont provides two offcore_response events: \fBOFFCORE_RESPONSE_0\fR and \fBOFFCORE_RESPONSE_1\fR. Those events need special treatment in the performance monitoring infrastructure because each event uses an extra register to store some settings. Thus, in case multiple offcore_response events are monitored simultaneously, the kernel needs to manage the sharing of that extra register. The offcore_response events are exposed as a normal event by the library. The extra settings are exposed as regular umasks. The library takes care of encoding the events according to the underlying kernel interface. On Intel Silvermont, the umasks are divided into three categories: request, supplier and snoop. The user must provide at least one umask for each category. The categories are shown in the umask descriptions. The library provides a default umask per category if not provided by the user. There is also the special response umask called \fBANY_RESPONSE\fR. When this umask is used then it overrides any supplier and snoop umasks. In other words, users can specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop is specified, the library defaults to using \fBANY_RESPONSE\fR. For instance, the following are valid event selections: .TP .B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_REQUEST .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY .P But the following are illegal: .TP .B OFFCORE_RESPONSE_0:ANY_RFO:NON_DRAM:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_RFO:L2_HIT:SNOOP_ANY:ANY_RESPONSE .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/papi.pc.in000664 001750 001750 00000000416 13216244366 017212 0ustar00jshenry1963jshenry1963000000 000000 prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: @PACKAGE_NAME@ Description: Performance API to access performance metrics on system Version: @PACKAGE_VERSION@ Libs: -L${libdir} -lpapi Libs.private: @LIBS@ Cflags: -I${includedir} papi-5.6.0/src/libpfm4/lib/pfmlib_intel_bdx_unc_sbo.c000664 001750 001750 00000006207 13216244365 024613 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_bdx_unc_sbo.c : Intel BroadwellX S-Box uncore PMU * * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_bdx_unc_sbo_events.h" static void display_sbo(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC_SBO=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d] %s\n", reg->val, reg->com.unc_event, reg->com.unc_umask, reg->com.unc_en, reg->com.unc_inv, reg->com.unc_edge, reg->com.unc_thres, pe[e->event].name); } #define DEFINE_S_BOX(n) \ pfmlib_pmu_t intel_bdx_unc_sbo##n##_support = {\ .desc = "Intel BroadwellX S-BOX"#n" uncore",\ .name = "bdx_unc_sbo"#n,\ .perf_name = "uncore_sbox_"#n,\ .pmu = PFM_PMU_INTEL_BDX_UNC_SB##n,\ .pme_count = LIBPFM_ARRAY_SIZE(intel_bdx_unc_s_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 4,\ .num_fixed_cntrs = 0,\ .max_encoding = 3,\ .pe = intel_bdx_unc_s_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK,\ .pmu_detect = pfm_intel_bdx_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ .display_reg = display_sbo,\ } DEFINE_S_BOX(0); DEFINE_S_BOX(1); DEFINE_S_BOX(2); DEFINE_S_BOX(3); papi-5.6.0/src/components/perfctr_ppc/power5+_events_map.c000664 001750 001750 00000036417 13216244357 025717 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: power5+_events_map.c * Author: Eric Kjeldergaard * kjelderg@linux.ibm.com * Mods: * * * Copyright (c) International Business Machines, 2006. * Contributed by Eric Kjeldergaard * * This file MUST be kept synchronised with the events file. * */ #include "perfctr-ppc64.h" PPC64_native_map_t native_name_map[MAX_NATNAME_MAP_INDEX] = { {"PM_0INST_CLB_CYC", -1} , {"PM_1INST_CLB_CYC", -1} , {"PM_1PLUS_PPC_CMPL", -1} , {"PM_2INST_CLB_CYC", -1} , {"PM_3INST_CLB_CYC", -1} , {"PM_4INST_CLB_CYC", -1} , {"PM_5INST_CLB_CYC", -1} , {"PM_6INST_CLB_CYC", -1} , {"PM_BRQ_FULL_CYC", -1} , {"PM_BR_ISSUED", -1} , {"PM_BR_MPRED_CR", -1} , {"PM_BR_MPRED_TA", -1} , {"PM_BR_UNCOND", -1} , {"PM_CLB_EMPTY_CYC", -1} , {"PM_CLB_FULL_CYC", -1} , {"PM_CRQ_FULL_CYC", -1} , {"PM_CR_MAP_FULL_CYC", -1} , {"PM_CYC", -1} , {"PM_DATA_FROM_L2", -1} , {"PM_DATA_FROM_L25_SHR", -1} , {"PM_DATA_FROM_L275_MOD", -1} , {"PM_DATA_FROM_L3", -1} , {"PM_DATA_FROM_L35_SHR", -1} , {"PM_DATA_FROM_L375_MOD", -1} , {"PM_DATA_FROM_RMEM", -1} , {"PM_DATA_TABLEWALK_CYC", -1} , {"PM_DC_INV_L2", -1} , {"PM_DC_PREF_OUT_OF_STREAMS", -1} , {"PM_DC_PREF_DST", -1} , {"PM_DC_PREF_STREAM_ALLOC", -1} , {"PM_DSLB_MISS", -1} , {"PM_DTLB_MISS", -1} , {"PM_DTLB_MISS_4K", -1} , {"PM_DTLB_REF", -1} , {"PM_DTLB_REF_4K", -1} , {"PM_EE_OFF", -1} , {"PM_EE_OFF_EXT_INT", -1} , {"PM_FAB_CMD_ISSUED", -1} , {"PM_FAB_CMD_RETRIED", -1} , {"PM_FAB_DCLAIM_ISSUED", -1} , {"PM_FAB_DCLAIM_RETRIED", -1} , {"PM_FAB_HOLDtoNN_EMPTY", -1} , {"PM_FAB_HOLDtoVN_EMPTY", -1} , {"PM_FAB_M1toP1_SIDECAR_EMPTY", -1} , {"PM_FAB_M1toVNorNN_SIDECAR_EMPTY", -1} , {"PM_FAB_P1toM1_SIDECAR_EMPTY", -1} , {"PM_FAB_P1toVNorNN_SIDECAR_EMPTY", -1} , {"PM_FAB_PNtoNN_DIRECT", -1} , {"PM_FAB_PNtoNN_SIDECAR", -1} , {"PM_FAB_PNtoVN_DIRECT", -1} , {"PM_FAB_PNtoVN_SIDECAR", -1} , {"PM_FAB_VBYPASS_EMPTY", -1} , {"PM_FLUSH", -1} , {"PM_FLUSH_BR_MPRED", -1} , {"PM_FLUSH_IMBAL", -1} , {"PM_FLUSH_SB", -1} , {"PM_FLUSH_SYNC", -1} , {"PM_FPR_MAP_FULL_CYC", -1} , {"PM_FPU0_1FLOP", -1} , {"PM_FPU0_DENORM", -1} , {"PM_FPU0_FDIV", -1} , {"PM_FPU0_FEST", -1} , {"PM_FPU0_FIN", -1} , {"PM_FPU0_FMA", -1} , {"PM_FPU0_FMOV_FEST", -1} , {"PM_FPU0_FPSCR", -1} , {"PM_FPU0_FRSP_FCONV", -1} , {"PM_FPU0_FSQRT", -1} , {"PM_FPU0_FULL_CYC", -1} , {"PM_FPU0_SINGLE", -1} , {"PM_FPU0_STALL3", -1} , {"PM_FPU0_STF", -1} , {"PM_FPU1_1FLOP", -1} , {"PM_FPU1_DENORM", -1} , {"PM_FPU1_FDIV", -1} , {"PM_FPU1_FEST", -1} , {"PM_FPU1_FIN", -1} , {"PM_FPU1_FMA", -1} , {"PM_FPU1_FMOV_FEST", -1} , {"PM_FPU1_FRSP_FCONV", -1} , {"PM_FPU1_FSQRT", -1} , {"PM_FPU1_FULL_CYC", -1} , {"PM_FPU1_SINGLE", -1} , {"PM_FPU1_STALL3", -1} , {"PM_FPU1_STF", -1} , {"PM_FPU_1FLOP", -1} , {"PM_FPU_DENORM", -1} , {"PM_FPU_FDIV", -1} , {"PM_FPU_FEST", -1} , {"PM_FPU_FULL_CYC", -1} , {"PM_FPU_SINGLE", -1} , {"PM_FXLS0_FULL_CYC", -1} , {"PM_FXLS1_FULL_CYC", -1} , {"PM_FXLS_FULL_CYC", -1} , {"PM_FXU0_FIN", -1} , {"PM_FXU1_FIN", -1} , {"PM_FXU_IDLE", -1} , {"PM_GCT_FULL_CYC", -1} , {"PM_GCT_NOSLOT_CYC", -1} , {"PM_GCT_USAGE_00to59_CYC", -1} , {"PM_GPR_MAP_FULL_CYC", -1} , {"PM_GRP_BR_REDIR", -1} , {"PM_GRP_BR_REDIR_NONSPEC", -1} , {"PM_GRP_DISP_BLK_SB_CYC", -1} , {"PM_GRP_DISP_REJECT", -1} , {"PM_GRP_DISP_VALID", -1} , {"PM_GRP_IC_MISS", -1} , {"PM_GRP_IC_MISS_BR_REDIR_NONSPEC", -1} , {"PM_GRP_IC_MISS_NONSPEC", -1} , {"PM_GRP_MRK", -1} , {"PM_IC_DEMAND_L2_BHT_REDIRECT", -1} , {"PM_IC_DEMAND_L2_BR_REDIRECT", -1} , {"PM_IC_PREF_REQ", -1} , {"PM_IERAT_XLATE_WR", -1} , {"PM_IERAT_XLATE_WR_LP", -1} , {"PM_IOPS_CMPL", -1} , {"PM_INST_DISP_ATTEMPT", -1} , {"PM_INST_FETCH_CYC", -1} , {"PM_INST_FROM_L2", -1} , {"PM_INST_FROM_L25_SHR", -1} , {"PM_INST_FROM_L2MISS", -1} , {"PM_INST_FROM_L3", -1} , {"PM_INST_FROM_L35_SHR", -1} , {"PM_ISLB_MISS", -1} , {"PM_ITLB_MISS", -1} , {"PM_L1_DCACHE_RELOAD_VALID", -1} , {"PM_L1_PREF", -1} , {"PM_L1_WRITE_CYC", -1} , {"PM_L2SA_MOD_INV", -1} , {"PM_L2SA_MOD_TAG", -1} , {"PM_L2SA_RCLD_DISP", -1} , {"PM_L2SA_RCLD_DISP_FAIL_ADDR", -1} , {"PM_L2SA_RCLD_DISP_FAIL_OTHER", -1} , {"PM_L2SA_RCLD_DISP_FAIL_RC_FULL", -1} , {"PM_L2SA_RCST_DISP", -1} , {"PM_L2SA_RCST_DISP_FAIL_ADDR", -1} , {"PM_L2SA_RCST_DISP_FAIL_OTHER", -1} , {"PM_L2SA_RCST_DISP_FAIL_RC_FULL", -1} , {"PM_L2SA_RC_DISP_FAIL_CO_BUSY", -1} , {"PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", -1} , {"PM_L2SA_SHR_INV", -1} , {"PM_L2SA_SHR_MOD", -1} , {"PM_L2SA_ST_HIT", -1} , {"PM_L2SA_ST_REQ", -1} , {"PM_L2SB_MOD_INV", -1} , {"PM_L2SB_MOD_TAG", -1} , {"PM_L2SB_RCLD_DISP", -1} , {"PM_L2SB_RCLD_DISP_FAIL_ADDR", -1} , {"PM_L2SB_RCLD_DISP_FAIL_OTHER", -1} , {"PM_L2SB_RCLD_DISP_FAIL_RC_FULL", -1} , {"PM_L2SB_RCST_DISP", -1} , {"PM_L2SB_RCST_DISP_FAIL_ADDR", -1} , {"PM_L2SB_RCST_DISP_FAIL_OTHER", -1} , {"PM_L2SB_RCST_DISP_FAIL_RC_FULL", -1} , {"PM_L2SB_RC_DISP_FAIL_CO_BUSY", -1} , {"PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", -1} , {"PM_L2SB_SHR_INV", -1} , {"PM_L2SB_SHR_MOD", -1} , {"PM_L2SB_ST_HIT", -1} , {"PM_L2SB_ST_REQ", -1} , {"PM_L2SC_MOD_INV", -1} , {"PM_L2SC_MOD_TAG", -1} , {"PM_L2SC_RCLD_DISP", -1} , {"PM_L2SC_RCLD_DISP_FAIL_ADDR", -1} , {"PM_L2SC_RCLD_DISP_FAIL_OTHER", -1} , {"PM_L2SC_RCLD_DISP_FAIL_RC_FULL", -1} , {"PM_L2SC_RCST_DISP", -1} , {"PM_L2SC_RCST_DISP_FAIL_ADDR", -1} , {"PM_L2SC_RCST_DISP_FAIL_OTHER", -1} , {"PM_L2SC_RCST_DISP_FAIL_RC_FULL", -1} , {"PM_L2SC_RC_DISP_FAIL_CO_BUSY", -1} , {"PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", -1} , {"PM_L2SC_SHR_INV", -1} , {"PM_L2SC_SHR_MOD", -1} , {"PM_L2SC_ST_HIT", -1} , {"PM_L2SC_ST_REQ", -1} , {"PM_L2_PREF", -1} , {"PM_L3SA_ALL_BUSY", -1} , {"PM_L3SA_HIT", -1} , {"PM_L3SA_MOD_INV", -1} , {"PM_L3SA_MOD_TAG", -1} , {"PM_L3SA_REF", -1} , {"PM_L3SA_SHR_INV", -1} , {"PM_L3SA_SNOOP_RETRY", -1} , {"PM_L3SB_ALL_BUSY", -1} , {"PM_L3SB_HIT", -1} , {"PM_L3SB_MOD_INV", -1} , {"PM_L3SB_MOD_TAG", -1} , {"PM_L3SB_REF", -1} , {"PM_L3SB_SHR_INV", -1} , {"PM_L3SB_SNOOP_RETRY", -1} , {"PM_L3SC_ALL_BUSY", -1} , {"PM_L3SC_HIT", -1} , {"PM_L3SC_MOD_INV", -1} , {"PM_L3SC_MOD_TAG", -1} , {"PM_L3SC_REF", -1} , {"PM_L3SC_SHR_INV", -1} , {"PM_L3SC_SNOOP_RETRY", -1} , {"PM_LARX_LSU0", -1} , {"PM_LD_MISS_L1_LSU0", -1} , {"PM_LD_MISS_L1_LSU1", -1} , {"PM_LD_REF_L1", -1} , {"PM_LD_REF_L1_LSU0", -1} , {"PM_BR_PRED_TA", -1} , {"PM_LR_CTR_MAP_FULL_CYC", -1} , {"PM_LSU0_BUSY_REJECT", -1} , {"PM_LSU0_DERAT_MISS", -1} , {"PM_LSU0_FLUSH_LRQ", -1} , {"PM_LSU0_FLUSH_SRQ", -1} , {"PM_LSU0_FLUSH_ULD", -1} , {"PM_LSU0_FLUSH_UST", -1} , {"PM_LSU0_LDF", -1} , {"PM_LSU0_NCLD", -1} , {"PM_LSU0_REJECT_ERAT_MISS", -1} , {"PM_LSU0_REJECT_LMQ_FULL", -1} , {"PM_LSU0_REJECT_RELOAD_CDF", -1} , {"PM_LSU0_REJECT_SRQ", -1} , {"PM_LSU0_SRQ_STFWD", -1} , {"PM_LSU1_BUSY_REJECT", -1} , {"PM_LSU1_DERAT_MISS", -1} , {"PM_LSU1_FLUSH_LRQ", -1} , {"PM_LSU1_FLUSH_SRQ", -1} , {"PM_LSU1_FLUSH_ULD", -1} , {"PM_LSU1_FLUSH_UST", -1} , {"PM_LSU1_LDF", -1} , {"PM_LSU1_NCLD", -1} , {"PM_LSU1_REJECT_ERAT_MISS", -1} , {"PM_LSU1_REJECT_LMQ_FULL", -1} , {"PM_LSU1_REJECT_RELOAD_CDF", -1} , {"PM_LSU1_REJECT_SRQ", -1} , {"PM_LSU1_SRQ_STFWD", -1} , {"PM_LSU_FLUSH", -1} , {"PM_LSU_FLUSH_LRQ_FULL", -1} , {"PM_LSU_FLUSH_SRQ", -1} , {"PM_LSU_FLUSH_SRQ_FULL", -1} , {"PM_LSU_FLUSH_ULD", -1} , {"PM_LSU_LDF", -1} , {"PM_LSU_LMQ_FULL_CYC", -1} , {"PM_LSU_LMQ_LHR_MERGE", -1} , {"PM_LSU_LMQ_S0_ALLOC", -1} , {"PM_LSU_LMQ_S0_VALID", -1} , {"PM_LSU_LRQ_FULL_CYC", -1} , {"PM_LSU_LRQ_S0_ALLOC", -1} , {"PM_LSU_LRQ_S0_VALID", -1} , {"PM_LSU_REJECT_ERAT_MISS", -1} , {"PM_LSU_REJECT_SRQ", -1} , {"PM_LSU_SRQ_FULL_CYC", -1} , {"PM_LSU_SRQ_S0_ALLOC", -1} , {"PM_LSU_SRQ_S0_VALID", -1} , {"PM_LSU_SRQ_SYNC_CYC", -1} , {"PM_LWSYNC_HELD", -1} , {"PM_MEM_FAST_PATH_RD_DISP", -1} , {"PM_IC_PREF_INSTALL", -1} , {"PM_MEM_HI_PRIO_WR_CMPL", -1} , {"PM_MEM_NONSPEC_RD_CANCEL", -1} , {"PM_MEM_LO_PRIO_WR_CMPL", -1} , {"PM_MEM_PWQ_DISP", -1} , {"PM_MEM_PWQ_DISP_Q2or3", -1} , {"PM_MEM_PW_CMPL", -1} , {"PM_MEM_PW_GATH", -1} , {"PM_MEM_RQ_DISP_Q0to3", -1} , {"PM_MEM_RQ_DISP", -1} , {"PM_MEM_RQ_DISP_Q4to7", -1} , {"PM_MEM_RQ_DISP_Q8to11", -1} , {"PM_MEM_SPEC_RD_CANCEL", -1} , {"PM_MEM_WQ_DISP_Q0to7", -1} , {"PM_MEM_WQ_DISP_Q8to15", -1} , {"PM_MEM_WQ_DISP_DCLAIM", -1} , {"PM_MEM_WQ_DISP_WRITE", -1} , {"PM_MRK_DATA_FROM_L2", -1} , {"PM_MRK_DATA_FROM_L25_SHR", -1} , {"PM_MRK_DATA_FROM_L275_MOD", -1} , {"PM_MRK_DATA_FROM_L3", -1} , {"PM_MRK_DATA_FROM_L35_SHR", -1} , {"PM_MRK_DATA_FROM_L375_MOD", -1} , {"PM_MRK_DATA_FROM_RMEM", -1} , {"PM_MRK_DSLB_MISS", -1} , {"PM_MRK_DTLB_MISS", -1} , {"PM_MRK_DTLB_MISS_4K", -1} , {"PM_MRK_DTLB_REF", -1} , {"PM_MRK_DTLB_REF_4K", -1} , {"PM_MRK_GRP_DISP", -1} , {"PM_MRK_GRP_ISSUED", -1} , {"PM_MRK_IMR_RELOAD", -1} , {"PM_MRK_L1_RELOAD_VALID", -1} , {"PM_MRK_LD_MISS_L1", -1} , {"PM_MRK_LD_MISS_L1_LSU0", -1} , {"PM_MRK_LD_MISS_L1_LSU1", -1} , {"PM_MRK_LSU0_FLUSH_LRQ", -1} , {"PM_MRK_LSU0_FLUSH_SRQ", -1} , {"PM_MRK_LSU0_FLUSH_ULD", -1} , {"PM_MRK_LSU0_FLUSH_UST", -1} , {"PM_MRK_LSU1_FLUSH_LRQ", -1} , {"PM_MRK_LSU1_FLUSH_SRQ", -1} , {"PM_MRK_LSU1_FLUSH_ULD", -1} , {"PM_MRK_LSU1_FLUSH_UST", -1} , {"PM_MRK_LSU_FLUSH_ULD", -1} , {"PM_MRK_LSU_SRQ_INST_VALID", -1} , {"PM_MRK_STCX_FAIL", -1} , {"PM_MRK_ST_CMPL", -1} , {"PM_MRK_ST_MISS_L1", -1} , {"PM_PMC4_OVERFLOW", -1} , {"PM_PMC5_OVERFLOW", -1} , {"PM_INST_CMPL", -1} , {"PM_PTEG_FROM_L2", -1} , {"PM_PTEG_FROM_L25_SHR", -1} , {"PM_PTEG_FROM_L275_MOD", -1} , {"PM_PTEG_FROM_L3", -1} , {"PM_PTEG_FROM_L35_SHR", -1} , {"PM_PTEG_FROM_L375_MOD", -1} , {"PM_PTEG_FROM_RMEM", -1} , {"PM_PTEG_RELOAD_VALID", -1} , {"PM_RUN_CYC", -1} , {"PM_SNOOP_DCLAIM_RETRY_QFULL", -1} , {"PM_SNOOP_PARTIAL_RTRY_QFULL", -1} , {"PM_SNOOP_PW_RETRY_RQ", -1} , {"PM_SNOOP_PW_RETRY_WQ_PWQ", -1} , {"PM_SNOOP_RD_RETRY_QFULL", -1} , {"PM_SNOOP_RD_RETRY_RQ", -1} , {"PM_SNOOP_RD_RETRY_WQ", -1} , {"PM_SNOOP_RETRY_1AHEAD", -1} , {"PM_SNOOP_TLBIE", -1} , {"PM_SNOOP_WR_RETRY_QFULL", -1} , {"PM_SNOOP_WR_RETRY_RQ", -1} , {"PM_SNOOP_WR_RETRY_WQ", -1} , {"PM_STCX_FAIL", -1} , {"PM_STCX_PASS", -1} , {"PM_ST_MISS_L1", -1} , {"PM_ST_REF_L1_LSU0", -1} , {"PM_ST_REF_L1_LSU1", -1} , {"PM_SUSPENDED", -1} , {"PM_TB_BIT_TRANS", -1} , {"PM_THRD_L2MISS_BOTH_CYC", -1} , {"PM_THRD_ONE_RUN_CYC", -1} , {"PM_THRD_PRIO_1_CYC", -1} , {"PM_THRD_PRIO_2_CYC", -1} , {"PM_THRD_PRIO_3_CYC", -1} , {"PM_THRD_PRIO_4_CYC", -1} , {"PM_THRD_PRIO_5_CYC", -1} , {"PM_THRD_PRIO_6_CYC", -1} , {"PM_THRD_PRIO_7_CYC", -1} , {"PM_THRD_PRIO_DIFF_0_CYC", -1} , {"PM_THRD_PRIO_DIFF_1or2_CYC", -1} , {"PM_THRD_PRIO_DIFF_3or4_CYC", -1} , {"PM_THRD_PRIO_DIFF_5or6_CYC", -1} , {"PM_THRD_PRIO_DIFF_minus1or2_CYC", -1} , {"PM_THRD_PRIO_DIFF_minus3or4_CYC", -1} , {"PM_THRD_PRIO_DIFF_minus5or6_CYC", -1} , {"PM_THRD_SEL_OVER_CLB_EMPTY", -1} , {"PM_THRD_SEL_OVER_GCT_IMBAL", -1} , {"PM_THRD_SEL_OVER_ISU_HOLD", -1} , {"PM_THRD_SEL_OVER_L2MISS", -1} , {"PM_THRD_SEL_T0", -1} , {"PM_THRD_SEL_T1", -1} , {"PM_THRD_SMT_HANG", -1} , {"PM_TLBIE_HELD", -1} , {"PM_TLB_MISS", -1} , {"PM_XER_MAP_FULL_CYC", -1} , {"PM_BR_PRED_CR", -1} , {"PM_MEM_RQ_DISP_Q12to15", -1} , {"PM_MEM_RQ_DISP_Q16to19", -1} , {"PM_SNOOP_RETRY_AB_COLLISION", -1} , {"PM_CMPLU_STALL_DCACHE_MISS", -1} , {"PM_CMPLU_STALL_FDIV", -1} , {"PM_CMPLU_STALL_FXU", -1} , {"PM_CMPLU_STALL_LSU", -1} , {"PM_DATA_FROM_L25_MOD", -1} , {"PM_DATA_FROM_L35_MOD", -1} , {"PM_DATA_FROM_LMEM", -1} , {"PM_DTLB_MISS_64K", -1} , {"PM_DTLB_REF_64K", -1} , {"PM_FPU_FMA", -1} , {"PM_FPU_FRSP_FCONV", -1} , {"PM_FPU_FSQRT", -1} , {"PM_FPU_STALL3", -1} , {"PM_FPU_STF", -1} , {"PM_FXU_BUSY", -1} , {"PM_MRK_FXU_FIN", -1} , {"PM_GCT_EMPTY_CYC", -1} , {"PM_GCT_NOSLOT_IC_MISS", -1} , {"PM_GCT_USAGE_60to79_CYC", -1} , {"PM_GRP_DISP", -1} , {"PM_HV_CYC", -1} , {"PM_INST_FROM_L1", -1} , {"PM_INST_FROM_L25_MOD", -1} , {"PM_INST_FROM_L35_MOD", -1} , {"PM_INST_FROM_LMEM", -1} , {"PM_LSU_BUSY_REJECT", -1} , {"PM_LSU_DERAT_MISS", -1} , {"PM_LSU_FLUSH_LRQ", -1} , {"PM_LSU_FLUSH_UST", -1} , {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} , {"PM_LSU_REJECT_LMQ_FULL", -1} , {"PM_LSU_REJECT_RELOAD_CDF", -1} , {"PM_LSU_SRQ_STFWD", -1} , {"PM_MRK_BRU_FIN", -1} , {"PM_MRK_DATA_FROM_L25_MOD", -1} , {"PM_MRK_DATA_FROM_L25_SHR_CYC", -1} , {"PM_MRK_DATA_FROM_L275_SHR_CYC", -1} , {"PM_MRK_DATA_FROM_L2_CYC", -1} , {"PM_MRK_DATA_FROM_L35_MOD", -1} , {"PM_MRK_DATA_FROM_L35_SHR_CYC", -1} , {"PM_MRK_DATA_FROM_L375_SHR_CYC", -1} , {"PM_MRK_DATA_FROM_L3_CYC", -1} , {"PM_MRK_DATA_FROM_LMEM", -1} , {"PM_MRK_DTLB_MISS_64K", -1} , {"PM_MRK_DTLB_REF_64K", -1} , {"PM_MRK_GRP_BR_REDIR", -1} , {"PM_MRK_LSU_FLUSH_UST", -1} , {"PM_MRK_ST_GPS", -1} , {"PM_PMC1_OVERFLOW", -1} , {"PM_PTEG_FROM_L25_MOD", -1} , {"PM_PTEG_FROM_L35_MOD", -1} , {"PM_PTEG_FROM_LMEM", -1} , {"PM_SLB_MISS", -1} , {"PM_ST_REF_L1", -1} , {"PM_THRD_GRP_CMPL_BOTH_CYC", -1} , {"PM_DATA_FROM_L275_SHR", -1} , {"PM_DATA_FROM_L2MISS", -1} , {"PM_DATA_FROM_L375_SHR", -1} , {"PM_DTLB_MISS_16M", -1} , {"PM_DTLB_REF_16M", -1} , {"PM_FPU_FMOV_FEST", -1} , {"PM_FXU0_BUSY_FXU1_IDLE", -1} , {"PM_FXU_FIN", -1} , {"PM_GCT_NOSLOT_SRQ_FULL", -1} , {"PM_GCT_USAGE_80to99_CYC", -1} , {"PM_GRP_CMPL", -1} , {"PM_GRP_DISP_SUCCESS", -1} , {"PM_INST_DISP", -1} , {"PM_INST_FROM_L275_SHR", -1} , {"PM_INST_FROM_L375_SHR", -1} , {"PM_INST_FROM_PREF", -1} , {"PM_LD_MISS_L1", -1} , {"PM_MRK_DATA_FROM_L275_SHR", -1} , {"PM_MRK_DATA_FROM_L2MISS", -1} , {"PM_MRK_DATA_FROM_L375_SHR", -1} , {"PM_MRK_DTLB_MISS_16M", -1} , {"PM_MRK_DTLB_REF_16M", -1} , {"PM_MRK_FPU_FIN", -1} , {"PM_MRK_INST_FIN", -1} , {"PM_MRK_LSU_FLUSH_LRQ", -1} , {"PM_MRK_ST_CMPL_INT", -1} , {"PM_PMC2_OVERFLOW", -1} , {"PM_PMC6_OVERFLOW", -1} , {"PM_PTEG_FROM_L275_SHR", -1} , {"PM_PTEG_FROM_L2MISS", -1} , {"PM_PTEG_FROM_L375_SHR", -1} , {"PM_STOP_COMPLETION", -1} , {"PM_THRESH_TIMEO", -1} , {"PM_0INST_FETCH", -1} , {"PM_BR_PRED_CR_TA", -1} , {"PM_CMPLU_STALL_DIV", -1} , {"PM_CMPLU_STALL_ERAT_MISS", -1} , {"PM_CMPLU_STALL_FPU", -1} , {"PM_CMPLU_STALL_REJECT", -1} , {"PM_DTLB_MISS_16G", -1} , {"PM_DTLB_REF_16G", -1} , {"PM_EXT_INT", -1} , {"PM_FPU_FIN", -1} , {"PM_FXU1_BUSY_FXU0_IDLE", -1} , {"PM_GCT_NOSLOT_BR_MPRED", -1} , {"PM_INST_FROM_L275_MOD", -1} , {"PM_INST_FROM_L375_MOD", -1} , {"PM_INST_FROM_RMEM", -1} , {"PM_LSU_SRQ_EMPTY_CYC", -1} , {"PM_MRK_CRU_FIN", -1} , {"PM_MRK_DATA_FROM_L25_MOD_CYC", -1} , {"PM_MRK_DATA_FROM_L275_MOD_CYC", -1} , {"PM_MRK_DATA_FROM_L35_MOD_CYC", -1} , {"PM_MRK_DATA_FROM_L375_MOD_CYC", -1} , {"PM_MRK_DATA_FROM_LMEM_CYC", -1} , {"PM_MRK_DATA_FROM_RMEM_CYC", -1} , {"PM_MRK_DTLB_MISS_16G", -1} , {"PM_MRK_DTLB_REF_16G", -1} , {"PM_MRK_GRP_CMPL", -1} , {"PM_MRK_GRP_IC_MISS", -1} , {"PM_MRK_GRP_TIMEO", -1} , {"PM_MRK_LSU_FIN", -1} , {"PM_MRK_LSU_FLUSH_SRQ", -1} , {"PM_PMC3_OVERFLOW", -1} , {"PM_WORK_HELD", -1} , {"PM_RUN_INST_CMPL", -1} }; papi-5.6.0/src/perfctr-2.6.x/examples/global/global.c000775 001750 001750 00000015315 13216244366 024237 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: global.c,v 1.37 2004/01/12 14:25:40 mikpe Exp $ * * usage: ./global [sampling_interval_usec [sleep_interval_sec]] * * This test program illustrates how a process may use the * Linux x86 Performance-Monitoring Counters interface to * do system-wide performance monitoring. * * Copyright (C) 2000-2004 Mikael Pettersson */ #include #include #include #include #include #include #include #include #include "libperfctr.h" #include "arch.h" static struct gperfctr *gperfctr; static struct perfctr_info info; static unsigned int nrcpus; static unsigned short *cpu_logical_map; struct gperfctr_state { /* no longer defined in or used by the kernel */ unsigned int nrcpus; struct gperfctr_cpu_state cpu_state[1]; /* actually 'nrcpus' */ }; static struct gperfctr_state *state; static struct gperfctr_state *prev_state; static unsigned int sample_num; int counting_mips; /* for CPUs that cannot FLOPS */ static unsigned long sampling_interval = 1000000; /* XXX: reduce for >4GHz CPUs */ static unsigned int sleep_interval = 5; static jmp_buf main_buf; static void onint(int sig) /* ^C handler */ { longjmp(main_buf, 1); } static void catch_sigint(void) { struct sigaction act; memset(&act, 0, sizeof act); act.sa_handler = onint; if( sigaction(SIGINT, &act, NULL) < 0 ) { perror("unable to catch SIGINT"); exit(1); } } static unsigned int hweight32(unsigned int w) { unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); res = (res & 0x33333333) + ((res >> 2) & 0x33333333); res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); } static void setup_cpu_logical_map_and_nrcpus(const struct perfctr_cpus_info *cpus_info) { const unsigned int *cpus, *cpus_forbidden; unsigned int nrwords, i, cpumask, bitmask; unsigned int logical_cpu_nr, kernel_cpu_nr; cpus = cpus_info->cpus->mask; cpus_forbidden = cpus_info->cpus_forbidden->mask; nrwords = cpus_info->cpus->nrwords; nrcpus = 0; for(i = 0; i < nrwords; ++i) nrcpus += hweight32(cpus[i] & ~cpus_forbidden[i]); cpu_logical_map = malloc(nrcpus*sizeof(cpu_logical_map[0])); if( !cpu_logical_map ) { perror("malloc"); exit(1); } logical_cpu_nr = 0; for(i = 0; i < nrwords; ++i) { cpumask = cpus[i] & ~cpus_forbidden[i]; kernel_cpu_nr = i * 8 * sizeof(int); for(bitmask = 1; cpumask != 0; ++kernel_cpu_nr, bitmask <<= 1) { if( cpumask & bitmask ) { cpumask &= ~bitmask; cpu_logical_map[logical_cpu_nr] = kernel_cpu_nr; ++logical_cpu_nr; } } } if( logical_cpu_nr != nrcpus ) abort(); } static void do_init(void) { struct perfctr_cpus_info *cpus_info; size_t nbytes; unsigned int i; gperfctr = gperfctr_open(); if( !gperfctr ) { perror("gperfctr_open"); exit(1); } if( gperfctr_info(gperfctr, &info) < 0 ) { perror("gperfctr_info"); exit(1); } cpus_info = gperfctr_cpus_info(gperfctr); if( !cpus_info ) { perror("gperfctr_info"); exit(1); } printf("\nPerfCtr Info:\n"); perfctr_info_print(&info); perfctr_cpus_info_print(cpus_info); /* use all non-forbidden CPUs */ setup_cpu_logical_map_and_nrcpus(cpus_info); free(cpus_info); /* now alloc state memory based on nrcpus */ nbytes = offsetof(struct gperfctr_state, cpu_state[0]) + nrcpus * sizeof(state->cpu_state[0]); state = malloc(nbytes); prev_state = malloc(nbytes); if( !state || !prev_state ) { perror("malloc"); exit(1); } memset(state, 0, nbytes); memset(prev_state, 0, nbytes); /* format state to indicate which CPUs we want to sample */ for(i = 0; i < nrcpus; ++i) state->cpu_state[i].cpu = cpu_logical_map[i]; state->nrcpus = nrcpus; } static int do_read(unsigned int sleep_interval) { unsigned int i, cpu, ctr; for(i = 0; i < state->nrcpus; ++i) { if( gperfctr_read(gperfctr, &state->cpu_state[i]) < 0 ) { perror("gperfctr_read"); return -1; } } printf("\nSample #%u\n", ++sample_num); for(i = 0; i < state->nrcpus; ++i) { cpu = state->cpu_state[i].cpu; printf("\nCPU %d:\n", cpu); if( state->cpu_state[i].cpu_control.tsc_on ) printf("\ttsc\t%lld\n", state->cpu_state[i].sum.tsc); for(ctr = 0; ctr < state->cpu_state[i].cpu_control.nractrs; ++ctr) printf("\tpmc[%d]\t%lld\n", ctr, state->cpu_state[i].sum.pmc[ctr]); if( ctr >= 1 ) { /* compute and display MFLOP/s or MIP/s */ unsigned long long tsc = state->cpu_state[i].sum.tsc; unsigned long long prev_tsc = prev_state->cpu_state[i].sum.tsc; unsigned long long ticks = tsc - prev_tsc; unsigned long long pmc0 = state->cpu_state[i].sum.pmc[0]; unsigned long long prev_pmc0 = prev_state->cpu_state[i].sum.pmc[0]; unsigned long long ops = pmc0 - prev_pmc0; double seconds = state->cpu_state[i].cpu_control.tsc_on ? ((double)ticks * (double)(info.tsc_to_cpu_mult ? : 1) / (double)info.cpu_khz) / 1000.0 : (double)sleep_interval; /* don't div-by-0 on WinChip ... */ printf("\tSince previous sample:\n"); printf("\tSECONDS\t%.15g\n", seconds); printf("\t%s\t%llu\n", counting_mips ? "INSNS" : "FLOPS", ops); printf("\t%s/s\t%.15g\n", counting_mips ? "MIP" : "MFLOP", ((double)ops / seconds) / 1e6); prev_state->cpu_state[i].sum.tsc = tsc; prev_state->cpu_state[i].sum.pmc[0] = pmc0; } } return 0; } static void print_control(const struct perfctr_cpu_control *control) { printf("\nControl used:\n"); perfctr_cpu_control_print(control); } static void do_enable(unsigned long sampling_interval) { struct perfctr_cpu_control cpu_control; unsigned int i; setup_control(&info, &cpu_control); print_control(&cpu_control); for(i = 0; i < nrcpus; ++i) { struct gperfctr_cpu_control control; control.cpu = cpu_logical_map[i]; control.cpu_control = cpu_control; if( gperfctr_control(gperfctr, &control) < 0 ) { perror("gperfctr_control"); exit(1); } } if( gperfctr_start(gperfctr, sampling_interval) < 0 ) { perror("gperfctr_start"); exit(1); } } int main(int argc, const char **argv) { if( argc >= 2 ) { sampling_interval = strtoul(argv[1], NULL, 0); if( argc >= 3 ) sleep_interval = strtoul(argv[2], NULL, 0); } if( setjmp(main_buf) == 0 ) { catch_sigint(); do_init(); do_enable(sampling_interval); printf("\nSampling interval:\t%lu usec\n", sampling_interval); printf("Sleep interval:\t\t%u sec\n", sleep_interval); do { sleep(sleep_interval); } while( do_read(sleep_interval) == 0 ); } if( gperfctr ) { printf("shutting down..\n"); gperfctr_stop(gperfctr); } return 0; } papi-5.6.0/src/perfctr-2.7.x/etc/costs/PPC750-300000664 001750 001750 00000002324 13216244367 022557 0ustar00jshenry1963jshenry1963000000 000000 [data from a 300 MHz PowerPC 750] PERFCTR INIT: PVR 0x00080202, CPU clock 300753 kHz, TB clock 16708 kHz PERFCTR INIT: NITER == 256 PERFCTR INIT: loop overhead is 40 cycles PERFCTR INIT: mftbl cost is 1.9 cycles (541 total) PERFCTR INIT: mfspr (pmc1) cost is 1.8 cycles (519 total) PERFCTR INIT: mfspr (pmc2) cost is 1.8 cycles (516 total) PERFCTR INIT: mfspr (pmc3) cost is 1.8 cycles (520 total) PERFCTR INIT: mfspr (pmc4) cost is 1.8 cycles (516 total) PERFCTR INIT: mfspr (mmcr0) cost is 1.8 cycles (523 total) PERFCTR INIT: mfspr (mmcr1) cost is 1.8 cycles (516 total) PERFCTR INIT: mtspr (pmc2) cost is 1.8 cycles (525 total) PERFCTR INIT: mtspr (pmc3) cost is 1.8 cycles (516 total) PERFCTR INIT: mtspr (pmc4) cost is 1.8 cycles (519 total) PERFCTR INIT: mtspr (mmcr1) cost is 1.9 cycles (545 total) PERFCTR INIT: mtspr (mmcr0) cost is 2.0 cycles (559 total) PERFCTR INIT: check_fcece(0): MMCR0[FC] is 0, PMC1 is 0x80000076 PERFCTR INIT: check_fcece(1): MMCR0[FC] is 0, PMC1 is 0x80000040 PERFCTR INIT: check_trigger(0): MMCR0[TRIGGER] is 1, PMC1 is 0x80000076, PMC2 is 0x78 PERFCTR INIT: check_trigger(1): MMCR0[TRIGGER] is 1, PMC1 is 0x80000041, PMC2 is 0x41 perfctr: driver 2.6.4, cpu type PowerPC 60x/7xx/74xx at 300753 kHz papi-5.6.0/src/Matlab/PAPI_Matlab.c000775 001750 001750 00000017774 13216244356 020726 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file: PAPI_Matlab.c * CVS: $Id$ * @author Joseph Thomas * * @brief PAPI Matlab integration. * See PAPI_Matlab.readme for more information. */ #include "mex.h" #include "matrix.h" #include "papi.h" static long long accum_error = 0; static long long start_time = 0; void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { float real_time, proc_time, rate; int i; int number_of_counters; unsigned int mrows, nchars; unsigned int *events; unsigned int flop_events[2]; long long ins = 0, *values, flop_values[2]; long long elapsed_time; int result; char *input, *temp; char one_output[] = "This function produces one output per running counter."; char no_input[] = "This function expects no input."; char error_reading[] = "Error reading the running counters."; /* Check for proper number of arguments. */ if(nrhs < 1) { mexErrMsgTxt("This function expects input."); } nchars = mxGetNumberOfElements(prhs[0]); input = (char *)mxCalloc(nchars, sizeof(char) + 1); input = mxArrayToString(prhs[0]); if(!strncmp(input, "num", 3)) { if(nrhs != 1) { mexErrMsgTxt(no_input); } else if(nlhs != 1) { mexErrMsgTxt("This function produces one and only one output: counters."); } result = PAPI_num_counters(); if(result < PAPI_OK) { mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Error reading counters."); } plhs[0] = mxCreateDoubleScalar((double)result); } else if((!strncmp(input, "flip", 4)) || (!strncmp(input, "flop", 4))) { if(nrhs != 1) { mexErrMsgTxt(no_input); } else if(nlhs > 2) { if (input[2] == 'i') mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflips]."); else mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); } if (input[2] == 'i') { if(result = PAPI_flips( &real_time, &proc_time, &ins, &rate) 0) { plhs[0] = mxCreateDoubleScalar((double)(ins - accum_error)); /* this call adds 7 fp instructions to the total */ /* but apparently not on Pentium M with Matlab 7.0.4 */ /* accum_error += 7; */ if(nlhs == 2) { plhs[1] = mxCreateDoubleScalar((double)rate); /* the second call adds 4 fp instructions to the total */ /* but apparently not on Pentium M with Matlab 7.0.4 */ /* accum_error += 4; */ } } } else if(!strncmp(input, "start", 5)) { if(nlhs != 0) { mexErrMsgTxt("This function produces no output."); } if(nrhs > (PAPI_num_counters() + 1)) { mexErrMsgTxt(one_output); } mrows = mxGetM(prhs[1]); events = (unsigned int *)mxCalloc(nrhs - 1, sizeof(int) + 1); for(i = 1; i < nrhs; i++) { if(mxIsComplex(prhs[i]) || !(mrows == 1) ) { mexErrMsgTxt("Input must be a list of strings."); } if(mxIsChar(prhs[i])) { nchars = mxGetNumberOfElements(prhs[i]); temp = (char *)mxCalloc(nchars, sizeof(char) + 1); temp = mxArrayToString(prhs[i]); if(result = PAPI_event_name_to_code(temp, &(events[i - 1])) < PAPI_OK) { mxFree(temp); mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Incorrect PAPI code given."); } mxFree(temp); } else { events[i - 1] = (unsigned int)mxGetScalar(prhs[i]); } } if((result = PAPI_start_counters(events, nrhs - 1)) < PAPI_OK) { mxFree(events); mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Error initializing counters."); } mxFree(events); } else if(!strncmp(input, "stop", 4)) { if(nrhs != 1) { mexErrMsgTxt(no_input); } number_of_counters = PAPI_num_counters(); if(nlhs > number_of_counters ) { mexErrMsgTxt(one_output); } if (nlhs == 0) values = (long long*)mxCalloc(number_of_counters, sizeof(long long)); else values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); if (start_time == 0) { if (nlhs == 0) result = PAPI_stop_counters(values, number_of_counters); else result = PAPI_stop_counters(values, nlhs); } else { start_time = 0; result = PAPI_stop_counters(flop_values, 2); } if(result < PAPI_OK) { if(result != PAPI_ENOTRUN) { mexPrintf("Error code: %d\n", result); mexErrMsgTxt("Error stopping the running counters."); } } accum_error = 0; for(i = 0; i < nlhs; i++) { plhs[i] = mxCreateDoubleScalar((double)values[i]); } mxFree(values); } else if(!strncmp(input, "read", 4)) { if(nrhs != 1) { mexErrMsgTxt(no_input); } if(nlhs > PAPI_num_counters()) { mexErrMsgTxt(one_output); } values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); if((result = PAPI_read_counters(values, nlhs)) < PAPI_OK) { mexPrintf("%d\n", result); mexErrMsgTxt(error_reading); } for(i = 0; i < nlhs; i++) { plhs[i] = mxCreateDoubleScalar((double)values[i]); } mxFree(values); } else if(!strncmp(input, "accum", 5)) { if(nrhs > PAPI_num_counters() + 1) { mexErrMsgTxt(no_input); } if(nlhs > PAPI_num_counters()) { mexErrMsgTxt(one_output); } values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); for(i = 0; i < nrhs - 1; i++) { values[i] = (long long)(*(mxGetPr(prhs[i + 1]))); } if(result = PAPI_accum_counters(values, nlhs) < PAPI_OK) { mexPrintf("Error code: %d\n", result); mexErrMsgTxt(error_reading); } for(i = 0; i < nlhs; i++) { plhs[i] = mxCreateDoubleScalar((double)values[i]); } mxFree(values); } else if(!strncmp(input, "ipc", 3)) { if(nrhs != 1) { mexErrMsgTxt(no_input); } else if(nlhs > 2) { mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, ipc]."); } if(PAPI_ipc(&real_time, &proc_time, &ins, &rate) 0) { plhs[0] = mxCreateDoubleScalar((double)ins); if(nlhs == 2) { plhs[1] = mxCreateDoubleScalar((double)rate); } } } else { mexPrintf("Cannot find the command you specified.\n"); mexErrMsgTxt("See the included readme file."); } } papi-5.6.0/src/components/host_micpower/utils/000775 001750 001750 00000000000 13216244357 023536 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPI_read_ts.3000664 001750 001750 00000002713 13216244356 020471 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_read_ts" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_read_ts \- .PP Read hardware counters with a timestamp\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_read_ts(int EventSet, long long *values, long long *cycles )\fP; .RE .PP \fBPAPI_read_ts()\fP copies the counters of the indicated event set into the provided array\&. It also places a real-time cycle timestamp into the cycles array\&. .PP The counters continue counting after the read\&. .PP \fBPAPI_read_ts()\fP assumes an initialized PAPI library and a properly added event set\&. .PP \fBParameters:\fP .RS 4 \fIEventSet\fP -- an integer handle for a PAPI Event Set as created by \fBPAPI_create_eventset()\fP .br \fI*values\fP -- an array to hold the counter values of the counting events .br \fI*cycles\fP -- an array to hold the timestamp values .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. .br \fIPAPI_ENOEVST\fP The event set specified does not exist\&. .RE .PP \fBExamples\fP .RS 4 .PP .nf * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_read\fP .PP \fBPAPI_accum\fP .PP \fBPAPI_start\fP .PP \fBPAPI_stop\fP .PP \fBPAPI_reset\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPIF_reset.3000664 001750 001750 00000000742 13216244355 020277 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_reset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_reset \- .PP Reset the hardware event counts in an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Prototype:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_reset( C_INT EventSet, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_reset\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/ctests/forkexec2.c000664 001750 001750 00000003406 13216244360 020665 0ustar00jshenry1963jshenry1963000000 000000 /* * File: forkexec2.c * Author: Philip Mucci * mucci@cs.utk.edu */ /* This file performs the following test: PAPI_library_init() PAPI_shutdown() fork() / \ parent child wait() PAPI_library_init() PAPI_shutdown() execlp() PAPI_library_init() */ #include #include #include #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int retval; int status; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { /* In child */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); } return 0; } else { if (!quiet) printf("Testing fork/PAPI_init/PAPI_shudtdown/exec/PAPI_init\n"); /* Init PAPI */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); } PAPI_shutdown( ); if ( fork( ) == 0 ) { /* Init PAPI in child before exec */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "forked PAPI_library_init", retval ); } PAPI_shutdown( ); if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) { test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); } } else { /* In parent, wait for child to finish */ wait( &status ); if ( WEXITSTATUS( status ) != 0 ) { test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); } } } test_pass( __FILE__ ); return 0; } papi-5.6.0/man/man3/PAPI_num_cmp_hwctrs.3000664 001750 001750 00000004112 13216244356 022073 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_num_cmp_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_num_cmp_hwctrs \- .PP Return the number of hardware counters for the specified component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP \fBPAPI_num_cmp_hwctrs()\fP returns the number of counters present in the specified component\&. By convention, component 0 is always the cpu\&. .PP On some components, especially for CPUs, the value returned is a theoretical maximum for estimation purposes only\&. It might not be possible to easily create an EventSet that contains the full number of events\&. This can be due to a variety of reasons: 1)\&. Some CPUs (especially Intel and POWER) have the notion of fixed counters that can only measure one thing, usually cycles\&. 2)\&. Some CPUs have very explicit rules about which event can run in which counter\&. In this case it might not be possible to add a wanted event even if counters are free\&. 3)\&. Some CPUs halve the number of counters available when running with SMT (multiple CPU threads) enabled\&. 4)\&. Some operating systems 'steal' a counter to use for things such as NMI Watchdog timers\&. The only sure way to see if events will fit is to attempt adding events to an EventSet, and doing something sensible if an error is generated\&. .PP \fBPAPI_library_init()\fP must be called in order for this function to return anything greater than 0\&. .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_num_cmp_hwctrs(int cidx )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIcidx\fP -- An integer identifier for a component\&. By convention, component 0 is always the cpu component\&. .RE .PP \fBExample\fP .RS 4 .PP .nf * // Query the cpu component for the number of counters\&. * printf(\"%d hardware counters found\&.\\n\", PAPI_num_cmp_hwctrs(0)); * .fi .PP .RE .PP \fBReturns:\fP .RS 4 On success, this function returns a value greater than zero\&. .br A zero result usually means the library has not been initialized\&. .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/perf_examples/syst.c000664 001750 001750 00000012646 13216244365 022674 0ustar00jshenry1963jshenry1963000000 000000 /* * syst.c - example of a simple system wide monitoring program * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" typedef struct { const char *events; int delay; int excl; int cpu; int group; } options_t; static options_t options; static perf_event_desc_t **all_fds; static int *num_fds; void setup_cpu(int cpu) { perf_event_desc_t *fds; int i, ret; ret = perf_setup_list_events(options.events, &all_fds[cpu], &num_fds[cpu]); if (ret || (num_fds == 0)) errx(1, "cannot setup events\n"); fds = all_fds[cpu]; /* temp */ fds[0].fd = -1; for(i=0; i < num_fds[cpu]; i++) { fds[i].hw.disabled = options.group ? !i : 1; if (options.excl && ((options.group && !i) || (!options.group))) fds[i].hw.exclusive = 1; fds[i].hw.disabled = options.group ? !i : 1; /* request timing information necessary for scaling counts */ fds[i].hw.read_format = PERF_FORMAT_SCALE; fds[i].fd = perf_event_open(&fds[i].hw, -1, cpu, (options.group ? fds[0].fd : -1), 0); if (fds[i].fd == -1) err(1, "cannot attach event to CPU%d %s", cpu, fds[i].name); } } void measure(void) { perf_event_desc_t *fds; long lret; int c, cmin, cmax, ncpus; int i, ret, l; printf("\n", options.delay); cmin = 0; lret = sysconf(_SC_NPROCESSORS_ONLN); if (lret < 0) err(1, "cannot get number of online processors"); cmax = (int)lret; ncpus = cmax; if (options.cpu != -1) { cmin = options.cpu; cmax = cmin + 1; } all_fds = calloc(ncpus, sizeof(perf_event_desc_t *)); num_fds = calloc(ncpus, sizeof(int)); if (!all_fds || !num_fds) err(1, "cannot allocate memory for internal structures"); for(c=cmin ; c < cmax; c++) setup_cpu(c); /* * FIX this for hotplug CPU */ for(c=cmin ; c < cmax; c++) { fds = all_fds[c]; if (options.group) ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); else for(i=0; i < num_fds[c]; i++) { ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0); if (ret) err(1, "cannot enable event %s\n", fds[i].name); } } for(l=0; l < options.delay; l++) { sleep(1); puts("------------------------"); for(c = cmin; c < cmax; c++) { fds = all_fds[c]; for(i=0; i < num_fds[c]; i++) { uint64_t val, delta; double ratio; ret = read(fds[i].fd, fds[i].values, sizeof(fds[i].values)); if (ret != sizeof(fds[i].values)) { if (ret == -1) err(1, "cannot read event %d:%d", i, ret); else warnx("could not read event%d", i); } /* * scaling because we may be sharing the PMU and * thus may be multiplexed */ val = perf_scale(fds[i].values); ratio = perf_scale_ratio(fds[i].values); delta = perf_scale_delta(fds[i].values, fds[i].prev_values); printf("CPU%d val=%-20"PRIu64" %-20"PRIu64" raw=%"PRIu64" ena=%"PRIu64" run=%"PRIu64" ratio=%.2f %s\n", c, val, delta, fds[i].values[0], fds[i].values[1], fds[i].values[2], ratio, fds[i].name); fds[i].prev_values[0] = fds[i].values[0]; fds[i].prev_values[1] = fds[i].values[1]; fds[i].prev_values[2] = fds[i].values[2]; } } } for(c = cmin; c < cmax; c++) { fds = all_fds[c]; for(i=0; i < num_fds[c]; i++) close(fds[i].fd); perf_free_fds(fds, num_fds[c]); } } static void usage(void) { printf("usage: syst [-c cpu] [-x] [-h] [-d delay] [-g] [-e event1,event2,...]\n"); } int main(int argc, char **argv) { int c, ret; options.cpu = -1; while ((c=getopt(argc, argv,"hc:e:d:gx")) != -1) { switch(c) { case 'x': options.excl = 1; break; case 'e': options.events = optarg; break; case 'c': options.cpu = atoi(optarg); break; case 'g': options.group = 1; break; case 'd': options.delay = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown error"); } } if (!options.delay) options.delay = 20; if (!options.events) options.events = "cycles,instructions"; ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "libpfm initialization failed: %s\n", pfm_strerror(ret)); measure(); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_itanium_priv.h000664 001750 001750 00000007147 13216244365 023652 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #ifndef __PFMLIB_ITANIUM_PRIV_H__ #define __PFMLIB_ITANIUM_PRIV_H__ /* * Itanium encoding structure * (code must be first 8 bits) */ typedef struct { unsigned long pme_code:8; /* major event code */ unsigned long pme_ear:1; /* is EAR event */ unsigned long pme_dear:1; /* 1=Data 0=Instr */ unsigned long pme_tlb:1; /* 1=TLB 0=Cache */ unsigned long pme_btb:1; /* 1=BTB */ unsigned long pme_ig1:4; /* ignored */ unsigned long pme_umask:16; /* unit mask*/ unsigned long pme_ig:32; /* ignored */ } pme_ita_entry_code_t; #define PME_UMASK_NONE 0x0 typedef union { unsigned long pme_vcode; pme_ita_entry_code_t pme_ita_code; /* must not be larger than vcode */ } pme_ita_code_t; typedef union { unsigned long qual; /* generic qualifier */ struct { unsigned long pme_iar:1; /* instruction address range supported */ unsigned long pme_opm:1; /* opcode match supported */ unsigned long pme_dar:1; /* data address range supported */ unsigned long pme_reserved:61; /* not used */ } pme_qual; } pme_ita_qualifiers_t; typedef struct { char *pme_name; pme_ita_code_t pme_entry_code; unsigned long pme_counters; /* supported counters */ unsigned int pme_maxincr; pme_ita_qualifiers_t pme_qualifiers; char *pme_desc; } pme_ita_entry_t; /* * We embed the umask value into the event code. Because it really is * like a subevent. * pme_code: * - lower 16 bits: major event code * - upper 16 bits: unit mask */ #define pme_code pme_entry_code.pme_ita_code.pme_code #define pme_ear pme_entry_code.pme_ita_code.pme_ear #define pme_dear pme_entry_code.pme_ita_code.pme_dear #define pme_tlb pme_entry_code.pme_ita_code.pme_tlb #define pme_btb pme_entry_code.pme_ita_code.pme_btb #define pme_umask pme_entry_code.pme_ita_code.pme_umask #define pme_used pme_qualifiers.pme_qual_struct.pme_used #define event_is_ear(e) ((e)->pme_ear == 1) #define event_is_iear(e) ((e)->pme_ear == 1 && (e)->pme_dear==0) #define event_is_dear(e) ((e)->pme_ear == 1 && (e)->pme_dear==1) #define event_is_tlb_ear(e) ((e)->pme_ear == 1 && (e)->pme_tlb==1) #define event_is_btb(e) ((e)->pme_btb) #define event_opcm_ok(e) ((e)->pme_qualifiers.pme_qual.pme_opm==1) #define event_iarr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_iar==1) #define event_darr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_dar==1) #endif /* __PFMLIB_ITANIUM_PRIV_H__ */ papi-5.6.0/man/man3/PAPI_disable_component.3000664 001750 001750 00000002512 13216244356 022532 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_disable_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_disable_component \- .PP disables the specified component .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @retval ENOCMP component does not exist @retval ENOINIT cannot disable as PAPI has already been initialized @param cidx component index of component to be disabled @par Examples: .fi .PP .PP .nf int cidx, result; cidx = PAPI_get_component_index("example"); if (cidx>=0) { result = PAPI_disable_component(cidx); if (result==PAPI_OK) printf("The example component is disabled\n"); } // \&.\&.\&. PAPI_library_init(); * .fi .PP \fBPAPI_disable_component()\fP allows the user to disable components before \fBPAPI_library_init()\fP time\&. This is useful if the user knows they do not wish to use events from that component and want to reduce the PAPI library overhead\&. .PP \fBPAPI_disable_component()\fP must be called before \fBPAPI_library_init()\fP\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_event_component\fP .PP \fBPAPI_library_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.6.x/etc/costs/Core2-2.4000664 001750 001750 00000001721 13216244366 022652 0ustar00jshenry1963jshenry1963000000 000000 [data from a 2.4 GHz Intel Core 2 Duo 6600] PERFCTR INIT: vendor 0, family 6, model 15, stepping 6, clock 2400116 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 369 cycles PERFCTR INIT: rdtsc cost is 63.7 cycles (4446 total) PERFCTR INIT: rdpmc cost is 53.5 cycles (3798 total) PERFCTR INIT: rdmsr (counter) cost is 219.9 cycles (14445 total) PERFCTR INIT: rdmsr (evntsel) cost is 190.2 cycles (12546 total) PERFCTR INIT: wrmsr (counter) cost is 223.7 cycles (14688 total) PERFCTR INIT: wrmsr (evntsel) cost is 219.9 cycles (14445 total) PERFCTR INIT: read cr4 cost is 7.3 cycles (837 total) PERFCTR INIT: write cr4 cost is 113.0 cycles (7605 total) PERFCTR INIT: write LVTPC cost is 19.2 cycles (1602 total) PERFCTR INIT: sync_core cost is 215.2 cycles (14148 total) PERFCTR INIT: read fixed_ctr0 cost is 53.0 cycles (3762 total) PERFCTR INIT: wrmsr fixed_ctr_ctrl cost is 182.6 cycles (12060 total) perfctr: driver 2.6.29, cpu type Intel Core 2 at 2400116 kHz papi-5.6.0/src/libpfm4/docs/man3/libpfm_amd64_k8.3000664 001750 001750 00000002667 13216244363 023414 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "April, 2009" "" "Linux Programmer's Manual" .SH NAME libpfm_amd64_k8 - support for AMD64 K8 processors .SH SYNOPSIS .nf .B #include .sp .B PMU name: amd64_k8_revb, amd64_k8_revc, amd64_k8_revd, amd64_k8_reve, amd64_k8_revf, amd64_k8_revg .B PMU desc: AMD64 K8 RevB, AMD64 K8 RevC, AMD64 K8 RevD, AMD64 K8 RevE, AMD64 K8 RevF, AMD64 K8 RevG .sp .SH DESCRIPTION The library supports AMD K8 processors in both 32 and 64-bit modes. They correspond to processor family 15. .SH MODIFIERS The following modifiers are supported on AMD64 K8 processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH AUTHORS .nf Stephane Eranian Robert Richter .if .PP papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_i386.h000664 001750 001750 00000000753 13216244362 024671 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file should never be included directly, use * instead. */ #ifndef _PERFMON_I386_H_ #define _PERFMON_I386_H_ /* * Both i386 and x86-64 must have same limits to ensure ABI * compatibility */ #define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW SW 64 */ #define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW SW 64 */ #endif /* _PERFMON_I386_H_ */ papi-5.6.0/src/libpfm4/libpfm.spec000664 001750 001750 00000006655 13216244365 021024 0ustar00jshenry1963jshenry1963000000 000000 %{!?with_python: %global with_python 1} %define python_sitearch %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)") %define python_prefix %(python -c "import sys; print sys.prefix") Name: libpfm Version: 4.6.0 Release: 1%{?dist} Summary: Library to encode performance events for use by perf tool Group: System Environment/Libraries License: MIT URL: http://perfmon2.sourceforge.net/ Source0: http://sourceforge.net/projects/perfmon2/files/libpfm4/%{name}-%{version}.tar.gz %if %{with_python} BuildRequires: python-devel BuildRequires: python-setuptools BuildRequires: swig %endif BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) %description libpfm4 is a library to help encode events for use with operating system kernels performance monitoring interfaces. The current version provides support for the perf_events interface available in upstream Linux kernels since v2.6.31. %package devel Summary: Development library to encode performance events for perf_events based tools Group: Development/Libraries Requires: %{name} = %{version}-%{release} %description devel Development library and header files to create performance monitoring applications for the perf_events interface. %if %{with_python} %package python Summary: Python bindings for libpfm and perf_event_open system call Group: Development/Languages Requires: %{name} = %{version}-%{release} %description python Python bindings for libpfm4 and perf_event_open system call. %endif %prep %setup -q %build %if %{with_python} %global python_config CONFIG_PFMLIB_NOPYTHON=n %else %global python_config CONFIG_PFMLIB_NOPYTHON=y %endif make %{python_config} %{?_smp_mflags} %install rm -rf $RPM_BUILD_ROOT %if %{with_python} %global python_config CONFIG_PFMLIB_NOPYTHON=n %else %global python_config CONFIG_PFMLIB_NOPYTHON=y %endif make \ PREFIX=$RPM_BUILD_ROOT%{_prefix} \ LIBDIR=$RPM_BUILD_ROOT%{_libdir} \ PYTHON_PREFIX=$RPM_BUILD_ROOT/%{python_prefix} \ %{python_config} \ LDCONFIG=/bin/true \ install %clean rm -fr $RPM_BUILD_ROOT %post -p /sbin/ldconfig %postun -p /sbin/ldconfig %files %defattr(644,root,root,755) %doc README %attr(755,root,root) %{_libdir}/lib*.so* %files devel %defattr(644,root,root,755) %{_includedir}/* %{_mandir}/man3/* %{_libdir}/lib*.a %if %{with_python} %files python %defattr(644,root,root,755) %attr(755,root,root) %{python_sitearch}/* %endif %changelog * Tue Feb 9 2016 William Cohen 4.6.0-1 - Update spec file. * Wed Nov 13 2013 Lukas Berk 4.4.0-1 - Intel IVB-EP support - Intel IVB updates support - Intel SNB updates support - Intel SNB-EP uncore support - ldlat support (PEBS-LL) - New Intel Atom support - bug fixes * Tue Aug 28 2012 Stephane Eranian 4.3.0-1 - ARM Cortex A15 support - updated Intel Sandy Bridge core PMU events - Intel Sandy Bridge desktop (model 42) uncore PMU support - Intel Ivy Bridge support - full perf_events generic event support - updated perf_examples - enabled Intel Nehalem/Westmere uncore PMU support - AMD LLano processor supoprt (Fam 12h) - AMD Turion rocessor supoprt (Fam 11h) - Intel Atom Cedarview processor support - Win32 compilation support - perf_events excl attribute - perf_events generic hw event aliases support - many bug fixes * Wed Mar 14 2012 William Cohen 4.2.0-2 - Some spec file fixup. * Wed Jan 12 2011 Arun Sharma 4.2.0-0 Initial revision papi-5.6.0/src/components/lustre/tests/000775 001750 001750 00000000000 13216244357 022174 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ha.3000664 001750 001750 00000002763 13216244364 025362 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "June, 2017" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_bdx_unc_ha - support for Intel Broadwell Server Home Agent (HA) uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: bdx_unc_ha0, bdx_unc_ha1 .B PMU desc: Intel Broadwell Server HA uncore PMU .sp .SH DESCRIPTION The library supports the Intel Broadwell Server Home Agent (HA) uncore PMU. This PMU model only exists on various Broadwell models (79, 86). .SH MODIFIERS The following modifiers are supported on Intel Broadwell server HA uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B i Invert the meaning of the threshold or edge filter. If set, the event counts when strictly less than N occurrences occur per cycle if threshold is set to N. When invert is set, then threshold must be set to non-zero value. If set, the event counts when the event transitions from occurring to not occurring (falling edge) when edge detection is set. This is a boolean modifier .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_hswep_unc_ha.3000664 001750 001750 00000002711 13216244364 025724 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "May, 2015" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_hswep_unc_ha - support for Intel Haswell-EP Home Agent (HA) uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: hswep_unc_ha0, hswep_unc_ha1 .B PMU desc: Intel Haswell-EP HA uncore PMU .sp .SH DESCRIPTION The library supports the Intel Haswell Home Agent (HA) uncore PMU. This PMU model only exists on Haswell model 63. .SH MODIFIERS The following modifiers are supported on Intel Haswell HA uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B i Invert the meaning of the threshold or edge filter. If set, the event counts when strictly less than N occurrences occur per cycle if threshold is set to N. When invert is set, then threshold must be set to non-zero value. If set, the event counts when the event transitions from occurring to not occurring (falling edge) when edge detection is set. This is a boolean modifier .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm-3.y/python/000775 001750 001750 00000000000 13216244363 020525 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_perf_event.c000664 001750 001750 00000026714 13216244365 023275 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_perf_events.c: encode events for perf_event API * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "pfmlib_priv.h" #include "pfmlib_perf_event_priv.h" #define PERF_PROC_FILE "/proc/sys/kernel/perf_event_paranoid" #ifdef min #undef min #endif #define min(a, b) ((a) < (b) ? (a) : (b)) /* * contains ONLY attributes related to PMU features */ static const pfmlib_attr_desc_t perf_event_mods[]={ PFM_ATTR_B("u", "monitor at user level"), /* monitor user level */ PFM_ATTR_B("k", "monitor at kernel level"), /* monitor kernel level */ PFM_ATTR_B("h", "monitor at hypervisor level"), /* monitor hypervisor level */ PFM_ATTR_SKIP, PFM_ATTR_SKIP, PFM_ATTR_SKIP, PFM_ATTR_SKIP, PFM_ATTR_B("mg", "monitor guest execution"), /* monitor guest level */ PFM_ATTR_B("mh", "monitor host execution"), /* monitor host level */ PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; /* * contains all attributes controlled by perf_events. That includes PMU attributes * and pure software attributes such as sampling periods */ static const pfmlib_attr_desc_t perf_event_ext_mods[]={ PFM_ATTR_B("u", "monitor at user level"), /* monitor user level */ PFM_ATTR_B("k", "monitor at kernel level"), /* monitor kernel level */ PFM_ATTR_B("h", "monitor at hypervisor level"), /* monitor hypervisor level */ PFM_ATTR_I("period", "sampling period"), /* sampling period */ PFM_ATTR_I("freq", "sampling frequency (Hz)"), /* sampling frequency */ PFM_ATTR_I("precise", "precise ip"), /* anti-skid mechanism */ PFM_ATTR_B("excl", "exclusive access"), /* exclusive PMU access */ PFM_ATTR_B("mg", "monitor guest execution"), /* monitor guest level */ PFM_ATTR_B("mh", "monitor host execution"), /* monitor host level */ PFM_ATTR_I("cpu", "CPU to program"), /* CPU to program */ PFM_ATTR_B("pinned", "pin event to counters"), /* pin event to PMU */ PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; static int pfmlib_perf_event_encode(void *this, const char *str, int dfl_plm, void *data) { pfm_perf_encode_arg_t arg; pfm_perf_encode_arg_t *uarg = data; pfmlib_os_t *os = this; struct perf_event_attr my_attr, *attr; pfmlib_pmu_t *pmu; pfmlib_event_desc_t e; pfmlib_event_attr_info_t *a; size_t orig_sz, asz, sz = sizeof(arg); uint64_t ival; int has_plm = 0, has_vmx_plm = 0; int i, plm = 0, ret, vmx_plm = 0; int cpu = -1, pinned = 0; sz = pfmlib_check_struct(uarg, uarg->size, PFM_PERF_ENCODE_ABI0, sz); if (!sz) return PFM_ERR_INVAL; /* copy input */ memcpy(&arg, uarg, sz); /* pointer to our internal attr struct */ memset(&my_attr, 0, sizeof(my_attr)); attr = &my_attr; /* * copy user attr to our internal version * size == 0 is interpreted minimal possible * size (ABI_VER0) */ /* size of attr struct passed by user */ orig_sz = uarg->attr->size; if (orig_sz == 0) asz = PERF_ATTR_SIZE_VER0; else asz = min(sizeof(*attr), orig_sz); /* * we copy the user struct to preserve whatever may * have been initialized but that we do not use */ memcpy(attr, uarg->attr, asz); /* restore internal size (just in case we need it) */ attr->size = sizeof(my_attr); /* useful for debugging */ if (asz != sizeof(*attr)) __pfm_vbprintf("warning: mismatch attr struct size " "user=%d libpfm=%zu\n", asz, sizeof(*attr)); memset(&e, 0, sizeof(e)); e.osid = os->id; e.os_data = attr; e.dfl_plm = dfl_plm; /* after this call, need to call pfmlib_release_event() */ ret = pfmlib_parse_event(str, &e); if (ret != PFM_SUCCESS) return ret; pmu = e.pmu; ret = PFM_ERR_NOTSUPP; if (!pmu->get_event_encoding[e.osid]) { DPRINT("PMU %s does not support PFM_OS_NONE\n", pmu->name); goto done; } ret = pmu->get_event_encoding[e.osid](pmu, &e); if (ret != PFM_SUCCESS) goto done; /* * process perf_event attributes */ for (i = 0; i < e.nattrs; i++) { a = attr(&e, i); if (a->ctrl != PFM_ATTR_CTRL_PERF_EVENT) continue; ival = e.attrs[i].ival; switch(a->idx) { case PERF_ATTR_U: if (ival) plm |= PFM_PLM3; has_plm = 1; break; case PERF_ATTR_K: if (ival) plm |= PFM_PLM0; has_plm = 1; break; case PERF_ATTR_H: if (ival) plm |= PFM_PLMH; has_plm = 1; break; case PERF_ATTR_PE: if (!ival || attr->freq) { ret = PFM_ERR_ATTR_VAL; goto done; } attr->sample_period = ival; break; case PERF_ATTR_FR: if (!ival || attr->sample_period) { ret = PFM_ERR_ATTR_VAL; goto done; } attr->sample_freq = ival; attr->freq = 1; break; case PERF_ATTR_PR: if (ival > 3) { ret = PFM_ERR_ATTR_VAL; goto done; } attr->precise_ip = ival; break; case PERF_ATTR_EX: if (ival && !attr->exclusive) attr->exclusive = 1; break; case PERF_ATTR_MG: vmx_plm |= PFM_PLM3; has_vmx_plm = 1; break; case PERF_ATTR_MH: vmx_plm |= PFM_PLM0; has_vmx_plm = 1; break; case PERF_ATTR_CPU: if (ival >= INT_MAX) { ret = PFM_ERR_ATTR_VAL; goto done; } cpu = (int)ival; break; case PERF_ATTR_PIN: pinned = (int)!!ival; break; } } /* * if no priv level mask was provided * with the event, then use dfl_plm */ if (!has_plm) plm = dfl_plm; /* exclude_guest by default */ if (!has_vmx_plm) vmx_plm = PFM_PLM0; /* * perf_event plm work by exclusion, so use logical or * goal here is to set to zero any exclude_* not supported * by underlying PMU */ plm |= (~pmu->supported_plm) & PFM_PLM_ALL; vmx_plm |= (~pmu->supported_plm) & PFM_PLM_ALL; attr->exclude_user = !(plm & PFM_PLM3); attr->exclude_kernel = !(plm & PFM_PLM0); attr->exclude_hv = !(plm & PFM_PLMH); attr->exclude_guest = !(vmx_plm & PFM_PLM3); attr->exclude_host = !(vmx_plm & PFM_PLM0); attr->pinned = pinned; __pfm_vbprintf("PERF[type=%x config=0x%"PRIx64" config1=0x%"PRIx64 " excl=%d e_u=%d e_k=%d e_hv=%d e_host=%d e_gu=%d period=%"PRIu64" freq=%d" " precise=%d pinned=%d] %s\n", attr->type, attr->config, attr->config1, attr->exclusive, attr->exclude_user, attr->exclude_kernel, attr->exclude_hv, attr->exclude_host, attr->exclude_guest, attr->sample_period, attr->freq, attr->precise_ip, attr->pinned, str); /* * propagate event index if necessary */ arg.idx = pfmlib_pidx2idx(e.pmu, e.event); /* propagate cpu */ arg.cpu = cpu; /* propagate our changes, that overwrites attr->size */ memcpy(uarg->attr, attr, asz); /* restore user size */ uarg->attr->size = orig_sz; /* * fstr not requested, stop here */ ret = PFM_SUCCESS; if (!arg.fstr) { memcpy(uarg, &arg, sz); goto done; } for (i=0; i < e.npattrs; i++) { int idx; if (e.pattrs[i].ctrl != PFM_ATTR_CTRL_PERF_EVENT) continue; idx = e.pattrs[i].idx; switch (idx) { case PERF_ATTR_K: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, !!(plm & PFM_PLM0)); break; case PERF_ATTR_U: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, !!(plm & PFM_PLM3)); break; case PERF_ATTR_H: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, !!(plm & PFM_PLMH)); break; case PERF_ATTR_PR: evt_strcat(e.fstr, ":%s=%d", perf_event_ext_mods[idx].name, attr->precise_ip); break; case PERF_ATTR_PE: case PERF_ATTR_FR: if (attr->freq && attr->sample_period) evt_strcat(e.fstr, ":%s=%"PRIu64, perf_event_ext_mods[idx].name, attr->sample_period); else if (attr->sample_period) evt_strcat(e.fstr, ":%s=%"PRIu64, perf_event_ext_mods[idx].name, attr->sample_period); break; case PERF_ATTR_MG: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, !attr->exclude_guest); break; case PERF_ATTR_MH: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, !attr->exclude_host); break; case PERF_ATTR_EX: evt_strcat(e.fstr, ":%s=%lu", perf_event_ext_mods[idx].name, attr->exclusive); break; } } ret = pfmlib_build_fstr(&e, arg.fstr); if (ret == PFM_SUCCESS) memcpy(uarg, &arg, sz); done: pfmlib_release_event(&e); return ret; } /* * get OS-specific event attributes */ static int perf_get_os_nattrs(void *this, pfmlib_event_desc_t *e) { pfmlib_os_t *os = this; int i, n = 0; for (i = 0; os->atdesc[i].name; i++) if (!is_empty_attr(os->atdesc+i)) n++; return n; } static int perf_get_os_attr_info(void *this, pfmlib_event_desc_t *e) { pfmlib_os_t *os = this; pfmlib_event_attr_info_t *info; int i, k, j = e->npattrs; for (i = k = 0; os->atdesc[i].name; i++) { /* skip padding entries */ if (is_empty_attr(os->atdesc+i)) continue; info = e->pattrs + j + k; info->name = os->atdesc[i].name; info->desc = os->atdesc[i].desc; info->equiv= NULL; info->code = i; info->idx = i; /* namespace-specific index */ info->type = os->atdesc[i].type; info->is_dfl = 0; info->ctrl = PFM_ATTR_CTRL_PERF_EVENT; k++; } e->npattrs += k; return PFM_SUCCESS; } /* * old interface, maintained for backward compatibility with earlier versions of the library */ int pfm_get_perf_event_encoding(const char *str, int dfl_plm, struct perf_event_attr *attr, char **fstr, int *idx) { pfm_perf_encode_arg_t arg; int ret; if (PFMLIB_INITIALIZED() == 0) return PFM_ERR_NOINIT; /* idx and fstr can be NULL */ if (!(attr && str)) return PFM_ERR_INVAL; if (dfl_plm & ~(PFM_PLM_ALL)) return PFM_ERR_INVAL; memset(&arg, 0, sizeof(arg)); /* do not clear attr, some fields may be initialized by caller already, e.g., size */ arg.attr = attr; arg.fstr = fstr; ret = pfm_get_os_event_encoding(str, dfl_plm, PFM_OS_PERF_EVENT_EXT, &arg); if (ret != PFM_SUCCESS) return ret; if (idx) *idx = arg.idx; return PFM_SUCCESS; } static int pfm_perf_event_os_detect(void *this) { int ret = access(PERF_PROC_FILE, F_OK); return ret ? PFM_ERR_NOTSUPP : PFM_SUCCESS; } pfmlib_os_t pfmlib_os_perf={ .name = "perf_event", .id = PFM_OS_PERF_EVENT, .atdesc = perf_event_mods, .detect = pfm_perf_event_os_detect, .get_os_attr_info = perf_get_os_attr_info, .get_os_nattrs = perf_get_os_nattrs, .encode = pfmlib_perf_event_encode, }; pfmlib_os_t pfmlib_os_perf_ext={ .name = "perf_event extended", .id = PFM_OS_PERF_EVENT_EXT, .atdesc = perf_event_ext_mods, .detect = pfm_perf_event_os_detect, .get_os_attr_info = perf_get_os_attr_info, .get_os_nattrs = perf_get_os_nattrs, .encode = pfmlib_perf_event_encode, }; papi-5.6.0/man/man3/PAPIF_add_events.3000664 001750 001750 00000001051 13216244355 021263 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_add_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_add_events \- .PP add multiple PAPI presets or native hardware events to an event set .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_add_events\fP( C_INT EventSet, C_INT(*) EventCodes, C_INT number, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_add_events\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/validation_tests/000775 001750 001750 00000000000 13216244370 020676 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPIF_get_real_cyc.3000664 001750 001750 00000000763 13216244355 021600 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_get_real_cyc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_get_real_cyc \- .PP Get real time counter value in clock cycles\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_get_real_cyc( C_LONG_LONG real_cyc )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_real_cyc\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_pebs_p4_smpl.h000664 001750 001750 00000013237 13216244362 026570 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * This file implements the sampling format to support Intel * Precise Event Based Sampling (PEBS) feature of Pentium 4 * and other Netburst-based processors. Not to be used for * Intel Core-based processors. * * What is PEBS? * ------------ * This is a hardware feature to enhance sampling by providing * better precision as to where a sample is taken. This avoids the * typical skew in the instruction one can observe with any * interrupt-based sampling technique. * * PEBS also lowers sampling overhead significantly by having the * processor store samples instead of the OS. PMU interrupt are only * generated after multiple samples are written. * * Another benefit of PEBS is that samples can be captured inside * critical sections where interrupts are masked. * * How does it work? * PEBS effectively implements a Hw buffer. The Os must pass a region * of memory where samples are to be stored. The region can have any * size. The OS must also specify the sampling period to reload. The PMU * will interrupt when it reaches the end of the buffer or a specified * threshold location inside the memory region. * * The description of the buffer is stored in the Data Save Area (DS). * The samples are stored sequentially in the buffer. The format of the * buffer is fixed and specified in the PEBS documentation. The sample * format changes between 32-bit and 64-bit modes due to extended register * file. * * PEBS does not work when HyperThreading is enabled due to certain MSR * being shared being to two threads. * * What does the format do? * It provides access to the PEBS feature for both 32-bit and 64-bit * processors that support it. * * The same code is used for both 32-bit and 64-bit modes, but different * format names are used because the two modes are not compatible due to * data model and register file differences. Similarly the public data * structures describing the samples are different. * * It is important to realize that the format provides a zero-copy environment * for the samples, i.e,, the OS never touches the samples. Whatever the * processor write is directly accessible to the user. * * Parameters to the buffer can be passed via pfm_create_context() in * the pfm_pebs_smpl_arg structure. * * It is not possible to mix a 32-bit PEBS application on top of a 64-bit * host kernel. */ #ifndef __PERFMON_PEBS_P4_SMPL_H__ #define __PERFMON_PEBS_P4_SMPL_H__ 1 #ifdef __cplusplus extern "C" { #endif #include #ifdef __i386__ #define PFM_PEBS_P4_SMPL_NAME "pebs32_p4" #else #define PFM_PEBS_P4_SMPL_NAME "pebs64_p4" #endif /* * format specific parameters (passed at context creation) */ typedef struct { uint64_t cnt_reset; /* counter reset value */ size_t buf_size; /* size of the buffer in bytes */ size_t intr_thres; /* index of interrupt threshold entry */ uint64_t reserved[6]; /* for future use */ } pfm_pebs_p4_smpl_arg_t; /* * DS Save Area as described in section 15.10.5 */ typedef struct { unsigned long bts_buf_base; unsigned long bts_index; unsigned long bts_abs_max; unsigned long bts_intr_thres; unsigned long pebs_buf_base; unsigned long pebs_index; unsigned long pebs_abs_max; unsigned long pebs_intr_thres; uint64_t pebs_cnt_reset; } pfm_ds_area_p4_t; /* * This header is at the beginning of the sampling buffer returned to the user. * * Because of PEBS alignement constraints, the actual PEBS buffer area does * not necessarily begin right after the header. The hdr_start_offs must be * used to compute the first byte of the buffer. The offset is defined as * the number of bytes between the end of the header and the beginning of * the buffer. As such the formula is: * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs */ typedef struct { uint64_t overflows; /* #overflows for buffer */ size_t buf_size; /* bytes in the buffer */ size_t start_offs; /* actual buffer start offset */ uint32_t version; /* smpl format version */ uint32_t reserved1; /* for future use */ uint64_t reserved2[5]; /* for future use */ pfm_ds_area_p4_t ds; /* DS management Area */ } pfm_pebs_p4_smpl_hdr_t; /* * PEBS record format as for both 32-bit and 64-bit modes */ typedef struct { unsigned long eflags; unsigned long ip; unsigned long eax; unsigned long ebx; unsigned long ecx; unsigned long edx; unsigned long esi; unsigned long edi; unsigned long ebp; unsigned long esp; #ifdef __x86_64__ unsigned long r8; unsigned long r9; unsigned long r10; unsigned long r11; unsigned long r12; unsigned long r13; unsigned long r14; unsigned long r15; #endif } pfm_pebs_p4_smpl_entry_t; #define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U #define PFM_PEBS_P4_SMPL_VERSION_MIN 0U #define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\ (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff)) #ifdef __cplusplus }; #endif #endif /* __PERFMON_PEBS_P4_SMPL_H__ */ papi-5.6.0/ChangeLogP510.txt000664 001750 001750 00000045605 13216244355 017506 0ustar00jshenry1963jshenry1963000000 000000 2013-01-15 * 0917f567 src/threads.c: Cleaned up compiler warning (gcc version 4.4.6) * 06ca3faa src/components/bgpm/CNKunit/linux-CNKunit.c src/components/bgpm/IOunit/linux-IOunit.c src/components/bgpm/L2unit/linux-L2unit.c...: Cleaned up compiler warnings on BG/Q (gcc version 4.4.6 (BGQ-V1R1M2-120920)) 2013-01-14 * 56400627 .../build/lib.linux-x86_64-2.7/perfmon/__init__.py .../lib.linux-x86_64-2.7/perfmon/perfmon_int.py .../build/lib.linux-x86_64-2.7/perfmon/pmu.py...: libpfm4: remove extraneous build artifacts. Steve Kaufmann reported differences between the libpfm4 I imported into PAPI and the libpfm4 that can be attained with a git clone git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4 Self: Do libpfm4 imports from a fresh clone of libpfm4. 2013-01-11 * 4ad994bc src/papi_events.csv: Clean up armv7 cortex a15 presets Clean up armv7 cortex a15 presets and add presets for L1 and L2 cache * d54dabf5 ChangeLogP510.txt RELEASENOTES.txt doc/Doxyfile-common...: Prepare the repo for a 5.1 release. * Bump the version number to 5.1 * Update the man pages * Create a changelog for 5.1 * Update RELEASENOTES * 8816a3b8 INSTALL.txt: Update INSTALL.txt Add information about installing PAPI on Intel MIC. Based upon information from Vince Weaver's PAPI MIC support page. http://www.eece.maine.edu/~vweaver/projects/mic/ * 8dc1ca23 TEST.TXT: Remove TEST.TXT This was a leftover from a switch over to git. * 292d6c9b src/papi_libpfm3_events.c: Fix build on ia64 When trying to build papi 5.0.1 for IA64, my collegue got compile errors due to perfmon.h not being included. We're not sure if this actually is a configure bug, but this patch fixed it. * 25424f41 src/extras.c: Fix kernel warning in _papi_hwi_stop_timer() In _papi_hwi_stop_timer() we were calling setitimer( timer, NULL, NULL ) to disable the itimer. Recent Linux kernels print warnings if you do this; NULL is not a valid second argument to setitimer() and possibly this wasn't really working before. According to the manpage the proper fix is to call setitimer() with a valid "new_value" field but with the values all 0. That is what this patch does. 2012-11-30 * a7d70127 src/components/micpower/README src/components/micpower/Rules.micpower src/components/micpower/linux-micpower.c...: MIC power component The Intel MIC (Xeon PHI) card reports power of several components of the card. These values are reported in a sysfs file, so this component is cloned from the coretemp component. 2013-01-08 * 121cd0a6 src/Makefile.in src/Rules.pfm4_pe src/configure...: configure: Add shortcut for mic support. * Add a --with-mic flag to enable the several options to cross compile for mic. MIC builds are cross-compiled and Matt and I were unable to figure out how to trigger cross compilation with just our flag. This is short-hand for setting --with-arch=k1om --without-ffsll --with-walltimer=clock_realtime_hr \ --with-perf-events --with-tls=__thread --with-virtualtimer=cputime_id * Automatically cause make to pass CONFIG_PFMLIB_ARCH_X86=y to libpfm4's make. So to build for the mic card one has to do: {Set pathing to find the x86_64-k1om-linux-gcc cross-compiler} $ ./configure --host=x86_64-k1om-linux --with-mic $ make Thanks to Matt Johnson for the legwork on configure shortcuting. 2013-01-07 * f65c9d9e src/papi_events.csv: Add preset events for ARM Cortex A15 2012-12-14 * 61a9c7b1 man/man3/PAPI_get_eventset_component.3 src/papi.c: Doxygen: Add a new API entry Add the manpage for the new PAPI_get_eventset_component api entry. 2013-01-02 * 38d969ab doc/Doxyfile-man1 doc/Doxyfile-man3 doc/Makefile...: Doxygen: Cleanup generated man pages. Mark a few \page sections as \htmlonly so that man pages are not built for them. Modify the makefile to rm some data structures that are generated. Doxyfile-man3: * Take out papi_vector.h, this file only defines a few data structures from which we don't need manpages. papi.h: * PAPI_get_component_index's inline comment had the close /**> to delimit its description, but doxygen uses /**<. papi_fwrappers.c: * Mark the group PAPIF as internal so that a man page is not generated for it. utils/*: * Remove some useless htmlonly directrives, doxygen will generate pages for any data structure, htmlonly doesn't stop that. Doxyfile-man1: * Change a flag in Doxyfile-man1 so that we don't document internal data structures in the utilities. We don't do this in -man3 because of the \class workaround we use to create manpages for each of the PAPI_* api entry points. Because we call them classes, they would be caught in the no data structures flag. * 7b790c09 doc/Doxyfile-html src/papi.h src/papi_fwrappers.c...: Doxygen: Cleanup some of the markup We were not using htmlonly correctly... The idea was to use \htmlonly to not build manpages for a few things. To properly hide \page s you want things like: /** \htmlonly \page Foo I don't want this to generate a manpage. \endhtmlonly */ 2012-12-07 * 152bac19 src/papi.c: Doxygen: Cleanup papi.c Cleanup some \ref s, \ref PAPI_function() isn't happy, use \ref PAPI_function it'll put in the proper links. Remove _papi_overflow_handler doc block. We had the block but no code. 2012-12-20 * 7a40c769 src/components/rapl/tests/rapl_overflow.c: RAPL test code: Add flexibility to the test code. Per Will Cohen; ------------------ I was reviewing some test results for the papi test and found that the rapl_overflow.c tests makes an assumption that there are exactly two packages. As a result the test will fail on machines with a single package. The following is a patch to make it a bit more flexible allow 1-n packages in the test. -Will ----------------- 2012-12-19 * 96c9afb0 src/components/appio/README src/components/appio/appio.c src/components/appio/appio.h...: Added events for seek statistics and support for intercepting lseek() calls. 2012-12-14 * 003abf6d src/Rules.perfctr-pfm: Rules.perfctr-pfm: pass CC in all cases. Perfctr user library was not being passed CC when built. 2012-12-05 * e2c05b29 src/papi_internal.c: papi_internal.c: Refactor dublicated code in cleanup and free eventset. Currently the code to free runtime state is duplicated in cleanup and free. The perf_event_uncore test exposed an issue where free cleaned up cpu_attach state but cleanup did not, causing a leak. Have _papi_hwi_free_EventSet call _papi_hwi_cleanup_eventset to free most of the runtime state of the eventset and then allow free_eventset to free the Eventset Info struct. 2012-12-13 * 7d020224 src/configure src/configure.in: configure: Change fortran compiler search order. Bandaid fix to buildbot errors. By default, configure would find icc before gcc but gfortran would be used before ifort. The real fix is to test that object code from the c compiler can be linked to by the fortran compiler. 2012-12-12 * 87b6e913 src/papi_events.csv: ivy_bridge: remove PAPI_HW_INT event Apparently recent Intel Vol3B documentation removed this event, and the most recent libpfm4 merge followed suit. I asked at Intel about this and possibly they only removed it because they didn't think anyone was using it. Maybe they'll ad it back 2012-12-10 * 293b26b9 src/Makefile.inc: Makefile.inc: Fix library link ordering. Per Will Cohen ----------------------------------------------------------- I ran across a problem when trying to build papi with the bundled libpfm and an earlier incompatible version of libpfm was already installed on the machine. The make would use the /usr/lib{64}/libpfm.so before trying to use the locally built version and this would cause problems. The attached patch changes the order of the linking and uses the local built libpfm before it tries the installed version. -Will ----------------------------------------------------------- 2012-12-12 * 57e6aa0d src/Makefile.in: Makefile.in: export CC_COMMON_NAME In 17cfcb4a I started using CC_COMMON_NAME in Rules.pfm4 but failed to have configure put it in Makefile. 2012-12-11 * 17cfcb4a src/Rules.pfm4_pe src/configure src/configure.in: Cleanup icc build Start using -diag-disable to quiet down some of the remarks icc carps about in libpfm4. Also have configure export CC_COMMON_NAME and check against that in Rules.pfm4_pe. afec8fc9a reverted us to passing -Wno-unused-parameter to icc, polluting buildbot. 2012-12-10 * afec8fc9 src/configure src/configure.in: configure: Attempt to better detect which C compiler we are using. This attempts to address trac bug 162. http://icl.cs.utk.edu/trac/papi/ticket/162 Specifying full paths for CC caused issues in our configure logic. We set several flags specific to gcc or icc and this was breaking down EG "/usr/bin/gcc" != "gcc" Now we attempt to execute whatever CC we are going to use and grep its version string. We set a CC_COMMON_NAME \in {"gcc", "icc", "xlc", "unknown"} based upon the above and later check CC_COMMON_NAME inplace of CC to set compiler specific flags. * 14432aa0 src/linux-timer.c src/papi.c: Minor Coverity fixes. Thanks, Will Cohen. 2012-12-07 * ba5e83d4 src/papi_user_events.c: papi_user_events.c: Fix memory leak. Reported by William Cohen as detected by the coverity tool. * 166498a8 src/components/nvml/linux-nvml.c: nvml component: fix detectDevices() The routine detectDevices() always returned with the error PAPI_ESYS when there was a device available. This resulted in that there were no nvml events available. Fixed. * 11ad5894 src/components/nvml/linux-nvml.c: nvml component: add missing variable declaration In the routine _papi_nvml_init_componen(), the variable papi_errorcode was not declared which prevented this component to build. Added declaration of papi_errorcode as int. 2012-12-06 * 9567dfef src/ftests/first.F src/ftests/second.F: Fix warning messages issued by gfortran 4.6.x regarding loss of precision when casting REAL to INT. Thanks to Heike for identifying the proper intrinsics. * 72588227 src/papi.c src/papi.h: Add PAPI_get_eventset_component() to get the component index from an event set. This is symmetric with PAPI_get_event_component which extracts the information from an event. In response to a request from John Mellor-Crummey. * 2e055d40 src/components/rapl/linux-rapl.c: Fix a compiler warning about a possibly uninitialized return value. 2012-12-05 * 1aae2246 src/utils/command_line.c: Reformat the floating point output string to recognize that you can't cast the *value* of a long long to a double and expect to get the right answer; you need to cast the *pointer* to a double, then everything works. * 0e834fc2 src/utils/command_line.c: Incorporated use of the new PAPI_add_named_event API. Restructured output to support formatted printing of built-in DATATYPEs: UINT64 prints as unsigned followed by (u); INT64 prints as signed; FP64 prints as float (but I don't like the default format); BIT64 prints a hex, prefixed by '0x'. Also if info.units is not empty, units are appended to output values. These features can be demo'd with the RAPL component. * af6abec2 src/papi.h: Rearranged DATATYPE enums so INT64 is now default (0) value. Also added a BIT64 type for unspecified bitfields. 2012-12-04 * 862033e0 src/components/bgpm/IOunit/linux-IOunit.c src/components/bgpm/IOunit/linux-IOunit.h src/components/bgpm/L2unit/linux-L2unit.c...: Resolved multiple components conflict on BG/Q when overflow is enabled for multiple events from different components at the same time. * 44744002 src/utils/command_line.c: Add -x and -u options to papi_command_line to allow printing counter values in hexadecimal and unsigned formats. 2012-11-30 * 25a914c5 src/papi_user_events.c: Cleanup unused variable warnings in user_events code. 2012-11-28 * 9a75f872 src/Rules.pfm4_pe src/configure src/configure.in: Cleanup the build under icc. libpfm4's build system uses a gcc specific flag, -Wno-unused-parameter. It does this via a variable, DBG, in config.mk: DBG?=-g -Wall -Werror -Wextra -Wno-unused-parameter The Intel compiler doesn't understand -Wno-unused-parameter and complains about it. In Rules.pfm4_pe we set DBG for icc builds. 2012-11-27 * 4def827b src/configure src/configure.in: Fix the perfctr build that was breaking due to missing CPU Mark Gates was reporting PAPI 5 wasn't running properly on Keeneland. It looks like some CPU cleanups in the configure code broke things. Hopefully this helps the situation. 2012-11-21 * 4316f172 src/perf_events.c: perf_events: get rid of "PAPI Error: Didn't close all events" error This was more meant as a warning; it could trigger when closing an EventSet that had an event partially added but failed for some reason. * 671e10bd src/utils/command_line.c: papi_command_line: fix error output The error messages got a bit weird looking due to the PAPI error printing changes a while back. * 959afa49 src/papi_internal.c: Fix _papi_hwi_add_event to report errors back to user. Previously _papi_hwi_add_event would report all errors returned by add_native_events() as being PAPI_ECNFLCT even though add_native_events() returned a wider range of errors. * 8ecb70ba src/perf_events.c: Have perf_event return PAPI_EPERM rather than PAPI_ECNFLCT if the kernel itself returns EPERM * 9053ca1c src/perf_events.c: Work around kernel issue with PERF_EVENT_IOC_REFRESH It's unclear exactly the best way to restart sampling. Refreshing with 1 is the "official" way as espoused by the kernel developers, but it doesn't work on Power. 0 works for Power and most other machines, but the kernel developers say not to use it. This makes power user 0 until we can figure out exactly what is going on. * e85df04b src/components/appio/tests/appio_test_socket.c: - added support distinguishing between network and file I/O. - added events to measure statistics for sockets - updated README 2012-11-15 * 248694ef src/x86_cpuid_info.c: Update x86_cpuid_info code for KNC. On Knight's Corner the leaf2 code returns 0 for the count value. We were printing a warning on this; better would be to just skip the cache detection code if we get this result. 2012-11-08 * 82c93156 src/linux-bgp-memory.c src/linux-bgp.c src/linux-bgp.h: There was more cleaning up necessary in order to get PAPI compiled on BG/P. It should work now with the recommended configure steps described in INSTALL. 2012-11-07 * 77da80b3 src/Makefile.inc src/configure src/configure.in...: Make BGP use papi_events.csv This was easier than trying to clean up the linux-bgp-preset-events.c file to have the proper file layout. * fc8a4168 src/linux-bgp.c: Fix some linux-bgp build issues. No one has tried compiling after all the PAPI 5.0 changes so many bugs slipped in. * c16ef312 src/ctests/perf_event_uncore.c: Fix type warnings in perf_event_uncore test. * 3947e9c8 src/ctests/perf_event_uncore.c: Put a bandaid on the perf_event_uncore test. Check for an Intel family 6 model 45 processor (sandybridge ep) before executing the test. 2012-09-27 * a23d95f8 src/papi.c src/papi.h src/papi_fwrappers.c...: Mark some comments @htmlonly. This cleans up what man pages are generated. 2012-11-07 * d239c350 src/Makefile.inc src/Rules.pfm4_pe: Factor out duplicate install code from Rules.pfm4_pe The Makefile.inc has a rule to installed shared libraries. However, Rules.pfm4_pe also has a slightly different set of rules to install code for shared libraries. This leads to the same shared library being installed under two different names. The duplicate code has been removed from Rules.pfm4_pe and a symbolic link has been added to ensure that any code that might have linked with libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE) still runs. 2012-10-30 * fcc64ff9 src/papi_events.csv: Add PAPI_HW_INT event for IvyBridge 2012-10-26 * ef89fc56 src/papi_events.csv: MIC: update PAPI_FP_INS / PAPI_VEC_INS instruction We were using VPU_INSTRUCTIONS_EXECUTED for PAPI_FP_INS but really it's more appropriate for PAPI_VEC_INS This leaves PAPI_FP_INS undefined, which breaks a lot of the ctests. A long term goal should probably be modifying the tests to use another counter if PAPI_FP_INS isn't available (this affects Ivy Bridge too). 2012-10-25 * 975c03f1 src/perf_events.c: perf_event: fix granularity bug cut-and paste error in the last set of changes. Would have meant if you tried to explicitly set granularity to thread you'd get system instead. * 3cd3a62d src/configure src/configure.in src/ctests/Makefile...: Add perf_event_uncore ctest Also add a new type of ctest, perf_event specific In theory we should have configure only enable this if perf_event support is being used. * 5ee97430 src/perf_events.c: perf_event: add PAPI_DOM_SUPERVISOR to allowed perf_event domains perf_event supports this domain but since we didn't have it in the list PAPI wasn't letting us set/unset this. This is needed for uncore support, as for uncore domain must be set to allow monitoring everything. * c9325560 src/perf_events.c: perf_event enable granularity support Add support for PAPI_GRAN_SYS to perf_event. This is needed for uncore support. 2012-10-18 * 59d3d758 src/mb.h src/perf_events.c: Update the memory barriers It turns out PAPI fails on older 32-bit x86 machines because it tries to use an SSE rmb() memory barrier. (Yes, I'm trying to run PAPI on a Pentium II. Don't ask) It looks like our memory barriers were copied out of the kernel, which doesn't quite work because it expects some kernel infrastructure instead. This patch uses the definitions used by the "perf" tool instead. Also dropped the use of the mb() memory barrier on mmap tail write, as the perf tool itself did a while ago so I'm hoping it's safe to do so as well. It makes these definitions a lot simpler. 2012-10-08 * bcdce5bc src/perf_events.c: perf_event: clarify an error message The message was saying detecting rdpmc support broke, but the real error is that perf_events itself is totally broken on this machine and it's just rdpcm was the first code that tried to access it. 2012-10-02 * 3bb3558f src/mb.h: Update memory barries for Knights Corner Despite being x86_64 they don't support the SSE memory barrier instructions, so add a case in mb.h to handle this properly. 2012-10-01 * 38a5d74c src/libpfm4/README src/libpfm4/docs/Makefile src/libpfm4/docs/man3/libpfm_intel_atom.3...: Merge libpfm4 with Knights Corner Support * bf959960 src/papi_events.csv: Change "phi" to "knc" to match libpfm4 for Xeon Phi / Knights Corner support 2012-09-20 * d9249635 ChangeLogP501.txt RELEASENOTES.txt: Update releasenotes and add a changelog for 5.0.1 * a1e30348 man/man1/papi_avail.1 man/man1/papi_clockres.1 man/man1/papi_command_line.1...: Rebuild the manpages for a 5.0.1 release. papi-5.6.0/src/libpfm4/lib/events/intel_snb_unc_events.h000664 001750 001750 00000013423 13216244364 025316 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: snb_unc (Intel Sandy Bridge uncore PMU) */ static const intel_x86_umask_t snb_unc_cbo_xsnp_response[]={ { .uname = "MISS", .udesc = "Number of snoop misses", .ucode = 0x100, .grpid = 0, }, { .uname = "INVAL", .udesc = "Number of snoop invalidates of a non-modified line", .ucode = 0x200, .grpid = 0, }, { .uname = "HIT", .udesc = "Number of snoop hits of a non-modified line", .ucode = 0x400, .grpid = 0, }, { .uname = "HITM", .udesc = "Number of snoop hits of a modified line", .ucode = 0x800, .grpid = 0, }, { .uname = "INVAL_M", .udesc = "Number of snoop invalidates of a modified line", .ucode = 0x1000, .grpid = 0, }, { .uname = "ANY_SNP", .udesc = "Number of snoops", .ucode = 0x1f00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "EXTERNAL_FILTER", .udesc = "Filter on cross-core snoops initiated by this Cbox due to external snoop request", .ucode = 0x2000, .grpid = 1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "XCORE_FILTER", .udesc = "Filter on cross-core snoops initiated by this Cbox due to processor core memory request", .ucode = 0x4000, .grpid = 1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "EVICTION_FILTER", .udesc = "Filter on cross-core snoops initiated by this Cbox due to LLC eviction", .ucode = 0x8000, .grpid = 1, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snb_unc_cbo_cache_lookup[]={ { .uname = "STATE_M", .udesc = "Number of LLC lookup requests for a line in modified state", .ucode = 0x100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STATE_E", .udesc = "Number of LLC lookup requests for a line in exclusive state", .ucode = 0x200, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STATE_S", .udesc = "Number of LLC lookup requests for a line in shared state", .ucode = 0x400, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STATE_I", .udesc = "Number of LLC lookup requests for a line in invalid state", .ucode = 0x800, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STATE_MESI", .udesc = "Number of LLC lookup requests for a line", .ucode = 0xf00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "READ_FILTER", .udesc = "Filter on processor core initiated cacheable read requests", .ucode = 0x1000, .grpid = 1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WRITE_FILTER", .udesc = "Filter on processor core initiated cacheable write requests", .ucode = 0x2000, .grpid = 1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "EXTSNP_FILTER", .udesc = "Filter on external snoop requests", .ucode = 0x4000, .grpid = 1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY_FILTER", .udesc = "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests", .ucode = 0x8000, .grpid = 1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_entry_t intel_snb_unc_cbo0_pe[]={ { .name = "UNC_CLOCKTICKS", .desc = "uncore clock ticks", .cntmsk = 1ULL << 32, .code = 0xff, /* perf_event pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "UNC_CBO_XSNP_RESPONSE", .desc = "Snoop responses", .modmsk = INTEL_SNB_UNC_ATTRS, .cntmsk = 0xff, .code = 0x22, .numasks = LIBPFM_ARRAY_SIZE(snb_unc_cbo_xsnp_response), .ngrp = 2, .umasks = snb_unc_cbo_xsnp_response, }, { .name = "UNC_CBO_CACHE_LOOKUP", .desc = "LLC cache lookups", .modmsk = INTEL_SNB_UNC_ATTRS, .cntmsk = 0xff, .code = 0x34, .numasks = LIBPFM_ARRAY_SIZE(snb_unc_cbo_cache_lookup), .ngrp = 2, .umasks = snb_unc_cbo_cache_lookup, }, }; static const intel_x86_entry_t intel_snb_unc_cbo_pe[]={ { .name = "UNC_CBO_XSNP_RESPONSE", .desc = "Snoop responses (must provide a snoop type and filter)", .modmsk = INTEL_SNB_UNC_ATTRS, .cntmsk = 0xff, .code = 0x22, .numasks = LIBPFM_ARRAY_SIZE(snb_unc_cbo_xsnp_response), .ngrp = 2, .umasks = snb_unc_cbo_xsnp_response, }, { .name = "UNC_CBO_CACHE_LOOKUP", .desc = "LLC cache lookups", .modmsk = INTEL_SNB_UNC_ATTRS, .cntmsk = 0xff, .code = 0x34, .numasks = LIBPFM_ARRAY_SIZE(snb_unc_cbo_cache_lookup), .ngrp = 2, .umasks = snb_unc_cbo_cache_lookup, }, }; papi-5.6.0/src/perfctr-2.6.x/usr.lib/event_set_p5.c000775 001750 001750 00000032502 13216244367 023675 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_p5.c,v 1.1 2003/02/16 21:08:54 mikpe Exp $ * Performance counter event descriptions for Intel P5 and P5 MMX * processors, and Cyrix 6x86/MII/III processors. * * Copyright (C) 2003 Mikael Pettersson * * References * ---------- * [IA32, Volume 3] "Intel Architecture Software Developer's Manual, * Volume 3: System Programming Guide". Intel document number 245472-009. * (at http://developer.intel.com/) * * [Cyrix 6x86MX] "Cyrix 6x86MX Processor". * [Cyrix MII] "Cyrix M II Data Book". * [Cyrix III] "Cyrix III Processor DataBook" Ver. 1.0, 1/25/00. * Note: This "Cyrix III" was code-named "Joshua", and it was apparently * cancelled by VIA due to disappointing performance. * (MII and III docs at http://www.viatech.com/) */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" /* * Intel Pentium (P5) events. */ static const struct perfctr_event p5_events[] = { { 0x00, 0x3, NULL, "DATA_READ", "Number of memory data reads (internal data cache hit and " "miss combined)." }, { 0x01, 0x3, NULL, "DATA_WRITE", "Number of memory data writes (internal data cache hit and " "miss combined), I/O is not included." }, { 0x02, 0x3, NULL, "DATA_TLB_MISS", "Number of misses to the data cache translation look-aside " "buffer." }, { 0x03, 0x3, NULL, "DATA_READ_MISS", "Number of memory read accesses that miss the internal data " "cache whether or not the access is cacheable or noncacheable." }, { 0x04, 0x3, NULL, "DATA_WRITE_MISS", "Number of memory write accesses that miss the internal data " "cache whether or not the access is cacheable or noncacheable." }, { 0x05, 0x3, NULL, "WRITE_HIT_TO_M_OR_E_STATE_LINES", "Number of write hits to exclusive or modified lines in the " "data cache." }, { 0x06, 0x3, NULL, "DATA_CACHE_LINES_WRITTEN_BACK", "Number of dirty lines (all) that are written back, regardless " "of the cause." }, { 0x07, 0x3, NULL, "EXTERNAL_SNOOPS", "Number of accepted external snoops whether they hit in the code " "cache or data cache or neither." }, { 0x08, 0x3, NULL, "EXTERNAL_DATA_CACHE_SNOOP_HITS", "Number of external snoops to the data cache." }, { 0x09, 0x3, NULL, "MEMORY_ACCESSES_IN_BOTH_PIPES", "Number of data memory reads or writes that are paired in both " "pipes of the pipeline." }, { 0x0A, 0x3, NULL, "BANK_CONFLICTS", "Number of actual bank conflicts." }, { 0x0B, 0x3, NULL, "MISALIGNED_DATA_MEMORY_OR_IO_REFERENCES", "Number of memory or I/O reads or writes that are misaligned." }, { 0x0C, 0x3, NULL, "CODE_READ", "Number of instruction reads whether the read is cacheable or " "noncacheable." }, { 0x0D, 0x3, NULL, "CODE_TLB_MISS", "Number of instruction reads that miss the code TLB whether " "the read is cacheable or noncacheable." }, { 0x0E, 0x3, NULL, "CODE_CACHE_MISS", "Number of instruction reads that miss the internal code cache " "whether the read is cacheable or noncacheable." }, { 0x0F, 0x3, NULL, "ANY_SEGMENT_REGISTER_LOADED", "Number of writes into any segment register in real or protected " "mode including the LDTR, GDTR, IDTR, and TR." }, /* 0x10: reserved */ /* 0x11: reserved */ { 0x12, 0x3, NULL, "BRANCHES", "Number of taken and not taken branches, including conditional " "branches, jumps, calls, returns, software interrupts, and " "interrupt returns." }, { 0x13, 0x3, NULL, "BTB_HITS", "Number of BTB hits that occur." }, { 0x14, 0x3, NULL, "TAKEN_BRANCH_OR_BTB_HIT", "Number of taken branches or BTB hits that occur." }, { 0x15, 0x3, NULL, "PIPELINE_FLUSHES", "Number of pipeline flushes that occur." }, { 0x16, 0x3, NULL, "INSTRUCTIONS_EXECUTED", "Number of instructions executed (up to two per clock)." }, { 0x17, 0x3, NULL, "INSTRUCTIONS_EXECUTED_V_PIPE", /* XXX: was INSTRUCTIONS_EXECUTED_IN_V_PIPE */ "Number of instructions executed in the V_pipe. It indicates " "the number of instructions that were paired." }, { 0x18, 0x3, NULL, "BUS_CYCLE_DURATION", "Number of clocks while a bus cycle is in progress." }, { 0x19, 0x3, NULL, "WRITE_BUFFER_FULL_STALL_DURATION", "Number of clocks while the pipeline is stalled due to full " "write buffers." }, { 0x1A, 0x3, NULL, "WAITING_FOR_DATA_MEMORY_READ_STALL_DURATION", "Number of clocks while the pipeline is stalled while waiting " "for data memory reads." }, { 0x1B, 0x3, NULL, "STALL_ON_WRITE_TO_AN_E_OR_M_STATE_LINE", "Number of stalls on writes to E- or M-state lines." }, { 0x1C, 0x3, NULL, "LOCKED_BUS_CYCLE", "Number of locked bus cycles that occur as the result of " "LOCK prefix or LOCK instruction, page-table updates, and " "descriptor table updates." }, { 0x1D, 0x3, NULL, "IO_READ_OR_WRITE_CYCLE", "Number of bus cycles directed to I/O space." }, { 0x1E, 0x3, NULL, "NONCACHEABLE_MEMORY_READS", "Number of noncacheable instruction or data memory read bus cycles." }, { 0x1F, 0x3, NULL, "PIPELINE_AGI_STALLS", "Number of adress generation interlock (AGI) stalls." }, /* 0x20: reserved */ /* 0x21: reserved */ { 0x22, 0x3, NULL, "FLOPS", "Number of floating-point operations that occur." }, { 0x23, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR0_REGISTER", "Number of matches on DR0 breakpoint." }, { 0x24, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR1_REGISTER", "Number of matches on DR1 breakpoint." }, { 0x25, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR2_REGISTER", "Number of matches on DR2 breakpoint." }, { 0x26, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR3_REGISTER", "Number of matches on DR3 breakpoint." }, { 0x27, 0x3, NULL, "HARDWARE_INTERRUPTS", "Number of taken INTR and NMI interrupts." }, { 0x28, 0x3, NULL, "DATA_READ_OR_WRITE", "Number of memory data reads and/or writes (internal data cache " "hit and miss combined)." }, { 0x29, 0x3, NULL, "DATA_READ_MISS_OR_WRITE_MISS", "Number of memory read and/or write accesses that miss the " "internal data cache whether or not the acceess is cacheable " "or noncacheable." }, }; const struct perfctr_event_set perfctr_p5_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5, .event_prefix = "P5_", .include = NULL, .nevents = ARRAY_SIZE(p5_events), .events = p5_events, }; /* * Intel Pentium MMX (P5MMX) events. */ static const struct perfctr_event p5mmx_and_mii_events[] = { { 0x2B, 0x1, NULL, "MMX_INSTRUCTIONS_EXECUTED_U_PIPE", "Number of MMX instructions executed in the U-pipe." }, { 0x2B, 0x2, NULL, "MMX_INSTRUCTIONS_EXECUTED_V_PIPE", "Number of MMX instructions executed in the V-pipe." }, { 0x2D, 0x1, NULL, "EMMS_INSTRUCTIONS_EXECUTED", "Number of EMMS instructions executed." }, { 0x2D, 0x2, NULL, "TRANSITIONS_BETWEEN_MMX_AND_FP_INSTRUCTIONS", "Number of transitions between MMX and floating-point instructions " "or vice versa." }, { 0x2F, 0x1, NULL, "SATURATING_MMX_INSTRUCTIONS_EXECUTED", "Number of saturating MMX instructions executed, independently of " "whether they actually saturated." }, { 0x2F, 0x2, NULL, "SATURATIONS_PERFORMED", "Number of MMX instructions that used saturating arithmetic and " "that at least one of its results actually saturated." }, { 0x31, 0x1, NULL, "MMX_INSTRUCTION_DATA_READS", "Number of MMX instruction data reads." }, { 0x32, 0x2, NULL, "TAKEN_BRANCHES", "Number of taken branches." }, { 0x37, 0x1, NULL, "MISPREDICTED_OR_UNPREDICTED_RETURNS", "Number of returns predicted incorrectly or not predicted at all." }, { 0x37, 0x2, NULL, "PREDICTED_RETURNS", "Number of predicted returns (whether they are predicted correctly " "and incorrectly)." }, { 0x38, 0x1, NULL, "MMX_MULTIPLY_UNIT_INTERLOCK", "Number of clocks the pipe is stalled since the destination of " "previous MMX instruction is not ready yet." }, { 0x38, 0x2, NULL, "MOVD_MOVQ_STORE_STALL_DUE_TO_PREVIOUS_MMX_OPERATION", "Number of clocks a MOVD/MOVQ instruction store is stalled in D2 " "stage due to a previous MMX operation with a destination to be " "used in the store instruction." }, { 0x39, 0x1, NULL, "RETURNS", "Number of returns executed." }, { 0x3A, 0x1, NULL, "BTB_FALSE_ENTRIES", "Number of false entries in the Branch Target Buffer." }, { 0x3A, 0x2, NULL, "BTB_MISS_PREDICTION_ON_NOT_TAKEN_BRANCH", "Number of times the BTB predicted a not-taken branch as taken." }, { 0x3B, 0x1, NULL, "FULL_WRITE_BUFFER_STALL_DURATION_WHILE_EXECUTING_MMX_INSTRUCTIONS", "Number of clocks while the pipeline is stalled due to full write " "buffers while executing MMX instructions." }, { 0x3B, 0x2, NULL, "STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE", "Number of clocks during stalls on MMX instructions writing " "to E- or M-state lines." }, }; static const struct perfctr_event_set p5mmx_and_mii_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5MMX, .event_prefix = "P5MMX_", .include = &perfctr_p5_event_set, .nevents = ARRAY_SIZE(p5mmx_and_mii_events), .events = p5mmx_and_mii_events, }; static const struct perfctr_event p5mmx_events[] = { { 0x2A, 0x1, NULL, "BUS_OWNERSHIP_LATENCY", "The time from LRM bus ownership request to bus ownership granted." }, { 0x2A, 0x2, NULL, "BUS_OWNERSHIP_TRANSFERS", "The number of bus ownership transfers." }, { 0x2C, 0x1, NULL, "CACHE_M_STATE_LINE_SHARING", "Number of times a processor identified a hit to a modified line " "due to a memory access in the other processor." }, { 0x2C, 0x2, NULL, "CACHE_LINE_SHARING", "Number of shared data lines in the L1 cache." }, { 0x2E, 0x1, NULL, "BUS_UTILIZATION_DUE_TO_PROCESSOR_ACTIVITY", "Number of clocks the bus is busy due to the processor's own activity." }, { 0x2E, 0x2, NULL, "WRITES_TO_NONCACHEABLE_MEMORY", "Number of write accesses to noncacheable memory." }, { 0x30, 0x1, NULL, "NUMBER_OF_CYCLES_NOT_IN_HALT_STATE", "Number of cycles the processor is not idle due to HLT instruction." }, { 0x30, 0x2, NULL, "DATA_CACHE_TLB_MISS_STALL_DURATION", "Number of clocks the pipeline is stalled due to a data cache " "translation look-aside buffer (TLB) miss." }, { 0x31, 0x2, NULL, "MMX_INSTRUCTION_DATA_READ_MISSES", "Number of MMX instruction data read misses." }, { 0x32, 0x1, NULL, "FLOATING_POINT_STALLS_DURATION", "Number of clocks while pipe is stalled due to a floating-point freeze." }, { 0x33, 0x1, NULL, "D1_STARVATION_AND_FIFO_IS_EMPTY", "Number of times D1 stage cannot issue ANY instructions since the " "FIFO buffer is empty." }, { 0x33, 0x2, NULL, "D1_STARVATION_AND_ONLY_ONE_INSTRUCTION_IN_FIFO", "Number of times the D1 stage issues just a single instruction since " "the FIFO buffer had just one instruction ready." }, { 0x34, 0x1, NULL, "MMX_INSTRUCTION_DATA_WRITES", "Number of data writes caused by MMX instructions." }, { 0x34, 0x2, NULL, "MMX_INSTRUCTION_DATA_WRITE_MISSES", "Number of data write misses caused by MMX instructions." }, { 0x35, 0x1, NULL, "PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS", "Number of pipeline flushes due to wrong branch prediction resolved " "in either the E-stage or the WB-stage." }, { 0x35, 0x2, NULL, "PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS_RESOLVED_IN_WB_STAGE", "Number of pipeline flushes due to wrong branch prediction resolved " "in the WB-stage." }, { 0x36, 0x1, NULL, "MISALIGNED_DATA_MEMORY_REFERENCE_ON_MMX_INSTRUCTIONS", "Number of misaligned data memory references when executing MMX " "instructions." }, { 0x36, 0x2, NULL, "PIPELINE_ISTALL_FOR_MMX_INSTRUCTION_DATA_MEMORY_READS", "Number of clocks during pipeline stalls caused by waits from MMX " "instructions data memory reads." }, /* 0x39, counter 1: reserved */ }; const struct perfctr_event_set perfctr_p5mmx_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5MMX, .event_prefix = "P5MMX_", .include = &p5mmx_and_mii_event_set, .nevents = ARRAY_SIZE(p5mmx_events), .events = p5mmx_events, }; /* * Cyrix 6x86MX, MII, and III events. */ static const struct perfctr_event mii_events[] = { { 0x039, 0x2, NULL, "RSB_OVERFLOWS" }, /* NOTE: The manuals list the following events as having codes 40-48. However, the 7-bit event code is actually split in the CESR, using bits 0-5 and 10, and similarly for the high half of the CESR. Since the driver also parses the other fields (bits 6-9) in a user's evntsel, the events are listed here with their actual in-CESR values. */ { 0x400, 0x3, NULL, "L2_TLB_MISSES" }, { 0x401, 0x3, NULL, "L1_TLB_DATA_MISS" }, { 0x402, 0x3, NULL, "L1_TLB_CODE_MISS" }, { 0x403, 0x3, NULL, "L1_TLB_MISS" }, { 0x404, 0x3, NULL, "TLB_FLUSHES" }, { 0x405, 0x3, NULL, "TLB_PAGE_INVALIDATES" }, { 0x406, 0x3, NULL, "TLB_PAGE_INVALIDATES_THAT_HIT" }, { 0x408, 0x3, NULL, "INSTRUCTIONS_DECODED" }, }; const struct perfctr_event_set perfctr_mii_event_set = { .cpu_type = PERFCTR_X86_CYRIX_MII, .event_prefix = "MII_", .include = &p5mmx_and_mii_event_set, .nevents = ARRAY_SIZE(mii_events), .events = mii_events, }; papi-5.6.0/src/components/appio/tests/init_fini.c000664 001750 001750 00000003124 13216244356 024101 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include #include "papi.h" #define NUM_EVENTS 6 static int Events[NUM_EVENTS]; static const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES","READ_USEC","WRITE_CALLS","WRITE_BYTES","WRITE_USEC"}; static long long values[NUM_EVENTS]; __attribute__ ((constructor)) void my_init(void) { //fprintf(stderr, "appio: constructor started\n"); int version = PAPI_library_init (PAPI_VER_CURRENT); if (version != PAPI_VER_CURRENT) { fprintf(stderr, "PAPI_library_init version mismatch\n"); exit(1); } else { fprintf(stderr, "appio: PAPI library initialized\n"); } int retval; int e; for (e=0; e /dev/cpu/msr_whitelist OPTION 2: OR: Enable MSR access via the filesystem and elevated permissions. Or, enable access to the standard MSRs filesystem For Linux kernel version < 3.7, using only file system checks chmod 666 /dev/cpu/*/msr For Linux kernel version >= 3.7, using capabilities chmod 666 /dev/cpu/*/msr The final executable needs CAP_SYS_RWIO to open MSR device files [1] setcap cap_sys_rawio=ep The final executable cannot be on a shared network partition. The dynamic linker on most operating systems will remove variables that control dynamic linking from the environment of executables with extended rights, such as setuid executables or executables with raised capabilities. One such variable is LD_LIBRARY_PATH. Therefore, executables that have the RAWIO capability can only load shared libraries from default system directories. One can work around this restriction by either installing the shared libraries in system directories, linking statically against those libraries, or using the -rpath linker option to specify the full path to the shared libraries during the linking step. -------------------------------------------------- COMPILE THE LIBMSR LIBRARY TO ACCESS THE MSRS https://github.com/scalability-llnl/libmsr Get the library and follow the instructions to build using CMake. This library contains a subdirectory, test, which will exercise the functionality. -------------------------------------------------- CONFIGURING THE PAPI LIBMSR COMPONENT Set libmsr library and header files by configuring within the component. % cd /src/components/libmsr % ./configure --with-libmsr-incdir= --with-libmsr-libdir= Then, at the higher src dirctory, configure with this component % cd /src % ./configure --with-components="libmsr" or if you want to specify the compilers and enable debug. % ./configure CC=gcc F77=gfortran --with-debug --with-components="libmsr" Finally, follow the standard PAPI build (make) instructions % make To use the module, make sure that the libraries are accessible. % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib:${LIBMSRDIR}/lib:${LD_LIBRARY_PATH} To check the installation, the following should show some available counters % ./utils/papi_native_avail | grep libmsr To check the installation, the following should some counter values % ./utils/papi_native_avail -e "libmsr:::PKG_WATTS:PACKAGE0" % ./utils/papi_command_line "libmsr:::PKG_WATTS:PACKAGE0" % ./utils/papi_command_line "libmsr:::PKG_DELTA_ENERGY:PACKAGE0" -------------------------------------------------- USE THE PAPI LIBMSR COMPONENT See the components/libmsr/utils/README file for instructions. This test demonstrates how to write power constraints, and gives an estimate of the overheads for reading and writing information to the RAPL MSRs. [1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 */ papi-5.6.0/src/components/stealtime/tests/stealtime_basic.c000664 001750 001750 00000006233 13216244360 026137 0ustar00jshenry1963jshenry1963000000 000000 /** * @author Vince Weaver * * test case for stealtime component * * * @brief * Tests basic stealtime functionality */ #include #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 1 int main (int argc, char **argv) { int retval,cid,numcmp; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; int code; char event_name[PAPI_MAX_STR_LEN]; int total_events=0; int r; const PAPI_component_info_t *cmpinfo = NULL; int quiet=0; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!quiet) { printf("Trying all stealtime events\n"); } numcmp = PAPI_num_components(); for(cid=0; cidname,"stealtime")) { if (!quiet) printf("\tFound stealtime component %d - %s\n", cid, cmpinfo->name); } else { continue; } code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",code); test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } if (!quiet) printf(" %s ",event_name); EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset()",retval); } retval = PAPI_add_event( EventSet, code ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_add_event()",retval); } retval = PAPI_start( EventSet); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } retval = PAPI_stop( EventSet, values); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } if (!quiet) printf(" value: %lld\n",values[0]); retval = PAPI_cleanup_eventset( EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()",retval); } retval = PAPI_destroy_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()",retval); } total_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); } } if (total_events==0) { test_skip(__FILE__,__LINE__,"No stealtime events found",0); } if (!quiet) { printf("Note: for this test the values are expected to all be 0\n\t unless run inside a VM on a busy system.\n"); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_strerror.3000664 001750 001750 00000001452 13216244362 023603 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" .SH NAME pfm_strerror \- return string describing error code .SH SYNOPSIS .nf .B #include .sp .BI "char *pfm_strerror(int "code); .sp .SH DESCRIPTION This function returns a string which describes the libpfm error value in \fBcode\fR. The string returned by the call must be considered as read only. The function must \fBonly\fR be used on libpfm calls. It is not designed to handle OS system call errors. .SH RETURN The function returns a pointer to the string describing the error code. If code is invalid then the default error message is returned. .SH ERRORS If the error code is invalid, then the function returns a pointer to a string which says "unknown error code". .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/libpfm-3.y/python/sys.py000775 001750 001750 00000004373 13216244363 021727 0ustar00jshenry1963jshenry1963000000 000000 #!/usr/bin/env python # # Copyright (c) 2008 Google, Inc. # Contributed by Arun Sharma # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # # System wide monitoring example. Copied from syst.c # # Run as: ./sys.py -c cpulist -e eventlist import sys import os from optparse import OptionParser import time from perfmon import * if __name__ == '__main__': parser = OptionParser() parser.add_option("-e", "--events", help="Events to use", action="store", dest="events") parser.add_option("-c", "--cpulist", help="CPUs to monitor", action="store", dest="cpulist") parser.set_defaults(cpu=0) (options, args) = parser.parse_args() cpus = options.cpulist.split(',') cpus = [ int(c) for c in cpus ] try: s = SystemWideSession(cpus) if options.events: events = options.events.split(",") else: raise "You need to specify events to monitor" s.dispatch_events(events) s.load() # Measuring loop for i in range(1, 10): s.start() time.sleep(1) s.stop() # Print the counts for cpu in xrange(len(cpus)): for i in xrange(s.npmds): print "CPU%d.PMD%d\t%lu""" % (cpu, s.pmds[cpu][i].reg_num, s.pmds[cpu][i].reg_value) finally: s.cleanup() papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_impl_pmcs.3000664 001750 001750 00000005755 13216244361 024554 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "July, 2003" "" "Linux Programmer's Manual" .SH NAME pfm_get_impl_pmcs, pfm_get_impl_pmds, pfm_get_impl_counters, pfm_get_num_counters, pfm_get_num_pmcs, pfm_get_num_pmds, pfm_get_hw_counter_width \- return bitmask of implemented PMU registers or number of PMU registers .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_get_impl_pmcs(pfmlib_regmask_t *" impl_pmcs ");" .BI "int pfm_get_impl_pmds(pfmlib_regmask_t *" impl_pmds ");" .BI "int pfm_get_impl_counters(pfmlib_regmask_t *" impl_counters ");" .BI "int pfm_get_num_counters(unsigned int *"num ");" .BI "int pfm_get_num_pmcs(unsigned int *"num ");" .BI "int pfm_get_num_pmds(unsigned int *"num ");" .BI "int pfm_get_num_counters(unsigned int *"num ");" .BI "int pfm_get_hw_counter_width(unsigned int *"width ");" .sp .SH DESCRIPTION The \fBpfm_get_impl_*()\fR functions can be used to figure out which PMU registers are implemented on the host CPU. All implemented registers may not necessarily be available to applications. Programs need to query the operating system kernel monitoring interface to figure out the list of available registers. .sp The \fBpfm_get_impl_*()\fR functions all return a bitmask of registers corresponding to the query. The bitmask pointer passed as argument is reset to zero by each function. The returned bitmask must be accessed using the set of functions provided by the library to ensure portability. See related man pages below. .sp The \fBpfm_get_num_*()\fR functions return the number of implemented PMC or PMD registers. Those numbers may be different from the actual number of registers available to applications. .sp The \fBpfm_get_impl_pmcs()\fR function returns in \fBimpl_pmcs\fR the bitmask of implemented PMCS. The \fBpfm_get_impl_pmds()\fR function returns in \fBimpl_pmds\fR the bitmask of implemented PMDS. The \fBpfm_get_impl_counters()\fR function returns in \fBimpl_counters\fR a bitmask of the PMD registers used as counters. Depending on the PMU mode, not all PMD registers are necessarily used as counters. .sp The \fBpfm_get_num_counters()\fR function returns in \fBnum\fR the number of PMD used as counters. A counter is a PMD which is used to accumulate the number of occurrences of an event. The \fBpfm_get_num_pmcs()\fR function returns in \fBnum\fR the number of implemented PMCs by the host PMU. The \fBpfm_get_num_pmds()\fR function returns in \fBnum\fR the number of implemented PMDs by the host PMU. The \fBpfm_get_hw_counter_width()\fR function returns the width in bits of the counters in \fBwidth\fR. PMU implementations can have different number of bits implemented. For instance, Itanium has 32-bit counters, while Itanium 2 has 47-bits. .SH RETURN The function returns whether or not it was successful. A return value of \fBPFMLIB_SUCCESS\fR indicates success, otherwise the value is the error code. .SH ERRORS .B PFMLIB_ERR_NOINIT the library has not been initialized properly. .SH SEE ALSO pfm_regmask_set(3), pfm_regmask_isset(3) .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/testlib/do_loops.h000664 001750 001750 00000003317 13216244370 020763 0ustar00jshenry1963jshenry1963000000 000000 #define NUM_WORK_SECONDS 2 #define NUM_FLOPS 20000000 #define NUM_MISSES 2000000 #define NUM_READS 20000 #define SUCCESS 1 #define FAILURE 0 #define MAX_THREADS 256 #define NUM_THREADS 4 #define NUM_ITERS 1000000 #define THRESHOLD 1000000 #define L1_MISS_BUFFER_SIZE_INTS 128*1024 #define CACHE_FLUSH_BUFFER_SIZE_INTS 16*1024*1024 #define TOLERANCE .2 #define OVR_TOLERANCE .75 #define MPX_TOLERANCE .20 #define TIME_LIMIT_IN_US 60*1000000 /* Run for about 1 minute or 60000000 us */ void do_reads( int n ); void fdo_reads( int *n ); void fdo_reads_( int *n ); void fdo_reads__( int *n ); void FDO_READS( int *n ); void _FDO_READS( int *n ); void do_flops( int n ); /* export the next symbol as 'end' address of do_flops for profiling */ void fdo_flops( int *n ); void fdo_flops_( int *n ); void fdo_flops__( int *n ); void FDO_FLOPS( int *n ); void _FDO_FLOPS( int *n ); void do_misses( int n, int bytes ); void fdo_misses( int *n, int *size ); void fdo_misses_( int *n, int *size ); void fdo_misses__( int *n, int *size ); void FDO_MISSES( int *n, int *size ); void _FDO_MISSES( int *n, int *size ); void do_flush( void ); void fdo_flush( void ); void fdo_flush_( void ); void fdo_flush__( void ); void FDO_FLUSH( void ); void _FDO_FLUSH( void ); void do_l1misses( int n ); void fdo_l1misses( int *n ); void fdo_l1misses_( int *n ); void fdo_l1misses__( int *n ); void FDO_L1MISSES( int *n ); void _FDO_L1MISSES( int *n ); void do_stuff( void ); void do_stuff_( void ); void do_stuff__( void ); void DO_STUFF( void ); void _DO_STUFF( void ); void dummy( void *array ); void dummy_( void *array ); void dummy__( void *array ); void DUMMY( void *array ); void _DUMMY( void *array ); void touch_dummy( double *array, int size ); papi-5.6.0/man/man3/PAPIF_multiplex_init.3000664 001750 001750 00000000766 13216244355 022231 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_multiplex_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_multiplex_init \- .PP Initialize multiplex support in the PAPI library\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_multiplex_init( C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_multiplex_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/aix-memory.c000664 001750 001750 00000005701 13216244356 017564 0ustar00jshenry1963jshenry1963000000 000000 /* * File: aix-memory.c * Author: Kevin London * london@cs.utk.edu * * Mods: * */ #include "papi.h" #include "papi_internal.h" #include "aix.h" int _aix_get_memory_info( PAPI_hw_info_t * mem_info, int type ) { PAPI_mh_level_t *L = mem_info->mem_hierarchy.level; /* Not quite sure what bit 30 indicates. I'm assuming it flags a unified tlb */ if ( _system_configuration.tlb_attrib & ( 1 << 30 ) ) { L[0].tlb[0].type = PAPI_MH_TYPE_UNIFIED; L[0].tlb[0].num_entries = _system_configuration.itlb_size; L[0].tlb[0].type = PAPI_MH_TYPE_UNIFIED; } else { L[0].tlb[0].type = PAPI_MH_TYPE_INST; L[0].tlb[0].num_entries = _system_configuration.itlb_size; L[0].tlb[0].associativity = _system_configuration.itlb_asc; L[0].tlb[1].type = PAPI_MH_TYPE_DATA; L[0].tlb[1].num_entries = _system_configuration.dtlb_size; L[0].tlb[1].associativity = _system_configuration.dtlb_asc; } /* Not quite sure what bit 30 indicates. I'm assuming it flags a unified cache */ if ( _system_configuration.cache_attrib & ( 1 << 30 ) ) { L[0].cache[0].type = PAPI_MH_TYPE_UNIFIED; L[0].cache[0].size = _system_configuration.icache_size; L[0].cache[0].associativity = _system_configuration.icache_asc; L[0].cache[0].line_size = _system_configuration.icache_line; } else { L[0].cache[0].type = PAPI_MH_TYPE_INST; L[0].cache[0].size = _system_configuration.icache_size; L[0].cache[0].associativity = _system_configuration.icache_asc; L[0].cache[0].line_size = _system_configuration.icache_line; L[0].cache[1].type = PAPI_MH_TYPE_DATA; L[0].cache[1].size = _system_configuration.dcache_size; L[0].cache[1].associativity = _system_configuration.dcache_asc; L[0].cache[1].line_size = _system_configuration.dcache_line; } L[1].cache[0].type = PAPI_MH_TYPE_UNIFIED; L[1].cache[0].size = _system_configuration.L2_cache_size; L[1].cache[0].associativity = _system_configuration.L2_cache_asc; /* is there a line size for Level 2 cache? */ /* it looks like we've always got at least 2 levels of info */ /* what about level 3 cache? */ mem_info->mem_hierarchy.levels = 2; return PAPI_OK; } int _aix_get_dmem_info( PAPI_dmem_info_t * d ) { /* This function has been reimplemented to conform to current interface. It has not been tested. Nor has it been confirmed for completeness. dkt 05-10-06 */ struct procsinfo pi; pid_t mypid = getpid( ); pid_t pid; int found = 0; pid = 0; while ( 1 ) { if ( getprocs( &pi, sizeof ( pi ), 0, 0, &pid, 1 ) != 1 ) break; if ( mypid == pi.pi_pid ) { found = 1; break; } } if ( !found ) return ( PAPI_ESYS ); d->size = pi.pi_size; d->resident = pi.pi_drss + pi.pi_trss; d->high_water_mark = PAPI_EINVAL; d->shared = PAPI_EINVAL; d->text = pi.pi_trss; /* this is a guess */ d->library = PAPI_EINVAL; d->heap = PAPI_EINVAL; d->locked = PAPI_EINVAL; d->stack = PAPI_EINVAL; d->pagesize = getpagesize( ); return ( PAPI_OK ); } papi-5.6.0/src/testlib/do_loops.c000664 001750 001750 00000010777 13216244370 020766 0ustar00jshenry1963jshenry1963000000 000000 /* Compile me with -O0 or else you'll get none. */ #include #include #include #include #include #include #include "do_loops.h" volatile int buf[CACHE_FLUSH_BUFFER_SIZE_INTS]; volatile int buf_dummy = 0; volatile int *flush = NULL; volatile int flush_dummy = 0; volatile double a = 0.5, b = 2.2; void do_reads( int n ) { int i, retval; static int fd = -1; char buf; if ( fd == -1 ) { fd = open( "/dev/zero", O_RDONLY ); if ( fd == -1 ) { perror( "open(/dev/zero)" ); exit( 1 ); } } for ( i = 0; i < n; i++ ) { retval = ( int ) read( fd, &buf, sizeof ( buf ) ); if ( retval != sizeof ( buf ) ) { if ( retval < 0 ) perror( "/dev/zero cannot be read" ); else fprintf( stderr, "/dev/zero cannot be read: only got %d bytes.\n", retval ); exit( 1 ); } } } void fdo_reads( int *n ) { do_reads( *n ); } void fdo_reads_( int *n ) { do_reads( *n ); } void fdo_reads__( int *n ) { do_reads( *n ); } void FDO_READS( int *n ) { do_reads( *n ); } void _FDO_READS( int *n ) { do_reads( *n ); } void do_flops( int n ) { int i; double c = 0.11; for ( i = 0; i < n; i++ ) { c += a * b; } dummy( ( void * ) &c ); } void fdo_flops( int *n ) { do_flops( *n ); } void fdo_flops_( int *n ) { do_flops( *n ); } void fdo_flops__( int *n ) { do_flops( *n ); } void FDO_FLOPS( int *n ) { do_flops( *n ); } void _FDO_FLOPS( int *n ) { do_flops( *n ); } void do_misses( int n, int bytes ) { register int i, j, tmp = buf_dummy, len = bytes / ( int ) sizeof ( int ); dummy( ( void * ) buf ); dummy( ( void * ) &buf_dummy ); assert( len <= CACHE_FLUSH_BUFFER_SIZE_INTS ); for ( j = 0; j < n; j++ ) { for ( i = 0; i < len; i++ ) { /* We need to read, modify, write here to look out for the write allocate policies. */ buf[i] += tmp; /* Fake out some naive prefetchers */ buf[len - 1 - i] -= tmp; } tmp += len; } buf_dummy = tmp; dummy( ( void * ) buf ); dummy( ( void * ) &buf_dummy ); } void fdo_misses( int *n, int *size ) { do_misses( *n, *size ); } void fdo_misses_( int *n, int *size ) { do_misses( *n, *size ); } void fdo_misses__( int *n, int *size ) { do_misses( *n, *size ); } void FDO_MISSES( int *n, int *size ) { do_misses( *n, *size ); } void _FDO_MISSES( int *n, int *size ) { do_misses( *n, *size ); } void do_flush( void ) { register int i; if ( flush == NULL ) flush = ( int * ) malloc( ( 1024 * 1024 * 16 ) * sizeof ( int ) ); if ( !flush ) return; dummy( ( void * ) flush ); for ( i = 0; i < ( 1024 * 1024 * 16 ); i++ ) { flush[i] += flush_dummy; } flush_dummy++; dummy( ( void * ) flush ); dummy( ( void * ) &flush_dummy ); } void fdo_flush( void ) { do_flush( ); } void fdo_flush_( void ) { do_flush( ); } void fdo_flush__( void ) { do_flush( ); } void FDO_FLUSH( void ) { do_flush( ); } void _FDO_FLUSH( void ) { do_flush( ); } void do_l1misses( int n ) { do_misses( n, L1_MISS_BUFFER_SIZE_INTS ); } void fdo_l1misses( int *n ) { do_l1misses( *n ); } void fdo_l1misses_( int *n ) { do_l1misses( *n ); } void fdo_l1misses__( int *n ) { do_l1misses( *n ); } void FDO_L1MISSES( int *n ) { do_l1misses( *n ); } void _FDO_L1MISSES( int *n ) { do_l1misses( *n ); } void do_stuff( void ) { static int loops = 0; if ( loops == 0 ) { struct timeval now, then; gettimeofday( &then, NULL ); do { do_flops( NUM_FLOPS ); do_reads( NUM_READS ); do_misses( 1, 1024 * 1024 ); gettimeofday( &now, NULL ); loops++; } while ( now.tv_sec - then.tv_sec < NUM_WORK_SECONDS ); } else { int i = 0; do { do_flops( NUM_FLOPS ); do_reads( NUM_READS ); do_misses( 1, 1024 * 1024 ); i++; } while ( i < loops ); } } void do_stuff_( void ) { do_stuff( ); } void do_stuff__( void ) { do_stuff( ); } void DO_STUFF( void ) { do_stuff( ); } void _DO_STUFF( void ) { do_stuff( ); } void dummy( void *array ) { /* Confuse the compiler so as not to optimize away the flops in the calling routine */ /* Cast the array as a void to eliminate unused argument warning */ ( void ) array; } void dummy_( void *array ) { ( void ) array; } void dummy__( void *array ) { ( void ) array; } void DUMMY( void *array ) { ( void ) array; } void _DUMMY( void *array ) { ( void ) array; } /* We have to actually touch the memory to confuse some * systems, so they actually allocate the memory. * -KSL */ void touch_dummy( double *array, int size ) { int i; double *tmp = array; for ( i = 0; i < size; i++, tmp++ ) *tmp = ( double ) rand( ); } papi-5.6.0/src/perfctr-2.6.x/examples/signal/x86.c000775 001750 001750 00000010125 13216244366 023433 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.c,v 1.3.2.11 2010/11/07 19:46:06 mikpe Exp $ * x86-specific code. * * Copyright (C) 2001-2010 Mikael Pettersson */ #define __USE_GNU /* enable symbolic names for gregset_t[] indices */ #include #include #include #include #include "libperfctr.h" #include "arch.h" #ifdef __x86_64__ #ifndef REG_RIP #define REG_RIP 16 #endif #define REG_PC REG_RIP #else /* !__x86_64__ */ #ifndef REG_EIP #define REG_EIP 14 #endif #define REG_PC REG_EIP #endif static inline unsigned long mcontext_pc(const mcontext_t *mc) { return mc->gregs[REG_PC]; } unsigned long ucontext_pc(const struct ucontext *uc) { return mcontext_pc(&uc->uc_mcontext); } void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { unsigned int nractrs = 0; unsigned int pmc_map0 = 0, pmc_map1 = 1; unsigned int evntsel0, evntsel1; memset(cpu_control, 0, sizeof *cpu_control); switch (info->cpu_type) { #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_INTEL_PENTM: case PERFCTR_X86_INTEL_CORE: /* FLOPS, USR, ENable, INT */ evntsel0 = 0xC1 | (1 << 16) | (1 << 22) | (1 << 20); /* BR_TAKEN_RETIRED, USR, INT */ evntsel1 = 0xC9 | (1 << 16) | (1 << 20); break; #endif case PERFCTR_X86_INTEL_CORE2: /* X87_OPS_RETIRED_ANY, USR, Enable, INT */ evntsel0 = 0xC1 | (0xFE << 8) | (1 << 16) | (1 << 22) | (1 << 20); /* BR_INST_RETIRED_TAKEN, USR, Enable, INT */ evntsel1 = 0xC4 | (0x0C << 8) | (1 << 16) | (1 << 22) | (1 << 20); break; case PERFCTR_X86_INTEL_ATOM: /* Atom's architectural events don't include FLOPS */ /* INST_RETIRED_ANY, USR, Enable, INT */ evntsel0 = 0xC0 | (1 << 16) | (1 << 22) | (1 << 20); /* BR_INST_RETIRED_ANY, USR, Enable, INT */ evntsel1 = 0xC4 | (1 << 16) | (1 << 22) | (1 << 20); break; case PERFCTR_X86_INTEL_NHLM: case PERFCTR_X86_INTEL_WSTMR: /* FP_COMP_OPS_EXE.ANY, USR, Enable, INT */ evntsel0 = 0x10 | (0xFF << 8) | (1 << 16) | (1 << 22) | (1 << 20); /* BR_INST_RETIRED.ALL, USR, Enable, INT */ evntsel1 = 0xC4 | (1 << 16) | (1 << 22) | (1 << 20); break; #if !defined(__x86_64__) case PERFCTR_X86_AMD_K7: /* K7 can't count FLOPS. Count RETIRED_INSTRUCTIONS instead. */ evntsel0 = 0xC0 | (1 << 16) | (1 << 22) | (1 << 20); /* RETIRED_TAKEN_BRANCHES, USR, INT */ evntsel1 = 0xC4 | (1 << 16) | (1 << 22) | (1 << 20); break; case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: #endif case PERFCTR_X86_INTEL_P4M3: nractrs = 1; /* PMC(0) produces tagged x87_FP_uop:s (FLAME_CCCR0, FIRM_ESCR0) */ cpu_control->pmc_map[0] = 0x8 | (1 << 31); cpu_control->evntsel[0] = (0x3 << 16) | (1 << 13) | (1 << 12); cpu_control->p4.escr[0] = (4 << 25) | (1 << 24) | (1 << 5) | (1 << 4) | (1 << 2); /* PMC(1) counts execution_event(X87_FP_retired) (IQ_CCCR0, CRU_ESCR2) */ pmc_map0 = 0xC | (1 << 31); evntsel0 = (1 << 26) | (0x3 << 16) | (5 << 13) | (1 << 12); cpu_control->p4.escr[1] = (0xC << 25) | (1 << 9) | (1 << 2); /* PMC(2) counts branch_retired(TP,TM) (IQ_CCCR2, CRU_ESCR3) */ pmc_map1 = 0xE | (1 << 31); evntsel1 = (1 << 26) | (0x3 << 16) | (5 << 13) | (1 << 12); cpu_control->p4.escr[2] = (6 << 25) | (((1 << 3)|(1 << 2)) << 9) | (1 << 2); break; case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: case PERFCTR_X86_AMD_FAM10H: /* RETIRED_FPU_INSTRS, Unit Mask "x87 instrs", any CPL, Enable, INT */ evntsel0 = 0xCB | (0x01 << 8) | (3 << 16) | (1 << 22) | (1 << 20); /* RETIRED_TAKEN_BRANCHES, USR, Enable, INT */ evntsel1 = 0xC4 | (1 << 16) | (1 << 22) | (1 << 20); break; default: printf("%s: unsupported cpu type %u\n", __FUNCTION__, info->cpu_type); exit(1); } cpu_control->tsc_on = 1; cpu_control->nractrs = nractrs; cpu_control->nrictrs = 2; cpu_control->pmc_map[nractrs+0] = pmc_map0; cpu_control->evntsel[nractrs+0] = evntsel0; cpu_control->ireset[nractrs+0] = -25; cpu_control->pmc_map[nractrs+1] = pmc_map1; cpu_control->evntsel[nractrs+1] = evntsel1; cpu_control->ireset[nractrs+1] = -25; } papi-5.6.0/src/components/perf_event/tests/event_name_lib.h000664 001750 001750 00000000232 13216244357 026130 0ustar00jshenry1963jshenry1963000000 000000 char *get_instructions_event(char *event, int size); char *get_offcore_event(char *event, int size); char *get_invalid_event_name(char *event, int size); papi-5.6.0/src/ftests/zeronamed.F000664 001750 001750 00000007753 13216244361 020741 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program zero integer*8 values(10) integer es, event integer*8 uso, usn, cyco, cycn integer domain, granularity character*(PAPI_MAX_STR_LEN) domainstr, grnstr character*(PAPI_MAX_STR_LEN) name integer retval Integer last_char External last_char integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() es = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_named_event('PAPI_TOT_CYC', retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_query_named_event: PAPI_TOT_CYC', retval) end if call PAPIf_query_named_event('PAPI_TOT_INS', retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_query_named_event: PAPI_TOT_INS', retval) end if call PAPIf_create_eventset(es, retval) if ( retval.NE.PAPI_OK) then call ftest_fail( __FILE__, __LINE__, . 'PAPIf_create_eventset', retval ) end if call PAPIf_add_named_event( es, 'PAPI_TOT_CYC', retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event: PAPI_TOT_CYC', retval) end if call PAPIf_add_named_event( es, 'PAPI_TOT_INS', retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event: PAPI_TOT_INS', retval) end if call PAPIf_get_real_usec(uso) call PAPIf_get_real_cyc(cyco) call PAPIf_start(es, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_stop(es, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_get_real_usec(usn) call PAPIf_get_real_cyc(cycn) if (tests_quiet .EQ. 0) then print *, "PAPI_{query, add, remove}_named_event API test." print *, "-----------------------------------------------", * "--------------------------" end if call PAPIf_get_domain(es, domain, PAPI_DEFDOM, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_domain', retval) end if call stringify_domain(domain, domainstr) if (tests_quiet .EQ. 0) then write (*,800) "Default domain is :", domain, domainstr end if call PAPIf_get_granularity(es, granularity, PAPI_DEFGRN, * retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_granularity', * retval) end if call stringify_granularity(granularity, grnstr) if (tests_quiet .EQ. 0) then call PAPIf_event_code_to_name (event, name, retval) write (*,800) "Default granularity is:", granularity, grnstr 800 format(a25, i3, " ", a70) write (*,810) "Using", NUM_FLOPS, $ " iterations of c = c + a * b" 810 format(a7, i9, a) print *, "-----------------------------------------------", * "--------------------------" write (*,100) "Test type", 1 write (*,100) "PAPI_TOT_CYC", values(1) write (*,100) "PAPI_TOT_INS", values(2) write (*,100) "Real usec", usn-uso write (*,100) "Real cycles", cycn-cyco 100 format(a13, ":", i12) print *, "-----------------------------------------------", * "--------------------------" print *, "Verification: PAPI_TOT_CYC should be roughly ", * "real_cycles" endif call ftests_pass(__FILE__) end papi-5.6.0/src/utils/papi_clockres.c000664 001750 001750 00000002613 13216244370 021446 0ustar00jshenry1963jshenry1963000000 000000 /** file clockres.c * * @page papi_clockres * @brief The papi_clockres utility. * @section Name * papi_clockres - measures and reports clock latency and resolution for PAPI timers. * * @section Synopsis * @section Description * papi_clockres is a PAPI utility program that measures and reports the * latency and resolution of the four PAPI timer functions: * PAPI_get_real_cyc(), PAPI_get_virt_cyc(), PAPI_get_real_usec() and PAPI_get_virt_usec(). * * @section Options * This utility has no command line options. * * @section Bugs * There are no known bugs in this utility. * If you find a bug, it should be reported to the PAPI Mailing List at . * */ #include #include #include "papi.h" #include "../testlib/clockcore.h" int main( int argc, char **argv ) { (void) argc; (void) argv; int retval; retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { fprintf(stderr,"Error with PAPI init!\n"); return 1; } retval = PAPI_set_debug( PAPI_VERB_ECONT ); if (retval != PAPI_OK ) { fprintf(stderr,"Error with PAPI_set_debug!\n"); return 1; } printf( "Printing Clock latency and resolution.\n" ); printf( "-----------------------------------------------\n" ); retval=clockcore( 0 ); if (retval<0) { fprintf(stderr,"Error reading clock!\n"); return retval; } return 0; } papi-5.6.0/src/components/perf_event/tests/event_name_lib.c000664 001750 001750 00000006067 13216244357 026137 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "papi.h" char *get_offcore_event(char *event, int size) { const PAPI_hw_info_t *hwinfo; hwinfo = PAPI_get_hardware_info(); if ( hwinfo == NULL ) { return NULL; } if (hwinfo->vendor == PAPI_VENDOR_INTEL) { if ( hwinfo->cpuid_family == 6) { switch(hwinfo->cpuid_model) { case 26: case 30: case 31: /* Nehalem */ case 46: /* Nehalem EX */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:LOCAL_DRAM",size); return event; break; case 37: case 44: /* Westmere */ case 47: /* Westmere EX */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:LOCAL_DRAM",size); return event; break; case 45: /* SandyBridge EP */ case 42: /* SandyBridge */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 58: /* IvyBridge */ case 62: /* Ivy Trail */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 60: /* Haswell */ case 69: case 70: case 63: /* Haswell EP */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 61: /* Broadwell */ case 71: case 86: case 79: /* Broadwell EP */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 78: /* Skylake */ case 94: case 85: /* Skylake-X */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 142: /* Kabylake */ case 158: strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; case 87: /* Knights Landing */ strncpy(event, "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); return event; break; } } return NULL; } else if (hwinfo->vendor == PAPI_VENDOR_AMD) { return NULL; } return NULL; } char *get_instructions_event(char *event, int size) { const PAPI_hw_info_t *hwinfo; hwinfo = PAPI_get_hardware_info(); if ( hwinfo == NULL ) { return NULL; } if (hwinfo->vendor == PAPI_VENDOR_INTEL) { if ( hwinfo->cpuid_family == 6) { strncpy(event,"INSTRUCTIONS_RETIRED",size); return event; } if ( hwinfo->cpuid_family == 15) { strncpy(event,"INSTR_RETIRED:NBOGUSNTAG",size); return event; } return NULL; } else if (hwinfo->vendor == PAPI_VENDOR_AMD) { strncpy(event,"RETIRED_INSTRUCTIONS",size); return event; } return NULL; } char *get_invalid_event_name(char *event, int size) { const PAPI_hw_info_t *hwinfo; hwinfo = PAPI_get_hardware_info(); if ( hwinfo == NULL ) { return NULL; } if (hwinfo->vendor == PAPI_VENDOR_INTEL) { if ( hwinfo->cpuid_family == 6) { switch(hwinfo->cpuid_model) { case 78: /* Skylake */ case 94: case 85: /* Skylake-X */ strncpy(event, "DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK:u=1",size); return event; break; } } return NULL; } else if (hwinfo->vendor == PAPI_VENDOR_AMD) { return NULL; } return NULL; } papi-5.6.0/src/perfctr-2.7.x/examples/signal/ppc.c000664 001750 001750 00000002221 13216244370 023557 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.4 2005/03/14 01:48:42 mikpe Exp $ * PPC32-specific code. * * Copyright (C) 2004 Mikael Pettersson */ #include #include #include #include #include "libperfctr.h" #include "arch.h" unsigned long ucontext_pc(const struct ucontext *uc) { /* glibc-2.3.3 (YDL4) changed the type of uc->uc_mcontext, * breaking code which worked in glibc-2.3.1 (YDL3.0.1). * This formulation works with both, and is cleaner than * selecting glibc-2.3.3 specific code with "#ifdef NGREG". */ return uc->uc_mcontext.regs->nip; } void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { memset(cpu_control, 0, sizeof *cpu_control); cpu_control->tsc_on = 1; cpu_control->nractrs = 0; cpu_control->nrictrs = 1; cpu_control->pmc_map[0] = 0; /* INSTRUCTIONS_COMPLETED */ cpu_control->evntsel[0] = 0x02; /* overflow after 100 events */ cpu_control->ireset[0] = 0x80000000-100; /* not kernel mode, enable interrupts, enable PMC1 interrupts */ cpu_control->ppc.mmcr0 = (1<<(31-1)) | (1<<(31-5)) | (1<<(31-16)); } papi-5.6.0/src/ctests/zero_flip.c000664 001750 001750 00000012033 13216244361 020763 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: start, stop and timer functionality - It attempts to use the following two counters. It may use less depending on hardware counter resource limitations. These are counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + PAPI_FP_INS + PAPI_TOT_CYC - Get us. - Start counters - Do flops - Stop and read counters - Get us. */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int main( int argc, char **argv ) { int retval, eventcnt, events[2], i, tmp; int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; int PAPI_event; long long values1[2], values2[2]; long long elapsed_us, elapsed_cyc; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); /* query and set up the right instruction to monitor */ if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) PAPI_event = PAPI_FP_OPS; else PAPI_event = PAPI_TOT_INS; retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); retval = PAPI_create_eventset( &EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); /* Add the events */ if (!quiet) printf( "Adding: %s\n", event_name ); retval = PAPI_add_event( EventSet1, PAPI_event ); if ( retval != PAPI_OK ) { if (!quiet) printf("Trouble adding event\n"); test_skip( __FILE__, __LINE__, "PAPI_add_event", retval ); } retval = PAPI_add_event( EventSet1, PAPI_TOT_CYC ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); /* Add them reversed to EventSet2 */ retval = PAPI_create_eventset( &EventSet2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); eventcnt = 2; retval = PAPI_list_events( EventSet1, events, &eventcnt ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_list_events", retval ); for ( i = eventcnt - 1; i >= 0; i-- ) { retval = PAPI_event_code_to_name( events[i], event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); retval = PAPI_add_event( EventSet2, events[i] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet1, values1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_start( EventSet2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet2, values2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; retval = PAPI_cleanup_eventset( EventSet1 ); /* JT */ if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_destroy_eventset( &EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); retval = PAPI_cleanup_eventset( EventSet2 ); /* JT */ if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_destroy_eventset( &EventSet2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); if ( !quiet ) { printf( "Test case 0: start, stop.\n" ); printf( "-----------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Test type : \t 1\t 2\n" ); sprintf( add_event_str, "%-12s : \t", event_name ); printf( TAB2, add_event_str, values1[0], values2[1] ); printf( TAB2, "PAPI_TOT_CYC : \t", values1[1], values2[0] ); printf( TAB1, "Real usec : \t", elapsed_us ); printf( TAB1, "Real cycles : \t", elapsed_cyc ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Verification: none\n" ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/linux/drivers/perfctr/arm_setup.c000664 001750 001750 00000000700 13216244366 026167 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arm_setup.c,v 1.1.2.1 2007/02/11 20:13:45 mikpe Exp $ * Performance-monitoring counters driver. * ARM-specific kernel-resident code. * * Copyright (C) 2005-2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include //#ifdef CONFIG_PERFCTR_MODULE //EXPORT_SYMBOL(__free_task_struct); //#endif /* MODULE */ papi-5.6.0/src/libpfm4/lib/pfmlib_intel_glm.c000664 001750 001750 00000005023 13216244365 023100 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_glm.c : Intel Goldmont core PMU * * Copyright (c) 2016 Google * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_glm_events.h" static const int glm_models[] = { 92, /* Goldmont */ 95, /* Goldmont Denverton */ 0 }; static int pfm_intel_glm_init(void *this) { pfm_intel_x86_cfg.arch_version = 3; return PFM_SUCCESS; } pfmlib_pmu_t intel_glm_support={ .desc = "Intel Goldmont", .name = "glm", .pmu = PFM_PMU_INTEL_GLM, .pme_count = LIBPFM_ARRAY_SIZE(intel_glm_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 4, .num_fixed_cntrs = 3, .max_encoding = 2, .pe = intel_glm_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .supported_plm = INTEL_X86_PLM, .cpu_family = 6, .cpu_models = glm_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_intel_glm_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumII-266a000664 001750 001750 00000001313 13216244367 023715 0ustar00jshenry1963jshenry1963000000 000000 [data from a 266Mhz Pentium II (Klamath)] PERFCTR INIT: vendor 0, family 6, model 3, stepping 4, clock 266621 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 102 cycles PERFCTR INIT: rdtsc cost is 33.2 cycles (2230 total) PERFCTR INIT: rdpmc cost is 28.4 cycles (1924 total) PERFCTR INIT: rdmsr (counter) cost is 81.2 cycles (5302 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4545 total) PERFCTR INIT: wrmsr (counter) cost is 82.3 cycles (5372 total) PERFCTR INIT: wrmsr (evntsel) cost is 74.8 cycles (4890 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (217 total) PERFCTR INIT: write cr4 cost is 39.0 cycles (2604 total) perfctr: driver 2.3.3, cpu type Intel Pentium II at 266621 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumII-266b000664 001750 001750 00000001314 13216244367 023717 0ustar00jshenry1963jshenry1963000000 000000 [data from a 266MHz Pentium II (Deschutes)] PERFCTR INIT: vendor 0, family 6, model 5, stepping 0, clock 267278 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 33.3 cycles (2223 total) PERFCTR INIT: rdpmc cost is 29.8 cycles (2000 total) PERFCTR INIT: rdmsr (counter) cost is 81.4 cycles (5302 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4533 total) PERFCTR INIT: wrmsr (counter) cost is 97.4 cycles (6325 total) PERFCTR INIT: wrmsr (evntsel) cost is 87.9 cycles (5721 total) PERFCTR INIT: read cr4 cost is 1.9 cycles (217 total) PERFCTR INIT: write cr4 cost is 42.2 cycles (2795 total) perfctr: driver 2.3.3, cpu type Intel Pentium II at 267278 kHz papi-5.6.0/src/components/infiniband/tests/000775 001750 001750 00000000000 13216244357 022757 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_power4.c000664 001750 001750 00000004360 13216244365 022351 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_power4.c : IBM Power4 support * * Copyright (C) IBM Corporation, 2009. All rights reserved. * Contributed by Corey Ashford (cjashfor@us.ibm.com) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_power_priv.h" #include "events/power4_events.h" static int pfm_power4_detect(void* this) { if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) return PFM_SUCCESS; return PFM_ERR_NOTSUPP; } pfmlib_pmu_t power4_support={ .desc = "POWER4", .name = "power4", .pmu = PFM_PMU_POWER4, .pme_count = LIBPFM_ARRAY_SIZE(power4_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 8, .max_encoding = 1, .pe = power4_pe, .pmu_detect = pfm_power4_detect, .get_event_encoding[PFM_OS_NONE] = pfm_gen_powerpc_get_encoding, PFMLIB_ENCODE_PERF(pfm_gen_powerpc_get_perf_encoding), PFMLIB_VALID_PERF_PATTRS(pfm_gen_powerpc_perf_validate_pattrs), .get_event_first = pfm_gen_powerpc_get_event_first, .get_event_next = pfm_gen_powerpc_get_event_next, .event_is_valid = pfm_gen_powerpc_event_is_valid, .validate_table = pfm_gen_powerpc_validate_table, .get_event_info = pfm_gen_powerpc_get_event_info, .get_event_attr_info = pfm_gen_powerpc_get_event_attr_info, }; papi-5.6.0/src/libpfm4/lib/pfmlib_amd64_fam14h.c000664 001750 001750 00000004701 13216244365 023203 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_amd64_fam14h.c : AMD64 Family 14h * * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_amd64_priv.h" #include "events/amd64_events_fam14h.h" #define DEFINE_FAM14H_REV(d, n, r, pmuid) \ pfmlib_pmu_t amd64_fam14h_##n##_support={ \ .desc = "AMD64 Fam14h "#d, \ .name = "amd64_fam14h_"#n, \ .pmu = pmuid, \ .pmu_rev = r, \ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam14h_pe),\ .type = PFM_PMU_TYPE_CORE, \ .supported_plm = AMD64_FAM10H_PLM, \ .num_cntrs = 4, \ .max_encoding = 1, \ .pe = amd64_fam14h_pe, \ .atdesc = amd64_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ \ .cpu_family = pmuid, \ .pmu_detect = pfm_amd64_family_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), \ .get_event_first = pfm_amd64_get_event_first, \ .get_event_next = pfm_amd64_get_event_next, \ .event_is_valid = pfm_amd64_event_is_valid, \ .validate_table = pfm_amd64_validate_table, \ .get_event_info = pfm_amd64_get_event_info, \ .get_event_attr_info = pfm_amd64_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs),\ .get_event_nattrs = pfm_amd64_get_event_nattrs, \ } DEFINE_FAM14H_REV(Bobcat, bobcat, AMD64_FAM14H_REV_B, PFM_PMU_AMD64_FAM14H_BOBCAT); papi-5.6.0/src/perfctr-2.7.x/CHANGES000664 001750 001750 00000166170 13216244367 020555 0ustar00jshenry1963jshenry1963000000 000000 $Id: CHANGES,v 1.177 2007/10/06 13:02:07 mikpe Exp $ CHANGES ======= [High-level changes in reverse chronological order. Detailed driver changes are in linux/drivers/perfctr/RELEASE-NOTES.] Version 2.7.21.1, 2007-10-06 - ppc64: recognise the PowerPC 970MP. - Perfex: On ppc64, do not truncate the MMCR1 value to 32 bits. Bug fix from Philip Mucci. Version 2.7.21, 2007-10-03 - Updated to support kernels 2.6.18 to 2.6.22. The system call numbers used in these kernels differs from those used in older kernels. The user-space library has been updated and will support both these newer kernels as well as the older kernels supported by perfctr-2.7.19/2.7.20/2.7.20.2. - Corrected kernel driver Makefile to unbreak call-site backpatching in the x86 driver. - Updated x86 driver to match the one in perfctr-2.6.28 in terms of fixes, CPU support, and kernel version support. - Updated references to my email address and perfctrs' web URL to match current reality. Version 2.7.20.2, 2006-08-27 - Changed library to adjust its system call numbers based on the running kernel's version. Provides compatibility with kernels using perfctr-2.7.19 or perfctr-2.7.20. Version 2.7.20.1, 2006-08-25 - Added patch file for a SuSE 2.6.16.21-SLES10 kernel. Contributed by Eric Kjeldergaard @ IBM. Version 2.7.20, 2006-08-20 - Updated to support kernels 2.6.16, 2.6.17, and 2.6.18-rc4. - Migrated ppc64 from include/asm-ppc64/ to include/asm-powerpc/. - Updated perfctr system call numbers on all platforms, due to other system call additions since kernel 2.6.14. Version 2.7.19, 2005-11-08 - The self and signal examples have been updated for Power5/PPC970. - The ppc64 driver has been updated to compile correctly in recent 2.6 kernels. - The x86 kernel driver has been updated to work correctly on dual-core P4 processors. Previous versions would fail during CPU detection (on HT DC P4s) or would erroneously restrict access for one of the cores (non-HT DC P4s). - The ppc32 driver will now compile in kernels that lack Open Firmware support, which is needed for some embedded systems. Version 2.7.18, 2005-06-06 - Fixed a 32/64-bit bug in the vperfctr_read_ctrs() library function, which caused problems on ppc64 and unnecessary overheads on x86. The bug was introduced in perfctr-2.7.16. From David Gibson. Version 2.7.17, 2005-05-26 - On all architectures the low-level drivers now indicate changes to the mmap:ed counter state via a standard seqlock mechanism. This cleans up the code, enables user-space fast sampling in some previously impossible cases (x86 w/o TSC), and eliminates a highly unlikely but not impossible failure case on x86 SMP. - Fix counter wraparound issues in ppc64 driver. From David Gibson. - Preliminary code in the x86/x86-64 low-level driver to detect multicore AMD K8 processors, and to prevent resource conflicts and an erratum related to northbridge events. On multicore K8s, northbridge events are only allowed when using the global-mode counters API. Version 2.7.16, 2005-04-09 - In user-visible mmap()ed state, counter start values are now 64-bit fields (values are still 32 bits however). Moved the virtual-to-physical counter mapping out of the user-visible mmap()ed state; the library now maintains that mapping. - On ppc32/ppc64, the mmap()ed state now includes a software counter which the low-level driver increments at each sampling operation. Unfortunately the PPC timebase has too low frequency for it to be a reliable context-switch indicator for user-space. - Makefile tweaks for cross-compilation and 'mrproper'. - Removed references to the obsolete /dev/perfctr special file in INSTALL and perfctr.spec. Version 2.7.15, 2005-03-31 - ppc64 updates from David Gibson for the user-space components and . User-space now compiles and appears to work on ppc64. Version 2.7.14, 2005-03-29 - Slight change to the layout of the mmap()able counter state object. User-visible fields are now contiguous and have been moved to a sub-struct. Kernel-private fields in the state are no longer visible to user-space. Added a sysfs attribute containing the offset from the start of the mapping to the user-visible state. - The number field in CPU register descriptors has been extended to 64 bits on all platforms. This is needed to prevent 32-bit binaries from breaking on 64-bit kernels. Version 2.7.13, 2005-03-23 - x86: Fixed a problem with the finalising of backpatchable calls that caused it to miss some call sites when the kernel is compiled with gcc-4.0. - ppc32: Fixed a bug where failure in a specific control validation check could leave the state semi-runnable. This was caused by a driver change in perfctr-2.7.11. - The value field in CPU register descriptors has been extended to 64 bits on all platforms. This is needed now for ppc64 and ppc32, and may later be needed on x86. - Added David Gibson's ppc64 code for perfctr-2.7.10. The driver code has been updated to the current APIs. The user-space code has not been updated, so currently it does not compile on ppc64. This will be fixed soon. Version 2.7.12, 2005-03-18 - Header cleanups: Moved declarations for things now unused in the kernel from the kernel headers to the user-space library. Changed kernel declarations of kernel/user-space API structures to use explicitly-sized integer types. - Removed unused /sys/class/perfctr/cpu_type file. Version 2.7.11.1, 2005-03-14 - Corrected the patch for kernel 2.6.11. A 'diff' mistake caused the one in perfctr-2.7.11 to be incomplete. Version 2.7.11, 2005-03-14 - The kernel/user-space APIs have been converted to transmit control data in differently-shaped "packets" for different types of data. Data corresponding to CPU register images is transmitted as variable-length arrays. Potentially variable-sized arrays are no longer embedded in other API structures, but are transmitted separately. These changes should eliminate "binary struct" versioning problems. - Since the library/application API had to change, the API structures were made compatible with perfctr-2.6.x again. (Reverted misguided API change in perfctr-2.7.8.) Added code to translate control data between the application format and the syscall formats. - The system calls have been renumbered again on all platforms. Version 2.7.10, 2005-02-20 - The kernel/user-space API for uploading control data and performing state changes has been updated. Writing control data no longer implies restarting a context. There is a new unified state changing operation for suspending, resuming, unlinking, and clearing contexts. This is preparation for a follow-up API change which will make control data uploads more generic and less sensitive to 'struct' layouts. The user-space library hides these changes from applications. - The system calls have been renumbered on all supported platforms, due to the merging of several state-changing system calls. Version 2.7.9, 2005-01-16 - The kernel components now publish global information via sysfs, in /sys/class/perfctr/. Removed the perfctr_info() system call. The library has been updated accordingly. (One omission: parsing of files containing "cpumasks" is not yet implemented.) - The system calls have been renumbered on all supported platforms, due to the removal of sys_perfctr_info() and to other system calls being added recently. - Fixes forward-ported from perfctr-2.6.12: * On x86/x86-64, perfctr_event_codes.h now includes P4 events. * On x86-64 libraries will now be installed in PREFIX/lib64/, as per current standards, unless overridden by LIBDIR. * Perfex had a bug in which it interpreted all numbers as hex, even those without "0x" prefixes. Perfex now emits warnings for ambiguous numbers. To silence the warnings, (a) prefix hex numbers with "0x" (preferred), or (b) use the "-d" option to enable decimal numbers, which requires "0x" prefixes on hex numbers, or (c) use the "-x" option to force all numbers to be interpreted as hex (deprecated). The "-d" option should be the default, but unfortunately that would break user-level scripts that assumed that "0x"-less numbers are still hex. * Changes in examples/signal/ to handle glibc-2.3.3 on PPC32. Version 2.7.8, 2004-11-24 - Changed the sys_vperfctr_control() API to handle future processors with more counters, and to allow for reduced syscall argument copying when only a small number of counters are being used. Converted the per-counter control fields from a struct-of-arrays to an array-of-struct layout. Version 2.7.7, 2004-11-13 - Workarounds for a hardware quirk on x86 and x86-64, where interrupts can be delivered some time after the counters have been stopped. Due to scheduling, an interrupt could be taken in the context of an unrelated process, which would prematurely terminate interrupt reporting for the original process. - Fixed a bug in the x86 and x86-64 kernels where the context-switch path suspended the previous process' performance counters too late. This could allow an overflow interrupt to be taken in the context of an unrelated process, with effects similar to the hardware quirk described above. - PPC32 updates: Enable overflow interrupts on all G4 processors starting with the 7410 Rev 1.3, and all IBM G3 processors starting with the 750FX DD2.3. Add support for MPC7447A and MPC7448. - Kernel/user-space API change for sys_vperfctr_read(). - System call numbers changed. Version 2.7.6, 2004-10-19 - Added preliminary overflow interrupt support for PPC32. There are currently several limitations of this feature: * Only enabled on MPC7455 processors. Will later be enabled on all G4 processors except 7400 and early 7410. * Either none or all of the "PMCj" group counters (i.e., all but PMC1) must be marked as interrupt-mode counters. This is because PPC32 doesn't provide per-counter interrupt enable/disable control. User-space should ignore overflow signals from counters it isn't interested in. - Corrected the PPC32 driver's handling of MMCR0 changes due to use of the FCECE or TRIGGER control flags. - Fixed a synchronisation error in the interface between the per-process counters driver and the low-level drivers. The error triggered warnings in DEBUG_SPINLOCK_SLEEP- enabled kernels. Version 2.7.5.1, 2004-09-19 - i386 and x86_64 system call numbers have changed: updated library and 2.6-vanilla kernel patch accordingly. - Fixed a problem causing an incomplete "wrapper" file to be installed as /usr/include/asm/perfctr.h on x86_64 systems. - Eliminated a potential kernel crash on P4 model 3 Prescott processors, due to the driver initialising two control registers that have been removed from P4M3. The P4M3 Nocona does not appear to have been affected by this error. - Fixed install procedure to not fail to install the shared library's symbolic links when updating an older installation. Version 2.7.5, 2004-08-18 - Some x86 driver cleanups. - Minor fixes to the driver init testing code on x86-64. - Fixed update-kernel script to use 'head' in a POSIX compliant way. - Updated user-space library and examples to handle P4 Model 3 in 64-bit mode. - Fixed an unforseen kernel synchronisation error with the newly added perfctr inheritance code. The error triggered warnings in CONFIG_DEBUG_SPINLOCK_SLEEP-enabled kernels. - Fixed another kernel locking rule violation in the new perfctr inheritance code. - Fixed an x86 driver initialisation bug which could cause it to disable the NMI watchdog prematurely. Version 2.7.4.1, 2004-07-18 - Add patches directory with patches for kernels 2.6.8-rc1-mm1 and 2.6.8-rc2. A CVS mistake caused these files to not be included in perfctr-2.7.4. Version 2.7.4, 2004-07-17 - Reimplemented inheritance of per-process perfctrs. Their settings are now inherited across fork(). At wait(), if parent and child still use the same control, the child's final counts (its and its childrens') are merged into the parent's "children counts". The perfex application will now show the sum of the self and children counts for each counter. - Bug fix for PowerPC 7400/7410: allow specifying MMCR2[THRESHMULT]. - Added documentation in linux/Documentation/perfctr/. - Reduce stack usage in sys_vperfctr_control() and sys_vperfctr_read(). - PPC32 now supports generic chips in timebase-only mode. Useful for high-resolution time measurements on generic chips. - Add support for Model 13 Pentium-M (Dothan). - Many code cleanups. Version 2.7.3, 2004-05-31 - Replaced the single system call by six system calls: one for getting CPU information, and 5 for per-process perfctrs. - Removed marshalling of system call parameters. Conventional copying is now done. - Temporarily removed global-mode perfctrs, while the API to the per-process perfctrs is being redesigned. - Changed x86-64 to use the x86 code. Simplifies maintenance, and, in theory, adds support for IA32e/EM64T. - Moved detailed CPU type detection on x86 from the driver to the library. This is both a cleanup and a bug fix. - Some changes to prepare the ppc32 data structures for potential G5/970 support, in both 32 and 64-bit kernels. - PowerPC 750GX support added. - Moved ppc32 #define:s to . - Lots of code cleanups. Converted NR_CPUS arrays to per_cpu(). Changed spacing in if/while/switch to be "normal". Version 2.7.2, 2004-05-14 - Changes for submission to 2.6.6-mm kernel. Replaced ioctl() interface by new syscall (by request, not choice). Eliminated module support and backwards compatibility. Many other cleanups. Version 2.7.1, 2004-05-10 - Updated the x86 and x86-64 drivers for the final version of the local APIC ownership API included in kernel 2.6.6. Perfctr and Oprofile can now coexist safely. Version 2.7.0, 2004-05-04 - Added a minimal performance counter resource management API to the kernel patch. This allows different drivers to claim the hardware as they need it, without risk of conflicts. Tested with the NMI watchdog, oprofile, and perfctr all loaded (but obviously not active) at the same time. Available for the 2.6.6-rc3 kernel. - Significant rewrites in the x86 and x86_64 low-level drivers for the new API. Instead of reserving the hardware as long as the driver is loaded, it is reserved and released dynamically based on whether the high-level driver needs it or not. Version 2.6.7, 2004-05-04 - Merged several x86_64-specific driver files with their x86 counterparts, reducing the amount of duplicated code. - Added textual descriptions to the library's P6 event sets. From Bryan O'Sullivan. - Changed examples/signal/signal to count retired instructions instead of retired micro-operations on AMD K7. Needed to avoid a loop with the same instruction overflowing indefinitely. - Updated kernel support: 2.6.6-rc3, 2.4.27-pre1, 2.4.22-1.2188.nptl (FC1), 2.4.21-9.0.1 (RHEL3), 2.4.20-31.9 (RH9). Version 2.6.6, 2004-02-21 - Pentium-M has an undocumented local APIC quirk which can stop perfctr interrupt delivery. Added workaround to prevent this. - Fixed a bug in x86-64's perfctr interrupt entry code in 2.4 kernels. Luckily, the bug turned out to be harmless (a bogus "rip" value was retrieved, but never used by the higher-level interrupt handler). - Added support for Pentium 4 Model 3 processors, which have slight event set changes from earlier models. - Updated kernel support: 2.6.3, 2.4.25, 2.4.22-1.2174.nptl (FC1), 2.4.20-30.9 (RH9), and 2.4.21-193 (SuSE). Removed support for some obsolete FC1 and RH update kernels. Version 2.6.5, 2004-01-26 - Relaxed and corrected control checks on Pentium 4: * Allow ESCR.CPL_T1 to be non-zero when using global-mode counters on HT processors. * Don't require ESCR.CPL_T0 to be non-zero. CPL_T0==0b00 is safe and potentially useful (global counters on HT). * Require CCCR.ACTIVE_THREAD==0b11 on non-HT processors, as documented in the IA32 Volume 3 manual. Old non-HT P4s seem to work Ok for all four values, but this is neither guaranteed nor useful. - Per-process counters driver updated for filp->f_mapping change in 2.6.2-rc kernels. - Support 2.4.21-9.EL (RHEL3) and 2.4.22-1.2149.nptl (FC1) kernels. - Library updates for PowerPC: * Added cpu_type constants for struct perfctr_info. * Decode PVR and define perfctr_info.cpu_type accordingly. * Added event set descriptions for 604/604e/750. Version 2.6.4, 2004-01-12 - Added support for PowerPC 604/7xx/74xx processors. * Overflow interrupts are not yet supported due to a hardware erratum affecting many 7xx and early 74xx processors. * The user-space components support PowerPC, but CPU detection and event set descriptions are not yet implemented. * Supported in 2.6.1 and 2.4.23 and newer 2.4 kernels. - Updated kernel support: 2.6.1, 2.4.25-pre4, 2.4.22-1.2140.nptl (FC1 update), 2.4.21-4.0.2.EL (RHEL update), and 2.4.20-28.x (RH 7.x/8.0/9 update). Version 2.6.3-pl1, 2004-01-01 - Updated kernel support: 2.6.1-rc1, 2.4.24-pre3, 2.4.22-1.2135.nptl (FC1 update), 2.4.21-6.EL (RHEL Taroon beta update), and 2.4.20-27.x (RH 7.x/8.0/9 update). - Moved the x86 performance counter interrupt handler code from the driver source to the kernel, via the patch kit. Needed to cope with changes in RedHat's 2.4.21-6.EL kernel. This change only affects 2.4.21 and later 2.4 kernels. Version 2.6.3, 2003-12-21 - Fixed a bug where a read of the global-mode counters could fail with EOVERFLOW due to an incorrect structure descriptor. The bug only existed in perfctr-2.6.2. (Thanks to Pavel Machek for reporting this problem.) - AMD64 IA32 emulation code cleaned up for kernel 2.4.23. - Updated kernel support: 2.6.0, 2.4.24-pre1, 2.4.23, 2.4.22-1.2129.nptl (FC1 update), 2.4.21-1.1931.2.393.ent (RHEL Taroon beta), and 2.4.20-24 (RH 7.x/8/9 update). - User-space package rpm spec file fixes: * Don't remove /dev/perfctr on package uninstall. * Don't add alias to /etc/modules.conf if it's already there. Version 2.6.2, 2003-11-23 - libperfctr.so is now installed with proper versioning. - ABI control and info structures padded to accommodate some extensions without breaking application/library binary compatibility. ABI version incremented to '5'. - Driver checks that only P4 models <= 2 use IQ_ESCR0/1. - Added support for Fedora Core 1's 2.4.22-1.2115.nptl kernel. - Driver compile fix for AMD64 in SMP 2.6 kernels. Version 2.6.1, 2003-10-05 - Opening a process' virtual perfctrs is now done via /dev/perfctr instead of /proc//perfctr. This is needed due to the changed semantics for /proc/self and /proc// in kernel 2.6.0-test6. User-space is not affected since the perfctr-2.6 API and user-space library was prepared for this access method change. User-space code monitoring other processes should use gettid() to identify tasks in 2.6 kernels, since getpid() does the wrong thing for process threads. - Driver cleanups from obsoleting 2.4.15 and older kernels. - Made examples/global/global.c more robust. - Simplified usage with 2.6 kernels: it's no longer necessary to add an 'alias' declaration in /etc/modprobe.conf. - Added support for AMD K8 Revision C processors. Version 2.6.0, 2003-09-08 - The driver now kills a process' performance counters if the process migrates to a forbidden CPU. This ensures that unsafe changes to a process' CPU affinity mask don't break the driver, the hardware state, or other processes. (This is an issue on hyper-threaded P4s only.) - A bug fix in perfctr-2.6.0-pre3 broke compiling the driver non-modular in modular 2.4 kernels. Corrected that problem. Version 2.6.0-pre5, 2003-08-31 - Disabled driver debug code which could printk() in the kernel's context-switch path, as that is disallowed. - 2.4.16 is now the oldest supported kernel. - Compilation fixes for driver's ia32 emulation code on x86-64. Version 2.6.0-pre4, 2003-08-19 - Kernel/user-space API switched to a new "sparse marshalling" mechanism, which supports x86 application code on x86-64, and API struct extensions w/o breaking binary compatibility. - Prepared the library for the future non-/proc/pid/perfctr API. - Fixed a bug in the per-process perfctr creation code. The remote-control interface was racy in preemptible kernels. - Fixed a bug in the process exit code for preemptible kernels. - Changes to handle 2.6 kernels with the cpumask_t patch (-mm, -osdl): * Driver converted to use cpumask_t API, with compatibility wrapper for cpumask_t-free kernels. * API change: removed the cpus and cpus_forbidden sets from the perfctr_info struct, added new data type and commands for retrieving these sets. (cpumask_t values cannot be exported as-is since their sizes depend on kernel configuration, and the type definition uses 'long' which breaks 32/64-bit binary compatibility.) * Updated library and example programs for the API change. - Fixed a dependency bug in the library Makefile. - Added support for VIA C3 Antaur/Nehemiah processors. Version 2.6.0-pre3, 2003-08-03 - Replaced 'long' by 'int' in the API structures to eliminate unnecessary ABI incompatibilities between x86 and x86-64. - Simplified global-mode perfctrs API: the write-control and read-state commands now operate on a single CPU instead of on a set of CPUs. Added a new start command to start the counters. - Added thin library wrappers for per-process perfctr kernel calls. Cleaned up examples/perfex and the library itself. - Removed the requirement that CCCR.ACTIVE_THREAD == 3 on P4. - Extended cascading should now work on Pentium 4 Model 2 processors. - Fixed a bug where the perfctr module's refcount could be zero with code still running in the module. This could race with rmmod in preemptive kernels, and in theory also in SMP kernels. Version 2.6.0-pre2, 2003-07-13 - Per-process perfctrs API fixes: control data is retrieved using new READ_CONTROL operation, mmap()ed state no longer exposes the control data, the SAMPLE operation is renamed to READ_SUM and now updates a given user-space buffer, non-write operations are permitted on dead perfctrs. Retrieving control explicitly makes the user-visible mmap()ed state binary compatible between x86 and x86-64. The other changes simplify the user-space library and allow perfex to replace raw mmap() accesses with higher-level operations. - Driver cleanups, including eliminating many #ifdefs and removing some unnecessary P4-specific driver procedures. - Fixes for macro redefinition warnings in the 2.4.22-pre3 kernel. - Perfctr library RPM spec file updates from Bryan O'Sullivan. Version 2.6.0-pre1, 2003-07-02 - Rearranged the data structure holding the counter state to reduce the number of caches lines needed to be touched at key operations. The new representation is also binary compatible between x86 and x86-64, which matters since user-space mmaps() it. - Added RPM spec file for the library. (From Bryan O'Sullivan). - Patch kit updated for kernels 2.4.22-pre2 and 2.5.73. Version 2.5.5, 2003-06-15 - Updates for driver model changes in kernel 2.5.71. - Minor updates to the library's event descriptions for Pentium 4. - Now supports SuSE's 2.4.19.SuSE-206 kernel for SLES 8 users. Autodetection of SuSE kernel versions is not yet implemented: pass "--patch=2.4.19.SuSE-206" to perfctr's update-kernel script to ensure that the correct patch is applied. - Patch kit updates for 2.4.21 final and 2.4.20-18 RH kernels. Version 2.5.4, 2003-06-01 - Corrected the driver's handling of OVF_PMI+FORCE_OVF counters on Pentium 4. This configuration didn't work at all, and lead to various BUG messages from the driver. These restrictions apply to OVF_PMI+FORCE_OVF counters: * The ireset value must be -1. * Once the counter has interrupted once, it will continue to interrupt when the faulting instruction is restarted, causing it to never complete. This problem also occurs for non-FORCE_OVF interrupt-mode counters if the ireset value is of too small magnitude, like -1. This appears to be a P4 hardware quirk. Don't restart FORCE_OVF interrupt-mode counters, and don't use ireset values too small to allow instructions to complete. - Updated library's K8 event descriptions to match current documentation. Corrected several omissions and errors. - Patch kit updated for kernels 2.5.70 and 2.4.21-rc6. Version 2.5.3.1, 2003-05-21 - Patch kit updated for recent RedHat 6.2/7.x/8.0/9 update kernels (2.2.24-{6.2.3,7.0.3} and 2.4.20-13.{7,8,9}). - Fixed a driver compile warning which occurred when the driver is built as a module in 2.4 SMP kernels using module versions. - x86-64 now uses 'long long' for 64-bit sums, like x86. This reduces x86 and x86-64 user-space source code incompatibility. Version 2.5.3, 2003-05-16 - Added support for the Pentium M processor. It is mostly like a Pentium III with some more events, except that six old Pentium III / Pentium Pro events have been redefined. - Added support for K8 in 64-bit mode (the x86_64 kernel arch). Updated driver, user-space library, and example programs. The shared library libperfctr.so is now compiled with -fPIC. - K8 bug fix in examples/signal/signal.c: a missing INT flag caused the driver to reject the control setup. - P4 event descriptions updated from recent documentation changes. Version 2.5.2, 2003-04-13 - Updated power management code for the local APIC and NMI watchdog driver model changes in kernel 2.5.67. - Timer-based sampling of per-process performance counters is now always enabled: previously it was only done on SMP. Needed to avoid counter inaccuracies on high core-clock CPUs. - Fixes to user-space library implementation of remote-control virtual performance counters: open() failed due to a missing return; avoid potential buffer overflow error; fix the "read counters" procedure for the case where the remote process is sampling the time-stamp counter but no performance counters. - Added support for RedHat 9's 2.4.20-8 and 2.4.20-9 kernels. Version 2.5.1, 2003-03-23 - Fixed initialisation on hyper-threading capable P4s in SMP kernels older than 2.4.15 to not signal an error if hyper-threading is disabled: in this case the absence of working set_cpus_allowed() support is not a problem. - Fixed two compilation errors in the set_cpus_allowed() emulation affecting old 2.4 kernels configured for SMP. - INSTALL file updates. Version 2.5.0, 2003-03-10 - Added a simple user-space library API for accessing other processes' virtual performance counters. This uses a new type and a new set of operations since remote access has different requirements than accessing one's own counters. Following Mike Marty's suggestion, I left out the process control calls needed around these operations (ptrace() and wait()), so applications must handle that themselves. - Added 'make install' support for the user-space components. - Driver API cleanups. The 'eventsel_aux[]' array in 'struct perfctr_cpu_control' has been renamed as 'escr[]' and has been moved into the 'p4' sub-structure. (The change highlights the fact that this field was and is P4-only.) The 'version[]' string in 'struct perfctr_info' has been renamed to 'driver_version[]', since perfctr_info now also contains an 'abi_version' field. Some changes in the driver ABI: while not strictly necessary, they clean things up and make room for future changes. The ABI changed anyway from perfctr-2.4, so this shouldn't be a problem. - Added a perfctr_cpu_control_print() procedure to the library, and updated the example programs to use it. - Updated the perfex example program's help text to describe the syntax and meaning of event specifiers. - Patch kit updates for 2.2.24/2.4.18-26(RedHat)/2.5.64 kernels. Version 2.5.0-pre2, 2003-03-03 - Added a way for user-space to query the driver's ABI version, and updated the library to check it. - Fixed to not include when perfctr hasn't been configured. This allows the patched kernel source to compile cleanly also in archs not supported by perfctr. - Major patch kit overhaul. Updated configuration help texts. Removed unnecessary features and patches. Some cleanups. Added aliasing support to the 'update-kernel' script, which allows a patch to serve several kernels (when applicable). - The perfctr configuration option was poorly placed. It is now at the end of the "Processor type and features" menu. - Removed "notsc" kernel option support from the 2.2 kernel patches. To use the driver with an IDT WinChip (Centaur C6/2/3) CPU now requires a newer kernel with native "notsc" support. - Driver fixes for changes in the 2.4.21-pre5 and 2.5.63 kernels. Version 2.5.0-pre1, 2003-02-19 - Fixed the driver's API to support global-mode perfctrs on 2.5 SMP kernels and asymmetric hyper-threaded P4 multiprocessors. Updated examples/global/global.c for the new API. - Minor library cleanups. Updated example programs accordingly. - API cleanup: Removed obsolete STOP command from the driver for virtual perfctrs. The library now uses CONTROL instead. - Proper detection and support for AMD K8 processors. They are similar to the K7s, but the event sets are not identical. - The library's event set descriptions have been redesigned and expanded to include unit mask descriptions and descriptions of Intel P4 and AMD K8 events. The etc/perfctr-events.tab text file has been removed since event_codes.h now is generated from the library's data structures. Version 2.4.5, 2003-02-09 - Corrected the unit mask definition for the K7 SYSTEM_REQUEST_TYPE event in etc/perfctr-events.tab: WC is 0x02 not 0x04. - Fixed two compile warnings which could be triggered in 2.5 kernels. - Patch kit updates for 2.4.21-pre4/2.4.18-24(RedHat)/2.5.59-osdl2 kernels. Version 2.4.4, 2003-01-18 - Fixed a context-switch bug where an interrupt-mode counter could increment unexpectedly, and also miss the overflow interrupt. - Fixed some ugly log messages the new HT P4 support code added in perfctr-2.4.3 could generate at driver initialisation time. - Added preliminary support for AMD K8 processors with the regular 32-bit x86 kernel. The K8 performance counters appear to be identical or very similar to the K7 performance counters. Version 2.4.3, 2002-12-11 - Support for hyper-threaded Pentium 4s added. In a HT P4, the two logical processors share the performance counter state. HT P4s are therefore _asymmetric_ multi-processors, and the driver enforces CPU affinity masks on users of per-process performance counters to avoid resource conflicts. (Users are restricted to logical processor #0 in each physical CPU.) Limitations: * The kernel mechanism for updating a process' CPU affinity mask uses no or very weak locking, which makes certain race conditions possible that can break the driver's CPU affinity mask restrictions. For now, users should NOT use the sched_setaffinity() system call on processes using per-process performance counters. * Global-mode performance counters don't work on HT P4s due to limitations in the API. This will be fixed in perfctr-2.5. * 2.2 kernels don't have CPU affinity masks, and therefore can't support HT P4s. Version 2.4.2, 2002-11-25 - Fixed a driver bug where it could fail to prevent simultaneous use of global-mode and per-process performance counters. - Made the driver safe for preemptible 2.5 kernels. - New patches for RedHat update kernels 2.2.22-6.2.2, 2.2.22-7.0.2, 2.4.18-18.7.x, and 2.4.18-18.8.0. Version 2.4.1, 2002-10-12 - Support RedHat 8.0's 2.4.18-14 kernel. Building perfctr as a module caused a namespace clash in this kernel. The fix required a change to the driver's kernel-resident glue code. Version 2.4.0, 2002-09-26 - Fixed an overly strict access control check which prevented opening another process' /proc//perfctr when the driver was built as a module. - Updates for kernels 2.2.22, 2.4.18-10-redhat, 2.4.20-pre8, 2.5.36. Version 2.4.0-pre2, 2002-08-27 - vperfctr_control() now allows the user to specify that some PMC sums are not to be cleared when updating the control. There is a new bitmap field `preserve' in struct vperfctr_control: if bit i is set then PMC(i)'s sum is not cleared. `preserve' is a simple `unsigned long' for now, since this type fits all currently known CPU types. This change breaks binary compatibility, but user-space code which clears the entire control record before filling in relevant fields will continue to work as before after a recompile. This feature removes a limitation which some people felt was a problem for some usage scenarios. Version 2.4.0-pre1, 2002-08-12 - The kernel driver has an initial implementation of a new remote-control API for virtual per-process perfctrs. A monitor process may access a target process' perfctrs via open(), mmap(), and ioctl() on the target's /proc/pid/perfctr. For open() and ioctl(), the monitor must hold the target under ptrace ATTACH control. The user-space library and examples have not been updated for the new API. Version 2.3.12, 2002-08-12 - Updated patch kit for the 2.4.19 final kernel. - Spelling fix in INSTALL. - Minor driver code size reduction on uniprocessor kernels. Version 2.3.11, 2002-07-21 - Interrupt-mode performance counters now have accumulated sums. The library procedures vperfctr_read_pmc() and vperfctr_read_ctrs() can now retrieve the sums of interrupt-mode counters. - Corrected the name of K7 event 0x42 to DATA_CACHE_REFILLS_FROM_L2. Version 2.3.10, 2002-07-19 - Added a script, `update-kernel', to simplify the process of patching the kernel source code. See INSTALL for details. - The counter and control registers are now cleared when the driver is idle. This should allow the counter hardware to power down when not used, especially on P4. - Some Pentium MMX and Pentium Pro processors have an erratum which causes System Management Mode to shut down if user-space has been granted access to the RDPMC instruction. The driver now avoids granting RDPMC access on the affected processors. The user-space library makes this change transparent. - New CPU type code for Model 2 Pentium 4s, due to a few but significant changes between Model 0 and 1 and Model 2 CPUs. - The driver now supports Replay Tagging on the Pentium 4. The perfex program has been updated to allow users to specify values to store in PEBS_ENABLE and PEBS_MATRIX_VERT. For example, the following command could be use to count the number of L1 cache read misses on a Pentium 4: perfex -e 0x0003B000/0x12000204@0x8000000C --p4pe=0x01000001 --p4pmv=0x1 some_program Explanation: IQ_CCCR0 is bound to CRU_ESCR2, CRU_ESCR2 is set up for replay_event with non-bogus uops and CPL>0, and PEBS_ENABLE and PEBS_MATRIX_VERT are set up for the 1stL_cache_load_miss_retired metric. Note that bit 25 is NOT set in PEBS_ENABLE. Version 2.3.9, 2002-06-27 - Pentium 4 bug fix: An error in older revisions of Intel's IA32 Volume 3 manual caused the driver to program the wrong control register in a few cases, affecting uses of the uop_type event. Revision -007 of Intel document #245472 corrects the error, and the driver has been updated accordingly. Version 2.3.8.1, 2002-06-27 - Regenerated the patch file for RedHat's 2.4.18-5 kernel. The patch file in 2.3.8 only contained an error message from 'diff'. Version 2.3.8, 2002-06-26 - Added counter overflow interrupt support for Intel P4. - New kernel support: standard kernels 2.2.21 and 2.4.19-rc1, and RedHat kernels 2.2.19-7.0.16, 2.4.9-34, and 2.4.18-5. - API changes: Removed unused and obsolete fields from the vperfctr state and control objects. Added fields to perfctr_cpu_control to enable future support for P4 replay tagging events. Incremented the vperfctr mmap() binary layout magic number. - Changed the "make" rule in INSTALL to build "vmlinux" before "modules". This change is needed for RedHat kernels. - Added build of a shared (.so) version of the user-space library. - When changing a process' vperfctr control data, the TSC sum is now preserved if the next control state includes the TSC. It used to be preserved only if both the previous and next states included the TSC. The difference matters when a running TSC is stopped and then restarted by a STOP;CONTROL command sequence. - Driver cleanups. Merged P6 and K7 driver procedures. Version 2.3.7, 2002-04-14 - Added Pentium 4 support to examples/perfex/. The full syntax of an event specifier is now "evntsel/aux@pmc". All three components are 32-bit processor-specific numbers, written in decimal or hexadecimal notation. "evntsel" is the primary processor-specific event selection code to use for this event. This field is mandatory. "/aux" is used when additional event selection data is needed. For the Pentium 4, "evntsel" is put in the counter's CCCR register, and "aux" is put in the associated ESCR register. No other processor currently needs this field. "@pmc" describes which CPU counter number to assign this event to. When omitted, the events are assigned in the order listed, starting from 0. Either all or none of the event specifiers should use the "@pmc" notation. Explicit counter assignment via "@pmc" is required on Pentium 4 and VIA C3 processors. As an example, the following command could be used to count the number of retired instructions on a Pentium 4: perfex -e 0x00039000/0x04000204@0x8000000C some_program Explanation: Program IQ_CCCR0 with required flags, ESCR select 4 (== CRU_ESCR0), and Enable. Program CRU_ESCR0 with event 2 (instr_retired), NBOGUSNTAG, CPL>0. Map this event to IQ_COUNTER0 (0xC) with fast RDPMC enabled. - The driver now permits cascading counters on the Pentium 4. - Preliminary driver infrastructure to support ptrace(ATTACH) for a future remote-control interface to per-process counters. - Driver and patch kit updated for the APIC interrupt entries changes in kernel 2.5.8-pre3. Version 2.3.6, 2002-03-21 - Fixed a problem with caused "BUG! resuming non-suspended perfctr" warnings when running PAPI's test cases with a DEBUG-compiled perfctr driver. There was no actual error, only a mismatch between the debug code and the code for changing event selection data. - Fixed a time-stamp counter accounting error when user-space resumed interrupt-mode perfctrs with the VPERFCTR_IRESUME ioctl. Version 2.3.5, 2002-03-17 - Multiprocessor AMD K7 machines should work now. A bug in current 2.2/2.4/2.5 kernels prevented correct CPU identification on these machines, causing crashes. The driver now works around this bug. - Added support for the VIA C3 Ezra-T processor. - Added some support for interrupt-mode counters to the library. Cleaned up examples/signal/. - Added links in OTHER to John Reiser's tsprof and Troy Baer's lperfex tools. Version 2.3.4, 2002-01-23 - More detailed installation instructions in INSTALL. - Experimental support for at-retirement counting on Pentium 4. Updated examples/global/ to count FLOPS on Pentium 4. - Fixed uses of __FUNCTION__ to comply with changes in GCC 3.0.3. Version 2.3.3, 2001-12-31 - Added support for the 2.4.16 and 2.4.17 kernels. - SMP bug fixed: if a process using interrupt-mode counters migrates from CPU1 to CPU2 and then back to CPU1, then it could incorrectly resume the stale state cached in CPU1. - P6 bug fixed: when a process resumed, it could inadvertently activate a suspended interrupt-mode counter belonging to the previous process using the performance counters. - Pentium 4 bug fixed: could fail to update the control registers on a context switch. - Removed the "pmc_map[] must be the identity function" restriction from P6 and K7. - Updated examples/global/global.c: added Pentium 4 support (preliminary, counting MIPS not FLOPS), corrected VIA C3 handling, and corrected 32-bit integer overflow problems affecting fast CPUs. - Removed perfctr_evntsel_num_insns() from the library: the interface could not support the Pentium 4. examples/self/self.c now does the setup all by itself, with Pentium 4 support. Version 2.3.2, 2001-11-19 - Corrected an error in the driver's mapping from counter number to control registers on the Pentium 4. Counter 17 didn't work, and attempts to use it could have disturbed other counters as well. - Fixed a minor omission in the Pentium 4 initialisation code. Version 2.3.1, 2001-11-06 - New patches for kernels 2.2.20, 2.4.9-13 (RedHat 7.2 update), 2.4.13-ac5, and 2.4.14. Minor cleanup in the P4 driver code. Version 2.3, 2001-10-24 - Added support for multiple interrupt-mode virtual perfctrs with automatic restart. Updated the signal delivery interface to pass a bitmask describing which counters overflowed; the siginfo si_code is now fixed as SI_PMC_OVF (fault-class). - Added EXPORT_NO_SYMBOLS to init.c, for compatibility with announced changes in modutils 2.5. - Patch set updated for recent kernels. Version 2.2, 2001-10-09 - Added preliminary Pentium 4 support to the driver, but only for the simple basic features. The example applications have not been updated, since I don't yet have a Pentium 4 for testing. Version 2.1.4, 2001-09-30 - Added -l/-L (--list/--long-list) options to examples/perfex to have it list the current CPU's available events. - Added 'set of events' descriptors for each supported CPU type to the library, and changed it to be a standard archive file. - Performance counter interrupts now work in standard kernels, starting with kernel 2.4.10. Updated README. Version 2.1.3, 2001-09-13 - Fixed a problem which prevented compiling the driver as a module in kernels older than 2.2.20pre10 if CONFIG_KMOD was disabled. - Cleaned up command-line option processing in perfex. It now uses the GNU getopt library and accepts long option names. - Fixed a typo in perfctr-events.tab (P6's INST_DECODED was misspelled as INST_DECODER), and updated/corrected several unit mask descriptions. - Replaced most occurrences of "VIA Cyrix III / C3" with "VIA C3". Version 2.1.2, 2001-09-05 - Added MODULE_LICENSE() tag, for compatibility with the tainted/ non-tainted kernel stuff being put into 2.4.9-ac and modutils. - The VIA C3 should be supported properly now, thanks to tests run by Dave Jones @ SuSE which clarified some aspects of the C3. - Minor bug fix in the perfctr interrupt assembly code. (Inherited from the 2.4 kernel. Fixed in 2.4.9-ac4.) Version 2.1.1, 2001-08-28 - Fixed a bug in the finalise backpatching code, which could cause a kernel hang in some configurations. - Updated for kernel 2.4.9-ac3, which required changes to avoid conflicts in the %cr4 access methods. - Preliminary code to detect Pentium 4 processors with Performance Monitoring features available. - Minor %cr4-related cleanups. - Minor documentation updates. - Added a link in OTHER to Curtis Janssen's vprof tool. Version 2.1, 2001-08-19 - Fixed a call backpatching bug, caused by an incompatibility between the 2.4 and 2.2 kernels' xchg() macros. - Fixed a bug where an attempt to use /proc//perfctr on an unsupported processor would cause a (well-behaved) kernel oops. - The WinChip configuration option has been removed, and WinChip users should instead pass "notsc" as a boot-time kernel parameter. This permitted a cleanup of the driver and the 2.4 kernel patches, at the expense of more code in the 2.2 kernel patches to implement "notsc" support. Version 2.0.1, 2001-08-14 - The "redirect call" backpatching code in the low-lever driver has been changed again. The change in 2.0-pre6 was insufficient, due to a nasty SMP-related erratum in all Intel P6 processors. - Added support for 2.4.8/2.4.8-ac1 kernels. - Removed an obsolete check from the WinChip support code. Version 2.0, 2001-08-08 - Resurrected partial support for interrupt-mode virtual perfctrs. virtual.c permits a single i-mode perfctr, in addition to TSC and a number of a-mode perfctrs. BUG: The i-mode PMC must be last, which constrains CPUs like the P6 where we currently restrict the pmc_map[] to be the identity mapping. (Not a problem for K7 since it is symmetric, or P4 since it is expected to use a non-identity pmc_map[].) - Bug fix in perfctr_cpu_update_control(): start by clearing cstatus. Prevents a failed attempt to update the control from leaving the object in a state with old cstatus != 0 but new control. Version 2.0-pre7, 2001-08-07 - Updated user-space library: * Coding tweaks to attempt to make gcc (various versions) generate better code. (Not entirely successful. May have to resort to hand-written assembly code.) * New vperfctr_read_ctrs() sampling procedure. * New perfctr_print_info() helper procedure. - Updated example applications: * Use the library's perfctr_print_info() for consistent output. * Counts are now printed in decimal, not hex. * 'perfex' now checks for data layout mismatch when the child process' virtual perfctr is mmap:ed into user space. * 'self' uses the new vperfctr_read_ctrs() sampling procedure. * 'signal' compiles again. - Cleaned up the driver's debugging code. - Internal driver rearrangements. The low-level driver (x86) now handles sampling/suspending/resuming counters. Merged counter state (sums and start values) and CPU control data to a single "CPU state" object. This simplifies the high-level drivers, and permits some optimisations in the low-level driver by avoiding the need to buffer tsc/pmc samples in memory before updating the accumulated sums (not yet implemented). - Removed WinChip "fake TSC" support. The user-space library can now sample with slightly less overhead on sane processors. Version 2.0-pre6, 2001-07-27 - Sampling bug fix for SMP. Normally processes are suspended and resumed many times per second, but on SMP machines it is possible for a process to run for a long time without being suspended. Since sampling is performed at the suspend and resume actions, a performance counter may wrap around more than once between sampling points. When this occurs, the accumulated counts will be highly variable and much lower than expected. A software timer is now used to ensure that sampling deadlines aren't missed on SMP machines. - Bug fix in the x86 "redirect call" backpatching routine. - Bug fix in the internal debugging code (CONFIG_PERFCTR_DEBUG). - Minor performance tweak for the P5/P5MMX read counters procedures. - To avoid undetected data layout mismatches, the user-space library now checks the data layout version field in a virtual perfctr when it is being mmap:ed into the user's address space. Version 2.0-pre5, 2001-06-11 - Structure layout changes to reduce sampling overheads. The ABI changed slightly, but I hope this is the last such change for some time. - Fixed two bugs related to the interaction of interrupt-mode perfctrs and the lazy EVNTSEL MSR update cache in the low-level driver. (Interrupt-mode support is still disabled in the high-level drivers, however.) - Fixed a bug in examples/perfex where it forgot to initialise the pmc_map[] control field. This caused the driver to refuse attempts to use more than one counter. The current fix is for P6/K7 only; a general "fixup" procedure will be added to the user-space library later. - Added a CONFIG_PERFCTR_DEBUG option to enable some internal consistency checking in the driver. This is a temporary measure intended to help debug two open problem reports. Version 2.0-pre4, 2001-04-30 - Some module usage accounting changes which should make automatic module loading and unloading more robust in 2.2 kernels. - Internal cleanups and a few minor bug fixes. - Some API naming changes, and O_CREAT can now be used to control whether opening /proc/self/perfctr should create and attach a vperfctr or not. - The user-space library has been updated for the new API. pmc_map[] is used to map from "virtual counter i" to an actual PMC index to be used by RDPMC -- the VIA Cyrix III / C3 is now able to sample in user-space even though it has no PMC(0). The layout of pmc_map[] is CPU-specific; see x86.c for details. Since TSC sampling is specified explicitly now, perfctr_cpu_nrctrs() has been changed to return the number of performance counters _excluding_ the TSC. - The example programs have been updated for the new API, with the exception of signal.c which is still non-functional. - The perfex.c example works better now that the API has a consistent one-evntsel-per-counter model even for Intel P5-like CPUs. - The global.c example has been fixed to not cause a division by zero on WinChip CPUs lacking a working TSC. Version 2.0-pre3, 2001-04-17 - Preliminary implementation of the new data structures and API is in place. The user-space components have not yet been updated. Interrupt-mode virtual perfctrs have been disabled pending completion of necessary CPU driver support. - Now uses "VIA_C3" as the family name for both the VIA C3 and the slightly older VIA Cyrix III processors. "VIA_CYRIX_III" was just too clumsy and confusing. (It's not a Cyrix at all.) - Fixed etc/perfctr-events.tab to make Cyrix' event codes agree with reality rather than with the Cyrix manuals. The manuals ignore the fact that the 7-bit event codes are stored in two distinct bit fields in the CESR. Version 2.0-pre2, 2001-04-07 - Removed automatic inheritance of per-process virtual perfctrs across fork(). Unless wait4() is modified, it's difficult to communicate the final values back to the parent: the now abandoned code did this in a way which made it impossible to distinguish one child's final counts from another's. Inheritance can be implemented in user-space anyway, so the loss is not great. The interface between the driver and the rest of the kernel is now smaller and simpler than before. - Dropped support for kernels older than 2.2.16. - Preliminary support for the VIA C3 processor. Version 2.0-pre1, 2001-03-25 - First round of API and coding changes/cleanups for version 2.0. The driver version in struct perfctr_info is now a string instead of the previous major/minor/micro version number mess. - Internal cleanups and minor fixes. - Fixed an include file problem which made some C compilers (not gcc) fail when compiling user-space applications using the driver. Version 1.9, 2001-02-13 - Fixed compilation problems for 2.2 and SMP kernels. - Corrected VIA Cyrix III support. The "VIA Cyrix III" product has apparently used two distinct CPUs. Initial CPUs were a Cyrix design (Joshua) while current CPUs apparently are a Centaur design (Samuel). Added support for "Samuel" CPUs. - Two corrections in the K7 perfctr event list. - Small tweaks to vperfctr interrupt handling. - Added preliminary interrupt-mode support for AMD K7. Version 1.8, 2001-01-23 - Added preliminary interrupt-mode support to virtual perfctrs. Currently for P6 only, and the local APIC must have been enabled. Tested on 2.4.0-ac10 with CONFIG_X86_UP_APIC=y. When an i-mode vperfctr interrupts on overflow, the counters are suspended and a user-specified signal is sent to the process. The user's signal handler can read the trap pc from the mmap:ed vperfctr, and should then issue an IRESUME ioctl to restart the counters. Version 1.7, 2001-01-01 - Updated patches for kernels 2.2.18 and 2.4.0-prerelease. - Removed the need to ./configure the library before building it. - /dev/perfctr is now only used for global-mode perfctrs. - Library API changes to reflect new /dev/perfctr semantics. - Backported /proc/self/perfctr to kernels 2.2.13-2.2.17. - /proc/self/perfctr is now mandatory for virtual perfctrs. - Fixed a VIA Cyrix III CPU detection bug. - Fixed a minor problem in the 2.4 patch to drivers/Makefile. - Changed examples/global/global.c to count MFLOP/s instead of branches and branch prediction hits/misses. Version 1.6, 2000-11-21 - Updated for kernels 2.4.0-test11 and 2.2.18pre22. - Preliminary implementation of /proc/self/perfctr as a more direct way of accessing one's virtual perfctrs. (If this works out, the /dev/perfctr interface to vperfctrs will be phased out.) The driver can still be built as an autoloadable module. (For now, only supported in 2.2.18pre22 and 2.4.0-test11.) - Some user-space library API changes to accommodate /proc/self/perfctr. - The per-process virtual TSC is no longer restarted from zero when the perfctrs are reprogrammed, which allows it to be used as a high-res per-process clock (i.e. gethrvtime()). - Rewrote the `command' example application to use perfctr inheritance instead of the recently removed "remote control" facility. - WinChip documentation updates and corrections. Version 1.5, 2000-09-03 - The virtual perfctr "remote control" facility has been removed, resulting in major simplifications in the driver. Since version 1.3 of the driver, the most common application of the remote control facility (to record events from unmodified applications) can be more easily implemented using the perfctr inheritance facility (perfctr control setup is inherited from parent to child processes, and a child's event counts are propagated back to its parent). Removing the remote control facility simplified resource management and eliminated a number of concurrency issues. - Code cleanups. Dropped support for intermediate 2.3 and early 2.4 kernels. The code now supports kernels 2.2.xx and 2.4.0-test7 or later only (via a 2.4-on-2.2 simulation layer). - A number of changes to the user-space library. The API is now thread- safe (the library has no internal state), and the naming scheme has been simplified due to the removal of the remote-control facility. The zero-syscall perfctr sampling code has been rewritten and should be faster and more robust. (It fixed a sampling problem one user had on a 4-way MP box.) Version 1.4, 2000-08-11 - Updates to comply which changes in 2.4.0-test kernels, in particular concerning module owner and use count tracking, and the Virtual File System (VFS) infrastructure. - A bug which prevented reclaiming VFS resources (dentries and inodes) allocated to virtual perfctrs has been fixed. This bug affected both 2.2.x and 2.4.0-test kernels. Version 1.3, 2000-06-29 - Implemented inheritance for per-process virtual perfctrs. This means that a child's performance-monitoring counts are attributed to its parent, similarly to how time is handled. The parent must have active perfctrs before forking off the child, and neither parent nor child must have reprogrammed its perfctrs when the child exits, otherwise no propagation occurs. Threads created implicitly by the kernel via request_module() are protected from perfctr inheritance. - Added an example program to illustrate inheritance. - Fixed two small buglets in the driver. - Preliminary changes to make the user-space library thread-safe. - Updated driver for kernel 2.4.0-test2. - The driver now exports the CPU clock frequency to user-space, to enable mapping of accumulated TSC counts to actual time. - Clarified that this package is licensed under the GNU LGPL. Version 1.2, 2000-05-24 - Added support for kernels 2.2.16pre4 and 2.3.99-pre9-5. - Added support for generic x86 processors with a time-stamp counter but no performance-monitoring counters. By using the driver to virtualise the TSC, accurate cycle-count measurements are now possible on PMC-less processors like the AMD K6. - Fixed a bug in the WinChip driver. - Miscellaneous code cleanups. Version 1.1, 2000-05-13 - Support for Linux kernels 2.2.14, 2.2.15 and 2.3.99-pre8. - Changes to the driver and user-space library to reduce the number of getpid() calls. (Suggested by Ulrich Drepper.) - Added support for the VIA Cyrix III processor. - Performance improvements in the x86 driver interface. - Some code cleanups. Version 1.0, 2000-01-31 - Support for Linux kernels 2.3.41, 2.2.15pre5, and 2.2.14. - Code cleanups in order to handle drivers for non-x86 processors. - Changes to the x86 drivers to reduce cache footprint and sampling overhead. (Sample low 32 bits of counters, but maintain 64-bit sums.) Version 0.11, 2000-01-30 - Support for Linux kernels 2.3.41 and 2.2.14. - Minor code cleanups and fixes. - The CR4.PCE flag is now globally enabled on x86, except for those processors which does not support it. This is done in part to reduce the overhead of virtualising the performance counters, but it is also necessary due to changes in kernel 2.3.40. Version 0.10, 2000-01-23 - Support for Linux kernels 2.3.40 and 2.2.14. - Global-mode performance counters are now implemented. - Added hardware support for the WinChip 3 processor. - More source code reorganisation. Version 0.9, 2000-01-02 - Support for Linux kernels 2.3.35, 2.2.14pre18, and 2.2.13. - The driver can now be built as a module. - The driver now installs itself as the /dev/perfctr device instead of adding a system call. - Significant source code reorganisation. Version 0.8, 1999-11-14 - Support for Linux kernels 2.3.28 and 2.2.13. - Major updates to reduce the overhead of maintaining virtual performance-monitoring counters: - The control registers are cached and updated lazily. - The counter registers are no longer written to. - Unused counters are no longer manipulated at all. (This matters especially for the AMD K7.) - Reduced the process scheduling overhead for processes not using performance-monitoring counters. - Minor code cleanups, bug fixes, and documentation updates. Version 0.7, 1999-10-25 - Support for Linux kernels 2.3.22 and 2.2.13. - Improved performance. (Uses RDPMC instead of RDMSR when possible.) - The AMD K7 Athlon should now work properly. - User-space now uses mmap() to read the kernel's accumulated counter state. - The driver is now invoked via a new sys_perfctr() system call, instead of abusing prctl(). - The kernel patch has been cleaned up. The "#ifdef CONFIG_PERFCTR" mess has been eliminated. Version 0.6, 1999-09-08 - Version 0.6 with support for Linux kernels 2.3.17 and 2.2.12. - Preliminary support for AMD Athlon added. Version 0.5, 1999-08-29 - Support for Linux kernel 2.3.15. - The user-space buffer is updated whenever state.status changes, even when a remote command triggers the change. - Reworked and simplified the high-level code. All accesses now require an attached file in order to implement proper accounting and syncronisation. The only exception is UNLINK: a process may always UNLINK its own PMCs. - Fixed counting bug in sys_perfctr_read(). - Improved support for the Intel Pentium III. - Another WinChip fix: fake TSC update at process resume. - The code should now be safe for 'gcc -fstrict-aliasing'. Version 0.4, 1999-07-31 - Support for Linux kernel 2.3.12. - Implemented PERFCTR_ATTACH and PERFCTR_{READ,CONTROL,STOP,UNLINK} on attached perfctrs. An attached perfctr is represented as a file. - Fixed an error in the WinChip-specific code. - Perfctrs now survive exec(). Version 0.3, 1999-07-22 - Support for Linux kernel 2.3.11. - Interface now via sys_prctl() instead of /dev/perfctr. - Added NYI stubs for accessing other processes' perfctrs. - Moved to dynamic allocation of a task's perfctr state. - Minor code cleanups. Version 0.2, 1999-06-07 - Support for Linux kernel 2.3.5. - Added support for WinChip CPUs. - Restart counters from zero, not their previous values. This corrected a problem for Intel P6 (WRMSR writes 32 bits to a PERFCTR MSR and then sign-extends to 40 bits), and also simplified the code. - Added support for syncing the kernel's counter values to a user- provided buffer each time a process is resumed. This feature, and the fact that the driver enables RDPMC in processes using PMCs, allows user-level computation of a process' accumulated counter values without incurring the overhead of making a system call. Version 0.1, 1999-05-30 - First public release for Linux kernel 2.3.3. papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/mont_opcode.c000664 001750 001750 00000016611 13216244362 025070 0ustar00jshenry1963jshenry1963000000 000000 /* * mont_opcode.c - example of how to use the opcode matcher with the Dual-Core Itanium 2 PMU * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #define OPCM_EVENT "IA64_TAGGED_INST_RETIRED_IBRP0_PMC32_33" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define NLOOP 200UL /* * we don't use static to make sure the compiler does not inline the function */ int do_test(unsigned long loop) { unsigned long sum = 0; while(loop--) sum += loop; return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(void) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_mont_input_param_t mont_inp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_context_t ctx; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; int ret; int type = 0; int id; unsigned int i; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("Can't initialize library\n"); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_MONTECITO_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with the %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&mont_inp,0, sizeof(mont_inp)); /* * We indicate that we are using the first opcode matcher (PMC32/PMC33). */ mont_inp.pfp_mont_opcm1.opcm_used = 1; /* * We want to match all the br.cloop in our test function. * This branch is an IP-relative branch for which the major * opcode (bits [40-37]) is 4 and the btype field (bits[6-8]) is 5. * We ignore all the other fields in the opcode. * * On Montecito, the opcode matcher covers the full 41 bits of each * instruction but we'll ignore them in this example. Hence the * match value is: * * match = (4<<37)| (5<<6) = 0x8000000140 * * On Montecito, the match field covers the full 41 bits of each instruction. * But for this example, we only care about the major and btype field, * and we ignore all other bits. When a bit is set in the mask it means * that the corresponding match bit value is a "don't care". A bit * with value of zero indicates that the corresponding match bit * must match. Hence we build the following mask: * * mask = ~((0xf<<37) | (0x3<<6)) = 0x1fffffff3f; * * The 0xf comes from the fact that major opcode is 4-bit wide. * The 0x3 comes from the fact that btype is 3-bit wide. */ mont_inp.pfp_mont_opcm1.opcm_b = 1; mont_inp.pfp_mont_opcm1.opcm_match = 0x8000000140; mont_inp.pfp_mont_opcm1.opcm_mask = 0x1fffffff3f; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event(OPCM_EVENT, &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find event %s\n", OPCM_EVENT); /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the unique identifier for our context, a regular file descriptor */ id = ctx.ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) fatal_error("pfm_load_context error errno %d\n",errno); /* * Let's roll now. */ pfm_self_start(id); do_test(NLOOP); pfm_self_stop(id); /* * now read the results */ if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count)) fatal_error("pfm_read_pmds error errno %d\n",errno); /* * print the results */ pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[0].reg_num, pd[0].reg_value, name, NLOOP); if (pd[0].reg_value != 0) printf("compiler used br.cloop\n"); else printf("compiler did not use br.cloop\n"); /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/components/nvml/Rules.nvml000664 001750 001750 00000000465 13216244357 022465 0ustar00jshenry1963jshenry1963000000 000000 include components/nvml/Makefile.nvml COMPSRCS += components/nvml/linux-nvml.c COMPOBJS += linux-nvml.o CFLAGS += -I$(NVML_INCDIR) -I$(CUDA_DIR)/include LDFLAGS += $(LDL) linux-nvml.o: components/nvml/linux-nvml.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/nvml/linux-nvml.c -o linux-nvml.o papi-5.6.0/000775 001750 001750 00000000000 13216762515 014541 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/powerpc_nest_events.h000664 001750 001750 00000004421 13216244365 025203 0ustar00jshenry1963jshenry1963000000 000000 #ifndef __POWERPC_NEST_EVENTS_H__ #define __POWERPC_NEST_EVENTS_H__ #define POWERPC_PME_NEST_MCS_00 0 #define POWERPC_PME_NEST_MCS_01 1 #define POWERPC_PME_NEST_MCS_02 2 #define POWERPC_PME_NEST_MCS_03 3 static const pme_power_entry_t powerpc_nest_read_pe[] = { [ POWERPC_PME_NEST_MCS_00 ] = { .pme_name = "MCS_00", .pme_code = 0x118, .pme_short_desc = "Total Read Bandwidth seen on both MCS of MC0", .pme_long_desc = "Total Read Bandwidth seen on both MCS of MC0", }, [ POWERPC_PME_NEST_MCS_01 ] = { .pme_name = "MCS_01", .pme_code = 0x120, .pme_short_desc = "Total Read Bandwidth seen on both MCS of MC1", .pme_long_desc = "Total Read Bandwidth seen on both MCS of MC1", }, [ POWERPC_PME_NEST_MCS_02 ] = { .pme_name = "MCS_02", .pme_code = 0x128, .pme_short_desc = "Total Read Bandwidth seen on both MCS of MC2", .pme_long_desc = "Total Read Bandwidth seen on both MCS of MC2", }, [ POWERPC_PME_NEST_MCS_03 ] = { .pme_name = "MCS_03", .pme_code = 0x130, .pme_short_desc = "Total Read Bandwidth seen on both MCS of MC3", .pme_long_desc = "Total Read Bandwidth seen on both MCS of MC3", }, }; static const pme_power_entry_t powerpc_nest_write_pe[] = { [ POWERPC_PME_NEST_MCS_00 ] = { .pme_name = "MCS_00", .pme_code = 0x198, .pme_short_desc = "Total Write Bandwidth seen on both MCS of MC0", .pme_long_desc = "Total Write Bandwidth seen on both MCS of MC0", }, [ POWERPC_PME_NEST_MCS_01 ] = { .pme_name = "MCS_01", .pme_code = 0x1a0, .pme_short_desc = "Total Write Bandwidth seen on both MCS of MC1", .pme_long_desc = "Total Write Bandwidth seen on both MCS of MC1", }, [ POWERPC_PME_NEST_MCS_02 ] = { .pme_name = "MCS_02", .pme_code = 0x1a8, .pme_short_desc = "Total Write Bandwidth seen on both MCS of MC2", .pme_long_desc = "Total Write Bandwidth seen on both MCS of MC2", }, [ POWERPC_PME_NEST_MCS_03 ] = { .pme_name = "MCS_03", .pme_code = 0x1b0, .pme_short_desc = "Total Write Bandwidth seen on both MCS of MC3", .pme_long_desc = "Total Write Bandwidth seen on both MCS of MC3", }, }; #endif papi-5.6.0/src/ctests/sdsc4-mpx.c000664 001750 001750 00000026126 13216244361 020624 0ustar00jshenry1963jshenry1963000000 000000 /* * Test example for multiplex functionality, originally * provided by Timothy Kaiser, SDSC. It was modified to fit the * PAPI test suite by Nils Smeds, . * * This example verifies the adding and removal of multiplexed * events in an event set. */ #include #include #include #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #define MAXEVENTS 9 #define REPEATS (MAXEVENTS * 4) #define SLEEPTIME 100 #define MINCOUNTS 100000 #define MPX_TOLERANCE 0.20 #define NUM_FLOPS 20000000 int main( int argc, char **argv ) { PAPI_event_info_t info; char name2[PAPI_MAX_STR_LEN]; int i, j, retval, idx, repeats; int iters = NUM_FLOPS; double x = 1.1, y, dtmp; long long t1, t2; long long values[MAXEVENTS], refvals[MAXEVENTS]; int nsamples[MAXEVENTS], truelist[MAXEVENTS], ntrue; #ifdef STARTSTOP long long dummies[MAXEVENTS]; #endif int sleep_time = SLEEPTIME; double valsample[MAXEVENTS][REPEATS]; double valsum[MAXEVENTS]; double avg[MAXEVENTS]; double spread[MAXEVENTS]; int nevents = MAXEVENTS, nev1; int eventset = PAPI_NULL; int events[MAXEVENTS]; int eventidx[MAXEVENTS]; int eventmap[MAXEVENTS]; int fails; int quiet; quiet = tests_quiet( argc, argv ); if ( argc > 1 ) { if ( !strcmp( argv[1], "quiet" ) ) { } else { sleep_time = atoi( argv[1] ); if ( sleep_time <= 0 ) sleep_time = SLEEPTIME; } } events[0] = PAPI_FP_INS; events[1] = PAPI_TOT_CYC; events[2] = PAPI_TOT_INS; events[3] = PAPI_TOT_IIS; events[4] = PAPI_INT_INS; events[5] = PAPI_STL_CCY; events[6] = PAPI_BR_INS; events[7] = PAPI_SR_INS; events[8] = PAPI_LD_INS; for ( i = 0; i < MAXEVENTS; i++ ) { values[i] = 0; valsum[i] = 0; nsamples[i] = 0; } /* Print test summary */ if ( !quiet ) { printf( "\nFunctional check of multiplexing routines.\n" ); printf( "Adding and removing events from an event set.\n\n" ); } /* Init the library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Enable multiplexing */ #ifdef MPX retval = PAPI_multiplex_init( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI multiplex init fail\n", retval ); } #endif /* Create an eventset */ if ( ( retval = PAPI_create_eventset( &eventset ) ) ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Enable multiplexing on the eventset */ #ifdef MPX /* In Component PAPI, EventSets must be assigned a component index before you can fiddle with their internals. 0 is always the cpu component */ retval = PAPI_assign_eventset_component( eventset, 0 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", retval ); if ( ( retval = PAPI_set_multiplex( eventset ) ) ) { if ( retval == PAPI_ENOSUPP) { test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); } test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); } #endif /* See which events are available and remove the ones that aren't */ nevents = MAXEVENTS; for ( i = 0; i < nevents; i++ ) { if ( ( retval = PAPI_add_event( eventset, events[i] ) ) ) { for ( j = i; j < MAXEVENTS-1; j++ ) events[j] = events[j + 1]; nevents--; i--; } } /* We want at least three events? */ /* Seems arbitrary. Might be because intel machines used to */ /* Only have two event slots */ if ( nevents < 3 ) { test_skip( __FILE__, __LINE__, "Not enough events left...", 0 ); } /* Find a reasonable number of iterations (each * event active 20 times) during the measurement */ /* TODO: find Linux multiplex interval */ /* not sure if 10ms is close or not */ /* Target: 10000 usec/multiplex, 20 repeats */ t2 = 10000 * 20 * nevents; if ( t2 > 30e6 ) { test_skip( __FILE__, __LINE__, "This test takes too much time", retval ); } /* Measure one run */ t1 = PAPI_get_real_usec( ); y = do_flops3( x, iters, 1 ); t1 = PAPI_get_real_usec( ) - t1; /* Scale up execution time to match t2 */ if ( t2 > t1 ) { iters = iters * ( int ) ( t2 / t1 ); } /* Make sure execution time is < 30s per repeated test */ else if ( t1 > 30e6 ) { test_skip( __FILE__, __LINE__, "This test takes too much time", retval ); } /* Split the events up by odd and even? */ j = nevents; for ( i = 1; i < nevents; i = i + 2 ) eventidx[--j] = i; for ( i = 0; i < nevents; i = i + 2 ) eventidx[--j] = i; assert( j == 0 ); /* put event mapping in eventmap? */ for ( i = 0; i < nevents; i++ ) eventmap[i] = i; x = 1.0; /* Make a reference run */ if ( !quiet ) { printf( "\nReference run:\n" ); } t1 = PAPI_get_real_usec( ); if ( ( retval = PAPI_start( eventset ) ) ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } y = do_flops3( x, iters, 1 ); PAPI_read( eventset, refvals ); t2 = PAPI_get_real_usec( ); /* Print results */ ntrue = nevents; PAPI_list_events( eventset, truelist, &ntrue ); if ( !quiet ) { printf( "\tOperations= %.1f Mflop", y * 1e-6 ); printf( "\t(%g Mflop/s)\n\n", ( y / ( double ) ( t2 - t1 ) ) ); printf( "%20s %16s %-15s %-15s\n", "PAPI measurement:", "Acquired count", "Expected event", "PAPI_list_events" ); for ( j = 0; j < nevents; j++ ) { PAPI_get_event_info( events[j], &info ); PAPI_event_code_to_name( truelist[j], name2 ); printf( "%20s = %16lld %-15s %-15s %s\n", info.short_descr, refvals[j], info.symbol, name2, strcmp( info.symbol,name2 ) ? "*** MISMATCH ***" : "" ); } printf( "\n" ); } /* Make repeated runs while removing/readding events */ nev1 = nevents; repeats = nevents * 4; /* Repeat four times for each event? */ for ( i = 0; i < repeats; i++ ) { /* What's going on here? as example, nevents=4, repeats=16*/ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 == i*/ /* 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 == i%nevents */ /* 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 == (i%nevents)+1 */ /* 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 */ /* so we skip nevery NEVENTS time through the loop? */ if ( ( i % nevents ) + 1 == nevents ) continue; if ( !quiet ) { printf( "\nTest %d (of %d):\n", i + 1 - (i / nevents), repeats - 4 ); } /* Stop the counter, it's been left running */ if ( ( retval = PAPI_stop( eventset, values ) ) ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* We run through a 4-way pattern */ /* 1st quarter, remove events */ /* 2nd quarter, add back events */ /* 3rd quarter, remove events again */ /* 4th wuarter, re-add events */ j = eventidx[i % nevents]; if ( ( i / nevents ) % 2 == 0 ) { /* Remove event */ PAPI_get_event_info( events[j], &info ); if ( !quiet ) { printf( "Removing event[%d]: %s\n", j, info.short_descr ); } retval = PAPI_remove_event( eventset, events[j] ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_remove_event", retval ); } /* Update the complex event mapping */ nev1--; for ( idx = 0; eventmap[idx] != j; idx++ ); for ( j = idx; j < nev1; j++ ) eventmap[j] = eventmap[j + 1]; } else { /* Add an event back in */ PAPI_get_event_info( events[j], &info ); if ( !quiet ) { printf( "Adding event[%d]: %s\n", j, info.short_descr ); } retval = PAPI_add_event( eventset, events[j] ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } eventmap[nev1] = j; nev1++; } if ( ( retval = PAPI_start( eventset ) ) ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } x = 1.0; // This startstop is leftover from sdsc2? */ #ifndef STARTSTOP if ( ( retval = PAPI_reset( eventset ) ) ) test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); #else if ( ( retval = PAPI_stop( eventset, dummies ) ) ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if ( ( retval = PAPI_start( eventset ) ) ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); #endif /* Run the actual workload */ t1 = PAPI_get_real_usec( ); y = do_flops3( x, iters, 1 ); PAPI_read( eventset, values ); t2 = PAPI_get_real_usec( ); /* Print approximate flops plus header */ if ( !quiet ) { printf( "\n(calculated independent of PAPI)\n" ); printf( "\tOperations= %.1f Mflop", y * 1e-6 ); printf( "\t(%g Mflop/s)\n\n", ( y / ( double ) ( t2 - t1 ) ) ); printf( "%20s %16s %-15s %-15s\n", "PAPI measurement:", "Acquired count", "Expected event", "PAPI_list_events" ); ntrue = nev1; PAPI_list_events( eventset, truelist, &ntrue ); for ( j = 0; j < nev1; j++ ) { idx = eventmap[j]; /* printf("Mapping: Counter %d -> slot %d.\n",j,idx); */ PAPI_get_event_info( events[idx], &info ); PAPI_event_code_to_name( truelist[j], name2 ); printf( "%20s = %16lld %-15s %-15s %s\n", info.short_descr, values[j], info.symbol, name2, strcmp( info.symbol, name2 ) ? "*** MISMATCH ***" : "" ); } printf( "\n" ); } /* Calculate results */ for ( j = 0; j < nev1; j++ ) { idx = eventmap[j]; dtmp = ( double ) values[j]; valsum[idx] += dtmp; valsample[idx][nsamples[idx]] = dtmp; nsamples[idx]++; } } /* Stop event for good */ if ( ( retval = PAPI_stop( eventset, values ) ) ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } if ( !quiet ) { printf( "\n\nEstimated variance relative " "to average counts:\n" ); for ( j = 0; j < nev1; j++ ) { printf( " Event %.2d", j ); } printf( "\n" ); } fails = nevents; /* Due to limited precision of floating point cannot really use typical standard deviation compuation for large numbers with very small variations. Instead compute the std devation problems with precision. */ /* Update so that if our event count is small (<1000 or so) */ /* Then don't fail with high variation. Since we're multiplexing */ /* it's hard to capture such small counts, and it makes the test */ /* fail on machines such as Haswell and the PAPI_SR_INS event */ for ( j = 0; j < nev1; j++ ) { avg[j] = valsum[j] / nsamples[j]; spread[j] = 0; for ( i = 0; i < nsamples[j]; ++i ) { double diff = ( valsample[j][i] - avg[j] ); spread[j] += diff * diff; } spread[j] = sqrt( spread[j] / nsamples[j] ) / avg[j]; if ( !quiet ) { printf( "%9.2g ", spread[j] ); } } for ( j = 0; j < nev1; j++ ) { /* Make sure that NaN get counted as errors */ if ( spread[j] < MPX_TOLERANCE ) { if (!quiet) printf("Event %d tolerance good\n",j); fails--; } /* Neglect inprecise results with low counts */ else if ( avg[j] < MINCOUNTS ) { if (!quiet) printf("Event %d too small to fail\n",j); fails--; } else { if (!quiet) printf("Event %d failed!\n",j); } } if ( !quiet ) { printf( "\n\n" ); for ( j = 0; j < nev1; j++ ) { PAPI_get_event_info( events[j], &info ); printf( "Event %.2d: mean=%10.0f, " "sdev/mean=%7.2g nrpt=%2d -- %s\n", j, avg[j], spread[j], nsamples[j], info.short_descr ); } printf( "\n\n" ); } if ( fails ) { test_fail( __FILE__, __LINE__, "Values differ from reference", fails ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/linux/include/asm-i386/000775 001750 001750 00000000000 13216244367 023577 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/intel_knl_unc_imc_events.h000664 001750 001750 00000004611 13216244364 026147 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: knl_unc_imc (Intel Knights Landing IMC uncore PMU) */ static const intel_x86_umask_t knl_unc_m_cas_count[]={ { .uname = "ALL", .udesc = "Counts total number of DRAM CAS commands issued on this channel", .ucode = 0x0300, }, { .uname = "RD", .udesc = "Counts all DRAM reads on this channel, incl. underfills", .ucode = 0x0100, }, { .uname = "WR", .udesc = "Counts number of DRAM write CAS commands on this channel", .ucode = 0x0200, }, }; static const intel_x86_entry_t intel_knl_unc_imc_pe[]={ { .name = "UNC_M_D_CLOCKTICKS", .desc = "IMC Uncore DCLK counts", .code = 0x00, /*encoding for generic counters */ .cntmsk = 0xf, }, { .name = "UNC_M_CAS_COUNT", .desc = "DRAM RD_CAS and WR_CAS Commands.", .code = 0x03, .cntmsk = 0xf, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m_cas_count), .umasks = knl_unc_m_cas_count, }, }; static const intel_x86_entry_t intel_knl_unc_imc_uclk_pe[]={ { .name = "UNC_M_U_CLOCKTICKS", .desc = "IMC UCLK counts", .code = 0x00, /*encoding for generic counters */ .cntmsk = 0xf, }, }; papi-5.6.0/src/ctests/Makefile.recipies000664 001750 001750 00000052122 13216244360 022072 0ustar00jshenry1963jshenry1963000000 000000 OMP = zero_omp omptough SMP = zero_smp SHMEM = zero_shmem PTHREADS= pthrtough pthrtough2 thrspecific profile_pthreads overflow_pthreads \ zero_pthreads clockres_pthreads overflow3_pthreads locks_pthreads \ krentel_pthreads MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \ sdsc2-mpx-noreset sdsc4-mpx reset_multiplex MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin MPI = mpifirst SHARED = shlib SERIAL = all_events all_native_events branches calibrate case1 case2 \ cmpinfo code2name derived describe memory disable_component \ dmem_info eventname exeinfo failed_events first flops \ get_event_component inherit high-level high-level2 hl_rates \ hwinfo ipc johnmay2 low-level matrix-hl \ realtime remove_events reset second tenth version virttime \ zero zero_flip zero_named FORKEXEC = fork fork2 exec exec2 forkexec forkexec2 forkexec3 forkexec4 \ fork_overflow exec_overflow child_overflow system_child_overflow \ system_overflow burn zero_fork OVERFLOW = fork_overflow exec_overflow child_overflow system_child_overflow \ system_overflow burn overflow overflow_force_software \ overflow_single_event overflow_twoevents timer_overflow overflow2 \ overflow_index overflow_one_and_read overflow_allcounters PROFILE = profile profile_force_software sprofile profile_twoevents \ byte_profile ATTACH = multiattach multiattach2 zero_attach attach3 attach2 attach_target attach_cpu P4_TEST = p4_lst_ins EAR = earprofile RANGE = data_range BROKEN = pernode val_omp API = api ifneq ($(MPICC),) ALL = $(PTHREADS) $(SERIAL) $(FORKEXEC) $(OVERFLOW) $(PROFILE) $(MPI) $(MPX) $(MPXPTHR) $(OMP) $(SMP) $(SHMEM)\ $(SHARED) $(EAR) $(RANGE) $(P4_TEST) $(ATTACH) $(API) else ALL = $(PTHREADS) $(SERIAL) $(FORKEXEC) $(OVERFLOW) $(PROFILE) $(MPX) $(MPXPTHR) $(OMP) $(SMP) $(SHMEM)\ $(SHARED) $(EAR) $(RANGE) $(P4_TEST) $(ATTACH) $(API) endif DEFAULT = papi_api serial forkexec_tests overflow_tests profile_tests attach multiplex_and_pthreads shared all: $(ALL) default ctests ctest: $(DEFAULT) attach: $(ATTACH) p4: $(P4_TEST) ear: $(EAR) range: $(RANGE) mpi: $(MPI) shared: $(SHARED) multiplex_and_pthreads: $(MPXPTHR) $(MPX) $(PTHREADS) multiplex: $(MPX) omp: $(OMP) smp: $(SMP) pthreads: $(PTHREADS) shmem: $(SHMEM) serial: $(SERIAL) forkexec_tests: $(FORKEXEC) overflow_tests: $(OVERFLOW) profile_tests: $(PROFILE) papi_api: $(API) api: api.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) api.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ sdsc2: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) sdsc.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ sdsc2-mpx: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc2.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ branches: branches.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) branches.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(LDFLAGS) -lm -o $@ sdsc2-mpx-noreset: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) -DMPX -DSTARTSTOP $(TOPTFLAGS) sdsc2.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) -lm $(LDFLAGS) -o $@ sdsc-mpx: sdsc-mpx.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc-mpx.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -o $@ sdsc4-mpx: sdsc4-mpx.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc4-mpx.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ calibrate: calibrate.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) calibrate.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o calibrate data_range: data_range.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) data_range.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o data_range p4_lst_ins: p4_lst_ins.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) p4_lst_ins.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o p4_lst_ins acpi: acpi.c dummy.o $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) acpi.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o acpi timer_overflow: timer_overflow.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) timer_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ mendes-alt: mendes-alt.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DMULTIPLEX mendes-alt.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ max_multiplex: max_multiplex.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) max_multiplex.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ multiplex1: multiplex1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ multiplex2: multiplex2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ multiplex1_pthreads: multiplex1_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex1_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread kufrin: kufrin.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) kufrin.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread multiplex3_pthreads: multiplex3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread overflow3_pthreads: overflow3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread thrspecific: thrspecific.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) thrspecific.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o thrspecific -lpthread pthrtough: pthrtough.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthrtough.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o pthrtough -lpthread pthrtough2: pthrtough2.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthrtough2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o pthrtough2 -lpthread profile_pthreads: profile_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_pthreads -lpthread locks_pthreads: locks_pthreads.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) locks_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o locks_pthreads -lpthread -lm krentel_pthreads: krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads -lpthread overflow_pthreads: overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_pthreads -lpthread version: version.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) version.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o version zero_pthreads: zero_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_pthreads -lpthread zero_smp: zero_smp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_smp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_smp $(SMPLIBS) zero_shmem: zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_shmem $(SMPLIBS) zero_omp: zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_omp $(OMPLIBS) omptough: omptough.c $(TESTLIB) $(PAPILIB) -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) omptough.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o omptough $(OMPLIBS) val_omp: val_omp.c $(TESTLIB) $(PAPILIB) -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) val_omp.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o val_omp $(OMPLIBS) clockres_pthreads: clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) $(LDFLAGS) -o clockres_pthreads -lpthread -lm inherit: inherit.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) inherit.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o inherit johnmay2: johnmay2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) johnmay2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o johnmay2 describe: describe.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) describe.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o describe derived: derived.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) derived.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o derived zero: zero.c $(TESTLIB) $(TESTINS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero.c $(TESTLIB) $(TESTINS) $(PAPILIB) $(LDFLAGS) -o zero zero_named: zero_named.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_named.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_named remove_events: remove_events.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) remove_events.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o remove_events zero_fork: zero_fork.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_fork.c $(DOLOOPS) $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o zero_fork try: try.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) try.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o try zero_flip: zero_flip.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_flip.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_flip realtime: realtime.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) realtime.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o realtime virttime: virttime.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) virttime.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o virttime first: first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o first mpifirst: mpifirst.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(MPICC) $(INCLUDE) $(MPFLAGS) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o mpifirst first-twice: first-twice.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) first-twice.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o first-twice second: second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o second flops: flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) $(LDFLAGS) -o flops ipc: ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o ipc overflow: overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow overflow_allcounters: overflow_allcounters.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_allcounters.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_allcounters overflow_twoevents: overflow_twoevents.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_twoevents.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_twoevents overflow_one_and_read: overflow_one_and_read.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_one_and_read.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_one_and_read overflow_index: overflow_index.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_index.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_index overflow_values: overflow_values.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_values.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o overflow_values overflow2: overflow2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow2 overflow_single_event: overflow_single_event.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_single_event.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_single_event overflow_force_software: overflow_force_software.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_force_software.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_force_software sprofile: sprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) sprofile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o sprofile profile: profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile profile_force_software: profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSWPROFILE profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_force_software profile_twoevents: profile_twoevents.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile_twoevents.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_twoevents earprofile: earprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) earprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(LDFLAGS) -o earprofile byte_profile: byte_profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) byte_profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o byte_profile pernode: pernode.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pernode.c $(LDFLAGS) -o pernode dmem_info: dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o dmem_info all_events: all_events.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) all_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o all_events all_native_events: all_native_events.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) all_native_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o all_native_events failed_events: failed_events.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) failed_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o failed_events get_event_component: get_event_component.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) get_event_component.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o get_event_component disable_component: disable_component.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) disable_component.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o disable_component memory: memory.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) memory.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o memory tenth: tenth.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) tenth.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o tenth eventname: eventname.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) eventname.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o eventname case1: case1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) case1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o case1 case2: case2.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) case2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o case2 low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level matrix-hl: matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o matrix-hl hl_rates: hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -o hl_rates high-level: high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o high-level high-level2: high-level2.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o high-level2 shlib: shlib.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exeinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exeinfo cmpinfo: cmpinfo.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) cmpinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o cmpinfo hwinfo: hwinfo.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hwinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o hwinfo code2name: code2name.c $(TESTLIB) $(PAPILIB) $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) code2name.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o code2name attach_target: attach_target.c $(DOLOOPS) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_target.c -o attach_target $(DOLOOPS) $(TESTLIB) zero_attach: zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_attach multiattach: multiattach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiattach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o multiattach multiattach2: multiattach2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiattach2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o multiattach2 attach3: attach3.c attach_target $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach3.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o attach3 attach2: attach2.c attach_target $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o attach2 attach_cpu: attach_cpu.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_cpu.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o attach_cpu reset: reset.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) reset.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o reset reset_multiplex: reset_multiplex.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) reset_multiplex.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o reset_multiplex fork_overflow: fork_overflow.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork_overflow exec_overflow: exec_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DPEXEC exec_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) $(LDFLAGS) -o exec_overflow child_overflow: child_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DPCHILD child_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) $(LDFLAGS) -o child_overflow system_child_overflow: system_child_overflow.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSYSTEM system_child_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o system_child_overflow system_overflow: system_overflow.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSYSTEM2 system_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o system_overflow burn: burn.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) burn.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o burn fork: fork.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork exec: exec.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exec.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exec exec2: exec2.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exec2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exec2 fork2: fork2.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork2 forkexec: forkexec.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec forkexec2: forkexec2.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec2 forkexec3: forkexec3.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec3.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec3 forkexec4: forkexec4.c $(TESTLIB) $(PAPILIB) -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec4.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec4 prof_utils.o: prof_utils.c $(testlibdir)/papi_test.h prof_utils.h $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -c prof_utils.c .PHONY : all default ctests ctest clean clean: rm -f *.o *.stderr *.stdout core *~ $(ALL) unregister_pthreads distclean clobber: clean rm -f Makefile.target papi-5.6.0/src/ctests/hl_rates.c000664 001750 001750 00000022203 13216244360 020572 0ustar00jshenry1963jshenry1963000000 000000 /* file hl_rates.c * This test exercises the four PAPI High Level rate calls: * PAPI_flops, PAPI_flips, PAPI_ipc, and PAPI_epc * flops and flips report cumulative real and process time since the first call, * and either floating point operations or instructions since the first call. * Also reported is incremental flop or flip rate since the last call. * * PAPI_ipc reports the same cumulative information, substituting * total instructions for flops or flips, and also reports * instructions per (process) cycle as a measure of execution efficiency. * * PAPI_epc is new in PAPI 5.2. It reports the same information as PAPI_IPC, * but for an arbitrary event instead of total cycles. It also reports * incremental core and (where available) reference cycles to allow the * computation of effective clock rates in the presence of clock scaling * like speed step or turbo-boost. * * This test computes a 1000 x 1000 matrix multiply for orders of indexing for * each of the four rate calls. It also accepts a command line parameter * for the event to be measured for PAPI_epc. If not provided, PAPI_TOT_INS * is measured. */ #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" int main( int argc, char **argv ) { int retval, event = 0; float rtime, ptime, mflips, mflops, ipc, epc; long long flpins, flpops, ins, ref, core, evt; double mflips_classic,mflips_swapped; double mflops_classic,mflops_swapped; double ipc_classic,ipc_swapped; double epc_classic,epc_swapped; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Initialize the test matrix */ flops_float_init_matrix(); /************************/ /* FLIPS */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_flips\n"); } /* Run flips at start */ retval=PAPI_flips(&rtime, &ptime, &flpins, &mflips); if (retval!=PAPI_OK) { if (!quiet) PAPI_perror( "PAPI_flips" ); if (retval==PAPI_ENOEVNT) { test_skip(__FILE__,__LINE__,"Could not add event",0); } } if (!quiet) { printf( "\nStart\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Instructions: %lld\n", flpins); printf( "MFLIPS %f\n", mflips); } /* Be sure we are all zero at beginning */ if ((rtime!=0) || (ptime!=0) || (flpins!=0) || (mflips!=0)) { test_fail(__FILE__,__LINE__,"Not initialized to zero",0); } // Flips classic flops_float_matrix_matrix_multiply(); if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) PAPI_perror( "PAPI_flips" ); if (!quiet) { printf( "\nClassic\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Instructions: %lld\n", flpins); printf( "MFLIPS %f\n", mflips); } mflips_classic=mflips; // Flips swapped flops_float_swapped_matrix_matrix_multiply(); if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) PAPI_perror( "PAPI_flips" ); if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Instructions: %lld\n", flpins); printf( "MFLIPS %f\n", mflips); } mflips_swapped=mflips; // turn off flips if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { PAPI_perror( "PAPI_stop_counters" ); } /************************/ /* FLOPS */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_flops\n"); } // Start flops if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) { PAPI_perror( "PAPI_flops" ); } if (!quiet) { printf( "\nStart\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Operations: %lld\n", flpops); printf( "MFLOPS %f\n", mflops); } /* Be sure we are all zero at beginning */ if ((rtime!=0) || (ptime!=0) || (flpops!=0) || (mflops!=0)) { test_fail(__FILE__,__LINE__,"Not initialized to zero",0); } // Classic flops flops_float_matrix_matrix_multiply(); if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) PAPI_perror( "PAPI_flops" ); if (!quiet) { printf( "\nClassic\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Operations: %lld\n", flpops); printf( "MFLOPS %f\n", mflops); } mflops_classic=mflops; // Swapped flops flops_float_swapped_matrix_matrix_multiply(); if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) PAPI_perror( "PAPI_flops" ); if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Operations: %lld\n", flpops); printf( "MFLOPS %f\n", mflops); } mflops_swapped=mflops; // turn off flops if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { PAPI_perror( "PAPI_stop_counters" ); } /************************/ /* IPC */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_ipc\n"); } // Start ipc if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) PAPI_perror( "PAPI_ipc" ); if (!quiet) { printf( "\nStart\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Instructions: %lld\n", ins); printf( "IPC %f\n", ipc); } /* Be sure we are all zero at beginning */ if ((rtime!=0) || (ptime!=0) || (ins!=0) || (ipc!=0)) { test_fail(__FILE__,__LINE__,"Not initialized to zero",0); } // Classic ipc flops_float_matrix_matrix_multiply(); if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) PAPI_perror( "PAPI_ipc" ); if (!quiet) { printf( "\nClassic\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Instructions: %lld\n", ins); printf( "IPC %f\n", ipc); } ipc_classic=ipc; // Swapped ipc flops_float_swapped_matrix_matrix_multiply(); if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) PAPI_perror( "PAPI_ipc" ); if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Instructions: %lld\n", ins); printf( "IPC %f\n", ipc); } ipc_swapped=ipc; // turn off ipc if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { PAPI_perror( "PAPI_stop_counters" ); } /************************/ /* EPC */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_epc\n"); } /* This unfortunately conflicts a bit with the TESTS_QUIET */ /* command line paramater nonsense. */ if ( argc >= 2) { retval = PAPI_event_name_to_code( argv[1], &event ); if (retval != PAPI_OK) { if (!quiet) printf("Can't find %s; Using PAPI_TOT_INS\n", argv[1]); event = PAPI_TOT_INS; } else { if (!quiet) printf("Using event %s\n", argv[1]); } } // Start epc if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) PAPI_perror( "PAPI_epc" ); if (!quiet) { printf( "\nStart\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Ref Cycles: %lld\n", ref); printf( "Core Cycles: %lld\n", core); printf( "Events: %lld\n", evt); printf( "EPC: %f\n", epc); } /* Be sure we are all zero at beginning */ if ((rtime!=0) || (ptime!=0) || (ref!=0) || (core!=0) || (evt!=0) || (epc!=0)) { test_fail(__FILE__,__LINE__,"Not initialized to zero",0); } // Classic epc flops_float_matrix_matrix_multiply(); if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) PAPI_perror( "PAPI_epc" ); if (!quiet) { printf( "\nClassic\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Ref Cycles: %lld\n", ref); printf( "Core Cycles: %lld\n", core); printf( "Events: %lld\n", evt); printf( "EPC: %f\n", epc); } epc_classic=epc; // Swapped epc flops_float_swapped_matrix_matrix_multiply(); if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) { PAPI_perror( "PAPI_epc" ); } if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "Ref Cycles: %lld\n", ref); printf( "Core Cycles: %lld\n", core); printf( "Events: %lld\n", evt); printf( "EPC: %f\n", epc); } epc_swapped=epc; // turn off epc if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { PAPI_perror( "PAPI_stop_counters" ); } if (!quiet) { printf( "\n----------------------------------\n" ); } /* Validate */ if (mflips_swapped/tests/Makefile has to have a rule with the name '< component >_tests'; e.g. for tests added to the example component, the name of the rule would be 'example_tests'. See: TESTS = HelloWorld example_tests: $(TESTS) 4. Include components/Makefile_comp_tests to your component test Makefile (see components/example/tests/Makefile for more details) 5. You may also define 'clean' and/or 'install' targets (as shown in the example) which will be called during those parts of the build. If these targets are missing it will just print a message reporting the missing target and continue. NOTE: there is no need to modify any PAPI code other than adding your tests and a Makefile to your component and follow step 1 to 4 listed above. @section Component Specific Information Some components under source control have additional information specific to their build process or operation. That information can be found in a README file inside the component directory. If the README doesn't exist, no special information is necessary. */ papi-5.6.0/src/perfctr-2.7.x/usr.lib/ppc64.h000664 001750 001750 00000005505 13216244370 022231 0ustar00jshenry1963jshenry1963000000 000000 /* Maynard * PPC64-specific code for performance counters library. * */ #ifndef __LIB_PERFCTR_PPC64_H #define __LIB_PERFCTR_PPC64_H static __inline__ unsigned long get_tb(void) { unsigned long tb; asm volatile("mftb %0" : "=r" (tb)); return tb; } #define rdtscl(x) do { (x) = (unsigned int) get_tb(); } while(0) #define SPRN_UPMC1 0x303 #define SPRN_UPMC2 0x304 #define SPRN_UPMC3 0x305 #define SPRN_UPMC4 0x306 #define SPRN_UPMC5 0x307 #define SPRN_UPMC6 0x308 #define SPRN_UPMC7 0x309 #define SPRN_UPMC8 0x30a #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ #define MMCR0_FC 0x80000000UL /* freeze counters. set to 1 on a perfmon exception */ #define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ #define MMCR0_KERNEL_DISABLE MMCR0_FCS #define MMCR0_FCP 0x20000000UL /* freeze in problem state */ #define MMCR0_PROBLEM_DISABLE MMCR0_FCP #define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ #define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ #define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ #define MMCR0_FCECE 0x02000000UL /* freeze counters on enabled condition or event */ /* time base exception enable */ #define MMCR0_TBEE 0x00400000UL /* time base exception enable */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ #define MMCR0_FCTI 0x00000008UL /* freeze counters in tags inactive mode */ #define MMCR0_FCTA 0x00000004UL /* freeze counters in tags active mode */ #define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define mfspr(rn) ({unsigned int rval; \ asm volatile("mfspr %0,%1" : "=r"(rval) : "i"(rn)); \ rval; }) static __inline__ unsigned int read_pmc(unsigned int pmc) { switch (pmc) { default: /* impossible, but silences gcc warning */ case 0: return mfspr(SPRN_UPMC1); case 1: return mfspr(SPRN_UPMC2); case 2: return mfspr(SPRN_UPMC3); case 3: return mfspr(SPRN_UPMC4); case 4: return mfspr(SPRN_UPMC5); case 5: return mfspr(SPRN_UPMC6); case 6: return mfspr(SPRN_UPMC7); case 7: return mfspr(SPRN_UPMC8); } } #define rdpmcl(pmc,x) do { (x) = read_pmc((pmc)); } while(0) #define vperfctr_has_rdpmc(vperfctr) ((vperfctr)->have_rdpmc) extern void perfctr_info_cpu_init(struct perfctr_info*); #endif /* __LIB_PERFCTR_PPC64_H */ papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumII-300000775 001750 001750 00000000662 13216244366 023550 0ustar00jshenry1963jshenry1963000000 000000 [data from a 300MHz Mobile Pentium II] PERFCTR INIT: vendor 0, family 6, model 5 PERFCTR INIT: NITER == 64 PERFCTR INIT: rdpmc ticks == 2035 PERFCTR INIT: rdmsr (counter) ticks == 5290 PERFCTR INIT: rdmsr (evntsel) ticks == 4547 PERFCTR INIT: wrmsr (counter) ticks == 5972 PERFCTR INIT: wrmsr (evntsel) ticks == 5384 PERFCTR INIT: read %cr4 ticks == 213 PERFCTR INIT: write %cr4 ticks == 2763 PERFCTR INIT: loop overhead ticks == 89 papi-5.6.0/src/libpfm4/lib/pfmlib_intel_nhm_unc.c000664 001750 001750 00000023751 13216244365 023760 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_nhm_unc.c : Intel Nehalem/Westmere uncore PMU * * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #define NHM_UNC_ATTR_E 0 #define NHM_UNC_ATTR_I 1 #define NHM_UNC_ATTR_C 2 #define NHM_UNC_ATTR_O 3 #define _NHM_UNC_ATTR_I (1 << NHM_UNC_ATTR_I) #define _NHM_UNC_ATTR_E (1 << NHM_UNC_ATTR_E) #define _NHM_UNC_ATTR_C (1 << NHM_UNC_ATTR_C) #define _NHM_UNC_ATTR_O (1 << NHM_UNC_ATTR_O) #define NHM_UNC_ATTRS \ (_NHM_UNC_ATTR_I|_NHM_UNC_ATTR_E|_NHM_UNC_ATTR_C|_NHM_UNC_ATTR_O) #define NHM_UNC_MOD_OCC_BIT 17 #define NHM_UNC_MOD_EDGE_BIT 18 #define NHM_UNC_MOD_INV_BIT 23 #define NHM_UNC_MOD_CMASK_BIT 24 #define NHM_UNC_MOD_OCC (1 << NHM_UNC_MOD_OCC_BIT) #define NHM_UNC_MOD_EDGE (1 << NHM_UNC_MOD_EDGE_BIT) #define NHM_UNC_MOD_INV (1 << NHM_UNC_MOD_INV_BIT) /* Intel Nehalem/Westmere uncore event table */ #include "events/intel_nhm_unc_events.h" #include "events/intel_wsm_unc_events.h" static const pfmlib_attr_desc_t nhm_unc_mods[]={ PFM_ATTR_B("e", "edge level"), /* edge */ PFM_ATTR_B("i", "invert"), /* invert */ PFM_ATTR_I("c", "counter-mask in range [0-255]"), /* counter-mask */ PFM_ATTR_B("o", "queue occupancy"), /* queue occupancy */ PFM_ATTR_NULL }; static const int nhm_models[] = { 26, 30, 31, 0 }; static const int wsm_dp_models[] = { 44, /* Westmere-EP, Gulftown */ 47, /* Westmere E7 */ 0, }; static int pfm_nhm_unc_get_encoding(void *this, pfmlib_event_desc_t *e) { pfm_intel_x86_reg_t reg; pfmlib_event_attr_info_t *a; const intel_x86_entry_t *pe = this_pe(this); unsigned int grpmsk, ugrpmsk = 0; int umodmsk = 0, modmsk_r = 0; uint64_t val; uint64_t umask; unsigned int modhw = 0; int k, ret, grpid, last_grpid = -1; int grpcounts[INTEL_X86_NUM_GRP]; int ncombo[INTEL_X86_NUM_GRP]; char umask_str[PFMLIB_EVT_MAX_NAME_LEN]; memset(grpcounts, 0, sizeof(grpcounts)); memset(ncombo, 0, sizeof(ncombo)); pe = this_pe(this); umask_str[0] = e->fstr[0] = '\0'; reg.val = 0; val = pe[e->event].code; grpmsk = (1 << pe[e->event].ngrp)-1; reg.val |= val; /* preset some filters from code */ /* take into account hardcoded umask */ umask = (val >> 8) & 0xff; modmsk_r = pe[e->event].modmsk_req; for(k=0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) { grpid = pe[e->event].umasks[a->idx].grpid; /* * cfor certain events groups are meant to be * exclusive, i.e., only unit masks of one group * can be used */ if (last_grpid != -1 && grpid != last_grpid && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) { DPRINT("exclusive unit mask group error\n"); return PFM_ERR_FEATCOMB; } /* * upper layer has removed duplicates * so if we come here more than once, it is for two * disinct umasks * * NCOMBO=no combination of unit masks within the same * umask group */ ++grpcounts[grpid]; if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_NCOMBO)) ncombo[grpid] = 1; if (grpcounts[grpid] > 1 && ncombo[grpid]) { DPRINT("event does not support unit mask combination within a group\n"); return PFM_ERR_FEATCOMB; } evt_strcat(umask_str, ":%s", pe[e->event].umasks[a->idx].uname); last_grpid = grpid; modhw |= pe[e->event].umasks[a->idx].modhw; umask |= pe[e->event].umasks[a->idx].ucode >> 8; ugrpmsk |= 1 << pe[e->event].umasks[a->idx].grpid; reg.val |= umask << 8; modmsk_r |= pe[e->event].umasks[a->idx].umodmsk_req; } else if (a->type == PFM_ATTR_RAW_UMASK) { /* there can only be one RAW_UMASK per event */ /* sanity check */ if (a->idx & ~0xff) { DPRINT("raw umask is 8-bit wide\n"); return PFM_ERR_ATTR; } /* override umask */ umask = a->idx & 0xff; ugrpmsk = grpmsk; } else { uint64_t ival = e->attrs[k].ival; switch(a->idx) { case NHM_UNC_ATTR_I: /* invert */ reg.nhm_unc.usel_inv = !!ival; umodmsk |= _NHM_UNC_ATTR_I; break; case NHM_UNC_ATTR_E: /* edge */ reg.nhm_unc.usel_edge = !!ival; umodmsk |= _NHM_UNC_ATTR_E; break; case NHM_UNC_ATTR_C: /* counter-mask */ /* already forced, cannot overwrite */ if (ival > 255) return PFM_ERR_INVAL; reg.nhm_unc.usel_cnt_mask = ival; umodmsk |= _NHM_UNC_ATTR_C; break; case NHM_UNC_ATTR_O: /* occupancy */ reg.nhm_unc.usel_occ = !!ival; umodmsk |= _NHM_UNC_ATTR_O; break; } } } if ((modhw & _NHM_UNC_ATTR_I) && reg.nhm_unc.usel_inv) return PFM_ERR_ATTR_SET; if ((modhw & _NHM_UNC_ATTR_E) && reg.nhm_unc.usel_edge) return PFM_ERR_ATTR_SET; if ((modhw & _NHM_UNC_ATTR_C) && reg.nhm_unc.usel_cnt_mask) return PFM_ERR_ATTR_SET; if ((modhw & _NHM_UNC_ATTR_O) && reg.nhm_unc.usel_occ) return PFM_ERR_ATTR_SET; /* * check that there is at least of unit mask in each unit * mask group */ if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) { ugrpmsk ^= grpmsk; ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask, -1, -1); if (ret != PFM_SUCCESS) return ret; } if (modmsk_r && (umodmsk ^ modmsk_r)) { DPRINT("required modifiers missing: 0x%x\n", modmsk_r); return PFM_ERR_ATTR; } evt_strcat(e->fstr, "%s", pe[e->event].name); pfmlib_sort_attr(e); for(k=0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname); else if (a->type == PFM_ATTR_RAW_UMASK) evt_strcat(e->fstr, ":0x%x", a->idx); } reg.val |= umask << 8; reg.nhm_unc.usel_en = 1; /* force enable bit to 1 */ reg.nhm_unc.usel_int = 1; /* force APIC int to 1 */ e->codes[0] = reg.val; e->count = 1; for (k = 0; k < e->npattrs; k++) { int idx; if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[k].type == PFM_ATTR_UMASK) continue; idx = e->pattrs[k].idx; switch(idx) { case NHM_UNC_ATTR_E: evt_strcat(e->fstr, ":%s=%lu", nhm_unc_mods[idx].name, reg.nhm_unc.usel_edge); break; case NHM_UNC_ATTR_I: evt_strcat(e->fstr, ":%s=%lu", nhm_unc_mods[idx].name, reg.nhm_unc.usel_inv); break; case NHM_UNC_ATTR_C: evt_strcat(e->fstr, ":%s=%lu", nhm_unc_mods[idx].name, reg.nhm_unc.usel_cnt_mask); break; case NHM_UNC_ATTR_O: evt_strcat(e->fstr, ":%s=%lu", nhm_unc_mods[idx].name, reg.nhm_unc.usel_occ); break; } } __pfm_vbprintf("[UNC_PERFEVTSEL=0x%"PRIx64" event=0x%x umask=0x%x en=%d int=%d inv=%d edge=%d occ=%d cnt_msk=%d] %s\n", reg.val, reg.nhm_unc.usel_event, reg.nhm_unc.usel_umask, reg.nhm_unc.usel_en, reg.nhm_unc.usel_int, reg.nhm_unc.usel_inv, reg.nhm_unc.usel_edge, reg.nhm_unc.usel_occ, reg.nhm_unc.usel_cnt_mask, pe[e->event].name); return PFM_SUCCESS; } pfmlib_pmu_t intel_nhm_unc_support={ .desc = "Intel Nehalem uncore", .name = "nhm_unc", .perf_name = "uncore", .pmu = PFM_PMU_INTEL_NHM_UNC, .pme_count = LIBPFM_ARRAY_SIZE(intel_nhm_unc_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 8, .num_fixed_cntrs = 1, .max_encoding = 1, .pe = intel_nhm_unc_pe, .atdesc = nhm_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .cpu_family = 6, .cpu_models = nhm_models, .pmu_detect = pfm_intel_x86_model_detect, .get_event_encoding[PFM_OS_NONE] = pfm_nhm_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; pfmlib_pmu_t intel_wsm_unc_support={ .desc = "Intel Westmere uncore", .name = "wsm_unc", .perf_name = "uncore", .pmu = PFM_PMU_INTEL_WSM_UNC, .pme_count = LIBPFM_ARRAY_SIZE(intel_wsm_unc_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 8, .num_fixed_cntrs = 1, .max_encoding = 1, .pe = intel_wsm_unc_pe, .atdesc = nhm_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .cpu_family = 6, .cpu_models = wsm_dp_models, .pmu_detect = pfm_intel_x86_model_detect, .get_event_encoding[PFM_OS_NONE] = pfm_nhm_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_nhm_unc_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/components/host_micpower/linux-host_micpower.c000664 001750 001750 00000051777 13216244357 026602 0ustar00jshenry1963jshenry1963000000 000000 /** linux-host_micpower.c * @author James Ralph * ralph@icl.utk.edu * * @ingroup papi_components * * @brief * This component wraps the MicAccessAPI to provide hostside * power information for attached Intel Xeon Phi (MIC) cards. */ /* From intel examples, see $(mic_dir)/sysmgt/sdk/Examples/Usage */ #define MAX_DEVICES (32) #define EVENTS_PER_DEVICE 10 #include #include #include #include #include "MicAccessTypes.h" #include "MicBasicTypes.h" #include "MicAccessErrorTypes.h" #include "MicAccessApi.h" #include "MicPowerManagerAPI.h" #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" void (*_dl_non_dynamic_init)(void) __attribute__((weak)); /* This is a guess, refine this later */ #define UPDATEFREQ 500000 papi_vector_t _host_micpower_vector; typedef struct host_micpower_register { /** Corresponds to counter slot, indexed from 1, 0 has a special meaning */ unsigned int selector; } host_micpower_register_t; typedef struct host_micpower_reg_alloc { host_micpower_register_t ra_bits; } host_micpower_reg_alloc_t; /** Internal structure used to build the table of events */ typedef struct host_micpower_native_event_entry { host_micpower_register_t resources; char name[PAPI_MAX_STR_LEN]; char description[PAPI_MAX_STR_LEN]; char units[3]; } host_micpower_native_event_entry_t; /** Per-eventset structure used to hold control flags. */ typedef struct host_micpower_control_state { int num_events; int resident[MAX_DEVICES*EVENTS_PER_DEVICE]; long long counts[MAX_DEVICES*EVENTS_PER_DEVICE]; long long lastupdate[MAX_DEVICES]; } host_micpower_control_state_t; /** Per-thread data */ typedef struct host_micpower_context { host_micpower_control_state_t state; } host_micpower_context_t; /* Global state info */ static MicDeviceOnSystem adapters[MAX_DEVICES]; static HANDLE handles[MAX_DEVICES]; static long long lastupdate[MAX_DEVICES]; static HANDLE accessHandle = NULL; static U32 nAdapters = MAX_DEVICES; static void* mic_access = NULL; static void* scif_access = NULL; #undef MICACCESS_API #define MICACCESS_API __attribute__((weak)) const char *MicGetErrorString(U32); U32 MICACCESS_API MicCloseAdapter(HANDLE); U32 MICACCESS_API MicInitAPI(HANDLE *, ETarget, MicDeviceOnSystem *, U32 *); U32 MICACCESS_API MicCloseAPI(HANDLE *); U32 MICACCESS_API MicInitAdapter(HANDLE *, MicDeviceOnSystem *); U32 MICACCESS_API MicGetPowerUsage(HANDLE, MicPwrUsage *); const char *(*MicGetErrorStringPtr)(U32); U32 (*MicCloseAdapterPtr)(HANDLE); U32 (*MicInitAPIPtr)(HANDLE *, ETarget, MicDeviceOnSystem *, U32 *); U32 (*MicCloseAPIPtr)(HANDLE *); U32 (*MicInitAdapterPtr)(HANDLE *, MicDeviceOnSystem *); U32 (*MicGetPowerUsagePtr)(HANDLE, MicPwrUsage *); static host_micpower_native_event_entry_t *native_events_table = NULL; struct powers { int total0; int total1; int inst; int imax; int pcie; int c2x3; int c2x4; int vccp; int vddg; int vddq; }; typedef union { struct powers power; int array[EVENTS_PER_DEVICE]; } power_t; static power_t cached_values[MAX_DEVICES]; static int loadFunctionPtrs() { /* Attempt to guess if we were statically linked to libc, if so bail */ if ( _dl_non_dynamic_init != NULL ) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "The host_micpower component does not support statically linking of libc.", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; } /* Need to link in the cuda libraries, if not found disable the component */ scif_access = dlopen("libscif.so", RTLD_NOW | RTLD_GLOBAL); if (NULL == scif_access) { snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Problem loading the SCIF library: %s\n", dlerror()); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } mic_access = dlopen("libMicAccessSDK.so", RTLD_NOW | RTLD_GLOBAL); if (NULL == mic_access) { snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Problem loading libMicAccessSDK.so: %s\n", dlerror()); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicGetErrorStringPtr = dlsym(mic_access, "MicGetErrorString"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicGetErrorString not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicCloseAdapterPtr = dlsym(mic_access, "MicCloseAdapter"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicCloseAdapter not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicInitAPIPtr = dlsym(mic_access, "MicInitAPI"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicInitAPI not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicCloseAPIPtr = dlsym(mic_access, "MicCloseAPI"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicCloseAPI not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicInitAdapterPtr = dlsym(mic_access, "MicInitAdapter"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicInitAdapter not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } MicGetPowerUsagePtr = dlsym(mic_access, "MicGetPowerUsage"); if (dlerror() != NULL) { strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicGetPowerUsage not found.",PAPI_MAX_STR_LEN); _host_micpower_vector.cmp_info.disabled = 1; return ( PAPI_ENOSUPP ); } return 0; } /* ############################################### * Component Interface code * ############################################### */ int _host_micpower_init_component( int cidx ) { U32 ret = MIC_ACCESS_API_ERROR_UNKNOWN; U32 adapterNum = 0; U32 throwaway = 1; _host_micpower_vector.cmp_info.CmpIdx = cidx; if ( loadFunctionPtrs() ) { goto disable_me; } memset( lastupdate, 0x0, sizeof(lastupdate)); memset( cached_values, 0x0, sizeof(struct powers)*MAX_DEVICES ); ret = MicInitAPIPtr( &accessHandle, eTARGET_SCIF_DRIVER, adapters, &nAdapters ); if ( MIC_ACCESS_API_SUCCESS != ret ) { snprintf( _host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Failed to init: %s", MicGetErrorStringPtr(ret)); MicCloseAPIPtr(&accessHandle); goto disable_me; } /* Sanity check on array size */ if ( nAdapters >= MAX_DEVICES ) { snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Too many MIC cards [%d] found, bailing.", nAdapters); MicCloseAPIPtr(&accessHandle); goto disable_me; } /* XXX: This code initializes a token for each adapter, in testing this appeared to be required/ * One has to call MicInitAdapter() before calling into that adapter's entries */ for (adapterNum=0; adapterNum < nAdapters; adapterNum++) { ret = MicInitAPIPtr( &handles[adapterNum], eTARGET_SCIF_DRIVER, adapters, &throwaway ); throwaway = 1; if (MIC_ACCESS_API_SUCCESS != ret) { fprintf(stderr, "%d:MicInitAPI carps: %s\n", __LINE__, MicGetErrorStringPtr(ret)); nAdapters = adapterNum; for (adapterNum=0; adapterNum < nAdapters; adapterNum++) MicCloseAdapterPtr( handles[adapterNum] ); MicCloseAPIPtr( &accessHandle ); snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Failed to initialize card %d's interface.", nAdapters); goto disable_me; } ret = MicInitAdapterPtr(&handles[adapterNum], &adapters[adapterNum]); if (MIC_ACCESS_API_SUCCESS != ret) { fprintf(stderr, "%d:MicInitAdapter carps: %s\n", __LINE__, MicGetErrorStringPtr(ret)); nAdapters = adapterNum; for (adapterNum=0; adapterNum < nAdapters; adapterNum++) MicCloseAdapterPtr( handles[adapterNum] ); MicCloseAPIPtr( &accessHandle ); snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Failed to initialize card %d's interface.", nAdapters); goto disable_me; } } native_events_table = ( host_micpower_native_event_entry_t*)papi_malloc( nAdapters * EVENTS_PER_DEVICE * sizeof(host_micpower_native_event_entry_t)); if ( NULL == native_events_table ) { return PAPI_ENOMEM; } for (adapterNum=0; adapterNum < nAdapters; adapterNum++) { snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].name, PAPI_MAX_STR_LEN, "mic%d:tot0", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].description, PAPI_MAX_STR_LEN, "Total power utilization, Averaged over Time Window 0 (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE].resources.selector = adapterNum*EVENTS_PER_DEVICE + 1; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].name, PAPI_MAX_STR_LEN, "mic%d:tot1", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].description, PAPI_MAX_STR_LEN, "Total power utilization, Averaged over Time Window 1 (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].resources.selector = adapterNum*EVENTS_PER_DEVICE + 2; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].name, PAPI_MAX_STR_LEN, "mic%d:pcie", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].description, PAPI_MAX_STR_LEN, "PCI-E connector power (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].resources.selector = adapterNum*EVENTS_PER_DEVICE + 3; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].name, PAPI_MAX_STR_LEN, "mic%d:inst", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].description, PAPI_MAX_STR_LEN, "Instantaneous power (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].resources.selector = adapterNum*EVENTS_PER_DEVICE + 4; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].name, PAPI_MAX_STR_LEN, "mic%d:imax", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].description, PAPI_MAX_STR_LEN, "Max instantaneous power (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].resources.selector = adapterNum*EVENTS_PER_DEVICE + 5; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].name, PAPI_MAX_STR_LEN, "mic%d:c2x3", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].description, PAPI_MAX_STR_LEN, "2x3 connector power (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].resources.selector = adapterNum*EVENTS_PER_DEVICE + 6; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].name, PAPI_MAX_STR_LEN, "mic%d:c2x4", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].description, PAPI_MAX_STR_LEN, "2x4 connector power (uWatts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].resources.selector = adapterNum*EVENTS_PER_DEVICE + 7; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].units, PAPI_MIN_STR_LEN, "uW"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].name, PAPI_MAX_STR_LEN, "mic%d:vccp", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].description, PAPI_MAX_STR_LEN, "Core rail (uVolts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].resources.selector = adapterNum*EVENTS_PER_DEVICE + 8; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].units, PAPI_MIN_STR_LEN, "uV"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].name, PAPI_MAX_STR_LEN, "mic%d:vddg", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].description, PAPI_MAX_STR_LEN, "Uncore rail (uVolts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].resources.selector = adapterNum*EVENTS_PER_DEVICE + 9; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].units, PAPI_MIN_STR_LEN, "uV"); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].name, PAPI_MAX_STR_LEN, "mic%d:vddq", adapterNum); snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].description, PAPI_MAX_STR_LEN, "Memory subsystem rail (uVolts)"); native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].resources.selector = adapterNum*EVENTS_PER_DEVICE + 10; snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].units, PAPI_MIN_STR_LEN, "uV"); } _host_micpower_vector.cmp_info.num_cntrs = EVENTS_PER_DEVICE*nAdapters; _host_micpower_vector.cmp_info.num_mpx_cntrs = EVENTS_PER_DEVICE*nAdapters; _host_micpower_vector.cmp_info.num_native_events = EVENTS_PER_DEVICE*nAdapters; return PAPI_OK; disable_me: _host_micpower_vector.cmp_info.num_cntrs = 0; _host_micpower_vector.cmp_info.num_mpx_cntrs = 0; _host_micpower_vector.cmp_info.num_native_events = 0; _host_micpower_vector.cmp_info.disabled = 1; nAdapters = 0; return PAPI_ENOSUPP; } int _host_micpower_init_thread( hwd_context_t *ctx) { (void)ctx; return PAPI_OK; } int _host_micpower_shutdown_component( void ) { U32 i = 0; for( i=0; iresident[i] = 0; for (i=0; i < count; i++) { index = info[i].ni_event&PAPI_NATIVE_AND_MASK; info[i].ni_position=native_events_table[index].resources.selector-1; state->resident[index] = 1; } state->num_events = count; return PAPI_OK; } int _host_micpower_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void) ctx; (void) ctl; return PAPI_OK; } static int read_power( struct powers *pwr, int which_one ) { MicPwrUsage power; U32 ret = MIC_ACCESS_API_ERROR_UNKNOWN; if ( which_one < 0 || which_one > (int)nAdapters ) return PAPI_ENOEVNT; ret = MicGetPowerUsagePtr(handles[which_one], &power); if (MIC_ACCESS_API_SUCCESS != ret) { fprintf(stderr,"Oops MicGetPowerUsage failed: %s\n", MicGetErrorStringPtr(ret)); return PAPI_ECMP; } pwr->total0 = power.total0.prr; pwr->total1 = power.total1.prr; pwr->inst = power.inst.prr; pwr->imax = power.imax.prr; pwr->pcie = power.pcie.prr; pwr->c2x3 = power.c2x3.prr; pwr->c2x4 = power.c2x4.prr; pwr->vccp = power.vccp.pwr; pwr->vddg = power.vddg.pwr; pwr->vddq = power.vddq.pwr; return PAPI_OK; } int _host_micpower_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags) { (void)flags; (void)events; (void)ctx; unsigned int i,j; int needs_update = 0; host_micpower_control_state_t* control = (host_micpower_control_state_t*)ctl; long long now = PAPI_get_real_usec(); for( i=0; iresident[EVENTS_PER_DEVICE*i+j]) { needs_update = 1; break; } } if ( needs_update ) { /* Do the global update */ if ( now >= lastupdate[i] + UPDATEFREQ) { read_power( &cached_values[i].power, i ); lastupdate[i] = now; } /* update from cached values */ if ( control->lastupdate[i] < lastupdate[i]) { control->lastupdate[i] = lastupdate[i]; } for (j=0; jresident[EVENTS_PER_DEVICE*i+j] ) { control->counts[EVENTS_PER_DEVICE*i+j] = (long long)cached_values[i].array[j]; } } } } *events = control->counts; return PAPI_OK; } int _host_micpower_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void)ctx; int needs_update = 0; unsigned int i,j; host_micpower_control_state_t* control = (host_micpower_control_state_t*)ctl; long long now = PAPI_get_real_usec(); for( i=0; iresident[EVENTS_PER_DEVICE*i+j]) { needs_update = 1; break; } } if ( needs_update ) { /* Do the global update */ if ( now >= lastupdate[i] + UPDATEFREQ) { read_power( &cached_values[i].power, i ); lastupdate[i] = now; } /* update from cached values */ if ( control->lastupdate[i] < lastupdate[i]) { control->lastupdate[i] = lastupdate[i]; } for (j=0; jresident[EVENTS_PER_DEVICE*i+j] ) { control->counts[EVENTS_PER_DEVICE*i+j] = (long long)cached_values[i].array[j]; } } } } return PAPI_OK; } int _host_micpower_ntv_enum_events( unsigned int *EventCode, int modifier ) { int index; switch (modifier) { case PAPI_ENUM_FIRST: if (0 == _host_micpower_vector.cmp_info.num_cntrs) return PAPI_ENOEVNT; *EventCode = 0; return PAPI_OK; case PAPI_ENUM_EVENTS: index = *EventCode; if ( index < _host_micpower_vector.cmp_info.num_cntrs - 1) { *EventCode = *EventCode + 1; return PAPI_OK; } else { return PAPI_ENOEVNT; } break; default: return PAPI_EINVAL; } return PAPI_EINVAL; } int _host_micpower_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; if ( code < _host_micpower_vector.cmp_info.num_cntrs ) { strncpy( name, native_events_table[code].name, len); return PAPI_OK; } return PAPI_ENOEVNT; } int _host_micpower_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; if ( code < _host_micpower_vector.cmp_info.num_cntrs ) { strncpy( name, native_events_table[code].description, len ); return PAPI_OK; } return PAPI_ENOEVNT; } int _host_micpower_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info) { unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; if ( code >= _host_micpower_vector.cmp_info.num_cntrs) return PAPI_ENOEVNT; strncpy( info->symbol, native_events_table[code].name, sizeof(info->symbol) ); strncpy( info->long_descr, native_events_table[code].description, sizeof(info->long_descr) ); strncpy( info->units, native_events_table[code].units, sizeof(info->units) ); return PAPI_OK; } int _host_micpower_ctl( hwd_context_t* ctx, int code, _papi_int_option_t *option) { (void)ctx; (void)code; (void)option; return PAPI_OK; } int _host_micpower_set_domain( hwd_control_state_t* ctl, int domain) { (void)ctl; if ( PAPI_DOM_ALL != domain ) return PAPI_EINVAL; return PAPI_OK; } papi_vector_t _host_micpower_vector = { .cmp_info = { .name = "host_micpower", .short_name = "host_micpower", .description = "A host-side component to read power usage on MIC guest cards.", .version = "0.1", .support_version = "n/a", .kernel_version = "n/a", .num_cntrs = 0, .num_mpx_cntrs = 0, .default_domain = PAPI_DOM_ALL, .available_domains = PAPI_DOM_ALL, .default_granularity = PAPI_GRN_SYS, .available_granularities = PAPI_GRN_SYS, .hardware_intr_sig = PAPI_INT_SIGNAL, }, .size = { .context = sizeof(host_micpower_context_t), .control_state = sizeof(host_micpower_control_state_t), .reg_value = sizeof(host_micpower_register_t), .reg_alloc = sizeof(host_micpower_reg_alloc_t), }, .start = _host_micpower_start, .stop = _host_micpower_start, .read = _host_micpower_read, .reset = NULL, .write = NULL, .init_component = _host_micpower_init_component, .init_thread = _host_micpower_init_thread, .init_control_state = _host_micpower_init_control_state, .update_control_state = _host_micpower_update_control_state, .ctl = _host_micpower_ctl, .shutdown_thread = _host_micpower_shutdown_thread, .shutdown_component = _host_micpower_shutdown_component, .set_domain = _host_micpower_set_domain, .ntv_enum_events = _host_micpower_ntv_enum_events, .ntv_code_to_name = _host_micpower_ntv_code_to_name, .ntv_code_to_descr = _host_micpower_ntv_code_to_descr, .ntv_code_to_info = _host_micpower_ntv_code_to_info, }; papi-5.6.0/src/perfctr-2.7.x/usr.lib/ppc64.c000664 001750 001750 00000025171 13216244370 022225 0ustar00jshenry1963jshenry1963000000 000000 /* * PPC64-specific perfctr library procedures. * Copyright (C) 2004, 2007 Mikael Pettersson * Copyright (C) 2004 Maynard Johnson * */ #include #include #include #include #include #include #include "libperfctr.h" #include "ppc64.h" static unsigned int __NR_vperfctr_open; #define __NR_vperfctr_control (__NR_vperfctr_open+1) #define __NR_vperfctr_write (__NR_vperfctr_open+2) #define __NR_vperfctr_read (__NR_vperfctr_open+3) #include static void init_sys_vperfctr(void) { if (!__NR_vperfctr_open) { unsigned int nr; unsigned int kver = perfctr_linux_version_code(); if (kver >= PERFCTR_KERNEL_VERSION(2,6,18)) nr = 310; else if (kver >= PERFCTR_KERNEL_VERSION(2,6,16)) nr = 301; else nr = 280; __NR_vperfctr_open = nr; } } /* * The actual syscalls. */ int _sys_vperfctr_open(int fd_unused, int tid, int creat) { init_sys_vperfctr(); return syscall(__NR_vperfctr_open, tid, creat); } int _sys_vperfctr_control(int fd, unsigned int cmd) { init_sys_vperfctr(); return syscall(__NR_vperfctr_control, fd, cmd); } static int _sys_vperfctr_write(int fd, unsigned int domain, const void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_write, fd, domain, arg, argbytes); } static int _sys_vperfctr_read(int fd, unsigned int domain, void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_read, fd, domain, arg, argbytes); } /* * Simple syscall wrappers. */ int _sys_vperfctr_read_sum(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_SUM, arg, sizeof(*arg)); } int _sys_vperfctr_read_children(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_CHILDREN, arg, sizeof(*arg)); } int _sys_vperfctr_unlink(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_UNLINK); } int _sys_vperfctr_iresume(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_MMCRA 786 #define SPRN_MMCR0 795 #define SPRN_MMCR1 798 #define SPRN_PMC1 787 #define SPRN_PMC2 788 #define SPRN_PMC3 789 #define SPRN_PMC4 790 #define SPRN_PMC5 791 #define SPRN_PMC6 792 #define SPRN_PMC7 793 #define SPRN_PMC8 794 #if 0 static void show_regs(const struct perfctr_cpu_reg *regs, unsigned int n) { unsigned int i; fprintf(stderr, "CPU Register Values:\n"); for(i = 0; i < n; ++i) fprintf(stderr, "SPR %#x\t0x%08x\n", regs[i].nr, regs[i].value); } #else #define show_regs(regs, n) do{}while(0) #endif static int read_packet(int fd, unsigned int domain, void *arg, unsigned int argbytes) { int ret; ret = _sys_vperfctr_read(fd, domain, arg, argbytes); if (ret != argbytes && ret >= 0) { errno = EPROTO; return -1; } return ret; } static unsigned int pmc_to_spr(unsigned int pmc) { switch (pmc) { default: /* impossible, but silences gcc warning */ case (1-1): return SPRN_PMC1; case (2-1): return SPRN_PMC2; case (3-1): return SPRN_PMC3; case (4-1): return SPRN_PMC4; case (5-1): return SPRN_PMC5; case (6-1): return SPRN_PMC6; case (7-1): return SPRN_PMC7; case (8-1): return SPRN_PMC8; } } static int write_cpu_regs(int fd, const struct perfctr_cpu_control *control) { struct perfctr_cpu_reg regs[3+8]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 8) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; for (i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; if (pmc >= 8 || (pmc_mask & (1<= nractrs) { unsigned int j = 3 + (i - nractrs); regs[j].nr = pmc_to_spr(pmc); regs[j].value = control->ireset[i]; } } regs[0].nr = SPRN_MMCR0; regs[0].value = control->ppc64.mmcr0; regs[1].nr = SPRN_MMCR1; regs[1].value = control->ppc64.mmcr1; regs[2].nr = SPRN_MMCRA; regs[2].value = control->ppc64.mmcra; nr_regs = 3 + (nrctrs - nractrs); show_regs(regs, nr_regs); return _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); } static int read_cpu_regs(int fd, struct perfctr_cpu_control *control) { struct perfctr_cpu_reg regs[3+8]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc; int ret; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 8) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; if (pmc >= 8 || (pmc_mask & (1<= nractrs) { unsigned int j = 3 + (i - nractrs); regs[j].nr = pmc_to_spr(pmc); } } regs[0].nr = SPRN_MMCR0; regs[1].nr = SPRN_MMCR1; regs[2].nr = SPRN_MMCRA; nr_regs = 3 + (nrctrs - nractrs); ret = read_packet(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); if (ret < 0) return ret; show_regs(regs, nr_regs); for(i = 0; i < nrctrs; ++i) { if (i >= nractrs) control->ireset[i] = regs[3 + (i - nractrs)].value; } control->ppc64.mmcr0 = regs[0].value; control->ppc64.mmcr1 = regs[1].value; control->ppc64.mmcra = regs[2].value; return 0; } int _sys_vperfctr_write_control(int fd, unsigned int cpu_type, const struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; ret = _sys_vperfctr_control(fd, VPERFCTR_CONTROL_CLEAR); if (ret < 0) return ret; u.control.si_signo = control->si_signo; u.control.preserve = control->preserve; ret = _sys_vperfctr_write(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; u.header.tsc_on = control->cpu_control.tsc_on; u.header.nractrs = control->cpu_control.nractrs; u.header.nrictrs = control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; ret = write_cpu_regs(fd, &control->cpu_control); if (ret < 0) return ret; return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } int _sys_vperfctr_read_control(int fd, unsigned int cpu_type, struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; memset(control, 0, sizeof *control); ret = read_packet(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; control->si_signo = u.control.si_signo; control->preserve = u.control.preserve; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; control->cpu_control.tsc_on = u.header.tsc_on; control->cpu_control.nractrs = u.header.nractrs; control->cpu_control.nrictrs = u.header.nrictrs; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; return read_cpu_regs(fd, &control->cpu_control); } #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ #define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ /* Processor Version Numbers */ #define PV_NORTHSTAR 0x0033 #define PV_PULSAR 0x0034 #define PV_POWER4 0x0035 #define PV_ICESTAR 0x0036 #define PV_SSTAR 0x0037 #define PV_POWER4p 0x0038 #define PV_970 0x0039 #define PV_POWER5 0x003A #define PV_POWER5p 0x003B #define PV_970FX 0x003C #define PV_630 0x0040 #define PV_630p 0x0041 #define PV_970MP 0x0044 #define PV_970GX 0x0045 static unsigned int mfpvr(void) { unsigned long pvr; asm("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); return pvr; } void perfctr_info_cpu_init(struct perfctr_info *info) { unsigned int pvr = mfpvr(); int cpu_type; switch (PVR_VER(pvr)) { case PV_POWER4: cpu_type = PERFCTR_PPC64_POWER4; break; case PV_POWER4p: cpu_type = PERFCTR_PPC64_POWER4p; break; case PV_970: case PV_970FX: cpu_type = PERFCTR_PPC64_970; break; case PV_970MP: cpu_type = PERFCTR_PPC64_970MP; break; case PV_POWER5: case PV_POWER5p: cpu_type = PERFCTR_PPC64_POWER5; break; default: cpu_type = PERFCTR_PPC64_GENERIC; break; } info->cpu_type = cpu_type; return; } unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC64_POWER4: case PERFCTR_PPC64_POWER4p: case PERFCTR_PPC64_970: case PERFCTR_PPC64_970MP: return 8; case PERFCTR_PPC64_POWER5: return 6; default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC64_GENERIC: return "Generic PowerPC64"; case PERFCTR_PPC64_POWER4: return "POWER4"; case PERFCTR_PPC64_POWER4p: return "POWER4+"; case PERFCTR_PPC64_970: return "PowerPC 970"; case PERFCTR_PPC64_970MP: return "PowerPC 970MP"; case PERFCTR_PPC64_POWER5: return "POWER5"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if( nrictrs ) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { printf("pmc[%u].map\t\t%u\n", i, control->pmc_map[i]); if( i >= nractrs ) printf("pmc[%u].ireset\t\t%d\n", i, control->ireset[i]); } if( control->ppc64.mmcr0 ) printf("mmcr0\t\t\t0x%08X\n", control->ppc64.mmcr0); if( control->ppc64.mmcr1 ) printf("mmcr1\t\t\t0x%016llX\n", (unsigned long long)control->ppc64.mmcr1); if( control->ppc64.mmcra ) printf("mmcra\t\t\t0x%08X\n", control->ppc64.mmcra); } papi-5.6.0/src/components/appio/tests/iozone/libasync.c000664 001750 001750 00000123062 13216244356 025244 0ustar00jshenry1963jshenry1963000000 000000 /* * Library for Posix async read operations with hints. * Author: Don Capps * Company: Iozone * Date: 4/24/1998 * * Two models are supported. First model is a replacement for read() where the async * operations are performed and the requested data is bcopy()-ed back into the users * buffer. The second model is a new version of read() where the caller does not * supply the address of the buffer but instead is returned an address to the * location of the data. The second model eliminates a bcopy from the path. * * To use model #1: * 1. Call async_init(&pointer_on_stack,fd,direct_flag); * The fd is the file descriptor for the async operations. * The direct_flag sets VX_DIRECT * * 2. Call async_read(gc, fd, ubuffer, offset, size, stride, max, depth) * Where: * gc ............ is the pointer on the stack * fd ............ is the file descriptor * ubuffer ....... is the address of the user buffer. * offset ........ is the offset in the file to begin reading * size .......... is the size of the transfer. * stride ........ is the distance, in size units, to space the async reads. * max ........... is the max size of the file to be read. * depth ......... is the number of async operations to perform. * * 3. Call end_async(gc) when finished. * Where: * gc ............ is the pointer on the stack. * * To use model #2: * 1. Call async_init(&pointer_on_stack,fd,direct_flag); * The fd is the file descriptor for the async operations. * The direct_flag sets VX_DIRECT * 2. Call async_read(gc, fd, &ubuffer, offset, size, stride, max, depth) * Where: * gc ............ is the pointer on the stack * fd ............ is the file descriptor * ubuffer ....... is the address of a pointer that will be filled in * by the async library. * offset ........ is the offset in the file to begin reading * size .......... is the size of the transfer. * stride ........ is the distance, in size units, to space the async reads. * max ........... is the max size of the file to be read. * depth ......... is the number of async operations to perform. * * 3. Call async_release(gc) when finished with the data that was returned. * This allows the async library to reuse the memory that was filled in * and returned to the user. * * 4. Call end_async(gc) when finished. * Where: * gc ............ is the pointer on the stack. * * To use model #1: (WRITES) * 1. Call async_init(&pointer_on_stack,fd,direct_flag); * The fd is the file descriptor for the async operations. * * 2. Call async_write(gc, fd, ubuffer, size, offset, depth) * Where: * gc ............ is the pointer on the stack * fd ............ is the file descriptor * ubuffer ....... is the address of the user buffer. * size .......... is the size of the transfer. * offset ........ is the offset in the file to begin reading * depth ......... is the number of async operations to perform. * * 4. Call end_async(gc) when finished. * Where: * gc ............ is the pointer on the stack. * * Notes: * The intended use is to replace calls to read() with calls to * async_read() and allow the user to make suggestions on * what kind of async read-ahead would be nice to have. * The first transfer requested is guarenteed to be complete * before returning to the caller. The async operations will * be started and will also be guarenteed to have completed * if the next call specifies its first request to be one * that was previously performed with an async operation. * * The async_read_no_copy() function allows the async operations * to return the data to the user and not have to perform * a bcopy of the data back into the user specified buffer * location. This model is faster but assumes that the user * application has been modified to work with this model. * * The async_write() is intended to enhance the performance of * initial writes to a file. This is the slowest case in the write * path as it must perform meta-data allocations and wait. */ #include #include #if defined(solaris) || defined(linux) || defined(SCO_Unixware_gcc) #else #include #endif #include #include #ifndef bsd4_4 #include #endif #ifdef VXFS #include #endif #if defined(OSFV5) || defined(linux) #include #endif #if defined(linux) #include #include #include #endif #if (defined(solaris) && defined(__LP64__)) || defined(__s390x__) || defined(FreeBSD) /* If we are building for 64-bit Solaris, all functions that return pointers * must be declared before they are used; otherwise the compiler will assume * that they return ints and the top 32 bits of the pointer will be lost, * causing segmentation faults. The following includes take care of this. * It should be safe to add these for all other OSs too, but we're only * doing it for Solaris now in case another OS turns out to be a special case. */ #include #include #include /* For the BSD string functions */ #endif void mbcopy(char *source, char *dest, size_t len); #if !defined(solaris) && !defined(off64_t) && !defined(_OFF64_T) && !defined(__off64_t_defined) && !defined(SCO_Unixware_gcc) typedef long long off64_t; #endif #if defined(OSFV5) #include #endif extern long long page_size; extern int one; /* * Internal cache entrys. Each entry on the global * cache, pointed to by async_init(gc) will be of * this structure type. */ char version[] = "Libasync Version $Revision$"; struct cache_ent { struct aiocb myaiocb; /* For use in small file mode */ #ifdef _LARGEFILE64_SOURCE #if defined(__CrayX1__) aiocb64_t myaiocb64; /* For use in large file mode */ #else struct aiocb64 myaiocb64; /* For use in large file mode */ #endif #endif long long fd; /* File descriptor */ long long size; /* Size of the transfer */ struct cache_ent *forward; /* link to next element on cache list */ struct cache_ent *back; /* link to previous element on the cache list */ long long direct; /* flag to indicate if the buffer should be */ /* de-allocated by library */ char *real_address; /* Real address to free */ volatile void *oldbuf; /* Used for firewall to prevent in flight */ /* accidents */ int oldfd; /* Used for firewall to prevent in flight */ /* accidents */ size_t oldsize; /* Used for firewall to prevent in flight */ /* accidents */ }; /* * Head of the cache list */ struct cache { struct cache_ent *head; /* Head of cache list */ struct cache_ent *tail; /* tail of cache list */ struct cache_ent *inuse_head; /* head of in-use list */ long long count; /* How many elements on the cache list */ struct cache_ent *w_head; /* Head of cache list */ struct cache_ent *w_tail; /* tail of cache list */ long long w_count; /* How many elements on the write list */ }; long long max_depth; extern int errno; struct cache_ent *alloc_cache(); struct cache_ent *incache(); void async_init(); void end_async(); int async_suspend(); int async_read(); void takeoff_cache(); void del_cache(); void async_release(); void putoninuse(); void takeoffinuse(); struct cache_ent *allocate_write_buffer(); size_t async_write(); void async_wait_for_write(); void async_put_on_write_queue(); void async_write_finish(); /* On Solaris _LP64 will be defined by if we're compiling * as a 64-bit binary. Make sure that __LP64__ gets defined in this case, * too -- it should be defined on the compiler command line, but let's * not rely on this. */ #if defined(_LP64) #if !defined(__LP64__) #define __LP64__ #endif #endif /***********************************************/ /* Initialization routine to setup the library */ /***********************************************/ void async_init(gc,fd,flag) struct cache **gc; int fd; int flag; { #ifdef VXFS if(flag) ioctl(fd,VX_SETCACHE,VX_DIRECT); #endif if(*gc) { printf("Warning calling async_init two times ?\n"); return; } *gc=(struct cache *)malloc((size_t)sizeof(struct cache)); if(*gc == 0) { printf("Malloc failed\n"); exit(174); } bzero(*gc,sizeof(struct cache)); #if defined(__AIX__) || defined(SCO_Unixware_gcc) max_depth=500; #else max_depth=sysconf(_SC_AIO_MAX); #endif } /***********************************************/ /* Tear down routine to shutdown the library */ /***********************************************/ void end_async(gc) struct cache *gc; { del_cache(gc); async_write_finish(gc); free((void *)gc); } /***********************************************/ /* Wait for a request to finish */ /***********************************************/ int async_suspend(struct cache_ent *ce) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ const struct aiocb * const cblist[1] = {&ce->myaiocb}; #else const struct aiocb64 * const cblist[1] = {&ce->myaiocb64}; #endif #else const struct aiocb * const cblist[1] = {&ce->myaiocb}; #endif #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ return aio_suspend(cblist, 1, NULL); #else return aio_suspend64(cblist, 1, NULL); #endif #else return aio_suspend(cblist, 1, NULL); #endif } /************************************************************************* * This routine is a generic async reader assist funtion. It takes * the same calling parameters as read() but also extends the * interface to include: * stride ..... For the async reads, what is the distance, in size units, * to space the reads. Note: Stride of 0 indicates that * you do not want any read-ahead. * max ..... What is the maximum file offset for this operation. * depth ..... How much read-ahead do you want. * * The calls to this will guarentee to complete the read() operation * before returning to the caller. The completion may occur in two * ways. First the operation may be completed by calling aio_read() * and then waiting for it to complete. Second the operation may be * completed by copying the data from a cache of previously completed * async operations. * In the event the read to be satisfied is not in the cache then a * series of async operations will be scheduled and then the first * async read will be completed. In the event that the read() can be * satisfied from the cache then the data is copied back to the * user buffer and a series of async reads will be initiated. If a * read is issued and the cache contains data and the read can not * be satisfied from the cache, then the cache is discarded, and * a new cache is constructed. * Note: All operations are aio_read(). The series will be issued * as asyncs in the order requested. After all are in flight * then the code will wait for the manditory first read. *************************************************************************/ int async_read(gc, fd, ubuffer, offset, size, stride, max, depth) struct cache *gc; long long fd; char *ubuffer; off64_t offset; long long size; long long stride; off64_t max; long long depth; { off64_t a_offset,r_offset; long long a_size; struct cache_ent *ce,*first_ce=0; long long i; ssize_t retval=0; ssize_t ret; long long start = 0; long long del_read=0; a_offset=offset; a_size = size; /* * Check to see if it can be completed from the cache */ if((ce=(struct cache_ent *)incache(gc,fd,offset,size))) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #else while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) { async_suspend(ce); } #endif #else while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #endif if(ret) { printf("aio_error 1: ret %d %d\n",ret,errno); } #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ retval=aio_return(&ce->myaiocb); #else #if defined(__CrayX1__) retval=aio_return64((aiocb64_t *)&ce->myaiocb64); #else retval=aio_return64((struct aiocb64 *)&ce->myaiocb64); #endif #endif #else retval=aio_return(&ce->myaiocb); #endif if(retval > 0) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ mbcopy((char *)ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); #else mbcopy((char *)ce->myaiocb64.aio_buf,(char *)ubuffer,(size_t)retval); #endif #else mbcopy((char *)ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); #endif } #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(retval < ce->myaiocb.aio_nbytes) #else if(retval < ce->myaiocb64.aio_nbytes) #endif #else if(retval < ce->myaiocb.aio_nbytes) #endif { printf("aio_return error1: ret %d %d\n",retval,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("aio_return error1: fd %d offset %ld buffer %lx size %d Opcode %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_nbytes, ce->myaiocb.aio_lio_opcode #else printf("aio_return error1: fd %d offset %lld buffer %lx size %d Opcode %d\n", ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_offset, (long)(ce->myaiocb64.aio_buf), ce->myaiocb64.aio_nbytes, ce->myaiocb64.aio_lio_opcode #endif #else printf("aio_return error1: fd %d offset %d buffer %lx size %d Opcode %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_nbytes, ce->myaiocb.aio_lio_opcode #endif ); } ce->direct=0; takeoff_cache(gc,ce); }else { /* * Clear the cache and issue the first request async() */ del_cache(gc); del_read++; first_ce=alloc_cache(gc,fd,offset,size,(long long)LIO_READ); again: #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ret=aio_read(&first_ce->myaiocb); #else ret=aio_read64(&first_ce->myaiocb64); #endif #else ret=aio_read(&first_ce->myaiocb); #endif if(ret!=0) { if(errno==EAGAIN) goto again; else printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); } } if(stride==0) /* User does not want read-ahead */ goto out; if(a_offset<0) /* Before beginning of file */ goto out; if(a_offset+size>max) /* After end of file */ goto out; if(depth >=(max_depth-1)) depth=max_depth-1; if(depth==0) goto out; if(gc->count > 1) start=depth-1; for(i=start;i max) continue; if((ce=incache(gc,fd,r_offset,a_size))) continue; ce=alloc_cache(gc,fd,r_offset,a_size,(long long)LIO_READ); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ret=aio_read(&ce->myaiocb); #else ret=aio_read64(&ce->myaiocb64); #endif #else ret=aio_read(&ce->myaiocb); #endif if(ret!=0) { takeoff_cache(gc,ce); break; } } out: if(del_read) /* Wait for the first read to complete */ { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) { async_suspend(first_ce); } #else while((ret=aio_error64(&first_ce->myaiocb64))== EINPROGRESS) { async_suspend(first_ce); } #endif #else while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) { async_suspend(first_ce); } #endif if(ret) printf("aio_error 2: ret %d %d\n",ret,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ retval=aio_return(&first_ce->myaiocb); #else retval=aio_return64(&first_ce->myaiocb64); #endif #else retval=aio_return(&first_ce->myaiocb); #endif #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(retval < first_ce->myaiocb.aio_nbytes) #else if(retval < first_ce->myaiocb64.aio_nbytes) #endif #else if(retval < first_ce->myaiocb.aio_nbytes) #endif { printf("aio_return error2: ret %d %d\n",retval,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("aio_return error2: fd %d offset %lld buffer %lx size %d Opcode %d\n", first_ce->myaiocb.aio_fildes, first_ce->myaiocb.aio_offset, (long)(first_ce->myaiocb.aio_buf), first_ce->myaiocb.aio_nbytes, first_ce->myaiocb.aio_lio_opcode #else printf("aio_return error2: fd %d offset %lld buffer %lx size %d Opcode %d\n", first_ce->myaiocb64.aio_fildes, first_ce->myaiocb64.aio_offset, (long)(first_ce->myaiocb64.aio_buf), first_ce->myaiocb64.aio_nbytes, first_ce->myaiocb64.aio_lio_opcode #endif #else printf("aio_return error2: fd %d offset %d buffer %lx size %d Opcode %d\n", first_ce->myaiocb.aio_fildes, first_ce->myaiocb.aio_offset, (long)(first_ce->myaiocb.aio_buf), first_ce->myaiocb.aio_nbytes, first_ce->myaiocb.aio_lio_opcode #endif ); } if(retval > 0) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ mbcopy((char *)first_ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); #else mbcopy((char *)first_ce->myaiocb64.aio_buf,(char *)ubuffer,(size_t)retval); #endif #else mbcopy((char *)first_ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); #endif } first_ce->direct=0; takeoff_cache(gc,first_ce); } return((int)retval); } /************************************************************************ * This routine allocates a cache_entry. It contains the * aiocb block as well as linkage for use in the cache mechanism. * The space allocated here will be released after the cache entry * has been consumed. The routine takeoff_cache() will be called * after the data has been copied to user buffer or when the * cache is purged. The routine takeoff_cache() will also release * all memory associated with this cache entry. ************************************************************************/ struct cache_ent * alloc_cache(gc,fd,offset,size,op) struct cache *gc; long long fd,size,op; off64_t offset; { struct cache_ent *ce; long temp; ce=(struct cache_ent *)malloc((size_t)sizeof(struct cache_ent)); if(ce == (struct cache_ent *)0) { printf("Malloc failed\n"); exit(175); } bzero(ce,sizeof(struct cache_ent)); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ce->myaiocb.aio_fildes=(int)fd; ce->myaiocb.aio_offset=(off64_t)offset; ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb.aio_buf=(volatile void *)temp; if(ce->myaiocb.aio_buf == 0) #else ce->myaiocb64.aio_fildes=(int)fd; ce->myaiocb64.aio_offset=(off64_t)offset; ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb64.aio_buf=(volatile void *)temp; if(ce->myaiocb64.aio_buf == 0) #endif #else ce->myaiocb.aio_fildes=(int)fd; ce->myaiocb.aio_offset=(off_t)offset; ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb.aio_buf=(volatile void *)temp; if(ce->myaiocb.aio_buf == 0) #endif { printf("Malloc failed\n"); exit(176); } /*bzero(ce->myaiocb.aio_buf,(size_t)size);*/ #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ce->myaiocb.aio_reqprio=0; ce->myaiocb.aio_nbytes=(size_t)size; ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb.aio_lio_opcode=(int)op; #else ce->myaiocb64.aio_reqprio=0; ce->myaiocb64.aio_nbytes=(size_t)size; ce->myaiocb64.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb64.aio_lio_opcode=(int)op; #endif #else ce->myaiocb.aio_reqprio=0; ce->myaiocb.aio_nbytes=(size_t)size; ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb.aio_lio_opcode=(int)op; #endif ce->fd=(int)fd; ce->forward=0; ce->back=gc->tail; if(gc->tail) gc->tail->forward = ce; gc->tail= ce; if(!gc->head) gc->head=ce; gc->count++; return(ce); } /************************************************************************ * This routine checks to see if the requested data is in the * cache. *************************************************************************/ struct cache_ent * incache(gc,fd,offset,size) struct cache *gc; long long fd,size; off64_t offset; { struct cache_ent *move; if(gc->head==0) { return(0); } move=gc->head; #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while(move) { if((move->fd == fd) && (move->myaiocb.aio_offset==(off64_t)offset) && ((size_t)size==move->myaiocb.aio_nbytes)) { return(move); } move=move->forward; } #else while(move) { if((move->fd == fd) && (move->myaiocb64.aio_offset==(off64_t)offset) && ((size_t)size==move->myaiocb64.aio_nbytes)) { return(move); } move=move->forward; } #endif #else while(move) { if((move->fd == fd) && (move->myaiocb.aio_offset==(off_t)offset) && ((size_t)size==move->myaiocb.aio_nbytes)) { return(move); } move=move->forward; } #endif return(0); } /************************************************************************ * This routine removes a specific cache entry from the cache, and * releases all memory associated witht the cache entry (if not direct). *************************************************************************/ void takeoff_cache(gc,ce) struct cache *gc; struct cache_ent *ce; { struct cache_ent *move; long long found; move=gc->head; if(move==ce) /* Head of list */ { gc->head=ce->forward; if(gc->head) gc->head->back=0; else gc->tail = 0; if(!ce->direct) { free((void *)(ce->real_address)); free((void *)ce); } gc->count--; return; } found=0; while(move) { if(move==ce) { if(move->forward) { move->forward->back=move->back; } if(move->back) { move->back->forward=move->forward; } found=1; break; } else { move=move->forward; } } if(gc->head == ce) gc->tail = ce; if(!found) printf("Internal Error in takeoff cache\n"); move=gc->head; if(!ce->direct) { free((void *)(ce->real_address)); free((void *)ce); } gc->count--; } /************************************************************************ * This routine is used to purge the entire cache. This is called when * the cache contains data but the incomming read was not able to * be satisfied from the cache. This indicates that the previous * async read-ahead was not correct and a new pattern is emerging. ************************************************************************/ void del_cache(gc) struct cache *gc; { struct cache_ent *ce; ssize_t ret; ce=gc->head; while(1) { ce=gc->head; if(ce==0) return; #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret = aio_cancel(0,&ce->myaiocb))==AIO_NOTCANCELED) #else while((ret = aio_cancel64(0,&ce->myaiocb64))==AIO_NOTCANCELED) #endif #else while((ret = aio_cancel(0,&ce->myaiocb))==AIO_NOTCANCELED) #endif ; #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ret = aio_return(&ce->myaiocb); #else ret = aio_return64(&ce->myaiocb64); #endif #else ret = aio_return(&ce->myaiocb); #endif ce->direct=0; takeoff_cache(gc,ce); /* remove from cache */ } } /************************************************************************ * Like its sister async_read() this function performs async I/O for * all buffers but it differs in that it expects the caller to * request a pointer to the data to be returned instead of handing * the function a location to put the data. This will allow the * async I/O to be performed and does not require any bcopy to be * done to put the data back into the location specified by the caller. ************************************************************************/ int async_read_no_copy(gc, fd, ubuffer, offset, size, stride, max, depth) struct cache *gc; long long fd; char **ubuffer; off64_t offset; long long size; long long stride; off64_t max; long long depth; { off64_t a_offset,r_offset; long long a_size; struct cache_ent *ce,*first_ce=0; long long i; ssize_t retval=0; ssize_t ret; long long del_read=0; long long start=0; a_offset=offset; a_size = size; /* * Check to see if it can be completed from the cache */ if((ce=(struct cache_ent *)incache(gc,fd,offset,size))) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #else while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) { async_suspend(ce); } #endif #else while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #endif if(ret) printf("aio_error 3: ret %d %d\n",ret,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(ce->oldbuf != ce->myaiocb.aio_buf || ce->oldfd != ce->myaiocb.aio_fildes || ce->oldsize != ce->myaiocb.aio_nbytes) #else if(ce->oldbuf != ce->myaiocb64.aio_buf || ce->oldfd != ce->myaiocb64.aio_fildes || ce->oldsize != ce->myaiocb64.aio_nbytes) #endif #else if(ce->oldbuf != ce->myaiocb.aio_buf || ce->oldfd != ce->myaiocb.aio_fildes || ce->oldsize != ce->myaiocb.aio_nbytes) #endif printf("It changed in flight\n"); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ retval=aio_return(&ce->myaiocb); #else retval=aio_return64(&ce->myaiocb64); #endif #else retval=aio_return(&ce->myaiocb); #endif if(retval > 0) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ *ubuffer=(char *)ce->myaiocb.aio_buf; #else *ubuffer=(char *)ce->myaiocb64.aio_buf; #endif #else *ubuffer=(char *)ce->myaiocb.aio_buf; #endif }else *ubuffer=0; #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(retval < ce->myaiocb.aio_nbytes) #else if(retval < ce->myaiocb64.aio_nbytes) #endif #else if(retval < ce->myaiocb.aio_nbytes) #endif { printf("aio_return error4: ret %d %d\n",retval,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("aio_return error4: fd %d offset %lld buffer %lx size %d Opcode %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_nbytes, ce->myaiocb.aio_lio_opcode #else printf("aio_return error4: fd %d offset %lld buffer %lx size %d Opcode %d\n", ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_offset, (long)(ce->myaiocb64.aio_buf), ce->myaiocb64.aio_nbytes, ce->myaiocb64.aio_lio_opcode #endif #else printf("aio_return error4: fd %d offset %d buffer %lx size %d Opcode %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_nbytes, ce->myaiocb.aio_lio_opcode #endif ); } ce->direct=1; takeoff_cache(gc,ce); /* do not delete buffer*/ putoninuse(gc,ce); }else { /* * Clear the cache and issue the first request async() */ del_cache(gc); del_read++; first_ce=alloc_cache(gc,fd,offset,size,(long long)LIO_READ); /* allocate buffer */ /*printf("allocated buffer/read %x offset %d\n",first_ce->myaiocb.aio_buf,offset);*/ again: #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ first_ce->oldbuf=first_ce->myaiocb.aio_buf; first_ce->oldfd=first_ce->myaiocb.aio_fildes; first_ce->oldsize=first_ce->myaiocb.aio_nbytes; ret=aio_read(&first_ce->myaiocb); #else first_ce->oldbuf=first_ce->myaiocb64.aio_buf; first_ce->oldfd=first_ce->myaiocb64.aio_fildes; first_ce->oldsize=first_ce->myaiocb64.aio_nbytes; ret=aio_read64(&first_ce->myaiocb64); #endif #else first_ce->oldbuf=first_ce->myaiocb.aio_buf; first_ce->oldfd=first_ce->myaiocb.aio_fildes; first_ce->oldsize=first_ce->myaiocb.aio_nbytes; ret=aio_read(&first_ce->myaiocb); #endif if(ret!=0) { if(errno==EAGAIN) goto again; else printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); } } if(stride==0) /* User does not want read-ahead */ goto out; if(a_offset<0) /* Before beginning of file */ goto out; if(a_offset+size>max) /* After end of file */ goto out; if(depth >=(max_depth-1)) depth=max_depth-1; if(depth==0) goto out; if(gc->count > 1) start=depth-1; for(i=start;i max) continue; if((ce=incache(gc,fd,r_offset,a_size))) continue; ce=alloc_cache(gc,fd,r_offset,a_size,(long long)LIO_READ); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ce->oldbuf=ce->myaiocb.aio_buf; ce->oldfd=ce->myaiocb.aio_fildes; ce->oldsize=ce->myaiocb.aio_nbytes; ret=aio_read(&ce->myaiocb); #else ce->oldbuf=ce->myaiocb64.aio_buf; ce->oldfd=ce->myaiocb64.aio_fildes; ce->oldsize=ce->myaiocb64.aio_nbytes; ret=aio_read64(&ce->myaiocb64); #endif #else ce->oldbuf=ce->myaiocb.aio_buf; ce->oldfd=ce->myaiocb.aio_fildes; ce->oldsize=ce->myaiocb.aio_nbytes; ret=aio_read(&ce->myaiocb); #endif if(ret!=0) { takeoff_cache(gc,ce); break; } } out: if(del_read) /* Wait for the first read to complete */ { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) { async_suspend(first_ce); } #else while((ret=aio_error64(&first_ce->myaiocb64))== EINPROGRESS) { async_suspend(first_ce); } #endif #else while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) { async_suspend(first_ce); } #endif if(ret) printf("aio_error 4: ret %d %d\n",ret,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(first_ce->oldbuf != first_ce->myaiocb.aio_buf || first_ce->oldfd != first_ce->myaiocb.aio_fildes || first_ce->oldsize != first_ce->myaiocb.aio_nbytes) printf("It changed in flight2\n"); retval=aio_return(&first_ce->myaiocb); #else if(first_ce->oldbuf != first_ce->myaiocb64.aio_buf || first_ce->oldfd != first_ce->myaiocb64.aio_fildes || first_ce->oldsize != first_ce->myaiocb64.aio_nbytes) printf("It changed in flight2\n"); retval=aio_return64(&first_ce->myaiocb64); #endif #else if(first_ce->oldbuf != first_ce->myaiocb.aio_buf || first_ce->oldfd != first_ce->myaiocb.aio_fildes || first_ce->oldsize != first_ce->myaiocb.aio_nbytes) printf("It changed in flight2\n"); retval=aio_return(&first_ce->myaiocb); #endif #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ if(retval < first_ce->myaiocb.aio_nbytes) #else if(retval < first_ce->myaiocb64.aio_nbytes) #endif #else if(retval < first_ce->myaiocb.aio_nbytes) #endif { printf("aio_return error5: ret %d %d\n",retval,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("aio_return error5: fd %d offset %lld buffer %lx size %d Opcode %d\n", first_ce->myaiocb.aio_fildes, first_ce->myaiocb.aio_offset, (long)(first_ce->myaiocb.aio_buf), first_ce->myaiocb.aio_nbytes, first_ce->myaiocb.aio_lio_opcode #else printf("aio_return error5: fd %d offset %lld buffer %lx size %d Opcode %d\n", first_ce->myaiocb64.aio_fildes, first_ce->myaiocb64.aio_offset, (long)(first_ce->myaiocb64.aio_buf), first_ce->myaiocb64.aio_nbytes, first_ce->myaiocb64.aio_lio_opcode #endif #else printf("aio_return error5: fd %d offset %ld buffer %lx size %d Opcode %d\n", first_ce->myaiocb.aio_fildes, first_ce->myaiocb.aio_offset, (long)(first_ce->myaiocb.aio_buf), first_ce->myaiocb.aio_nbytes, first_ce->myaiocb.aio_lio_opcode #endif ); } if(retval > 0) { #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ *ubuffer=(char *)first_ce->myaiocb.aio_buf; #else *ubuffer=(char *)first_ce->myaiocb64.aio_buf; #endif #else *ubuffer=(char *)first_ce->myaiocb.aio_buf; #endif }else *ubuffer=(char *)0; first_ce->direct=1; /* do not delete the buffer */ takeoff_cache(gc,first_ce); putoninuse(gc,first_ce); } return((int)retval); } /************************************************************************ * The caller is now finished with the data that was provided so * the library is now free to return the memory to the pool for later * reuse. ************************************************************************/ void async_release(gc) struct cache *gc; { takeoffinuse(gc); } /************************************************************************ * Put the buffer on the inuse list. When the user is finished with * the buffer it will call back into async_release and the items on the * inuse list will be deallocated. ************************************************************************/ void putoninuse(gc,entry) struct cache *gc; struct cache_ent *entry; { if(gc->inuse_head) entry->forward=gc->inuse_head; else entry->forward=0; gc->inuse_head=entry; } /************************************************************************ * This is called when the application is finished with the data that * was provided. The memory may now be returned to the pool. ************************************************************************/ void takeoffinuse(gc) struct cache *gc; { struct cache_ent *ce; if(gc->inuse_head==0) printf("Takeoffinuse error\n"); ce=gc->inuse_head; gc->inuse_head=gc->inuse_head->forward; if(gc->inuse_head !=0) printf("Error in take off inuse\n"); free((void*)(ce->real_address)); free(ce); } /************************************************************************* * This routine is a generic async writer assist funtion. It takes * the same calling parameters as write() but also extends the * interface to include: * * offset ..... offset in the file. * depth ..... How much read-ahead do you want. * *************************************************************************/ size_t async_write(gc,fd,buffer,size,offset,depth) struct cache *gc; long long fd,size; char *buffer; off64_t offset; long long depth; { struct cache_ent *ce; size_t ret; ce=allocate_write_buffer(gc,fd,offset,size,(long long)LIO_WRITE,depth,0LL,(char *)0,(char *)0); ce->direct=0; /* not direct. Lib supplies buffer and must free it */ #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ mbcopy(buffer,(char *)(ce->myaiocb.aio_buf),(size_t)size); #else mbcopy(buffer,(char *)(ce->myaiocb64.aio_buf),(size_t)size); #endif #else mbcopy(buffer,(char *)(ce->myaiocb.aio_buf),(size_t)size); #endif async_put_on_write_queue(gc,ce); /* printf("asw: fd %d offset %lld, size %d\n",ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); */ again: #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ret=aio_write(&ce->myaiocb); #else ret=aio_write64(&ce->myaiocb64); #endif #else ret=aio_write(&ce->myaiocb); #endif if(ret==-1) { if(errno==EAGAIN) { async_wait_for_write(gc); goto again; } if(errno==0) { /* Compensate for bug in async library */ async_wait_for_write(gc); goto again; } else { printf("Error in aio_write: ret %d errno %d count %lld\n",ret,errno,gc->w_count); /* printf("aio_write_no_copy: fd %d buffer %x offset %lld size %d\n", ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_buf, ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); */ exit(177); } } return((ssize_t)size); } /************************************************************************* * Allocate a write aiocb and write buffer of the size specified. Also * put some extra buffer padding so that VX_DIRECT can do its job when * needed. *************************************************************************/ struct cache_ent * allocate_write_buffer(gc,fd,offset,size,op,w_depth,direct,buffer,free_addr) struct cache *gc; long long fd,size,op; off64_t offset; long long w_depth; long long direct; char *buffer,*free_addr; { struct cache_ent *ce; long temp; if(fd==0LL) { printf("Setting up write buffer insane\n"); exit(178); } if(gc->w_count > w_depth) async_wait_for_write(gc); ce=(struct cache_ent *)malloc((size_t)sizeof(struct cache_ent)); if(ce == (struct cache_ent *)0) { printf("Malloc failed 1\n"); exit(179); } bzero(ce,sizeof(struct cache_ent)); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ce->myaiocb.aio_fildes=(int)fd; ce->myaiocb.aio_offset=(off64_t)offset; if(!direct) { ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb.aio_buf=(volatile void *)temp; }else { ce->myaiocb.aio_buf=(volatile void *)buffer; ce->real_address=(char *)free_addr; } if(ce->myaiocb.aio_buf == 0) #else ce->myaiocb64.aio_fildes=(int)fd; ce->myaiocb64.aio_offset=(off64_t)offset; if(!direct) { ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb64.aio_buf=(volatile void *)temp; } else { ce->myaiocb64.aio_buf=(volatile void *)buffer; ce->real_address=(char *)free_addr; } if(ce->myaiocb64.aio_buf == 0) #endif #else ce->myaiocb.aio_fildes=(int)fd; ce->myaiocb.aio_offset=(off_t)offset; if(!direct) { ce->real_address = (char *)malloc((size_t)(size+page_size)); temp=(long)ce->real_address; temp = (temp+page_size) & ~(page_size-1); ce->myaiocb.aio_buf=(volatile void *)temp; } else { ce->myaiocb.aio_buf=(volatile void *)buffer; ce->real_address=(char *)free_addr; } if(ce->myaiocb.aio_buf == 0) #endif { printf("Malloc failed 2\n"); exit(180); } #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ce->myaiocb.aio_reqprio=0; ce->myaiocb.aio_nbytes=(size_t)size; ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb.aio_lio_opcode=(int)op; #else ce->myaiocb64.aio_reqprio=0; ce->myaiocb64.aio_nbytes=(size_t)size; ce->myaiocb64.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb64.aio_lio_opcode=(int)op; #endif #else ce->myaiocb.aio_reqprio=0; ce->myaiocb.aio_nbytes=(size_t)size; ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; ce->myaiocb.aio_lio_opcode=(int)op; #endif ce->fd=(int)fd; return(ce); } /************************************************************************* * Put it on the outbound queue. *************************************************************************/ void async_put_on_write_queue(gc,ce) struct cache *gc; struct cache_ent *ce; { ce->forward=0; ce->back=gc->w_tail; if(gc->w_tail) gc->w_tail->forward = ce; gc->w_tail= ce; if(!gc->w_head) gc->w_head=ce; gc->w_count++; return; } /************************************************************************* * Cleanup all outstanding writes *************************************************************************/ void async_write_finish(gc) struct cache *gc; { while(gc->w_head) { /*printf("async_write_finish: Waiting for buffer %x to finish\n",gc->w_head->myaiocb64.aio_buf);*/ async_wait_for_write(gc); } } /************************************************************************* * Wait for an I/O to finish *************************************************************************/ void async_wait_for_write(gc) struct cache *gc; { struct cache_ent *ce; size_t ret,retval; if(gc->w_head==0) return; ce=gc->w_head; gc->w_head=ce->forward; gc->w_count--; ce->forward=0; if(ce==gc->w_tail) gc->w_tail=0; /*printf("Wait for buffer %x offset %lld size %d to finish\n", ce->myaiocb64.aio_buf, ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); printf("write count %lld \n",gc->w_count); */ #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #else while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) { async_suspend(ce); } #endif #else while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) { async_suspend(ce); } #endif if(ret) { printf("aio_error 5: ret %d %d\n",ret,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("fd %d offset %lld size %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, ce->myaiocb.aio_nbytes); #else printf("fd %d offset %lld size %d\n", ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); #endif #else printf("fd %d offset %lld size %d\n", ce->myaiocb.aio_fildes, ce->myaiocb.aio_offset, ce->myaiocb.aio_nbytes); #endif exit(181); } #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ retval=aio_return(&ce->myaiocb); #else #if defined(__CrayX1__) retval=aio_return64((aiocb64_t *)&ce->myaiocb64); #else retval=aio_return64((struct aiocb64 *)&ce->myaiocb64); #endif #endif #else retval=aio_return(&ce->myaiocb); #endif if((int)retval < 0) { printf("aio_return error: %d\n",errno); } if(!ce->direct) { /* printf("Freeing buffer %x\n",ce->real_address);*/ free((void *)(ce->real_address)); free((void *)ce); } } /************************************************************************* * This routine is a generic async writer assist funtion. It takes * the same calling parameters as write() but also extends the * interface to include: * * offset ..... offset in the file. * depth ..... How much read-ahead do you want. * free_addr .. address of memory to free after write is completed. * *************************************************************************/ size_t async_write_no_copy(gc,fd,buffer,size,offset,depth,free_addr) struct cache *gc; long long fd,size; char *buffer; off64_t offset; long long depth; char *free_addr; { struct cache_ent *ce; size_t ret; long long direct = 1; ce=allocate_write_buffer(gc,fd,offset,size,(long long)LIO_WRITE,depth,direct,buffer,free_addr); ce->direct=0; /* have library de-allocate the buffer */ async_put_on_write_queue(gc,ce); /* printf("awnc: fd %d offset %lld, size %d\n",ce->myaiocb64.aio_fildes, ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); */ again: #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ ret=aio_write(&ce->myaiocb); #else ret=aio_write64(&ce->myaiocb64); #endif #else ret=aio_write(&ce->myaiocb); #endif if(ret==-1) { if(errno==EAGAIN) { async_wait_for_write(gc); goto again; } if(errno==0) { /* Compensate for bug in async library */ async_wait_for_write(gc); goto again; } else { printf("Error in aio_write: ret %d errno %d\n",ret,errno); #ifdef _LARGEFILE64_SOURCE #ifdef __LP64__ printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", ce->myaiocb.aio_fildes, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_offset, ce->myaiocb.aio_nbytes); #else printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", ce->myaiocb64.aio_fildes, (long)(ce->myaiocb64.aio_buf), ce->myaiocb64.aio_offset, ce->myaiocb64.aio_nbytes); #endif #else printf("aio_write_no_copy: fd %d buffer %lx offset %ld size %d\n", ce->myaiocb.aio_fildes, (long)(ce->myaiocb.aio_buf), ce->myaiocb.aio_offset, ce->myaiocb.aio_nbytes); #endif exit(182); } } else { return((ssize_t)size); } } void mbcopy(source, dest, len) char *source,*dest; size_t len; { int i; for(i=0;i @n int PAPI_detach( int EventSet, unsigned long tid ); PAPI_detach is a wrapper function that calls PAPI_set_opt to allow PAPI to monitor performance counts on a thread other than the one currently executing. This is sometimes referred to as third party monitoring. PAPI_attach connects the specified EventSet to the specifed thread; PAPI_detach breaks that connection and restores the EventSet to the original executing thread. @param EventSet An integer handle for a PAPI EventSet as created by PAPI_create_eventset. @param tid A thread id as obtained from, for example, PAPI_list_threads or PAPI_thread_id. @retval PAPI_ECMP This feature is unsupported on this component. @retval PAPI_EINVAL One or more of the arguments is invalid. @retval PAPI_ENOEVST The event set specified does not exist. @retval PAPI_EISRUN The event set is currently counting events. @par Examples: .fi .PP .PP .nf * int EventSet = PAPI_NULL; * unsigned long pid; * pid = fork( ); * if ( pid <= 0 ) * exit( 1 ); * if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) * exit( 1 ); * // Add Total Instructions Executed to our EventSet * if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) * exit( 1 ); * // Attach this EventSet to the forked process * if ( PAPI_attach( EventSet, pid ) != PAPI_OK ) * exit( 1 ); * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_opt\fP .br \fBPAPI_list_threads\fP .br \fBPAPI_thread_id\fP .br \fBPAPI_thread_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPI_num_components.3000664 001750 001750 00000001041 13216244356 022105 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_num_components" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_num_components \- .PP Get the number of components available on the system\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @return Number of components available on the system .fi .PP .PP .PP .nf // Query the library for a component count\&. printf("%d components installed\&., PAPI_num_components() ); * .fi .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/lib/pfmlib_intel_x86_arch.c000664 001750 001750 00000013665 13216244365 023756 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_x86_arch.c : Intel architectural PMU v1, v2, v3 * * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * This file implements supports for the IA-32 architectural PMU as specified * in the following document: * "IA-32 Intel Architecture Software Developer's Manual - Volume 3B: System * Programming Guide" */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_intel_x86_priv.h" /* architecture private */ #include "events/intel_x86_arch_events.h" /* architected event table */ extern pfmlib_pmu_t intel_x86_arch_support; static intel_x86_entry_t *x86_arch_pe; /* * .byte 0x53 == push ebx. it's universal for 32 and 64 bit * .byte 0x5b == pop ebx. * Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode. * Using the opcode directly avoids this problem. */ static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d) { __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b" : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) : "a" (op)); } /* * create architected event table */ static int create_arch_event_table(unsigned int mask, int version) { intel_x86_entry_t *pe; int i, num_events = 0; int m; DPRINT("version=%d evt_msk=0x%x\n", version, mask); /* * first pass: count the number of supported events */ m = mask; for(i=0; i < 7; i++, m>>=1) { if ((m & 0x1) == 0) num_events++; } intel_x86_arch_support.pme_count = num_events; pe = calloc(num_events, sizeof(intel_x86_entry_t)); if (pe == NULL) return PFM_ERR_NOTSUPP; x86_arch_pe = pe; intel_x86_arch_support.pe = pe; /* * second pass: populate the table */ m = mask; for(i=0; i < 7; i++, m>>=1) { if (!(m & 0x1)) { *pe = intel_x86_arch_pe[i]; switch(version) { case 3: pe->modmsk = INTEL_V3_ATTRS; break; default: pe->modmsk = INTEL_V2_ATTRS; break; } pe++; } } return PFM_SUCCESS; } static int check_arch_pmu(int family) { union { unsigned int val; intel_x86_pmu_eax_t eax; intel_x86_pmu_edx_t edx; } eax, ecx, edx, ebx; /* * check family number to reject for processors * older than Pentium (family=5). Those processors * did not have the CPUID instruction */ if (family < 5 || family == 15) return PFM_ERR_NOTSUPP; /* * check if CPU supports 0xa function of CPUID * 0xa started with Core Duo. Needed to detect if * architected PMU is present */ cpuid(0x0, &eax.val, &ebx.val, &ecx.val, &edx.val); if (eax.val < 0xa) return PFM_ERR_NOTSUPP; /* * extract architected PMU information */ cpuid(0xa, &eax.val, &ebx.val, &ecx.val, &edx.val); /* * version must be greater than zero */ return eax.eax.version < 1 ? PFM_ERR_NOTSUPP : PFM_SUCCESS; } static int pfm_intel_x86_arch_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) return ret; return check_arch_pmu(pfm_intel_x86_cfg.family); } static int pfm_intel_x86_arch_init(void *this) { union { unsigned int val; intel_x86_pmu_eax_t eax; intel_x86_pmu_edx_t edx; } eax, ecx, edx, ebx; /* * extract architected PMU information */ if (!pfm_cfg.forced_pmu) { cpuid(0xa, &eax.val, &ebx.val, &ecx.val, &edx.val); intel_x86_arch_support.num_cntrs = eax.eax.num_cnt; intel_x86_arch_support.num_fixed_cntrs = edx.edx.num_cnt; } else { eax.eax.version = 3; ebx.val = 0; /* no restriction */ intel_x86_arch_support.num_cntrs = 0; intel_x86_arch_support.num_fixed_cntrs = 0; } /* * must be called after impl_cntrs has been initialized */ return create_arch_event_table(ebx.val, eax.eax.version); } void pfm_intel_x86_arch_terminate(void *this) { /* workaround const void for intel_x86_arch_support.pe */ if (x86_arch_pe) free(x86_arch_pe); } /* architected PMU */ pfmlib_pmu_t intel_x86_arch_support={ .desc = "Intel X86 architectural PMU", .name = "ix86arch", .pmu = PFM_PMU_INTEL_X86_ARCH, .pme_count = 0, .pe = NULL, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | PFMLIB_PMU_FL_ARCH_DFL, .type = PFM_PMU_TYPE_CORE, .max_encoding = 1, .pmu_detect = pfm_intel_x86_arch_detect, .pmu_init = pfm_intel_x86_arch_init, .pmu_terminate = pfm_intel_x86_arch_terminate, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/freebsd/map-core.h000664 001750 001750 00000007400 13216244361 020611 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-core.h * CVS: $Id$ * Author: Harald Servat * redcrash@gmail.com */ #ifndef FreeBSD_MAP_CORE #define FreeBSD_MAP_CORE enum NativeEvent_Value_CoreProcessor { PNE_CORE_BACLEARS = PAPI_NATIVE_MASK, PNE_CORE_BTB_MISSES, PNE_CORE_BR_BAC_MISSP_EXEC, PNE_CORE_BR_BOGUS, PNE_CORE_BR_CALL_EXEC, PNE_CORE_BR_CALL_MISSP_EXEC, PNE_CORE_BR_CND_EXEC, PNE_CORE_BR_CND_MISSP_EXEC, PNE_CORE_BR_IND_CALL_EXEC, PNE_CORE_BR_IND_EXEC, PNE_CORE_BR_IND_MISSP_EXEC, PNE_CORE_BR_INST_EXEC, PNE_CORE_BR_INSTR_DECODED, PNE_CORE_BR_INSTR_RET, PNE_CORE_BR_MISPRED_RET, PNE_CORE_BR_MISPRED_TAKEN_RET, PNE_CORE_BR_MISSP_EXEC, PNE_CORE_BR_RET_BAC_MISSP_EXEC, PNE_CORE_BR_RET_EXEC, PNE_CORE_BR_RET_MISSP_EXEC, PNE_CORE_BR_TAKEN_RET, PNE_CORE_BUS_BNR_CLOCKS, PNE_CORE_BUS_DRDY_CLOCKS, PNE_CORE_BUS_DATA_RCV, PNE_CORE_BUS_LOCKS_CLOCKS, PNE_CORE_BUS_NOT_IN_USE, PNE_CORE_BUS_REQ_OUTSTANDING, PNE_CORE_BUS_SNOOP_STALL, PNE_CORE_BUS_SNOOPS, PNE_CORE_BUS_TRANS_ANY, PNE_CORE_BUS_TRANS_BRD, PNE_CORE_BUS_TRANS_BURST, PNE_CORE_BUS_TRANS_DEF, PNE_CORE_BUS_TRANS_IO, PNE_CORE_BUS_TRANS_IFETCH, PNE_CORE_BUS_TRANS_INVAL, PNE_CORE_BUS_TRANS_MEM, PNE_CORE_BUS_TRANS_P, PNE_CORE_BUS_TRANS_PWR, PNE_CORE_BUS_TRANS_RFO, PNE_CORE_BUS_TRANS_WB, PNE_CORE_CYCLES_DIV_BUSY, PNE_CORE_CYCLES_INT_MASKED, PNE_CORE_CYCLES_INT_PENDING_MASKED, PNE_CORE_DCU_SNOOP_TO_SHARE, PNE_CORE_DCACHE_CACHE_LOCK, PNE_CORE_DCACHE_CACHE_LD, PNE_CORE_DCACHE_CACHE_ST, PNE_CORE_DCACHE_M_EVICT, PNE_CORE_DCACHE_M_REPL, PNE_CORE_DCACHE_PEND_MISS, PNE_CORE_DCACHE_REPL, PNE_CORE_DATA_MEM_CACHE_REF, PNE_CORE_DATA_MEM_REF, PNE_CORE_DBUS_BUSY, PNE_CORE_DBUS_BUSY_RD, PNE_CORE_DIV, PNE_CORE_DTLB_MISS, PNE_CORE_ESP_UOPS, PNE_CORE_EST_TRANS, PNE_CORE_FP_ASSIST, PNE_CORE_FP_COMP_INSTR_RET, PNE_CORE_FP_COMPS_OP_EXE, PNE_CORE_FP_MMX_TRANS, PNE_CORE_FUSED_LD_UOPS_RET, PNE_CORE_FUSED_ST_UOPS_RET, PNE_CORE_FUSED_UOPS_RET, PNE_CORE_HW_INT_RX, PNE_CORE_ICACHE_MISSES, PNE_CORE_ICACHE_READS, PNE_CORE_IFU_MEM_STALL, PNE_CORE_ILD_STALL, PNE_CORE_ITLB_MISSES, PNE_CORE_INSTR_DECODED, PNE_CORE_INSTR_RET, PNE_CORE_L1_PREF_REQ, PNE_CORE_L2_ADS, PNE_CORE_L2_IFETCH, PNE_CORE_L2_LD, PNE_CORE_L2_LINES_IN, PNE_CORE_L2_LINES_OUT, PNE_CORE_L2_M_LINES_IN, PNE_CORE_L2_M_LINES_OUT, PNE_CORE_L2_NO_REQUEST_CYCLES, PNE_CORE_L2_REJECT_CYCLES, PNE_CORE_L2_RQSTS, PNE_CORE_L2_ST, PNE_CORE_LD_BLOCKS, PNE_CORE_LLC_MISSES, PNE_CORE_LLC_REFERENCE, PNE_CORE_MMX_ASSIST, PNE_CORE_MMX_FP_TRANS, PNE_CORE_MMX_INSTR_EXEC, PNE_CORE_MMX_INSTR_RET, PNE_CORE_MISALIGN_MEM_REF, PNE_CORE_MUL, PNE_CORE_NONHLT_REF_CYCLES, PNE_CORE_PREF_RQSTS_DN, PNE_CORE_PREF_RQSTS_UP, PNE_CORE_RESOURCE_STALL, PNE_CORE_SD_DRAINS, PNE_CORE_SIMD_FP_DP_P_RET, PNE_CORE_SIMD_FP_DP_P_COMP_RET, PNE_CORE_SIMD_FP_DP_S_RET, PNE_CORE_SIMD_FP_DP_S_COMP_RET, PNE_CORE_SIMD_FP_SP_P_COMP_RET, PNE_CORE_SIMD_FP_SP_RET, PNE_CORE_SIMD_FP_SP_S_RET, PNE_CORE_SIMD_FP_SP_S_COMP_RET, PNE_CORE_SIMD_INT_128_RET, PNE_CORE_SIMD_INT_PARI_EXEC, PNE_CORE_SIMD_INT_PCK_EXEC, PNE_CORE_SIMD_INT_PLOG_EXEC, PNE_CORE_SIMD_INT_PMUL_EXEC, PNE_CORE_SIMD_INT_PSFT_EXEC, PNE_CORE_SIMD_INT_SAT_EXEC, PNE_CORE_SIMD_INT_UPCK_EXEC, PNE_CORE_SMC_DETECTED, PNE_CORE_SSE_NTSTORES_MISS, PNE_CORE_SSE_NTSTORES_RET, PNE_CORE_SSE_PREFNTA_MISS, PNE_CORE_SSE_PREFNTA_RET, PNE_CORE_SSE_PREFT1_MISS, PNE_CORE_SSE_PREFT1_RET, PNE_CORE_SSE_PREFT2_MISS, PNE_CORE_SSE_PREFT2_RET, PNE_CORE_SEG_REG_LOADS, PNE_CORE_SERIAL_EXECUTION_CYCLES, PNE_CORE_THERMAL_TRIP, PNE_CORE_UNFUSION, PNE_CORE_UNHALTED_CORE_CYCLES, PNE_CORE_UOPS_RET, PNE_CORE_NATNAME_GUARD }; extern Native_Event_LabelDescription_t CoreProcessor_info[]; extern hwi_search_t CoreProcessor_map[]; #endif papi-5.6.0/src/libpfm4/lib/events/intel_snbep_unc_r2pcie_events.h000664 001750 001750 00000013012 13216244364 027101 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: snbep_unc_r2pcie (Intel SandyBridge-EP R2PCIe uncore) */ static const intel_x86_umask_t snbep_unc_r2_ring_ad_used[]={ { .uname = "CCW_EVEN", .udesc = "Counter-clockwise and even ring polarity", .ucode = 0x400, }, { .uname = "CCW_ODD", .udesc = "Counter-clockwise and odd ring polarity", .ucode = 0x800, }, { .uname = "CW_EVEN", .udesc = "Clockwise and even ring polarity", .ucode = 0x100, }, { .uname = "CW_ODD", .udesc = "Clockwise and odd ring polarity", .ucode = 0x200, }, { .uname = "CW_ANY", .udesc = "Clockwise with any polarity", .ucode = 0x300, }, { .uname = "CCW_ANY", .udesc = "Counter-clockwise with any polarity", .ucode = 0xc00, }, { .uname = "ANY", .udesc = "any direction and any polarity", .ucode = 0xf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t snbep_unc_r2_ring_iv_used[]={ { .uname = "ANY", .udesc = "R2 IV Ring in Use", .ucode = 0xf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t snbep_unc_r2_rxr_cycles_ne[]={ { .uname = "DRS", .udesc = "DRS Ingress queue", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB", .udesc = "NCB Ingress queue", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCS", .udesc = "NCS Ingress queue", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snbep_unc_r2_txr_cycles_full[]={ { .uname = "AD", .udesc = "AD Egress queue", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK", .udesc = "AK Egress queue", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL", .udesc = "BL Egress queue", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_snbep_unc_r2_pe[]={ { .name = "UNC_R2_CLOCKTICKS", .desc = "Number of uclks in domain", .code = 0x1, .cntmsk = 0xf, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, }, { .name = "UNC_R2_RING_AD_USED", .desc = "R2 AD Ring in Use", .code = 0x7, .cntmsk = 0xf, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_ring_ad_used), .umasks = snbep_unc_r2_ring_ad_used }, { .name = "UNC_R2_RING_AK_USED", .desc = "R2 AK Ring in Use", .code = 0x8, .cntmsk = 0xf, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_ring_ad_used), .umasks = snbep_unc_r2_ring_ad_used /* shared */ }, { .name = "UNC_R2_RING_BL_USED", .desc = "R2 BL Ring in Use", .code = 0x9, .cntmsk = 0xf, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_ring_ad_used), .umasks = snbep_unc_r2_ring_ad_used /* shared */ }, { .name = "UNC_R2_RING_IV_USED", .desc = "R2 IV Ring in Use", .code = 0xa, .cntmsk = 0xf, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_ring_iv_used), .umasks = snbep_unc_r2_ring_iv_used }, { .name = "UNC_R2_RXR_AK_BOUNCES", .desc = "AK Ingress Bounced", .code = 0x12, .cntmsk = 0x1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, }, { .name = "UNC_R2_RXR_CYCLES_NE", .desc = "Ingress Cycles Not Empty", .code = 0x10, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_rxr_cycles_ne), .umasks = snbep_unc_r2_rxr_cycles_ne }, { .name = "UNC_R2_TXR_CYCLES_FULL", .desc = "Egress Cycles Full", .code = 0x25, .cntmsk = 0x1, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_txr_cycles_full), .umasks = snbep_unc_r2_txr_cycles_full }, { .name = "UNC_R2_TXR_CYCLES_NE", .desc = "Egress Cycles Not Empty", .code = 0x23, .cntmsk = 0x1, .ngrp = 1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_r2_txr_cycles_full), .umasks = snbep_unc_r2_txr_cycles_full /* shared */ }, { .name = "UNC_R2_TXR_INSERTS", .desc = "Egress allocations", .code = 0x24, .cntmsk = 0x1, .modmsk = SNBEP_UNC_R2PCIE_ATTRS, }, }; papi-5.6.0/src/freebsd/map-core.c000664 001750 001750 00000027751 13216244361 020617 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-core.c * Author: Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** CORE SUBSTRATE CORE SUBSTRATE CORE SUBSTRATE CORE SUBSTRATE CORE SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_CoreProcessor must match CoreProcessor_info */ Native_Event_LabelDescription_t CoreProcessor_info[] = { {"BAClears", "The number of BAClear conditions asserted."}, {"BTB_Misses", "The number of branches for which the branch table buffer did not produce a prediction."}, {"Br_BAC_Missp_Exec", "The number of branch instructions executed that were mispredicted at the front end."}, {"Br_Bogus", "The number of bogus branches."}, {"Br_Call_Exec", "The number of CALL instructions executed."}, {"Br_Call_Missp_Exec", "The number of CALL instructions executed that were mispredicted."}, {"Br_Cnd_Exec", "The number of conditional branch instructions executed."}, {"Br_Cnd_Missp_Exec", "The number of conditional branch instructions executed that were mispredicted."}, {"Br_Ind_Call_Exec", "The number of indirect CALL instructions executed."}, {"Br_Ind_Exec", "The number of indirect branches executed."}, {"Br_Ind_Missp_Exec", "The number of indirect branch instructions executed that were mispredicted."}, {"Br_Inst_Exec", "The number of branch instructions executed including speculative branches."}, {"Br_Instr_Decoded", "The number of branch instructions decoded."}, {"Br_Instr_Ret", "The number of branch instructions retired. This is an architectural performance event."}, {"Br_MisPred_Ret", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, {"Br_MisPred_Taken_Ret", "The number of taken and mispredicted branches retired."}, {"Br_Missp_Exec", "The number of branch instructions executed and mispredicted at execution including branches that were not predicted."}, {"Br_Ret_BAC_Missp_Exec", "The number of return branch instructions that were mispredicted at the front end."}, {"Br_Ret_Exec", "The number of return branch instructions executed."}, {"Br_Ret_Missp_Exec", "The number of return branch instructions executed that were mispredicted."}, {"Br_Taken_Ret", "The number of taken branches retired."}, {"Bus_BNR_Clocks", "was asserted."}, {"Bus_DRDY_Clocks", "The number of external bus cycles while DRDY was asserted."}, {"Bus_Data_Rcv", "The number of cycles during which the processor is busy receiving data."}, {"Bus_Locks_Clocks", "The number of external bus cycles while the bus lock signal was asserted."}, {"Bus_Not_In_Use", "The number of cycles when there is no transaction from the core."}, {"Bus_Req_Outstanding", "The weighted cycles of cacheable bus data read requests from the data cache unit or hardware prefetcher."}, {"Bus_Snoop_Stall", "The number bus cycles while a bus snoop is stalled."}, {"Bus_Snoops", "The number of snoop responses to bus transactions."}, {"Bus_Trans_Any", "The number of completed bus transactions."}, {"Bus_Trans_Brd", "The number of read bus transactions."}, {"Bus_Trans_Burst", "The number of completed burst transactions. Retried transactions may be counted more than once."}, {"Bus_Trans_Def", "The number of completed deferred transactions."}, {"Bus_Trans_IO", "The number of completed I/O transactions counting both reads and writes."}, {"Bus_Trans_Ifetch", "Completed instruction fetch transactions."}, {"Bus_Trans_Inval", "The number completed invalidate transactions."}, {"Bus_Trans_Mem", "The number of completed memory transactions."}, {"Bus_Trans_P", "The number of completed partial transactions."}, {"Bus_Trans_Pwr", "The number of completed partial write transactions."}, {"Bus_Trans_RFO", "The number of completed read-for-ownership transactions."}, {"Bus_Trans_WB", "The number of completed writeback transactions from the data cache unit, excluding L2 writebacks."}, {"Cycles_Div_Busy", "The number of cycles the divider is busy. The event is only available on PMC0."}, {"Cycles_Int_Masked", "The number of cycles while interrupts were disabled."}, {"Cycles_Int_Pending_Masked", "The number of cycles while interrupts were disabled and interrupts were pending."}, {"DCU_Snoop_To_Share", "The number of data cache unit snoops to L1 cache lines in the shared state."}, {"DCache_Cache_Lock", "The number of cacheable locked read operations to invalid state."}, {"DCache_Cache_LD", "The number of cacheable L1 data read operations."}, {"DCache_Cache_ST", "The number cacheable L1 data write operations."}, {"DCache_M_Evict", "The number of M state data cache lines that were evicted."}, {"DCache_M_Repl", "The number of M state data cache lines that were allocated."}, {"DCache_Pend_Miss", "The weighted cycles an L1 miss was outstanding."}, {"DCache_Repl", "The number of data cache line replacements."}, {"Data_Mem_Cache_Ref", "The number of cacheable read and write operations to L1 data cache."}, {"Data_Mem_Ref", "The number of L1 data reads and writes, both cacheable and uncacheable."}, {"Dbus_Busy", "The number of core cycles during which the data bus was busy."}, {"Dbus_Busy_Rd", "The nunber of cycles during which the data bus was busy transferring data to a core."}, {"Div", "The number of divide operations including speculative operations for integer and floating point divides. This event can only be counted on PMC1."}, {"Dtlb_Miss", "The number of data references that missed the TLB."}, {"ESP_Uops", "The number of ESP folding instructions decoded."}, {"EST_Trans", "Count the number of Intel Enhanced SpeedStep transitions."}, {"FP_Assist", "The number of floating point operations that required microcode assists. The event is only available on PMC1."}, {"FP_Comp_Instr_Ret", "The number of X87 floating point compute instructions retired. The event is only available on PMC0."}, {"FP_Comps_Op_Exe", "The number of floating point computational instructions executed."}, {"FP_MMX_Trans", "The number of transitions from X87 to MMX."}, {"Fused_Ld_Uops_Ret", "The number of fused load uops retired."}, {"Fused_St_Uops_Ret", "The number of fused store uops retired."}, {"Fused_Uops_Ret", "The number of fused uops retired."}, {"HW_Int_Rx", "The number of hardware interrupts received."}, {"ICache_Misses", "The number of instruction fetch misses in the instruction cache and streaming buffers."}, {"ICache_Reads", "The number of instruction fetches from the the instruction cache and streaming buffers counting both cacheable and uncacheable fetches."}, {"IFU_Mem_Stall", "The number of cycles the instruction fetch unit was stalled while waiting for data from memory."}, {"ILD_Stall", "The number of instruction length decoder stalls."}, {"ITLB_Misses", "The number of instruction TLB misses."}, {"Instr_Decoded", "The number of instructions decoded."}, {"Instr_Ret", "The number of instructions retired. This is an architectural performance event."}, {"L1_Pref_Req", "The number of L1 prefetch request due to data cache misses."}, {"L2_ADS", "The number of L2 address strobes."}, {"L2_IFetch", "The number of instruction fetches by the instruction fetch unit from L2 cache including speculative fetches."}, {"L2_LD", "The number of L2 cache reads."}, {"L2_Lines_In", "The number of L2 cache lines allocated."}, {"L2_Lines_Out", "The number of L2 cache lines evicted."}, {"L2_M_Lines_In", "The number of L2 M state cache lines allocated."}, {"L2_M_Lines_Out", "The number of L2 M state cache lines evicted."}, {"L2_No_Request_Cycles", "The number of cycles there was no request to access L2 cache."}, {"L2_Reject_Cycles", "The number of cycles the L2 cache was busy and rejecting new requests."}, {"L2_Rqsts", "The number of L2 cache requests."}, {"L2_ST", "The number of L2 cache writes including speculative writes."}, {"LD_Blocks", "The number of load operations delayed due to store buffer blocks."}, {"LLC_Misses", "The number of cache misses for references to the last level cache, excluding misses due to hardware prefetches. This is an architectural performance event."}, {"LLC_Reference", "The number of references to the last level cache, excluding those due to hardware prefetches. This is an architectural performance event."}, {"MMX_Assist", "The number of EMMX instructions executed."}, {"MMX_FP_Trans", "The number of transitions from MMX to X87."}, {"MMX_Instr_Exec", "The number of MMX instructions executed excluding MOVQ and MOVD stores."}, {"MMX_Instr_Ret", "The number of MMX instructions retired."}, {"Misalign_Mem_Ref", "The number of misaligned data memory references, counting loads and stores."}, {"Mul", "The number of multiply operations include speculative floating point and integer multiplies. This event is available on PMC1 only."}, {"NonHlt_Ref_Cycles", "The number of non-halted bus cycles. This is an architectural performance event."}, {"Pref_Rqsts_Dn", "The number of hardware prefetch requests issued in backward streams."}, {"Pref_Rqsts_Up", "The number of hardware prefetch requests issued in forward streams."}, {"Resource_Stall", "The number of cycles where there is a resource related stall."}, {"SD_Drains", "The number of cycles while draining store buffers."}, {"SIMD_FP_DP_P_Ret", "The number of SSE/SSE2 packed double precision instructions retired."}, {"SIMD_FP_DP_P_Comp_Ret", "The number of SSE/SSE2 packed double precision compute instructions retired."}, {"SIMD_FP_DP_S_Ret", "The number of SSE/SSE2 scalar double precision instructions retired."}, {"SIMD_FP_DP_S_Comp_Ret", "The number of SSE/SSE2 scalar double precision compute instructions retired."}, {"SIMD_FP_SP_P_Comp_Ret", "The number of SSE/SSE2 packed single precision compute instructions retired."}, {"SIMD_FP_SP_Ret", "The number of SSE/SSE2 scalar single precision instructions retired, both packed and scalar."}, {"SIMD_FP_SP_S_Ret", "The number of SSE/SSE2 scalar single precision instructions retired."}, {"SIMD_FP_SP_S_Comp_Ret", "The number of SSE/SSE2 single precision compute instructions retired."}, {"SIMD_Int_128_Ret", "The number of SSE2 128-bit integer instructions retired."}, {"SIMD_Int_Pari_Exec", "The number of SIMD integer packed arithmetic instructions executed."}, {"SIMD_Int_Pck_Exec", "The number of SIMD integer pack operations instructions executed."}, {"SIMD_Int_Plog_Exec", "The number of SIMD integer packed logical instructions executed."}, {"SIMD_Int_Pmul_Exec", "The number of SIMD integer packed multiply instructions executed."}, {"SIMD_Int_Psft_Exec", "The number of SIMD integer packed shift instructions executed."}, {"SIMD_Int_Sat_Exec", "The number of SIMD integer saturating instructions executed."}, {"SIMD_Int_Upck_Exec", "The number of SIMD integer unpack instructions executed."}, {"SMC_Detected", "The number of times self-modifying code was detected."}, {"SSE_NTStores_Miss", "The number of times an SSE streaming store instruction missed all caches."}, {"SSE_NTStores_Ret", "The number of SSE streaming store instructions executed."}, {"SSE_PrefNta_Miss", "The number of times PREFETCHNTA missed all caches."}, {"SSE_PrefNta_Ret", "The number of PREFETCHNTA instructions retired."}, {"SSE_PrefT1_Miss", "The number of times PREFETCHT1 missed all caches."}, {"SSE_PrefT1_Ret", "The number of PREFETCHT1 instructions retired."}, {"SSE_PrefT2_Miss", "The number of times PREFETCHNT2 missed all caches."}, {"SSE_PrefT2_Ret", "The number of PREFETCHT2 instructions retired."}, {"Seg_Reg_Loads", "The number of segment register loads."}, {"Serial_Execution_Cycles", "The number of non-halted bus cycles of this code while the other core was halted."}, {"Thermal_Trip", "The duration in a thermal trip based on the current core clock."}, {"Unfusion", "The number of unfusion events."}, {"Unhalted_Core_Cycles", "The number of core clock cycles when the clock signal on a specific core is not halted. This is an architectural performance event."}, {"Uops_Ret", "The number of micro-ops retired."}, { NULL, NULL } }; papi-5.6.0/src/libpfm4/lib/pfmlib_amd64_k7.c000664 001750 001750 00000004340 13216244365 022443 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_amd64_k7.c : AMD64 K7 * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_amd64_priv.h" #include "events/amd64_events_k7.h" pfmlib_pmu_t amd64_k7_support={ .desc = "AMD64 K7", .name = "amd64_k7", .pmu = PFM_PMU_AMD64_K7, .pmu_rev = AMD64_K7, .pme_count = LIBPFM_ARRAY_SIZE(amd64_k7_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = AMD64_K7_PLM, .num_cntrs = 4, .max_encoding = 1, .pe = amd64_k7_pe, .atdesc = amd64_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .cpu_family = PFM_PMU_AMD64_K7, .pmu_detect = pfm_amd64_family_detect, .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), .get_event_first = pfm_amd64_get_event_first, .get_event_next = pfm_amd64_get_event_next, .event_is_valid = pfm_amd64_event_is_valid, .validate_table = pfm_amd64_validate_table, .get_event_info = pfm_amd64_get_event_info, .get_event_attr_info = pfm_amd64_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), .get_event_nattrs = pfm_amd64_get_event_nattrs, }; papi-5.6.0/src/perfctr-2.7.x/usr.lib/event_set_p6.c000664 001750 001750 00000044041 13216244370 023667 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_p6.c,v 1.5 2004/05/02 21:45:47 mikpe Exp $ * Performance counter event descriptions for the Intel P6 family. * * Copyright (C) 2003-2004 Mikael Pettersson * * References * ---------- * [IA32, Volume 3] "Intel Architecture Software Developer's Manual, * Volume 3: System Programming Guide". Intel document number 25366813. * (at http://developer.intel.com/) */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" /* * Intel Pentium Pro events. * Note that four L2 events were redefined in Pentium M. */ static const struct perfctr_unit_mask_4 p6_um_mesi = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x08, "M (modified cache state)" }, { 0x04, "E (exclusive cache state)" }, { 0x02, "S (shared cache state)" }, { 0x01, "I (invalid cache state)" } } }; static const struct perfctr_unit_mask_2 p6_um_ebl = { { .type = perfctr_um_type_exclusive, .default_value = 0x20, .nvalues = 2 }, { { 0x20, "transactions from any processor" }, { 0x00, "self-generated transactions" } } }; static const struct perfctr_event p6_events[] = { /* Data Cache Unit (DCU) */ { 0x43, 0x3, NULL, "DATA_MEM_REFS", "All memory references, cachable and non" }, { 0x45, 0x3, NULL, "DCU_LINES_IN", "Total lines allocated in the DCU" }, { 0x46, 0x3, NULL, "DCU_M_LINES_IN", "Number of M state lines allocated in DCU" }, { 0x47, 0x3, NULL, "DCU_M_LINES_OUT", "Number of M lines evicted from the DCU" }, { 0x48, 0x3, NULL, "DCU_MISS_OUTSTANDING", "Number of cycles while DCU miss outstanding" }, /* Instruction Fetch Unit (IFU) */ { 0x80, 0x3, NULL, "IFU_IFETCH", "Number of non/cachable instruction fetches" }, /* XXX: was IFU_FETCH */ { 0x81, 0x3, NULL, "IFU_IFETCH_MISS", "Number of instruction fetch misses" }, /* XXX: was IFU_FETCH_MISS */ { 0x85, 0x3, NULL, "ITLB_MISS", "Number of ITLB misses" }, { 0x86, 0x3, NULL, "IFU_MEM_STALL", "Cycles instruction fetch pipe is stalled" }, { 0x87, 0x3, NULL, "ILD_STALL", "Cycles instruction length decoder is stalled" }, /* L2 Cache */ { 0x28, 0x3, UM(p6_um_mesi), "L2_IFETCH", "Number of L2 instruction fetches" }, { 0x2A, 0x3, UM(p6_um_mesi), "L2_ST", "Number of L2 data stores" }, { 0x25, 0x3, NULL, "L2_M_LINES_INM", "Number of modified lines allocated in L2" }, { 0x2E, 0x3, UM(p6_um_mesi), "L2_RQSTS", "Number of L2 requests" }, { 0x21, 0x3, NULL, "L2_ADS", "Number of L2 address strobes" }, { 0x22, 0x3, NULL, "L2_DBUS_BUSY", "Number of cycles data bus was busy" }, { 0x23, 0x3, NULL, "L2_DBUS_BUSY_RD", "Cycles data bus was busy in xfer from L2 to CPU" }, /* External Bus Logic (EBL) */ { 0x62, 0x3, UM(p6_um_ebl), "BUS_DRDY_CLOCKS", "Number of clocks DRDY is asserted" }, { 0x63, 0x3, UM(p6_um_ebl), "BUS_LOCK_CLOCKS", "Number of clocks LOCK is asserted" }, { 0x60, 0x3, NULL, "BUS_REQ_OUTSTANDING", "Number of outstanding bus requests" }, { 0x65, 0x3, UM(p6_um_ebl), "BUS_TRAN_BRD", "Number of burst read transactions" }, { 0x66, 0x3, UM(p6_um_ebl), "BUS_TRAN_RFO", "Number of read for ownership transactions" }, { 0x67, 0x3, UM(p6_um_ebl), "BUS_TRANS_WB", "Number of write back transactions" }, { 0x68, 0x3, UM(p6_um_ebl), "BUS_TRAN_IFETCH", "Number of instruction fetch transactions" }, { 0x69, 0x3, UM(p6_um_ebl), "BUS_TRAN_INVAL", "Number of invalidate transactions" }, { 0x6A, 0x3, UM(p6_um_ebl), "BUS_TRAN_PWR", "Number of partial write transactions" }, { 0x6B, 0x3, UM(p6_um_ebl), "BUS_TRANS_P", "Number of partial transactions" }, { 0x6C, 0x3, UM(p6_um_ebl), "BUS_TRANS_IO", "Number of I/O transactions" }, { 0x6D, 0x3, UM(p6_um_ebl), "BUS_TRAN_DEF" }, { 0x6E, 0x3, UM(p6_um_ebl), "BUS_TRAN_BURST", "Number of burst transactions" }, { 0x70, 0x3, UM(p6_um_ebl), "BUS_TRAN_ANY", "Number of all transactions" }, { 0x6F, 0x3, UM(p6_um_ebl), "BUS_TRAN_MEM", "Number of memory transactions" }, { 0x64, 0x3, NULL, "BUS_DATA_RCV", "Bus cycles this processor is receiving data" }, { 0x61, 0x3, NULL, "BUS_BNR_DRV", "Bus cycles this processor is driving BNR pin" }, { 0x7A, 0x3, NULL, "BUS_HIT_DRV", "Bus cycles this processor is driving HIT pin" }, { 0x7B, 0x3, NULL, "BUS_HITM_DRV", "Bus cycles this processor is driving HITM pin" }, { 0x7E, 0x3, NULL, "BUS_SNOOP_STALL", "Cycles during bus snoop stall" }, /* Floating-Point Unit */ { 0xC1, 0x1, NULL, "FLOPS", "Number of computational FP operations executed" }, { 0x10, 0x1, NULL, "FP_COMP_OPS_EXE", "Number of computational FP operations executed" }, { 0x11, 0x2, NULL, "FP_ASSIST", "Number of FP exceptions handled by microcode" }, { 0x12, 0x2, NULL, "MUL", "Number of multiplies" }, { 0x13, 0x2, NULL, "DIV", "Number of divides" }, { 0x14, 0x1, NULL, "CYCLES_DIV_BUSY", "Cycles divider is busy" }, /* Memory Ordering */ { 0x03, 0x3, NULL, "LD_BLOCKS", "Number of store buffer blocks" }, { 0x04, 0x3, NULL, "SB_DRAINS", "Number of store buffer drain cycles" }, { 0x05, 0x3, NULL, "MISALIGN_MEM_REF", "Number of misaligned data memory references" }, /* Instruction Decoding and Retirement */ { 0xC0, 0x3, NULL, "INST_RETIRED", "Number of instructions retired" }, { 0xC2, 0x3, NULL, "UOPS_RETIRED", "Number of UOPs retired" }, { 0xD0, 0x3, NULL, "INST_DECODED", "Number of instructions decoded" }, /* Interrupts */ { 0xC8, 0x3, NULL, "HW_INT_RX", "Number of hardware interrupts received" }, { 0xC6, 0x3, NULL, "CYCLES_INT_MASKED", "Cycles interrupts are disabled" }, { 0xC7, 0x3, NULL, "CYCLES_INT_PENDING_AND_MASKED", "Cycles interrupts are disabled with pending interrupts" }, /* Branches */ { 0xC4, 0x3, NULL, "BR_INST_RETIRED", "Number of branch instructions retired" }, { 0xC5, 0x3, NULL, "BR_MISS_PRED_RETIRED", "Number of mispredicted branches retired" }, { 0xC9, 0x3, NULL, "BR_TAKEN_RETIRED", "Number of taken branches retired" }, { 0xCA, 0x3, NULL, "BR_MISS_PRED_TAKEN_RET", "Number of taken mispredictions branches retired" }, { 0xE0, 0x3, NULL, "BR_INST_DECODED", "Number of branch instructions decoded" }, { 0xE2, 0x3, NULL, "BTB_MISSES", "Number of branches that miss the BTB" }, { 0xE4, 0x3, NULL, "BR_BOGUS", "Number of bogus branches" }, { 0xE6, 0x3, NULL, "BACLEARS", "Number of times BACLEAR is asserted" }, /* Stalls */ { 0xA2, 0x3, NULL, "RESOURCE_STALLS", "Cycles during resource related stalls" }, { 0xD2, 0x3, NULL, "PARTIAL_RAT_STALLS", "Cycles or events for partial stalls" }, /* Segment Register Loads */ { 0x06, 0x3, NULL, "SEGMENT_REG_LOADS", "Number of segment register loads" }, /* Clocks */ { 0x79, 0x3, NULL, "CPU_CLK_UNHALTED", "Clocks processor is not halted" }, }; const struct perfctr_event_set p6_event_set = { .cpu_type = PERFCTR_X86_INTEL_P6, .event_prefix = "P6_", .include = NULL, .nevents = ARRAY_SIZE(p6_events), .events = p6_events, }; static const struct perfctr_event ppro_events[] = { /* L2 cache */ { 0x29, 0x3, UM(p6_um_mesi), "L2_LD", "Number of L2 data loads" }, /* redefined in Pentium M */ { 0x24, 0x3, NULL, "L2_LINES_IN", "Number of allocated lines in L2" }, /* redefined in Pentium M */ { 0x26, 0x3, NULL, "L2_LINES_OUT", "Number of recovered lines from L2" }, /* redefined in Pentium M */ { 0x27, 0x3, NULL, "L2_M_LINES_OUTM", "Number of modified lines removed from L2" }, /* redefined in Pentium M */ }; const struct perfctr_event_set perfctr_ppro_event_set = { .cpu_type = PERFCTR_X86_INTEL_P6, .event_prefix = "P6_", .include = &p6_event_set, .nevents = ARRAY_SIZE(ppro_events), .events = ppro_events, }; /* * Intel Pentium II events. * Note that two PII events (0xB0 and 0xCE) are unavailable in the PIII. */ static const struct perfctr_unit_mask_0 p2_um_mmx_uops_exec = { { .type = perfctr_um_type_fixed, .default_value = 0x0F, .nvalues = 0 } }; static const struct perfctr_unit_mask_6 p2_um_mmx_instr_type_exec = { { .type = perfctr_um_type_bitmask, .default_value = 0x3F, .nvalues = 6 }, { { 0x01, "MMX packed multiplies" }, { 0x02, "MMX packed shifts" }, { 0x04, "MMX pack operations" }, { 0x08, "MMX unpack operations" }, { 0x10, "MMX packed logical instructions" }, { 0x20, "MMX packed arithmetic instructions" } } }; static const struct perfctr_unit_mask_2 p2_um_fp_mmx_trans = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 2 }, { { 0x00, "MMX to FP transitions" }, { 0x01, "FP to MMX transitions" } } }; static const struct perfctr_unit_mask_4 p2_um_seg_reg_rename = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "segment register ES" }, { 0x02, "segment register DS" }, { 0x04, "segment register FS" }, { 0x08, "segment register GS" } } }; static const struct perfctr_event p2andp3_events[] = { /* MMX Unit */ { 0xB1, 0x3, NULL, "MMX_SAT_INSTR_EXEC", "Number of MMX saturating instructions executed" }, { 0xB2, 0x3, UM(p2_um_mmx_uops_exec), "MMX_UOPS_EXEC", "Number of MMX UOPS executed" }, { 0xB3, 0x3, UM(p2_um_mmx_instr_type_exec), "MMX_INSTR_TYPE_EXEC", "Number of MMX packing instructions" }, { 0xCC, 0x3, UM(p2_um_fp_mmx_trans), "FP_MMX_TRANS", "MMX-floating point transitions" }, { 0xCD, 0x3, NULL, "MMX_ASSIST", "Number of EMMS instructions executed" }, /* Segment Register Renaming */ { 0xD4, 0x3, UM(p2_um_seg_reg_rename), "SEG_RENAME_STALLS" }, { 0xD5, 0x3, UM(p2_um_seg_reg_rename), "SEG_REG_RENAMES" }, { 0xD6, 0x3, NULL, "RET_SEG_RENAMES" }, }; static const struct perfctr_event_set p2andp3_event_set = { .cpu_type = PERFCTR_X86_INTEL_PII, .event_prefix = "PII_", .include = &perfctr_ppro_event_set, .nevents = ARRAY_SIZE(p2andp3_events), .events = p2andp3_events, }; static const struct perfctr_event p2_events[] = { /* not in PIII :-( */ /* MMX Unit */ { 0xB0, 0x3, NULL, "MMX_INSTR_EXEC" }, { 0xCE, 0x3, NULL, "MMX_INSTR_RET", "Number of MMX instructions retired" }, }; const struct perfctr_event_set perfctr_p2_event_set = { .cpu_type = PERFCTR_X86_INTEL_PII, .event_prefix = "PII_", .include = &p2andp3_event_set, .nevents = ARRAY_SIZE(p2_events), .events = p2_events, }; /* * Intel Pentium III events. * Note that the two KNI decoding events were redefined in Pentium M. */ static const struct perfctr_unit_mask_4 p3_um_kni_prefetch = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 4 }, { { 0x00, "prefetch NTA" }, { 0x01, "prefetch T1" }, { 0x02, "prefetch T2" }, { 0x03, "weakly ordered stores" } } }; static const struct perfctr_event p3_events_1[] = { /* Memory Ordering */ { 0x07, 0x3, UM(p3_um_kni_prefetch), "EMON_KNI_PREF_DISPATCHED", "Number of KNI pre-fetch/weakly ordered insns dispatched" }, { 0x4B, 0x3, UM(p3_um_kni_prefetch), "EMON_KNI_PREF_MISS", "Number of KNI pre-fetch/weakly ordered insns that miss all caches" }, }; static const struct perfctr_event_set p3_event_set_1 = { .cpu_type = PERFCTR_X86_INTEL_PIII, .event_prefix = "PIII_", .include = &p2andp3_event_set, .nevents = ARRAY_SIZE(p3_events_1), .events = p3_events_1, }; static const struct perfctr_unit_mask_2 p3_um_kni_inst_retired = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 2 }, { { 0x00, "packed and scalar" }, { 0x01, "scalar" } } }; static const struct perfctr_event p3_events_2[] = { /* Instruction Decoding and Retirement */ { 0xD8, 0x3, UM(p3_um_kni_inst_retired), "EMON_KNI_INST_RETIRED", "Number of KNI instructions retired" }, /* redefined in Pentium M */ { 0xD9, 0x3, UM(p3_um_kni_inst_retired), "EMON_KNI_COMP_INST_RET", "Number of KNI computation instructions retired" }, /* redefined in Pentium M */ }; const struct perfctr_event_set perfctr_p3_event_set = { .cpu_type = PERFCTR_X86_INTEL_PIII, .event_prefix = "PIII_", .include = &p3_event_set_1, .nevents = ARRAY_SIZE(p3_events_2), .events = p3_events_2, }; /* * Intel Pentium M events. * Note that six PPro/PIII events were redefined. To describe that * we have to break up the PPro and PIII event sets, and assemble * the Pentium M event set in several steps. */ static const struct perfctr_unit_mask_6 pentm_um_mesi_prefetch = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 6 }, /* XXX: how should we describe that bits 5-4 are a single field? */ { { 0x01, "I (invalid cache state)" }, { 0x02, "S (shared cache state)" }, { 0x04, "E (exclusive cache state)" }, { 0x08, "M (modified cache state)" }, /* Bits 5-4: 00: all but HW-prefetched lines, 01: only HW-prefetched lines, 10/11: all lines */ { 0x10, "prefetch type bit 0" }, { 0x20, "prefetch type bit 1" } } }; static const struct perfctr_unit_mask_2 pentm_um_est_trans = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 2 }, { { 0x00, "All transitions" }, { 0x02, "Only Frequency transitions" } } }; static const struct perfctr_unit_mask_4 pentm_um_sse_inst_ret = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 4 }, { { 0x00, "SSE Packed Single and Scalar Single" }, { 0x01, "SSE Packed-Single" }, { 0x02, "SSE2 Packed-Double" }, { 0x03, "SSE2 Scalar-Double" } } }; static const struct perfctr_unit_mask_4 pentm_um_sse_comp_inst_ret = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 4 }, { { 0x00, "SSE Packed Single" }, { 0x01, "SSE Scalar-Single" }, { 0x02, "SSE2 Packed-Double" }, { 0x03, "SSE2 Scalar-Double" } } }; static const struct perfctr_unit_mask_3 pentm_um_fused_uops = { { .type = perfctr_um_type_exclusive, .default_value = 0x00, .nvalues = 3 }, { { 0x00, "All fused micro-ops" }, { 0x01, "Only load+Op micro-ops" }, { 0x02, "Only std+sta micro-ops" } } }; static const struct perfctr_event pentm_events[] = { /* L2 cache */ { 0x24, 0x3, UM(pentm_um_mesi_prefetch), "L2_LINES_IN", "Number of allocated lines in L2" }, /* redefined */ { 0x26, 0x3, UM(pentm_um_mesi_prefetch), "L2_LINES_OUT", "Number of recovered lines from L2" }, /* redefined */ { 0x27, 0x3, UM(pentm_um_mesi_prefetch), "L2_M_LINES_OUT", "Number of modified lines in L2, except hardware-prefetched" }, /* redefined */ { 0x29, 0x3, UM(pentm_um_mesi_prefetch), "L2_LD", "Number of L2 data loads" }, /* redefined */ /* Power Management */ { 0x58, 0x3, UM(pentm_um_est_trans), "EMON_EST_TRANS", "Number of SpeedStep(R) thermal transitions" }, { 0x59, 0x3, NULL, "EMON_THERMAL_TRIP", /*XXX: set bit 22(!?) for edge */ "Number of thermal trips (CPU temp exceeded 100C)" }, /* BPU */ { 0x88, 0x3, NULL, "BR_INST_EXEC", "All executed branches (not necessarily retired)" }, { 0x89, 0x3, NULL, "BR_MISSP_EXEC", "Number of branch instruction mispredicted at execution" }, { 0x8A, 0x3, NULL, "BR_BAC_MISSP_EXEC", "Number of branch instructions mispredicted at decoding" }, { 0x8B, 0x3, NULL, "BR_CND_EXEC", "Number of conditional branch instructions executed" }, { 0x8C, 0x3, NULL, "BR_CND_MISSP_EXEC", "Number of mispredicted conditional branch instructions executed" }, { 0x8D, 0x3, NULL, "BR_IND_EXEC", "Number of indirect branch instructions executed" }, { 0x8E, 0x3, NULL, "BR_IND_MISSP_EXEC", "Number of mispredicted indirect branch instructions executed" }, { 0x8F, 0x3, NULL, "BR_RET_EXEC", "Number of return branch instructions executed" }, { 0x90, 0x3, NULL, "BR_RET_MISSP_EXEC", "Number of mispredicted return branch instructions executed"}, { 0x91, 0x3, NULL, "BR_RET_BAC_MISSP_EXEC", "Number of executed return branch instructions mispredicted at decoding" }, { 0x92, 0x3, NULL, "BR_CALL_EXEC", "Number of function calls executed using CALL instruction" }, { 0x93, 0x3, NULL, "BR_CALL_MISSP_EXEC", "Number of mispredicted CALL instructions executed" }, { 0x94, 0x3, NULL, "BR_IND_CALL_EXEC", "Number of mispredicted indirect CALL instructions executed" }, /* Decoder */ { 0xCE, 0x3, NULL, "EMON_SIMD_INSTR_RETIRED", "Number of SIMD instructions retired" }, { 0xD3, 0x3, NULL, "EMON_SYNCH_UOPS" }, { 0xD7, 0x3, NULL, "EMON_ESP_UOPS" }, { 0xD8, 0x3, UM(pentm_um_sse_inst_ret), "EMON_SSE_SSE2_INST_RETIRED", "Number of SSE2 instructions retired" }, /* redefined */ { 0xD9, 0x3, UM(pentm_um_sse_comp_inst_ret), "EMON_SSE_SSE2_COMP_INST_RETIRED", "Number of scalar-double SSE2 instructions retired" }, /* redefined */ { 0xDA, 0x3, UM(pentm_um_fused_uops), "EMON_FUSED_UOPS_RET", "Number of fused UOPS retired" }, { 0xDB, 0x3, NULL, "EMON_UNFUSION" }, /* Prefetcher */ { 0xF0, 0x3, NULL, "EMON_PREF_RQSTS_UP" }, { 0xF8, 0x3, NULL, "EMON_PREF_RQSTS_DN" }, }; const struct perfctr_event_set pentm_event_set_1 = { .cpu_type = PERFCTR_X86_INTEL_PII, .event_prefix = "PII_", .include = &p6_event_set, .nevents = ARRAY_SIZE(p2andp3_events), .events = p2andp3_events, }; const struct perfctr_event_set pentm_event_set_2 = { .cpu_type = PERFCTR_X86_INTEL_PIII, .event_prefix = "PIII_", .include = &pentm_event_set_1, .nevents = ARRAY_SIZE(p3_events_1), .events = p3_events_1, }; const struct perfctr_event_set perfctr_pentm_event_set = { .cpu_type = PERFCTR_X86_INTEL_PENTM, .event_prefix = "PENTM_", .include = &pentm_event_set_2, .nevents = ARRAY_SIZE(pentm_events), .events = pentm_events, }; papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.12-rc1-mm4000664 001750 001750 00000010016 13216244370 025370 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.12-rc1-mm4/arch/ppc64/Kconfig linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/Kconfig --- linux-2.6.12-rc1-mm4/arch/ppc64/Kconfig 2005-03-31 21:08:24.000000000 +0200 +++ linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/Kconfig 2005-03-31 23:28:07.000000000 +0200 @@ -297,6 +297,7 @@ config SECCOMP endmenu +source "drivers/perfctr/Kconfig" menu "General setup" diff -rupN linux-2.6.12-rc1-mm4/arch/ppc64/kernel/misc.S linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/kernel/misc.S --- linux-2.6.12-rc1-mm4/arch/ppc64/kernel/misc.S 2005-03-31 21:08:24.000000000 +0200 +++ linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/kernel/misc.S 2005-03-31 23:28:07.000000000 +0200 @@ -956,6 +956,12 @@ _GLOBAL(sys_call_table32) .llong .sys32_request_key .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read .balign 8 _GLOBAL(sys_call_table) @@ -1232,3 +1238,9 @@ _GLOBAL(sys_call_table) .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read diff -rupN linux-2.6.12-rc1-mm4/arch/ppc64/kernel/process.c linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/kernel/process.c --- linux-2.6.12-rc1-mm4/arch/ppc64/kernel/process.c 2005-03-31 21:07:46.000000000 +0200 +++ linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/arch/ppc64/kernel/process.c 2005-03-31 23:28:07.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,9 @@ struct task_struct *__switch_to(struct t local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -323,6 +326,7 @@ void exit_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -425,6 +429,8 @@ copy_thread(int nr, unsigned long clone_ */ kregs->nip = *((unsigned long *)ret_from_fork); + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.12-rc1-mm4/include/asm-ppc64/processor.h linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/include/asm-ppc64/processor.h --- linux-2.6.12-rc1-mm4/include/asm-ppc64/processor.h 2005-03-31 21:08:31.000000000 +0200 +++ linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/include/asm-ppc64/processor.h 2005-03-31 23:28:07.000000000 +0200 @@ -574,6 +574,8 @@ struct thread_struct { unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + /* performance counters */ + struct vperfctr *perfctr; }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.12-rc1-mm4/include/asm-ppc64/unistd.h linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/include/asm-ppc64/unistd.h --- linux-2.6.12-rc1-mm4/include/asm-ppc64/unistd.h 2005-03-31 21:07:54.000000000 +0200 +++ linux-2.6.12-rc1-mm4.perfctr27-ppc64-arch-hooks/include/asm-ppc64/unistd.h 2005-03-31 23:28:07.000000000 +0200 @@ -283,8 +283,14 @@ #define __NR_request_key 270 #define __NR_keyctl 271 #define __NR_waitid 272 +/* 273 is reserved for ioprio_set */ +/* 274 is reserved for ioprio_get */ +#define __NR_vperfctr_open 275 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 273 +#define __NR_syscalls 279 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.12-rc1-mm3000664 001750 001750 00000007053 13216244370 025376 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/Kconfig.~1~ 2005-03-28 17:26:13.000000000 +0200 +++ linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/Kconfig 2005-03-28 23:56:30.000000000 +0200 @@ -280,6 +280,7 @@ config LPARCFG endmenu +source "drivers/perfctr/Kconfig" menu "General setup" --- linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/kernel/misc.S.~1~ 2005-03-28 17:26:13.000000000 +0200 +++ linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/kernel/misc.S 2005-03-28 23:56:30.000000000 +0200 @@ -956,6 +956,12 @@ _GLOBAL(sys_call_table32) .llong .sys32_request_key .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read .balign 8 _GLOBAL(sys_call_table) @@ -1232,3 +1238,9 @@ _GLOBAL(sys_call_table) .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read --- linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/kernel/process.c.~1~ 2005-03-28 17:25:43.000000000 +0200 +++ linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/arch/ppc64/kernel/process.c 2005-03-28 23:56:30.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,9 @@ struct task_struct *__switch_to(struct t local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -323,6 +326,7 @@ void exit_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -425,6 +429,8 @@ copy_thread(int nr, unsigned long clone_ */ kregs->nip = *((unsigned long *)ret_from_fork); + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/include/asm-ppc64/processor.h.~1~ 2005-03-28 17:26:21.000000000 +0200 +++ linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/include/asm-ppc64/processor.h 2005-03-28 23:56:30.000000000 +0200 @@ -574,6 +574,8 @@ struct thread_struct { unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + /* performance counters */ + struct vperfctr *perfctr; }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/include/asm-ppc64/unistd.h.~1~ 2005-03-28 17:25:46.000000000 +0200 +++ linux-2.6.12-rc1-mm3.perfctr27-ppc64-arch-support/include/asm-ppc64/unistd.h 2005-03-28 23:56:30.000000000 +0200 @@ -283,8 +283,14 @@ #define __NR_request_key 270 #define __NR_keyctl 271 #define __NR_waitid 272 +/* 273 is reserved for ioprio_set */ +/* 274 is reserved for ioprio_get */ +#define __NR_vperfctr_open 275 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 273 +#define __NR_syscalls 279 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif papi-5.6.0/man/man3/PAPI_get_real_usec.3000664 001750 001750 00000001457 13216244356 021655 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_real_usec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_real_usec \- .PP get real time counter value in microseconds .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf This function returns the total real time passed since some arbitrary starting point. The time is returned in microseconds. This call is equivalent to wall clock time. @par Examples: .fi .PP .PP .nf s = PAPI_get_real_cyc(); your_slow_code(); e = PAPI_get_real_cyc(); printf("Wallclock cycles: %lld\en",e-s); * .fi .PP .PP \fBSee Also:\fP .RS 4 PAPIF .PP PAPI .PP \fBPAPI_get_virt_usec\fP .PP \fBPAPI_get_virt_cyc\fP .PP \fBPAPI_library_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.12-rc1-mm1000664 001750 001750 00000010042 13216244370 025364 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.12-rc1-mm1/arch/ppc64/Kconfig linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/Kconfig --- linux-2.6.12-rc1-mm1/arch/ppc64/Kconfig 2005-03-22 21:59:07.000000000 +0100 +++ linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/Kconfig 2005-03-23 02:22:35.000000000 +0100 @@ -280,6 +280,7 @@ config LPARCFG endmenu +source "drivers/perfctr/Kconfig" menu "General setup" diff -rupN linux-2.6.12-rc1-mm1/arch/ppc64/kernel/misc.S linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/kernel/misc.S --- linux-2.6.12-rc1-mm1/arch/ppc64/kernel/misc.S 2005-03-22 21:59:07.000000000 +0100 +++ linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/kernel/misc.S 2005-03-23 02:22:35.000000000 +0100 @@ -960,6 +960,12 @@ _GLOBAL(sys_call_table32) .llong .sys32_request_key .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read .balign 8 _GLOBAL(sys_call_table) @@ -1236,3 +1242,9 @@ _GLOBAL(sys_call_table) .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .llong .sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .llong .sys_vperfctr_open /* 275 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read diff -rupN linux-2.6.12-rc1-mm1/arch/ppc64/kernel/process.c linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/kernel/process.c --- linux-2.6.12-rc1-mm1/arch/ppc64/kernel/process.c 2005-03-22 21:58:47.000000000 +0100 +++ linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/arch/ppc64/kernel/process.c 2005-03-23 02:22:35.000000000 +0100 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -225,7 +226,9 @@ struct task_struct *__switch_to(struct t local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -323,6 +326,7 @@ void exit_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -425,6 +429,8 @@ copy_thread(int nr, unsigned long clone_ */ kregs->nip = *((unsigned long *)ret_from_fork); + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.12-rc1-mm1/include/asm-ppc64/processor.h linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/include/asm-ppc64/processor.h --- linux-2.6.12-rc1-mm1/include/asm-ppc64/processor.h 2005-03-22 21:58:51.000000000 +0100 +++ linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/include/asm-ppc64/processor.h 2005-03-23 02:22:35.000000000 +0100 @@ -578,6 +578,8 @@ struct thread_struct { unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + /* performance counters */ + struct vperfctr *perfctr; }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.12-rc1-mm1/include/asm-ppc64/unistd.h linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/include/asm-ppc64/unistd.h --- linux-2.6.12-rc1-mm1/include/asm-ppc64/unistd.h 2005-03-22 21:58:51.000000000 +0100 +++ linux-2.6.12-rc1-mm1.perfctr27-ppc64-arch-support/include/asm-ppc64/unistd.h 2005-03-23 02:22:35.000000000 +0100 @@ -283,8 +283,14 @@ #define __NR_request_key 270 #define __NR_keyctl 271 #define __NR_waitid 272 +/* 273 is reserved for ioprio_set */ +/* 274 is reserved for ioprio_get */ +#define __NR_vperfctr_open 275 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 273 +#define __NR_syscalls 279 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif papi-5.6.0/src/components/powercap/linux-powercap.c000664 001750 001750 00000040433 13216244360 024453 0ustar00jshenry1963jshenry1963000000 000000 /** * @file linux-powercap.c * @author Philip Vaccaro * @ingroup papi_components * @brief powercap component * * To work, the powercap kernel module must be loaded. */ #include #include #include #include #include #include #include #include #include #include #include /* Headers required by PAPI */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" typedef struct _powercap_register { unsigned int selector; } _powercap_register_t; typedef struct _powercap_native_event_entry { char name[PAPI_MAX_STR_LEN]; char units[PAPI_MIN_STR_LEN]; char description[PAPI_MAX_STR_LEN]; int socket_id; int component_id; int event_id; int type; int return_type; _powercap_register_t resources; } _powercap_native_event_entry_t; typedef struct _powercap_reg_alloc { _powercap_register_t ra_bits; } _powercap_reg_alloc_t; static char read_buff[PAPI_MAX_STR_LEN]; static char write_buff[PAPI_MAX_STR_LEN]; static int num_events=0; // package events #define PKG_ENERGY 0 #define PKG_MAX_ENERGY_RANGE 1 #define PKG_MAX_POWER_A 2 #define PKG_POWER_LIMIT_A 3 #define PKG_TIME_WINDOW_A 4 #define PKG_MAX_POWER_B 5 #define PKG_POWER_LIMIT_B 6 #define PKG_TIME_WINDOW_B 7 #define PKG_ENABLED 8 #define PKG_NAME 9 #define PKG_NUM_EVENTS 10 static int pkg_events[PKG_NUM_EVENTS] = {PKG_ENERGY, PKG_MAX_ENERGY_RANGE, PKG_MAX_POWER_A, PKG_POWER_LIMIT_A, PKG_TIME_WINDOW_A, PKG_MAX_POWER_B, PKG_POWER_LIMIT_B, PKG_TIME_WINDOW_B, PKG_ENABLED, PKG_NAME}; static char *pkg_event_names[PKG_NUM_EVENTS] = {"ENERGY_UJ", "MAX_ENERGY_RANGE_UJ", "MAX_POWER_A_UW", "POWER_LIMIT_A_UW", "TIME_WINDOW_A_US", "MAX_POWER_B_UW", "POWER_LIMIT_B_UW", "TIME_WINDOW_B", "ENABLED", "NAME"}; static char *pkg_sys_names[PKG_NUM_EVENTS] = {"energy_uj", "max_energy_range_uj", "constraint_0_max_power_uw", "constraint_0_power_limit_uw", "constraint_0_time_window_us", "constraint_1_max_power_uw", "constraint_1_power_limit_uw", "constraint_1_time_window_us", "enabled", "name"}; static mode_t pkg_sys_flags[PKG_NUM_EVENTS] = {O_RDONLY, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDONLY}; // non-package events #define COMPONENT_ENERGY 10 #define COMPONENT_MAX_ENERGY_RANGE 11 #define COMPONENT_MAX_POWER_A 12 #define COMPONENT_POWER_LIMIT_A 13 #define COMPONENT_TIME_WINDOW_A 14 #define COMPONENT_ENABLED 15 #define COMPONENT_NAME 16 #define COMPONENT_NUM_EVENTS 7 static int component_events[COMPONENT_NUM_EVENTS] = {COMPONENT_ENERGY, COMPONENT_MAX_ENERGY_RANGE, COMPONENT_MAX_POWER_A, COMPONENT_POWER_LIMIT_A, COMPONENT_TIME_WINDOW_A, COMPONENT_ENABLED, COMPONENT_NAME}; static char *component_event_names[COMPONENT_NUM_EVENTS] = {"ENERGY_UJ", "MAX_ENERGY_RANGE_UJ", "MAX_POWER_A_UW", "POWER_LIMIT_A_UW", "TIME_WINDOW_A_US", "ENABLED", "NAME"}; static char *component_sys_names[COMPONENT_NUM_EVENTS] = {"energy_uj", "max_energy_range_uj", "constraint_0_max_power_uw", "constraint_0_power_limit_uw", "constraint_0_time_window_us", "enabled", "name"}; static mode_t component_sys_flags[COMPONENT_NUM_EVENTS] = {O_RDONLY, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDONLY}; #define POWERCAP_MAX_COUNTERS (2 * (PKG_NUM_EVENTS + (3 * COMPONENT_NUM_EVENTS))) static _powercap_native_event_entry_t powercap_ntv_events[(2 * (PKG_NUM_EVENTS + (3 * COMPONENT_NUM_EVENTS)))]; static int event_fds[POWERCAP_MAX_COUNTERS]; typedef struct _powercap_control_state { long long count[POWERCAP_MAX_COUNTERS]; long long which_counter[POWERCAP_MAX_COUNTERS]; long long lastupdate; } _powercap_control_state_t; typedef struct _powercap_context { long long start_value[POWERCAP_MAX_COUNTERS]; _powercap_control_state_t state; } _powercap_context_t; papi_vector_t _powercap_vector; /***************************************************************************/ /****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ /***************************************************************************/ /* Null terminated version of strncpy */ static char * _local_strlcpy( char *dst, const char *src, size_t size ) { char *retval = strncpy( dst, src, size ); if ( size>0 ) dst[size-1] = '\0'; return( retval ); } static long long read_powercap_value( int index ) { int sz = pread(event_fds[index], read_buff, PAPI_MAX_STR_LEN, 0); read_buff[sz] = '\0'; return atoll(read_buff); } static int write_powercap_value( int index, long long value ) { snprintf(write_buff, sizeof(write_buff), "%lld", value); int sz = pwrite(event_fds[index], write_buff, PAPI_MAX_STR_LEN, 0); if(sz == -1) { perror("Error in pwrite(): "); } return 1; } /************************* PAPI Functions **********************************/ /* * This is called whenever a thread is initialized */ static int _powercap_init_thread( hwd_context_t *ctx ) { ( void ) ctx; return PAPI_OK; } /* * Called when PAPI process is initialized (i.e. PAPI_library_init) */ static int _powercap_init_component( int cidx ) { int num_sockets = -1; int s = -1, e = -1, c = -1; char events_dir[128]; char event_path[128]; DIR *events; // get hw info const PAPI_hw_info_t *hw_info; hw_info=&( _papi_hwi_system_info.hw_info ); // check if intel processor if ( hw_info->vendor!=PAPI_VENDOR_INTEL ) { strncpy(_powercap_vector.cmp_info.disabled_reason, "Not an Intel processor", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; } // store number of sockets for adding events num_sockets = hw_info->sockets; num_events = 0; for(s = 0; s < num_sockets; s++) { // compose string of a pkg directory path snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d/", s); // open directory to make sure it exists events = opendir(events_dir); // not a valid pkg/component directory so continue if (events == NULL) { continue; } // loop through pkg events and create powercap event entries for (e = 0; e < PKG_NUM_EVENTS; e++) { // compose string to individual event snprintf(event_path, sizeof(event_path), "%s%s", events_dir, pkg_sys_names[e]); // not a valid pkg event path so continue if (access(event_path, F_OK) == -1) { continue; } snprintf(powercap_ntv_events[num_events].name, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); //snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); //snprintf(powercap_ntv_events[num_events].units, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); powercap_ntv_events[num_events].return_type = PAPI_DATATYPE_UINT64; powercap_ntv_events[num_events].type = pkg_events[e]; powercap_ntv_events[num_events].resources.selector = num_events + 1; event_fds[num_events] = open(event_path, O_SYNC|pkg_sys_flags[e]); if(powercap_ntv_events[num_events].type == PKG_NAME) { int sz = pread(event_fds[num_events], read_buff, PAPI_MAX_STR_LEN, 0); read_buff[sz] = '\0'; snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].description), "%s", read_buff); } num_events++; } // reset component count for each socket c = 0; snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d:%d/", s, c); while((events = opendir(events_dir)) != NULL) { // loop through pkg events and create powercap event entries for (e = 0; e < COMPONENT_NUM_EVENTS; e++) { // compose string to individual event snprintf(event_path, sizeof(event_path), "%s%s", events_dir, component_sys_names[e]); // not a valid pkg event path so continue if (access(event_path, F_OK) == -1) { continue; } snprintf(powercap_ntv_events[num_events].name, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); //snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); //snprintf(powercap_ntv_events[num_events].units, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); powercap_ntv_events[num_events].return_type = PAPI_DATATYPE_UINT64; powercap_ntv_events[num_events].type = component_events[e]; powercap_ntv_events[num_events].resources.selector = num_events + 1; event_fds[num_events] = open(event_path, O_SYNC|component_sys_flags[e]); if(powercap_ntv_events[num_events].type == COMPONENT_NAME) { int sz = pread(event_fds[num_events], read_buff, PAPI_MAX_STR_LEN, 0); read_buff[sz] = '\0'; snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].description), "%s", read_buff); } num_events++; } // test for next component c++; // compose string of an pkg directory path snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d:%d/", s, c); } } /* Export the total number of events available */ _powercap_vector.cmp_info.num_native_events = num_events; _powercap_vector.cmp_info.num_cntrs = num_events; _powercap_vector.cmp_info.num_mpx_cntrs = num_events; /* Export the component id */ _powercap_vector.cmp_info.CmpIdx = cidx; return PAPI_OK; } /* * Control of counters (Reading/Writing/Starting/Stopping/Setup) * functions */ static int _powercap_init_control_state( hwd_control_state_t *ctl ) { _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; memset( control, 0, sizeof ( _powercap_control_state_t ) ); return PAPI_OK; } static int _powercap_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) { _powercap_context_t* context = ( _powercap_context_t* ) ctx; (void) ctl; int b; for( b = 0; b < num_events; b++ ) { context->start_value[b]=read_powercap_value(b); } return PAPI_OK; } static int _powercap_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void) ctx; _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; int c; for( c = 0; c < num_events; c++ ) { control->count[c]=read_powercap_value(c); } return PAPI_OK; } /* Shutdown a thread */ static int _powercap_shutdown_thread( hwd_context_t *ctx ) { ( void ) ctx; SUBDBG( "Enter\n" ); return PAPI_OK; } static int _powercap_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags ) { (void) ctx; (void) flags; _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; int i; for(i=0;icount[i]=read_powercap_value(control->which_counter[i]); } *events = ( ( _powercap_control_state_t* ) ctl )->count; return PAPI_OK; } static int _powercap_write( hwd_context_t * ctx, hwd_control_state_t * ctl, long long *values ) { /* write values */ ( void ) ctx; _powercap_control_state_t *control = ( _powercap_control_state_t * ) ctl; int i; for(i=0;iwhich_counter[i]].type == PKG_POWER_LIMIT_A) || (powercap_ntv_events[control->which_counter[i]].type == PKG_POWER_LIMIT_B) ) { write_powercap_value(control->which_counter[i], values[i]); } } return PAPI_OK; } /* * Clean up what was setup in powercap_init_component(). */ static int _powercap_shutdown_component( void ) { int i; /* Read counters into expected slot */ for(i=0;iwhich_counter[i]=index; native[i].ni_position = i; } return PAPI_OK; } static int _powercap_set_domain( hwd_control_state_t *ctl, int domain ) { ( void ) ctl; if ( PAPI_DOM_ALL != domain ) return PAPI_EINVAL; return PAPI_OK; } static int _powercap_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) { ( void ) ctx; ( void ) ctl; return PAPI_OK; } /* * Native Event functions */ static int _powercap_ntv_enum_events( unsigned int *EventCode, int modifier ) { int index; switch ( modifier ) { case PAPI_ENUM_FIRST: *EventCode = 0; return PAPI_OK; case PAPI_ENUM_EVENTS:index = *EventCode; if (index < num_events - 1) { *EventCode = *EventCode + 1; return PAPI_OK; } else { return PAPI_ENOEVNT; } default:return PAPI_EINVAL; } } /* * */ static int _powercap_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { int index = EventCode & PAPI_NATIVE_AND_MASK; if ( index >= 0 && index < num_events ) { _local_strlcpy( name, powercap_ntv_events[index].name, len ); return PAPI_OK; } return PAPI_ENOEVNT; } /* * */ static int _powercap_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { int index = EventCode; if ( index < 0 && index >= num_events ) return PAPI_ENOEVNT; _local_strlcpy( name, powercap_ntv_events[index].description, len ); return PAPI_OK; } static int _powercap_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info ) { int index = EventCode; if ( index < 0 || index >= num_events ) return PAPI_ENOEVNT; _local_strlcpy( info->symbol, powercap_ntv_events[index].name, sizeof( info->symbol )); _local_strlcpy( info->long_descr, powercap_ntv_events[index].description, sizeof( info->long_descr ) ); _local_strlcpy( info->units, powercap_ntv_events[index].units, sizeof( info->units ) ); info->data_type = powercap_ntv_events[index].return_type; return PAPI_OK; } papi_vector_t _powercap_vector = { .cmp_info = { /* (unspecified values are initialized to 0) */ .name = "powercap", .short_name = "powercap", .description = "Linux powercap energy measurements", .version = "5.3.0", .default_domain = PAPI_DOM_ALL, .default_granularity = PAPI_GRN_SYS, .available_granularities = PAPI_GRN_SYS, .hardware_intr_sig = PAPI_INT_SIGNAL, .available_domains = PAPI_DOM_ALL, }, /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( _powercap_context_t ), .control_state = sizeof ( _powercap_control_state_t ), .reg_value = sizeof ( _powercap_register_t ), .reg_alloc = sizeof ( _powercap_reg_alloc_t ), }, /* function pointers in this component */ .init_thread = _powercap_init_thread, .init_component = _powercap_init_component, .init_control_state = _powercap_init_control_state, .update_control_state = _powercap_update_control_state, .start = _powercap_start, .stop = _powercap_stop, .read = _powercap_read, .write = _powercap_write, .shutdown_thread = _powercap_shutdown_thread, .shutdown_component = _powercap_shutdown_component, .ctl = _powercap_ctl, .set_domain = _powercap_set_domain, .reset = _powercap_reset, .ntv_enum_events = _powercap_ntv_enum_events, .ntv_code_to_name = _powercap_ntv_code_to_name, .ntv_code_to_descr = _powercap_ntv_code_to_descr, .ntv_code_to_info = _powercap_ntv_code_to_info, }; papi-5.6.0/src/components/infiniband/tests/infiniband_values_by_code.c000664 001750 001750 00000010172 13216244357 030270 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @author Jose Pedro Oliveira * * test case for the linux-infiniband component * Adapted from its counterpart in the net component. * * @brief * Prints the value of every native event (by code) */ #include #include #include #include #include "papi.h" #include "papi_test.h" int main (int argc, char **argv) { int retval,cid,numcmp; int EventSet = PAPI_NULL; long long *values = 0; int *codes = 0; char *names = 0; int code, i; int total_events=0; int r; const PAPI_component_info_t *cmpinfo = NULL; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!TESTS_QUIET) { printf("Trying all infiniband events\n"); } numcmp = PAPI_num_components(); for(cid=0; cidnum_native_events, cmpinfo->name); } if ( strstr(cmpinfo->name, "infiniband") == NULL) { continue; } if (cmpinfo->disabled) { test_skip(__FILE__,__LINE__,"Component infiniband is disabled", 0); continue; } values = (long long*) malloc(sizeof(long long) * cmpinfo->num_native_events); codes = (int*) malloc(sizeof(int) * cmpinfo->num_native_events); names = (char*) malloc(PAPI_MAX_STR_LEN * cmpinfo->num_native_events); EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); } code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); i = 0; while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( code, &names[i*PAPI_MAX_STR_LEN] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } codes[i] = code; retval = PAPI_add_event( EventSet, code ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_add_event()", retval); } total_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); i += 1; } retval = PAPI_start( EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()", retval); } /* XXX figure out a general method to generate some traffic * for infiniband * the operation should take more than one second in order * to guarantee that the network counters are updated */ /* For now, just sleep for 10 seconds */ sleep(10); retval = PAPI_stop( EventSet, values); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); } if (!TESTS_QUIET) { for (i=0 ; inum_native_events ; ++i) printf("%#x %-24s = %lld\n", codes[i], names+i*PAPI_MAX_STR_LEN, values[i]); } retval = PAPI_cleanup_eventset( EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()", retval); } retval = PAPI_destroy_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()", retval); } free(names); free(codes); free(values); } if (total_events==0) { test_skip(__FILE__,__LINE__,"No infiniband events found", 0); } test_pass( __FILE__ ); return 0; } // vim:set ai ts=4 sw=4 sts=4 et: papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc.c000664 001750 001750 00000047013 13216244365 024302 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snbep_unc.c : Intel SandyBridge-EP uncore PMU common code * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" const pfmlib_attr_desc_t snbep_unc_mods[]={ PFM_ATTR_B("e", "edge detect"), /* edge */ PFM_ATTR_B("i", "invert"), /* invert */ PFM_ATTR_I("t", "threshold in range [0-255]"), /* threshold */ PFM_ATTR_I("t", "threshold in range [0-31]"), /* threshold */ PFM_ATTR_I("tf", "thread id filter [0-1]"), /* thread id */ PFM_ATTR_I("cf", "core id filter, includes non-thread data in bit 4 [0-15]"), /* core id (ivbep) */ PFM_ATTR_I("nf", "node id bitmask filter [0-255]"),/* nodeid mask filter0 */ PFM_ATTR_I("ff", "frequency >= 100Mhz * [0-255]"),/* freq filter */ PFM_ATTR_I("addr", "physical address matcher [40 bits]"),/* address matcher */ PFM_ATTR_I("nf", "node id bitmask filter [0-255]"),/* nodeid mask filter1 */ PFM_ATTR_B("isoc", "match isochronous requests"), /* isochronous */ PFM_ATTR_B("nc", "match non-coherent requests"), /* non-coherent */ PFM_ATTR_I("cf", "core id filter, includes non-thread data in bit 5 [0-63]"), /* core id (hswep) */ PFM_ATTR_NULL }; int pfm_intel_snbep_unc_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 45: /* SandyBridge-EP */ break; default: return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } int pfm_intel_ivbep_unc_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 62: /* SandyBridge-EP */ break; default: return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } int pfm_intel_hswep_unc_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 63: /* Haswell-EP */ break; default: return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } int pfm_intel_knl_unc_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 87: /* Knights Landing */ break; default: return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } int pfm_intel_bdx_unc_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 79: /* Broadwell X */ case 86: /* Broadwell X */ break; default: return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } static void display_com(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d] %s\n", reg->val, reg->com.unc_event, reg->com.unc_umask, reg->com.unc_en, reg->com.unc_inv, reg->com.unc_edge, reg->com.unc_thres, pe[e->event].name); } static void display_reg(void *this, pfmlib_event_desc_t *e, pfm_snbep_unc_reg_t reg) { pfmlib_pmu_t *pmu = this; if (pmu->display_reg) pmu->display_reg(this, e, ®); else display_com(this, e, ®); } static inline int is_occ_event(void *this, int idx) { pfmlib_pmu_t *pmu = this; const intel_x86_entry_t *pe = this_pe(this); return (pmu->flags & INTEL_PMU_FL_UNC_OCC) && (pe[idx].code & 0x80); } static inline int get_pcu_filt_band(void *this, pfm_snbep_unc_reg_t reg) { #define PCU_FREQ_BAND0_CODE 0xb /* event code for UNC_P_FREQ_BAND0_CYCLES */ return reg.pcu.unc_event - PCU_FREQ_BAND0_CODE; } int snbep_unc_add_defaults(void *this, pfmlib_event_desc_t *e, unsigned int msk, uint64_t *umask, pfm_snbep_unc_reg_t *filters, unsigned short max_grpid) { const intel_x86_entry_t *pe = this_pe(this); const intel_x86_entry_t *ent; unsigned int i; int j, k, added, skip; int idx; k = e->nattrs; ent = pe+e->event; for(i=0; msk; msk >>=1, i++) { if (!(msk & 0x1)) continue; added = skip = 0; for (j = 0; j < e->npattrs; j++) { if (e->pattrs[j].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[j].type != PFM_ATTR_UMASK) continue; idx = e->pattrs[j].idx; if (ent->umasks[idx].grpid != i) continue; if (max_grpid != INTEL_X86_MAX_GRPID && i > max_grpid) { skip = 1; continue; } if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) { skip = 1; continue; } /* umask is default for group */ if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) { DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n", ent->umasks[idx].uname, i, j, idx, ent->umasks[idx].ucode); /* * default could be an alias, but * ucode must reflect actual code */ *umask |= ent->umasks[idx].ucode >> 8; filters[0].val |= pe[e->event].umasks[idx].ufilters[0]; filters[1].val |= pe[e->event].umasks[idx].ufilters[1]; e->attrs[k].id = j; /* pattrs index */ e->attrs[k].ival = 0; k++; added++; if (intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) goto done; if (intel_x86_uflag(this, e->event, idx, INTEL_X86_EXCL_GRP_GT)) { if (max_grpid != INTEL_X86_MAX_GRPID) { DPRINT("two max_grpid, old=%d new=%d\n", max_grpid, ent->umasks[idx].grpid); return PFM_ERR_UMASK; } max_grpid = ent->umasks[idx].grpid; } } } if (!added && !skip) { DPRINT("no default found for event %s unit mask group %d (max_grpid=%d, i=%d)\n", ent->name, i, max_grpid, i); return PFM_ERR_UMASK; } } DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask); done: e->nattrs = k; return PFM_SUCCESS; } /* * common encoding routine */ int pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e) { const intel_x86_entry_t *pe = this_pe(this); unsigned int grpmsk, ugrpmsk = 0; unsigned short max_grpid = INTEL_X86_MAX_GRPID; unsigned short last_grpid = INTEL_X86_MAX_GRPID; int umodmsk = 0, modmsk_r = 0; int pcu_filt_band = -1; pfm_snbep_unc_reg_t reg; pfm_snbep_unc_reg_t filters[INTEL_X86_MAX_FILTERS]; pfm_snbep_unc_reg_t addr; pfmlib_event_attr_info_t *a; uint64_t val, umask1, umask2; int k, ret; int has_cbo_tid = 0; unsigned short grpid; int grpcounts[INTEL_X86_NUM_GRP]; int ncombo[INTEL_X86_NUM_GRP]; char umask_str[PFMLIB_EVT_MAX_NAME_LEN]; memset(grpcounts, 0, sizeof(grpcounts)); memset(ncombo, 0, sizeof(ncombo)); memset(filters, 0, sizeof(filters)); addr.val = 0; pe = this_pe(this); umask_str[0] = e->fstr[0] = '\0'; reg.val = val = pe[e->event].code; /* take into account hardcoded umask */ umask1 = (val >> 8) & 0xff; umask2 = umask1; grpmsk = (1 << pe[e->event].ngrp)-1; modmsk_r = pe[e->event].modmsk_req; for(k=0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) { uint64_t um; grpid = pe[e->event].umasks[a->idx].grpid; /* * certain event groups are meant to be * exclusive, i.e., only unit masks of one group * can be used */ if (last_grpid != INTEL_X86_MAX_GRPID && grpid != last_grpid && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) { DPRINT("exclusive unit mask group error\n"); return PFM_ERR_FEATCOMB; } /* * selecting certain umasks in a group may exclude any umasks * from any groups with a higher index * * enforcement requires looking at the grpid of all the umasks */ if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_GT)) max_grpid = grpid; /* * certain event groups are meant to be * exclusive, i.e., only unit masks of one group * can be used */ if (last_grpid != INTEL_X86_MAX_GRPID && grpid != last_grpid && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) { DPRINT("exclusive unit mask group error\n"); return PFM_ERR_FEATCOMB; } /* * upper layer has removed duplicates * so if we come here more than once, it is for two * disinct umasks * * NCOMBO=no combination of unit masks within the same * umask group */ ++grpcounts[grpid]; /* mark that we have a umask with NCOMBO in this group */ if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_NCOMBO)) ncombo[grpid] = 1; /* * if more than one umask in this group but one is marked * with ncombo, then fail. It is okay to combine umask within * a group as long as none is tagged with NCOMBO */ if (grpcounts[grpid] > 1 && ncombo[grpid]) { DPRINT("umask %s does not support unit mask combination within group %d\n", pe[e->event].umasks[a->idx].uname, grpid); return PFM_ERR_FEATCOMB; } last_grpid = grpid; um = pe[e->event].umasks[a->idx].ucode; filters[0].val |= pe[e->event].umasks[a->idx].ufilters[0]; filters[1].val |= pe[e->event].umasks[a->idx].ufilters[1]; um >>= 8; umask2 |= um; ugrpmsk |= 1 << pe[e->event].umasks[a->idx].grpid; /* PCU occ event */ if (is_occ_event(this, e->event)) { reg.pcu.unc_occ = umask2 >> 6; umask2 = 0; } else reg.val |= umask2 << 8; evt_strcat(umask_str, ":%s", pe[e->event].umasks[a->idx].uname); modmsk_r |= pe[e->event].umasks[a->idx].umodmsk_req; } else if (a->type == PFM_ATTR_RAW_UMASK) { /* there can only be one RAW_UMASK per event */ /* sanity check */ if (a->idx & ~0xff) { DPRINT("raw umask is 8-bit wide\n"); return PFM_ERR_ATTR; } /* override umask */ umask2 = a->idx & 0xff; ugrpmsk = grpmsk; } else { uint64_t ival = e->attrs[k].ival; switch(a->idx) { case SNBEP_UNC_ATTR_I: /* invert */ if (is_occ_event(this, e->event)) reg.pcu.unc_occ_inv = !!ival; else reg.com.unc_inv = !!ival; umodmsk |= _SNBEP_UNC_ATTR_I; break; case SNBEP_UNC_ATTR_E: /* edge */ if (is_occ_event(this, e->event)) reg.pcu.unc_occ_edge = !!ival; else reg.com.unc_edge = !!ival; umodmsk |= _SNBEP_UNC_ATTR_E; break; case SNBEP_UNC_ATTR_T8: /* counter-mask */ /* already forced, cannot overwrite */ if (ival > 255) return PFM_ERR_ATTR_VAL; reg.com.unc_thres = ival; umodmsk |= _SNBEP_UNC_ATTR_T8; break; case SNBEP_UNC_ATTR_T5: /* pcu counter-mask */ /* already forced, cannot overwrite */ if (ival > 31) return PFM_ERR_ATTR_VAL; reg.pcu.unc_thres = ival; umodmsk |= _SNBEP_UNC_ATTR_T5; break; case SNBEP_UNC_ATTR_TF: /* thread id */ if (ival > 1) { DPRINT("invalid thread id, must be < 1"); return PFM_ERR_ATTR_VAL; } reg.cbo.unc_tid = 1; has_cbo_tid = 1; filters[0].cbo_filt.tid = ival; umodmsk |= _SNBEP_UNC_ATTR_TF; break; case SNBEP_UNC_ATTR_CF: /* core id */ if (ival > 15) return PFM_ERR_ATTR_VAL; reg.cbo.unc_tid = 1; filters[0].cbo_filt.cid = ival; has_cbo_tid = 1; umodmsk |= _SNBEP_UNC_ATTR_CF; break; case SNBEP_UNC_ATTR_CF1: /* core id */ if (ival > 63) return PFM_ERR_ATTR_VAL; reg.cbo.unc_tid = 1; filters[0].hswep_cbo_filt0.cid = ival; /* includes non-thread data */ has_cbo_tid = 1; umodmsk |= _SNBEP_UNC_ATTR_CF1; break; case SNBEP_UNC_ATTR_NF: /* node id filter0 */ if (ival > 255 || ival == 0) { DPRINT("invalid nf, 0 < nf < 256\n"); return PFM_ERR_ATTR_VAL; } filters[0].cbo_filt.nid = ival; umodmsk |= _SNBEP_UNC_ATTR_NF; break; case SNBEP_UNC_ATTR_NF1: /* node id filter1 */ if (ival > 255 || ival == 0) { DPRINT("invalid nf, 0 < nf < 256\n"); return PFM_ERR_ATTR_VAL; } filters[1].ivbep_cbo_filt1.nid = ival; umodmsk |= _SNBEP_UNC_ATTR_NF1; break; case SNBEP_UNC_ATTR_FF: /* freq band filter */ if (ival > 255) return PFM_ERR_ATTR_VAL; pcu_filt_band = get_pcu_filt_band(this, reg); filters[0].val = ival << (pcu_filt_band * 8); umodmsk |= _SNBEP_UNC_ATTR_FF; break; case SNBEP_UNC_ATTR_A: /* addr filter */ if (ival & ~((1ULL << 40)-1)) { DPRINT("address filter 40bits max\n"); return PFM_ERR_ATTR_VAL; } addr.ha_addr.lo_addr = ival; /* LSB 26 bits */ addr.ha_addr.hi_addr = (ival >> 26) & ((1ULL << 14)-1); umodmsk |= _SNBEP_UNC_ATTR_A; break; case SNBEP_UNC_ATTR_ISOC: /* isoc filter */ filters[1].ivbep_cbo_filt1.isoc = !!ival; break; case SNBEP_UNC_ATTR_NC: /* nc filter */ filters[1].ivbep_cbo_filt1.nc = !!ival; break; } } } /* * check that there is at least of unit mask in each unit mask group */ if (pe[e->event].numasks && (ugrpmsk != grpmsk || ugrpmsk == 0)) { uint64_t um = 0; ugrpmsk ^= grpmsk; ret = snbep_unc_add_defaults(this, e, ugrpmsk, &um, filters, max_grpid); if (ret != PFM_SUCCESS) return ret; umask2 |= um; } /* * nf= is only required on some events in CBO */ if (!(modmsk_r & _SNBEP_UNC_ATTR_NF) && (umodmsk & _SNBEP_UNC_ATTR_NF)) { DPRINT("using nf= on an umask which does not require it\n"); return PFM_ERR_ATTR; } if (!(modmsk_r & _SNBEP_UNC_ATTR_NF1) && (umodmsk & _SNBEP_UNC_ATTR_NF1)) { DPRINT("using nf= on an umask which does not require it\n"); return PFM_ERR_ATTR; } if (modmsk_r && !(umodmsk & modmsk_r)) { DPRINT("required modifiers missing: 0x%x\n", modmsk_r); return PFM_ERR_ATTR; } evt_strcat(e->fstr, "%s", pe[e->event].name); pfmlib_sort_attr(e); for(k = 0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname); else if (a->type == PFM_ATTR_RAW_UMASK) evt_strcat(e->fstr, ":0x%x", a->idx); } DPRINT("umask2=0x%"PRIx64" umask1=0x%"PRIx64"\n", umask2, umask1); e->count = 0; reg.val |= (umask1 | umask2) << 8; e->codes[e->count++] = reg.val; /* * handles C-box filter */ if (filters[0].val || filters[1].val || has_cbo_tid) e->codes[e->count++] = filters[0].val; if (filters[1].val) e->codes[e->count++] = filters[1].val; /* HA address matcher */ if (addr.val) e->codes[e->count++] = addr.val; for (k = 0; k < e->npattrs; k++) { int idx; if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[k].type == PFM_ATTR_UMASK) continue; idx = e->pattrs[k].idx; switch(idx) { case SNBEP_UNC_ATTR_E: if (is_occ_event(this, e->event)) evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.pcu.unc_occ_edge); else evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.com.unc_edge); break; case SNBEP_UNC_ATTR_I: if (is_occ_event(this, e->event)) evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.pcu.unc_occ_inv); else evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.com.unc_inv); break; case SNBEP_UNC_ATTR_T8: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.com.unc_thres); break; case SNBEP_UNC_ATTR_T5: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.pcu.unc_thres); break; case SNBEP_UNC_ATTR_TF: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, reg.cbo.unc_tid); break; case SNBEP_UNC_ATTR_CF: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[0].cbo_filt.cid); break; case SNBEP_UNC_ATTR_CF1: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[0].hswep_cbo_filt0.cid); break; case SNBEP_UNC_ATTR_FF: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, (filters[0].val >> (pcu_filt_band*8)) & 0xff); break; case SNBEP_UNC_ATTR_ISOC: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[1].ivbep_cbo_filt1.isoc); break; case SNBEP_UNC_ATTR_NC: evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[1].ivbep_cbo_filt1.nc); break; case SNBEP_UNC_ATTR_NF: if (modmsk_r & _SNBEP_UNC_ATTR_NF) evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[0].cbo_filt.nid); break; case SNBEP_UNC_ATTR_NF1: if (modmsk_r & _SNBEP_UNC_ATTR_NF1) evt_strcat(e->fstr, ":%s=%lu", snbep_unc_mods[idx].name, filters[1].ivbep_cbo_filt1.nid); break; case SNBEP_UNC_ATTR_A: evt_strcat(e->fstr, ":%s=0x%lx", snbep_unc_mods[idx].name, addr.ha_addr.hi_addr << 26 | addr.ha_addr.lo_addr); break; } } display_reg(this, e, reg); return PFM_SUCCESS; } int pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx) { if (intel_x86_eflag(this, pidx, INTEL_X86_NO_AUTOENCODE)) return 0; return !intel_x86_uflag(this, pidx, uidx, INTEL_X86_NO_AUTOENCODE); } int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) { const intel_x86_entry_t *pe = this_pe(this); const pfmlib_attr_desc_t *atdesc = this_atdesc(this); int numasks, idx; numasks = intel_x86_num_umasks(this, pidx); if (attr_idx < numasks) { idx = intel_x86_attr2umask(this, pidx, attr_idx); info->name = pe[pidx].umasks[idx].uname; info->desc = pe[pidx].umasks[idx].udesc; info->equiv= pe[pidx].umasks[idx].uequiv; info->code = pe[pidx].umasks[idx].ucode; if (!intel_x86_uflag(this, pidx, idx, INTEL_X86_CODE_OVERRIDE)) info->code >>= 8; if (info->code == 0) info->code = pe[pidx].umasks[idx].ufilters[0]; info->type = PFM_ATTR_UMASK; info->is_dfl = intel_x86_uflag(this, pidx, idx, INTEL_X86_DFL); info->is_precise = intel_x86_uflag(this, pidx, idx, INTEL_X86_PEBS); } else { idx = intel_x86_attr2mod(this, pidx, attr_idx); info->name = atdesc[idx].name; info->desc = atdesc[idx].desc; info->type = atdesc[idx].type; info->equiv= NULL; info->code = idx; info->is_dfl = 0; info->is_precise = 0; } info->ctrl = PFM_ATTR_CTRL_PMU; info->idx = idx; /* namespace specific index */ info->dfl_val64 = 0; return PFM_SUCCESS; } papi-5.6.0/src/components/powercap/Rules.powercap000664 001750 001750 00000000345 13216244360 024164 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/powercap/linux-powercap.c COMPOBJS += linux-powercap.o linux-powercap.o: components/powercap/linux-powercap.c $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/powercap/linux-powercap.c -o linux-powercap.o papi-5.6.0/src/perfctr-2.6.x/linux/include/linux/000775 001750 001750 00000000000 13216244367 023467 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_intel_bdx_unc_imc.c000664 001750 001750 00000005500 13216244365 024573 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_bdx_unc_imc.c : Intel BroadwellX Integrated Memory Controller (IMC) uncore PMU * * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_bdx_unc_imc_events.h" #define DEFINE_IMC_BOX(n) \ pfmlib_pmu_t intel_bdx_unc_imc##n##_support = { \ .desc = "Intel BroadwellX IMC"#n" uncore", \ .name = "bdx_unc_imc"#n, \ .perf_name = "uncore_imc_"#n, \ .pmu = PFM_PMU_INTEL_BDX_UNC_IMC##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_bdx_unc_m_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 1, \ .max_encoding = 1, \ .pe = intel_bdx_unc_m_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ .pmu_detect = pfm_intel_bdx_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_IMC_BOX(0); DEFINE_IMC_BOX(1); DEFINE_IMC_BOX(2); DEFINE_IMC_BOX(3); DEFINE_IMC_BOX(4); DEFINE_IMC_BOX(5); DEFINE_IMC_BOX(6); DEFINE_IMC_BOX(7); papi-5.6.0/src/libpfm4/docs/man3/libpfm_arm_ac8.3000664 001750 001750 00000000702 13216244363 023375 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_arm_ac8 - support for ARM Cortex A8 PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: arm_ac8 .B PMU desc: ARM Cortex A8 .sp .SH DESCRIPTION The library supports the ARM Cortex A8 core PMU. This PMU supports 2 counters and has no privilege levels filtering. No event modifiers are available. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/perfctr-2.6.x/linux/drivers/perfctr/virtual_stub.c000775 001750 001750 00000004764 13216244367 026735 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: virtual_stub.c,v 1.26.2.9 2009/01/23 17:21:20 mikpe Exp $ * Kernel stub used to support virtual perfctrs when the * perfctr driver is built as a module. * * Copyright (C) 2000-2009 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include #include #include #include "compat.h" static void bug_void_perfctr(struct vperfctr *perfctr) { current->thread.perfctr = NULL; BUG(); } #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK static void bug_set_cpus_allowed(struct task_struct *owner, struct vperfctr *perfctr, cpumask_t new_mask) { owner->thread.perfctr = NULL; BUG(); } #endif struct vperfctr_stub vperfctr_stub = { .exit = bug_void_perfctr, .flush = bug_void_perfctr, .suspend = bug_void_perfctr, .resume = bug_void_perfctr, .sample = bug_void_perfctr, #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK .set_cpus_allowed = bug_set_cpus_allowed, #endif }; /* * exit_thread() calls __vperfctr_exit() via vperfctr_stub.exit(). * If the process' reference was the last reference to this * vperfctr object, and this was the last live vperfctr object, * then the perfctr module's use count will drop to zero. * This is Ok, except for the fact that code is still running * in the module (pending returns back to exit_thread()). This * could race with rmmod in a preemptive UP kernel, leading to * code running in freed memory. The race also exists in SMP * kernels, but the time window is extremely small. * * Since exit() isn't performance-critical, we wrap the call to * vperfctr_stub.exit() with code to increment the module's use * count before the call, and decrement it again afterwards. Thus, * the final drop to zero occurs here and not in the module itself. * (All other code paths that drop the use count do so via a file * object, and VFS also refcounts the module.) */ void _vperfctr_exit(struct vperfctr *perfctr) { __module_get(vperfctr_stub.owner); vperfctr_stub.exit(perfctr); module_put(vperfctr_stub.owner); } /* __vperfctr_flush() is a conditional __vperfctr_exit(), * so it needs the same protection. */ void _vperfctr_flush(struct vperfctr *perfctr) { __module_get(vperfctr_stub.owner); vperfctr_stub.flush(perfctr); module_put(vperfctr_stub.owner); } EXPORT_SYMBOL(vperfctr_stub); EXPORT_SYMBOL___put_task_struct; #if !defined(CONFIG_UTRACE) #include #include EXPORT_SYMBOL(ptrace_check_attach); #endif papi-5.6.0/src/components/coretemp_freebsd/Rules.coretemp_freebsd000664 001750 001750 00000000500 13216244357 027343 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/coretemp_freebsd/coretemp_freebsd.c COMPOBJS += coretemp_freebsd.o coretemp_freebsd.o: components/coretemp_freebsd/coretemp_freebsd.c components/coretemp_freebsd/coretemp_freebsd.h $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/coretemp_freebsd/coretemp_freebsd.c -o coretemp_freebsd.o papi-5.6.0/src/libpfm4/lib/events/intel_ppro_events.h000664 001750 001750 00000046571 13216244364 024661 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: ppro (Intel Pentium Pro) */ static const intel_x86_umask_t ppro_l2_ifetch[]={ { .uname = "I", .udesc = "Invalid state", .ucode = 0x100, }, { .uname = "S", .udesc = "Shared state", .ucode = 0x200, }, { .uname = "E", .udesc = "Exclusive state", .ucode = 0x400, }, { .uname = "M", .udesc = "Modified state", .ucode = 0x800, }, }; static const intel_x86_umask_t ppro_bus_drdy_clocks[]={ { .uname = "SELF", .udesc = "Clocks when processor is driving bus", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Clocks when any agent is driving bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_ppro_pe[]={ { .name = "CPU_CLK_UNHALTED", .desc = "Number cycles during which the processor is not halted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x79, }, { .name = "INST_RETIRED", .desc = "Number of instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc0, }, { .name = "DATA_MEM_REFS", .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, }, { .name = "DCU_LINES_IN", .desc = "Total lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x45, }, { .name = "DCU_M_LINES_IN", .desc = "Number of M state lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x46, }, { .name = "DCU_M_LINES_OUT", .desc = "Number of M state lines evicted from the DCU. This includes evictions via snoop HITM, intervention or replacement", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x47, }, { .name = "DCU_MISS_OUTSTANDING", .desc = "Weighted number of cycle while a DCU miss is outstanding, incremented by the number of cache misses at any particular time. Cacheable read requests only are considered. Uncacheable requests are excluded Read-for-ownerships are counted, as well as line fills, invalidates, and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x48, }, { .name = "IFU_IFETCH", .desc = "Number of instruction fetches, both cacheable and noncacheable including UC fetches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x80, }, { .name = "IFU_IFETCH_MISS", .desc = "Number of instruction fetch misses. All instructions fetches that do not hit the IFU (i.e., that produce memory requests). Includes UC accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x81, }, { .name = "ITLB_MISS", .desc = "Number of ITLB misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x85, }, { .name = "IFU_MEM_STALL", .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, }, { .name = "ILD_STALL", .desc = "Number of cycles that the instruction length decoder is stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x87, }, { .name = "L2_IFETCH", .desc = "Number of L2 instruction fetches. This event indicates that a normal instruction fetch was received by the L2. The count includes only L2 cacheable instruction fetches: it does not include UC instruction fetches It does not include ITLB miss accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(ppro_l2_ifetch), .ngrp = 1, .umasks = ppro_l2_ifetch, }, { .name = "L2_ST", .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(ppro_l2_ifetch), .ngrp = 1, .umasks = ppro_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_INM", .desc = "Number of modified lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x25, }, { .name = "L2_RQSTS", .desc = "Total number of L2 requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(ppro_l2_ifetch), .ngrp = 1, .umasks = ppro_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_ADS", .desc = "Number of L2 address strobes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x21, }, { .name = "L2_DBUS_BUSY", .desc = "Number of cycles during which the L2 cache data bus was busy", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x22, }, { .name = "L2_DBUS_BUSY_RD", .desc = "Number of cycles during which the data bus was busy transferring read data from L2 to the processor", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x23, }, { .name = "BUS_DRDY_CLOCKS", .desc = "Number of clocks during which DRDY# is asserted. Utilization of the external system data bus during data transfers", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, }, { .name = "BUS_LOCK_CLOCKS", .desc = "Number of clocks during which LOCK# is asserted on the external system bus", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_REQ_OUTSTANDING", .desc = "Number of bus requests outstanding. This counter is incremented by the number of cacheable read bus requests outstanding in any given cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x60, }, { .name = "BUS_TRANS_BRD", .desc = "Number of burst read transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_RFO", .desc = "Number of completed read for ownership transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x66, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_WB", .desc = "Number of completed write back transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_IFETCH", .desc = "Number of completed instruction fetch transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_INVAL", .desc = "Number of completed invalidate transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_PWR", .desc = "Number of completed partial write transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6a, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_P", .desc = "Number of completed partial transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6b, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IO", .desc = "Number of completed I/O transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_DEF", .desc = "Number of completed deferred transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_BURST", .desc = "Number of completed burst transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6e, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_ANY", .desc = "Number of all completed bus transactions. Address bus utilization can be calculated knowing the minimum address bus occupancy. Includes special cycles, etc.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x70, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_MEM", .desc = "Number of completed memory transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6f, .numasks = LIBPFM_ARRAY_SIZE(ppro_bus_drdy_clocks), .ngrp = 1, .umasks = ppro_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_DATA_RECV", .desc = "Number of bus clock cycles during which this processor is receiving data", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x64, }, { .name = "BUS_BNR_DRV", .desc = "Number of bus clock cycles during which this processor is driving the BNR# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x61, }, { .name = "BUS_HIT_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HIT# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7a, }, { .name = "BUS_HITM_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HITM# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7b, }, { .name = "BUS_SNOOP_STALL", .desc = "Number of clock cycles during which the bus is snoop stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7e, }, { .name = "FLOPS", .desc = "Number of computational floating-point operations retired. Excludes floating-point computational operations that cause traps or assists. Includes internal sub-operations for complex floating-point instructions like transcendentals. Excludes floating point loads and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xc1, }, { .name = "FP_COMP_OPS_EXE", .desc = "Number of computational floating-point operations executed. The number of FADD, FSUB, FCOM, FMULs, integer MULs and IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This number does not include the number of cycles, but the number of operations. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x10, }, { .name = "FP_ASSIST", .desc = "Number of floating-point exception cases handled by microcode.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x11, }, { .name = "MUL", .desc = "Number of multiplies.This count includes integer as well as FP multiplies and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x12, }, { .name = "DIV", .desc = "Number of divides.This count includes integer as well as FP divides and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x13, }, { .name = "CYCLES_DIV_BUSY", .desc = "Number of cycles during which the divider is busy, and cannot accept new divides. This includes integer and FP divides, FPREM, FPSQRT, etc. and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x14, }, { .name = "LD_BLOCKS", .desc = "Number of load operations delayed due to store buffer blocks. Includes counts caused by preceding stores whose addresses are unknown, preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflicts with the load but which incompletely overlap the load", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3, }, { .name = "SB_DRAINS", .desc = "Number of store buffer drain cycles. Incremented every cycle the store buffer is draining. Draining is caused by serializing operations like CPUID, synchronizing operations like XCHG, interrupt acknowledgment, as well as other conditions (such as cache flushing).", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4, }, { .name = "MISALIGN_MEM_REF", .desc = "Number of misaligned data memory references. Incremented by 1 every cycle during which, either the processor's load or store pipeline dispatches a misaligned micro-op Counting is performed if it is the first or second half or if it is blocked, squashed, or missed. In this context, misaligned means crossing a 64-bit boundary", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x5, }, { .name = "UOPS_RETIRED", .desc = "Number of micro-ops retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc2, }, { .name = "INST_DECODED", .desc = "Number of instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd0, }, { .name = "HW_INT_RX", .desc = "Number of hardware interrupts received", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc8, }, { .name = "CYCLES_INT_MASKED", .desc = "Number of processor cycles for which interrupts are disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc6, }, { .name = "CYCLES_INT_PENDING_AND_MASKED", .desc = "Number of processor cycles for which interrupts are disabled and interrupts are pending.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc7, }, { .name = "BR_INST_RETIRED", .desc = "Number of branch instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc4, }, { .name = "BR_MISS_PRED_RETIRED", .desc = "Number of mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc5, }, { .name = "BR_TAKEN_RETIRED", .desc = "Number of taken branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc9, }, { .name = "BR_MISS_PRED_TAKEN_RET", .desc = "Number of taken mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xca, }, { .name = "BR_INST_DECODED", .desc = "Number of branch instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe0, }, { .name = "BTB_MISSES", .desc = "Number of branches for which the BTB did not produce a prediction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe2, }, { .name = "BR_BOGUS", .desc = "Number of bogus branches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe4, }, { .name = "BACLEARS", .desc = "Number of times BACLEAR is asserted. This is the number of times that a static branch prediction was made, in which the branch decoder decided to make a branch prediction because the BTB did not", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe6, }, { .name = "RESOURCE_STALLS", .desc = "Incremented by 1 during every cycle for which there is a resource related stall. Includes register renaming buffer entries, memory buffer entries. Does not include stalls due to bus queue full, too many cache misses, etc. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xa2, }, { .name = "PARTIAL_RAT_STALLS", .desc = "Number of cycles or events for partial stalls. This includes flag partial stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd2, }, { .name = "SEGMENT_REG_LOADS", .desc = "Number of segment register loads.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6, }, { .name = "L2_LD", .desc = "Number of L2 data loads. This event indicates that a normal, unlocked, load memory access was received by the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(ppro_l2_ifetch), .ngrp = 1, .umasks = ppro_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_IN", .desc = "Number of lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x24, }, { .name = "L2_LINES_OUT", .desc = "Number of lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x26, }, { .name = "L2_M_LINES_OUTM", .desc = "Number of modified lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x27, }, }; papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_skl.3000664 001750 001750 00000010365 13216244364 024056 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2015" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_skl - support for Intel SkyLake core PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: skl .B PMU desc: Intel SkyLake .sp .SH DESCRIPTION The library supports the Intel SkyLake core PMU. It should be noted that this PMU model only covers each core's PMU and not the socket level PMU. On SkyLake, the number of generic counters depends on the Hyperthreading (HT) mode. counters are available. The \fBpfm_get_pmu_info()\fR function returns the maximum number of generic counters in \fBnum_cntrs\fr. .SH MODIFIERS The following modifiers are supported on Intel SkyLake processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on both threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. .TP .B ldlat Pass a latency threshold to the MEM_TRANS_RETIRED:LOAD_LATENCY event. This is an integer attribute that must be in the range [3:65535]. It is required for this event. Note that the event must be used with precise sampling (PEBS). .TP .B intx Monitor the event only when executing inside a transactional memory region (in tx). Event does not count otherwise. This is a boolean modifiers. Default value is 0. .TP .B intxcp Do not count occurrences of the event when they are inside an aborted transactional memory region. This is a boolean modifier. Default value is 0. .TP .B fe_thres This modifier is for the FRONTEND_RETIRED event only. It defines the period in core cycles after which the IDQ_*_BUBBLES umask counts. It acts as a threshold, i.e., at least a period of N core cycles where the frontend did not deliver X uops. It can only be used with the IDQ_*_BUBBLES umasks. If not specified, the default threshold value is 1 cycle. the valid values are in [1-4095]. .SH OFFCORE_RESPONSE events Intel SkyLake provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. Those events need special treatment in the performance monitoring infrastructure because each event uses an extra register to store some settings. Thus, in case multiple offcore_response events are monitored simultaneously, the kernel needs to manage the sharing of that extra register. The offcore_response events are exposed as a normal events by the library. The extra settings are exposed as regular umasks. The library takes care of encoding the events according to the underlying kernel interface. On Intel SkyLake, the umasks are divided into three categories: request, supplier and snoop. The user must provide at least one umask for each category. The categories are shown in the umask descriptions. There is also the special response umask called \fBANY_RESPONSE\fR. When this umask is used then it overrides any supplier and snoop umasks. In other words, users can specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop is specified, the library defaults to using \fBANY_RESPONSE\fR. For instance, the following are valid event selections: .TP .B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_REQUEST .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY .P But the following are illegal: .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY:ANY_RESPONSE .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/components/nvml/tests/Makefile000664 001750 001750 00000001423 13216244357 023272 0ustar00jshenry1963jshenry1963000000 000000 NAME=nvml include ../../Makefile_comp_tests include ../Makefile.nvml NVCC = $(CUDA_DIR)/bin/nvcc NVCFLAGS = -L$(NVML_INCDIR) -ccbin=$(CC) CUDALIBS = -L$(NVML_LIBDIR) -L$(CUDA_DIR)/lib64 -lcuda -lcudart -lnvidia-ml PAPILIB := ../../../libpapi.a -ldl %.o:%.cu $(NVCC) $(NVCFLAGS) $(INCLUDE) -c -o $@ $< TESTS = HelloWorld nvml_power_limiting_test nvml_tests: $(TESTS) HelloWorld: HelloWorld.o $(UTILOBJS) $(NVCC) $(NVCFLAGS) $(INCLUDE) -o HelloWorld HelloWorld.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) nvml_power_limiting_test: nvml_power_limiting_test.o $(UTILOBJS) $(NVCC) $(NVCFLAGS) $(INCLUDE) -o nvml_power_limiting_test nvml_power_limiting_test.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) cp nvml_power_limiting_test ~/nvml_power_limiting_test clean: rm -f $(TESTS) *.o FORCE: papi-5.6.0/src/components/mx/Rules.mx000664 001750 001750 00000000300 13216244357 021571 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/mx/linux-mx.c COMPOBJS += linux-mx.o linux-mx.o: components/mx/linux-mx.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/mx/linux-mx.c -o linux-mx.o papi-5.6.0/src/ctests/low-level.c000664 001750 001750 00000013552 13216244360 020706 0ustar00jshenry1963jshenry1963000000 000000 /* This examples show the essentials in using the PAPI low-level interface. The program consists of 3 examples where the work done over some work-loops. The example tries to illustrate some simple mistakes that are easily made and how a correct code would accomplish the same thing. Example 1: The total count over two work loops (Loops 1 and 2) are supposed to be measured. Due to a mis-understanding of the semantics of the API the total count gets wrong. The example also illustrates that it is legal to read both running and stopped counters. Example 2: The total count over two work loops (Loops 1 and 3) is supposed to be measured while discarding the counts made in loop 2. Instead the counts in loop1 are counted twice and the counts in loop2 are added to the total number of counts. Example 3: One correct way of accomplishing the result aimed for in example 2. */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define NUM_EVENTS 2 int main( int argc, char **argv ) { int retval; long long values[NUM_EVENTS], dummyvalues[NUM_EVENTS]; int Events[NUM_EVENTS]; int EventSet = PAPI_NULL; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* query and set up the right events to monitor */ if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { Events[0] = PAPI_FP_INS; Events[1] = PAPI_TOT_CYC; } else { Events[0] = PAPI_TOT_INS; Events[1] = PAPI_TOT_CYC; } retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval = PAPI_add_events( EventSet, ( int * ) Events, NUM_EVENTS ); if (retval < PAPI_OK ) { if (!quiet) printf("Trouble adding events\n"); test_skip( __FILE__, __LINE__, "PAPI_add_events", retval ); } if ( !quiet ) { printf( "\n Incorrect usage of read and accum.\n" ); printf( " Some cycles are counted twice\n" ); } if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); /* Loop 1 */ do_flops( NUM_FLOPS ); if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); /* Loop 2 */ do_flops( NUM_FLOPS ); /* Using PAPI_accum here is incorrect. The result is that Loop 1 * * is being counted twice */ if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters being accumulated)\n" ); /* Loop 3 */ do_flops( NUM_FLOPS ); if ( ( retval = PAPI_stop( EventSet, dummyvalues ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !quiet ) { printf( TWO12, dummyvalues[0], dummyvalues[1], "(Reading stopped counters)\n" ); printf( TWO12, values[0], values[1], "" ); printf( "\n Incorrect usage of read and accum.\n" ); printf( " Another incorrect use\n" ); } if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); /* Loop 1 */ do_flops( NUM_FLOPS ); if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); /* Loop 2 */ /* Code that should not be counted */ do_flops( NUM_FLOPS ); if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !quiet ) printf( TWO12, dummyvalues[0], dummyvalues[1], "(Intermediate counts...)\n" ); /* Loop 3 */ do_flops( NUM_FLOPS ); /* Since PAPI_read does not reset the counters it's use above after * * loop 2 is incorrect. Instead Loop1 will in effect be counted twice. * * and the counts in loop 2 are included in the total counts */ if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "" ); if ( ( retval = PAPI_stop( EventSet, dummyvalues ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if ( !quiet ) { printf( "\n Correct usage of read and accum.\n" ); printf( " PAPI_reset and PAPI_accum used to skip counting\n" ); printf( " a section of the code.\n" ); } if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters continuing)\n" ); /* Code that should not be counted */ do_flops( NUM_FLOPS ); if ( ( retval = PAPI_reset( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); if ( !quiet ) printf( "%12s %12s (Counters reset)\n", "", "" ); do_flops( NUM_FLOPS ); if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "" ); if ( !quiet ) { printf( "----------------------------------\n" ); printf( "Verification: The last line in each experiment should be\n" ); printf( "approximately twice the value of the first line.\n" ); printf ( "The third case illustrates one possible way to accomplish this.\n" ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ubo.3000664 001750 001750 00000002774 13216244364 025561 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "June, 2017" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_bdx_unc_ubo - support for Intel Broadwell Server U-Box uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: bdx_unc_ubo .B PMU desc: Intel Broadwell Server U-Box uncore PMU .sp .SH DESCRIPTION The library supports the Intel Broadwell server system configuration unit (U-Box) uncore PMU. This PMU model only exists on various Broadwell server models (79, 86). .SH MODIFIERS The following modifiers are supported on Intel Broadwell server U-Box uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:15]. .TP .B i Invert the meaning of the threshold or edge filter. If set, the event counts when strictly less than N occurrences occur per cycle if threshold is set to N. When invert is set, then threshold must be set to non-zero value. If set, the event counts when the event transitions from occurring to not occurring (falling edge) when edge detection is set. This is a boolean modifier .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_knc.3000664 001750 001750 00000002504 13216244364 024034 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_knc - support for Intel Knights Corner .SH SYNOPSIS .nf .B #include .sp .B PMU name: knc .B PMU desc: Intel Knights Corner .sp .SH DESCRIPTION The library supports Intel Knights Corner processors. .SH MODIFIERS The following modifiers are supported on Intel Knights Corner processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on all threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/lib/events/power5_events.h000664 001750 001750 00000530012 13216244365 023714 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __POWER5_EVENTS_H__ #define __POWER5_EVENTS_H__ /* * File: power5_events.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2009. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define POWER5_PME_PM_LSU_REJECT_RELOAD_CDF 0 #define POWER5_PME_PM_FPU1_SINGLE 1 #define POWER5_PME_PM_L3SB_REF 2 #define POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC 3 #define POWER5_PME_PM_INST_FROM_L275_SHR 4 #define POWER5_PME_PM_MRK_DATA_FROM_L375_MOD 5 #define POWER5_PME_PM_DTLB_MISS_4K 6 #define POWER5_PME_PM_CLB_FULL_CYC 7 #define POWER5_PME_PM_MRK_ST_CMPL 8 #define POWER5_PME_PM_LSU_FLUSH_LRQ_FULL 9 #define POWER5_PME_PM_MRK_DATA_FROM_L275_SHR 10 #define POWER5_PME_PM_1INST_CLB_CYC 11 #define POWER5_PME_PM_MEM_SPEC_RD_CANCEL 12 #define POWER5_PME_PM_MRK_DTLB_MISS_16M 13 #define POWER5_PME_PM_FPU_FDIV 14 #define POWER5_PME_PM_FPU_SINGLE 15 #define POWER5_PME_PM_FPU0_FMA 16 #define POWER5_PME_PM_SLB_MISS 17 #define POWER5_PME_PM_LSU1_FLUSH_LRQ 18 #define POWER5_PME_PM_L2SA_ST_HIT 19 #define POWER5_PME_PM_DTLB_MISS 20 #define POWER5_PME_PM_BR_PRED_TA 21 #define POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC 22 #define POWER5_PME_PM_CMPLU_STALL_FXU 23 #define POWER5_PME_PM_EXT_INT 24 #define POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ 25 #define POWER5_PME_PM_LSU1_LDF 26 #define POWER5_PME_PM_MRK_ST_GPS 27 #define POWER5_PME_PM_FAB_CMD_ISSUED 28 #define POWER5_PME_PM_LSU0_SRQ_STFWD 29 #define POWER5_PME_PM_CR_MAP_FULL_CYC 30 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL 31 #define POWER5_PME_PM_MRK_LSU0_FLUSH_ULD 32 #define POWER5_PME_PM_LSU_FLUSH_SRQ_FULL 33 #define POWER5_PME_PM_FLUSH_IMBAL 34 #define POWER5_PME_PM_MEM_RQ_DISP_Q16to19 35 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC 36 #define POWER5_PME_PM_DATA_FROM_L35_MOD 37 #define POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL 38 #define POWER5_PME_PM_FPU1_FDIV 39 #define POWER5_PME_PM_FPU0_FRSP_FCONV 40 #define POWER5_PME_PM_MEM_RQ_DISP 41 #define POWER5_PME_PM_LWSYNC_HELD 42 #define POWER5_PME_PM_FXU_FIN 43 #define POWER5_PME_PM_DSLB_MISS 44 #define POWER5_PME_PM_FXLS1_FULL_CYC 45 #define POWER5_PME_PM_DATA_FROM_L275_SHR 46 #define POWER5_PME_PM_THRD_SEL_T0 47 #define POWER5_PME_PM_PTEG_RELOAD_VALID 48 #define POWER5_PME_PM_LSU_LMQ_LHR_MERGE 49 #define POWER5_PME_PM_MRK_STCX_FAIL 50 #define POWER5_PME_PM_2INST_CLB_CYC 51 #define POWER5_PME_PM_FAB_PNtoVN_DIRECT 52 #define POWER5_PME_PM_PTEG_FROM_L2MISS 53 #define POWER5_PME_PM_CMPLU_STALL_LSU 54 #define POWER5_PME_PM_MRK_DSLB_MISS 55 #define POWER5_PME_PM_LSU_FLUSH_ULD 56 #define POWER5_PME_PM_PTEG_FROM_LMEM 57 #define POWER5_PME_PM_MRK_BRU_FIN 58 #define POWER5_PME_PM_MEM_WQ_DISP_WRITE 59 #define POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC 60 #define POWER5_PME_PM_LSU1_NCLD 61 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER 62 #define POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ 63 #define POWER5_PME_PM_FPR_MAP_FULL_CYC 64 #define POWER5_PME_PM_FPU1_FULL_CYC 65 #define POWER5_PME_PM_L3SA_ALL_BUSY 66 #define POWER5_PME_PM_3INST_CLB_CYC 67 #define POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 68 #define POWER5_PME_PM_L2SA_SHR_INV 69 #define POWER5_PME_PM_THRESH_TIMEO 70 #define POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL 71 #define POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL 72 #define POWER5_PME_PM_FPU_FSQRT 73 #define POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ 74 #define POWER5_PME_PM_PMC1_OVERFLOW 75 #define POWER5_PME_PM_L3SC_SNOOP_RETRY 76 #define POWER5_PME_PM_DATA_TABLEWALK_CYC 77 #define POWER5_PME_PM_THRD_PRIO_6_CYC 78 #define POWER5_PME_PM_FPU_FEST 79 #define POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY 80 #define POWER5_PME_PM_MRK_DATA_FROM_RMEM 81 #define POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC 82 #define POWER5_PME_PM_MEM_PWQ_DISP 83 #define POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY 84 #define POWER5_PME_PM_LD_MISS_L1_LSU0 85 #define POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL 86 #define POWER5_PME_PM_FPU1_STALL3 87 #define POWER5_PME_PM_GCT_USAGE_80to99_CYC 88 #define POWER5_PME_PM_WORK_HELD 89 #define POWER5_PME_PM_INST_CMPL 90 #define POWER5_PME_PM_LSU1_FLUSH_UST 91 #define POWER5_PME_PM_FXU_IDLE 92 #define POWER5_PME_PM_LSU0_FLUSH_ULD 93 #define POWER5_PME_PM_LSU1_REJECT_LMQ_FULL 94 #define POWER5_PME_PM_GRP_DISP_REJECT 95 #define POWER5_PME_PM_L2SA_MOD_INV 96 #define POWER5_PME_PM_PTEG_FROM_L25_SHR 97 #define POWER5_PME_PM_FAB_CMD_RETRIED 98 #define POWER5_PME_PM_L3SA_SHR_INV 99 #define POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL 100 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR 101 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL 102 #define POWER5_PME_PM_PTEG_FROM_L375_MOD 103 #define POWER5_PME_PM_MRK_LSU1_FLUSH_UST 104 #define POWER5_PME_PM_BR_ISSUED 105 #define POWER5_PME_PM_MRK_GRP_BR_REDIR 106 #define POWER5_PME_PM_EE_OFF 107 #define POWER5_PME_PM_MEM_RQ_DISP_Q4to7 108 #define POWER5_PME_PM_MEM_FAST_PATH_RD_DISP 109 #define POWER5_PME_PM_INST_FROM_L3 110 #define POWER5_PME_PM_ITLB_MISS 111 #define POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE 112 #define POWER5_PME_PM_FXLS_FULL_CYC 113 #define POWER5_PME_PM_DTLB_REF_4K 114 #define POWER5_PME_PM_GRP_DISP_VALID 115 #define POWER5_PME_PM_LSU_FLUSH_UST 116 #define POWER5_PME_PM_FXU1_FIN 117 #define POWER5_PME_PM_THRD_PRIO_4_CYC 118 #define POWER5_PME_PM_MRK_DATA_FROM_L35_MOD 119 #define POWER5_PME_PM_4INST_CLB_CYC 120 #define POWER5_PME_PM_MRK_DTLB_REF_16M 121 #define POWER5_PME_PM_INST_FROM_L375_MOD 122 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR 123 #define POWER5_PME_PM_GRP_CMPL 124 #define POWER5_PME_PM_FPU1_1FLOP 125 #define POWER5_PME_PM_FPU_FRSP_FCONV 126 #define POWER5_PME_PM_5INST_CLB_CYC 127 #define POWER5_PME_PM_L3SC_REF 128 #define POWER5_PME_PM_THRD_L2MISS_BOTH_CYC 129 #define POWER5_PME_PM_MEM_PW_GATH 130 #define POWER5_PME_PM_FAB_PNtoNN_SIDECAR 131 #define POWER5_PME_PM_FAB_DCLAIM_ISSUED 132 #define POWER5_PME_PM_GRP_IC_MISS 133 #define POWER5_PME_PM_INST_FROM_L35_SHR 134 #define POWER5_PME_PM_LSU_LMQ_FULL_CYC 135 #define POWER5_PME_PM_MRK_DATA_FROM_L2_CYC 136 #define POWER5_PME_PM_LSU_SRQ_SYNC_CYC 137 #define POWER5_PME_PM_LSU0_BUSY_REJECT 138 #define POWER5_PME_PM_LSU_REJECT_ERAT_MISS 139 #define POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC 140 #define POWER5_PME_PM_DATA_FROM_L375_SHR 141 #define POWER5_PME_PM_FPU0_FMOV_FEST 142 #define POWER5_PME_PM_PTEG_FROM_L25_MOD 143 #define POWER5_PME_PM_LD_REF_L1_LSU0 144 #define POWER5_PME_PM_THRD_PRIO_7_CYC 145 #define POWER5_PME_PM_LSU1_FLUSH_SRQ 146 #define POWER5_PME_PM_L2SC_RCST_DISP 147 #define POWER5_PME_PM_CMPLU_STALL_DIV 148 #define POWER5_PME_PM_MEM_RQ_DISP_Q12to15 149 #define POWER5_PME_PM_INST_FROM_L375_SHR 150 #define POWER5_PME_PM_ST_REF_L1 151 #define POWER5_PME_PM_L3SB_ALL_BUSY 152 #define POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY 153 #define POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC 154 #define POWER5_PME_PM_FAB_HOLDtoNN_EMPTY 155 #define POWER5_PME_PM_DATA_FROM_LMEM 156 #define POWER5_PME_PM_RUN_CYC 157 #define POWER5_PME_PM_PTEG_FROM_RMEM 158 #define POWER5_PME_PM_L2SC_RCLD_DISP 159 #define POWER5_PME_PM_LSU0_LDF 160 #define POWER5_PME_PM_LSU_LRQ_S0_VALID 161 #define POWER5_PME_PM_PMC3_OVERFLOW 162 #define POWER5_PME_PM_MRK_IMR_RELOAD 163 #define POWER5_PME_PM_MRK_GRP_TIMEO 164 #define POWER5_PME_PM_ST_MISS_L1 165 #define POWER5_PME_PM_STOP_COMPLETION 166 #define POWER5_PME_PM_LSU_BUSY_REJECT 167 #define POWER5_PME_PM_ISLB_MISS 168 #define POWER5_PME_PM_CYC 169 #define POWER5_PME_PM_THRD_ONE_RUN_CYC 170 #define POWER5_PME_PM_GRP_BR_REDIR_NONSPEC 171 #define POWER5_PME_PM_LSU1_SRQ_STFWD 172 #define POWER5_PME_PM_L3SC_MOD_INV 173 #define POWER5_PME_PM_L2_PREF 174 #define POWER5_PME_PM_GCT_NOSLOT_BR_MPRED 175 #define POWER5_PME_PM_MRK_DATA_FROM_L25_MOD 176 #define POWER5_PME_PM_L2SB_MOD_INV 177 #define POWER5_PME_PM_L2SB_ST_REQ 178 #define POWER5_PME_PM_MRK_L1_RELOAD_VALID 179 #define POWER5_PME_PM_L3SB_HIT 180 #define POWER5_PME_PM_L2SB_SHR_MOD 181 #define POWER5_PME_PM_EE_OFF_EXT_INT 182 #define POWER5_PME_PM_1PLUS_PPC_CMPL 183 #define POWER5_PME_PM_L2SC_SHR_MOD 184 #define POWER5_PME_PM_PMC6_OVERFLOW 185 #define POWER5_PME_PM_LSU_LRQ_FULL_CYC 186 #define POWER5_PME_PM_IC_PREF_INSTALL 187 #define POWER5_PME_PM_TLB_MISS 188 #define POWER5_PME_PM_GCT_FULL_CYC 189 #define POWER5_PME_PM_FXU_BUSY 190 #define POWER5_PME_PM_MRK_DATA_FROM_L3_CYC 191 #define POWER5_PME_PM_LSU_REJECT_LMQ_FULL 192 #define POWER5_PME_PM_LSU_SRQ_S0_ALLOC 193 #define POWER5_PME_PM_GRP_MRK 194 #define POWER5_PME_PM_INST_FROM_L25_SHR 195 #define POWER5_PME_PM_FPU1_FIN 196 #define POWER5_PME_PM_DC_PREF_STREAM_ALLOC 197 #define POWER5_PME_PM_BR_MPRED_TA 198 #define POWER5_PME_PM_CRQ_FULL_CYC 199 #define POWER5_PME_PM_L2SA_RCLD_DISP 200 #define POWER5_PME_PM_SNOOP_WR_RETRY_QFULL 201 #define POWER5_PME_PM_MRK_DTLB_REF_4K 202 #define POWER5_PME_PM_LSU_SRQ_S0_VALID 203 #define POWER5_PME_PM_LSU0_FLUSH_LRQ 204 #define POWER5_PME_PM_INST_FROM_L275_MOD 205 #define POWER5_PME_PM_GCT_EMPTY_CYC 206 #define POWER5_PME_PM_LARX_LSU0 207 #define POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC 208 #define POWER5_PME_PM_SNOOP_RETRY_1AHEAD 209 #define POWER5_PME_PM_FPU1_FSQRT 210 #define POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 211 #define POWER5_PME_PM_MRK_FPU_FIN 212 #define POWER5_PME_PM_THRD_PRIO_5_CYC 213 #define POWER5_PME_PM_MRK_DATA_FROM_LMEM 214 #define POWER5_PME_PM_FPU1_FRSP_FCONV 215 #define POWER5_PME_PM_SNOOP_TLBIE 216 #define POWER5_PME_PM_L3SB_SNOOP_RETRY 217 #define POWER5_PME_PM_FAB_VBYPASS_EMPTY 218 #define POWER5_PME_PM_MRK_DATA_FROM_L275_MOD 219 #define POWER5_PME_PM_6INST_CLB_CYC 220 #define POWER5_PME_PM_L2SB_RCST_DISP 221 #define POWER5_PME_PM_FLUSH 222 #define POWER5_PME_PM_L2SC_MOD_INV 223 #define POWER5_PME_PM_FPU_DENORM 224 #define POWER5_PME_PM_L3SC_HIT 225 #define POWER5_PME_PM_SNOOP_WR_RETRY_RQ 226 #define POWER5_PME_PM_LSU1_REJECT_SRQ 227 #define POWER5_PME_PM_IC_PREF_REQ 228 #define POWER5_PME_PM_L3SC_ALL_BUSY 229 #define POWER5_PME_PM_MRK_GRP_IC_MISS 230 #define POWER5_PME_PM_GCT_NOSLOT_IC_MISS 231 #define POWER5_PME_PM_MRK_DATA_FROM_L3 232 #define POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL 233 #define POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD 234 #define POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS 235 #define POWER5_PME_PM_L3SA_MOD_INV 236 #define POWER5_PME_PM_LSU_FLUSH_LRQ 237 #define POWER5_PME_PM_THRD_PRIO_2_CYC 238 #define POWER5_PME_PM_LSU_FLUSH_SRQ 239 #define POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID 240 #define POWER5_PME_PM_L3SA_REF 241 #define POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL 242 #define POWER5_PME_PM_FPU0_STALL3 243 #define POWER5_PME_PM_GPR_MAP_FULL_CYC 244 #define POWER5_PME_PM_TB_BIT_TRANS 245 #define POWER5_PME_PM_MRK_LSU_FLUSH_LRQ 246 #define POWER5_PME_PM_FPU0_STF 247 #define POWER5_PME_PM_MRK_DTLB_MISS 248 #define POWER5_PME_PM_FPU1_FMA 249 #define POWER5_PME_PM_L2SA_MOD_TAG 250 #define POWER5_PME_PM_LSU1_FLUSH_ULD 251 #define POWER5_PME_PM_MRK_LSU0_FLUSH_UST 252 #define POWER5_PME_PM_MRK_INST_FIN 253 #define POWER5_PME_PM_FPU0_FULL_CYC 254 #define POWER5_PME_PM_LSU_LRQ_S0_ALLOC 255 #define POWER5_PME_PM_MRK_LSU1_FLUSH_ULD 256 #define POWER5_PME_PM_MRK_DTLB_REF 257 #define POWER5_PME_PM_BR_UNCOND 258 #define POWER5_PME_PM_THRD_SEL_OVER_L2MISS 259 #define POWER5_PME_PM_L2SB_SHR_INV 260 #define POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL 261 #define POWER5_PME_PM_L3SC_MOD_TAG 262 #define POWER5_PME_PM_MRK_ST_MISS_L1 263 #define POWER5_PME_PM_GRP_DISP_SUCCESS 264 #define POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC 265 #define POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT 266 #define POWER5_PME_PM_MEM_WQ_DISP_Q8to15 267 #define POWER5_PME_PM_FPU0_SINGLE 268 #define POWER5_PME_PM_LSU_DERAT_MISS 269 #define POWER5_PME_PM_THRD_PRIO_1_CYC 270 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER 271 #define POWER5_PME_PM_FPU1_FEST 272 #define POWER5_PME_PM_FAB_HOLDtoVN_EMPTY 273 #define POWER5_PME_PM_SNOOP_RD_RETRY_RQ 274 #define POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL 275 #define POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC 276 #define POWER5_PME_PM_MRK_ST_CMPL_INT 277 #define POWER5_PME_PM_FLUSH_BR_MPRED 278 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR 279 #define POWER5_PME_PM_FPU_STF 280 #define POWER5_PME_PM_CMPLU_STALL_FPU 281 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC 282 #define POWER5_PME_PM_GCT_NOSLOT_CYC 283 #define POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE 284 #define POWER5_PME_PM_PTEG_FROM_L35_SHR 285 #define POWER5_PME_PM_MRK_LSU_FLUSH_UST 286 #define POWER5_PME_PM_L3SA_HIT 287 #define POWER5_PME_PM_MRK_DATA_FROM_L25_SHR 288 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR 289 #define POWER5_PME_PM_MRK_DATA_FROM_L35_SHR 290 #define POWER5_PME_PM_IERAT_XLATE_WR 291 #define POWER5_PME_PM_L2SA_ST_REQ 292 #define POWER5_PME_PM_THRD_SEL_T1 293 #define POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT 294 #define POWER5_PME_PM_INST_FROM_LMEM 295 #define POWER5_PME_PM_FPU0_1FLOP 296 #define POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC 297 #define POWER5_PME_PM_PTEG_FROM_L2 298 #define POWER5_PME_PM_MEM_PW_CMPL 299 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC 300 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER 301 #define POWER5_PME_PM_FPU0_FIN 302 #define POWER5_PME_PM_MRK_DTLB_MISS_4K 303 #define POWER5_PME_PM_L3SC_SHR_INV 304 #define POWER5_PME_PM_GRP_BR_REDIR 305 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL 306 #define POWER5_PME_PM_MRK_LSU_FLUSH_SRQ 307 #define POWER5_PME_PM_PTEG_FROM_L275_SHR 308 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL 309 #define POWER5_PME_PM_SNOOP_RD_RETRY_WQ 310 #define POWER5_PME_PM_LSU0_NCLD 311 #define POWER5_PME_PM_FAB_DCLAIM_RETRIED 312 #define POWER5_PME_PM_LSU1_BUSY_REJECT 313 #define POWER5_PME_PM_FXLS0_FULL_CYC 314 #define POWER5_PME_PM_FPU0_FEST 315 #define POWER5_PME_PM_DTLB_REF_16M 316 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR 317 #define POWER5_PME_PM_LSU0_REJECT_ERAT_MISS 318 #define POWER5_PME_PM_DATA_FROM_L25_MOD 319 #define POWER5_PME_PM_GCT_USAGE_60to79_CYC 320 #define POWER5_PME_PM_DATA_FROM_L375_MOD 321 #define POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 322 #define POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF 323 #define POWER5_PME_PM_0INST_FETCH 324 #define POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF 325 #define POWER5_PME_PM_L1_PREF 326 #define POWER5_PME_PM_MEM_WQ_DISP_Q0to7 327 #define POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC 328 #define POWER5_PME_PM_BRQ_FULL_CYC 329 #define POWER5_PME_PM_GRP_IC_MISS_NONSPEC 330 #define POWER5_PME_PM_PTEG_FROM_L275_MOD 331 #define POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 332 #define POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC 333 #define POWER5_PME_PM_LSU_FLUSH 334 #define POWER5_PME_PM_DATA_FROM_L3 335 #define POWER5_PME_PM_INST_FROM_L2 336 #define POWER5_PME_PM_PMC2_OVERFLOW 337 #define POWER5_PME_PM_FPU0_DENORM 338 #define POWER5_PME_PM_FPU1_FMOV_FEST 339 #define POWER5_PME_PM_INST_FETCH_CYC 340 #define POWER5_PME_PM_LSU_LDF 341 #define POWER5_PME_PM_INST_DISP 342 #define POWER5_PME_PM_DATA_FROM_L25_SHR 343 #define POWER5_PME_PM_L1_DCACHE_RELOAD_VALID 344 #define POWER5_PME_PM_MEM_WQ_DISP_DCLAIM 345 #define POWER5_PME_PM_FPU_FULL_CYC 346 #define POWER5_PME_PM_MRK_GRP_ISSUED 347 #define POWER5_PME_PM_THRD_PRIO_3_CYC 348 #define POWER5_PME_PM_FPU_FMA 349 #define POWER5_PME_PM_INST_FROM_L35_MOD 350 #define POWER5_PME_PM_MRK_CRU_FIN 351 #define POWER5_PME_PM_SNOOP_WR_RETRY_WQ 352 #define POWER5_PME_PM_CMPLU_STALL_REJECT 353 #define POWER5_PME_PM_LSU1_REJECT_ERAT_MISS 354 #define POWER5_PME_PM_MRK_FXU_FIN 355 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER 356 #define POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY 357 #define POWER5_PME_PM_PMC4_OVERFLOW 358 #define POWER5_PME_PM_L3SA_SNOOP_RETRY 359 #define POWER5_PME_PM_PTEG_FROM_L35_MOD 360 #define POWER5_PME_PM_INST_FROM_L25_MOD 361 #define POWER5_PME_PM_THRD_SMT_HANG 362 #define POWER5_PME_PM_CMPLU_STALL_ERAT_MISS 363 #define POWER5_PME_PM_L3SA_MOD_TAG 364 #define POWER5_PME_PM_FLUSH_SYNC 365 #define POWER5_PME_PM_INST_FROM_L2MISS 366 #define POWER5_PME_PM_L2SC_ST_HIT 367 #define POWER5_PME_PM_MEM_RQ_DISP_Q8to11 368 #define POWER5_PME_PM_MRK_GRP_DISP 369 #define POWER5_PME_PM_L2SB_MOD_TAG 370 #define POWER5_PME_PM_CLB_EMPTY_CYC 371 #define POWER5_PME_PM_L2SB_ST_HIT 372 #define POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL 373 #define POWER5_PME_PM_BR_PRED_CR_TA 374 #define POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ 375 #define POWER5_PME_PM_MRK_LSU_FLUSH_ULD 376 #define POWER5_PME_PM_INST_DISP_ATTEMPT 377 #define POWER5_PME_PM_INST_FROM_RMEM 378 #define POWER5_PME_PM_ST_REF_L1_LSU0 379 #define POWER5_PME_PM_LSU0_DERAT_MISS 380 #define POWER5_PME_PM_L2SB_RCLD_DISP 381 #define POWER5_PME_PM_FPU_STALL3 382 #define POWER5_PME_PM_BR_PRED_CR 383 #define POWER5_PME_PM_MRK_DATA_FROM_L2 384 #define POWER5_PME_PM_LSU0_FLUSH_SRQ 385 #define POWER5_PME_PM_FAB_PNtoNN_DIRECT 386 #define POWER5_PME_PM_IOPS_CMPL 387 #define POWER5_PME_PM_L2SC_SHR_INV 388 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER 389 #define POWER5_PME_PM_L2SA_RCST_DISP 390 #define POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION 391 #define POWER5_PME_PM_FAB_PNtoVN_SIDECAR 392 #define POWER5_PME_PM_LSU_LMQ_S0_ALLOC 393 #define POWER5_PME_PM_LSU0_REJECT_LMQ_FULL 394 #define POWER5_PME_PM_SNOOP_PW_RETRY_RQ 395 #define POWER5_PME_PM_DTLB_REF 396 #define POWER5_PME_PM_PTEG_FROM_L3 397 #define POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY 398 #define POWER5_PME_PM_LSU_SRQ_EMPTY_CYC 399 #define POWER5_PME_PM_FPU1_STF 400 #define POWER5_PME_PM_LSU_LMQ_S0_VALID 401 #define POWER5_PME_PM_GCT_USAGE_00to59_CYC 402 #define POWER5_PME_PM_DATA_FROM_L2MISS 403 #define POWER5_PME_PM_GRP_DISP_BLK_SB_CYC 404 #define POWER5_PME_PM_FPU_FMOV_FEST 405 #define POWER5_PME_PM_XER_MAP_FULL_CYC 406 #define POWER5_PME_PM_FLUSH_SB 407 #define POWER5_PME_PM_MRK_DATA_FROM_L375_SHR 408 #define POWER5_PME_PM_MRK_GRP_CMPL 409 #define POWER5_PME_PM_SUSPENDED 410 #define POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC 411 #define POWER5_PME_PM_SNOOP_RD_RETRY_QFULL 412 #define POWER5_PME_PM_L3SB_MOD_INV 413 #define POWER5_PME_PM_DATA_FROM_L35_SHR 414 #define POWER5_PME_PM_LD_MISS_L1_LSU1 415 #define POWER5_PME_PM_STCX_FAIL 416 #define POWER5_PME_PM_DC_PREF_DST 417 #define POWER5_PME_PM_GRP_DISP 418 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR 419 #define POWER5_PME_PM_FPU0_FPSCR 420 #define POWER5_PME_PM_DATA_FROM_L2 421 #define POWER5_PME_PM_FPU1_DENORM 422 #define POWER5_PME_PM_FPU_1FLOP 423 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER 424 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL 425 #define POWER5_PME_PM_FPU0_FSQRT 426 #define POWER5_PME_PM_LD_REF_L1 427 #define POWER5_PME_PM_INST_FROM_L1 428 #define POWER5_PME_PM_TLBIE_HELD 429 #define POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS 430 #define POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC 431 #define POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ 432 #define POWER5_PME_PM_MEM_RQ_DISP_Q0to3 433 #define POWER5_PME_PM_ST_REF_L1_LSU1 434 #define POWER5_PME_PM_MRK_LD_MISS_L1 435 #define POWER5_PME_PM_L1_WRITE_CYC 436 #define POWER5_PME_PM_L2SC_ST_REQ 437 #define POWER5_PME_PM_CMPLU_STALL_FDIV 438 #define POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY 439 #define POWER5_PME_PM_BR_MPRED_CR 440 #define POWER5_PME_PM_L3SB_MOD_TAG 441 #define POWER5_PME_PM_MRK_DATA_FROM_L2MISS 442 #define POWER5_PME_PM_LSU_REJECT_SRQ 443 #define POWER5_PME_PM_LD_MISS_L1 444 #define POWER5_PME_PM_INST_FROM_PREF 445 #define POWER5_PME_PM_DC_INV_L2 446 #define POWER5_PME_PM_STCX_PASS 447 #define POWER5_PME_PM_LSU_SRQ_FULL_CYC 448 #define POWER5_PME_PM_FPU_FIN 449 #define POWER5_PME_PM_L2SA_SHR_MOD 450 #define POWER5_PME_PM_LSU_SRQ_STFWD 451 #define POWER5_PME_PM_0INST_CLB_CYC 452 #define POWER5_PME_PM_FXU0_FIN 453 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL 454 #define POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC 455 #define POWER5_PME_PM_PMC5_OVERFLOW 456 #define POWER5_PME_PM_FPU0_FDIV 457 #define POWER5_PME_PM_PTEG_FROM_L375_SHR 458 #define POWER5_PME_PM_LD_REF_L1_LSU1 459 #define POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY 460 #define POWER5_PME_PM_HV_CYC 461 #define POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC 462 #define POWER5_PME_PM_LR_CTR_MAP_FULL_CYC 463 #define POWER5_PME_PM_L3SB_SHR_INV 464 #define POWER5_PME_PM_DATA_FROM_RMEM 465 #define POWER5_PME_PM_DATA_FROM_L275_MOD 466 #define POWER5_PME_PM_LSU0_REJECT_SRQ 467 #define POWER5_PME_PM_LSU1_DERAT_MISS 468 #define POWER5_PME_PM_MRK_LSU_FIN 469 #define POWER5_PME_PM_DTLB_MISS_16M 470 #define POWER5_PME_PM_LSU0_FLUSH_UST 471 #define POWER5_PME_PM_L2SC_MOD_TAG 472 #define POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY 473 static const pme_power_entry_t power5_pe[] = { [ POWER5_PME_PM_LSU_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU_REJECT_RELOAD_CDF", .pme_code = 0x2c6090, .pme_short_desc = "LSU reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_FPU1_SINGLE ] = { .pme_name = "PM_FPU1_SINGLE", .pme_code = 0x20e7, .pme_short_desc = "FPU1 executed single precision instruction", .pme_long_desc = "FPU1 has executed a single precision instruction.", }, [ POWER5_PME_PM_L3SB_REF ] = { .pme_name = "PM_L3SB_REF", .pme_code = 0x701c4, .pme_short_desc = "L3 slice B references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice ", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_3or4_CYC", .pme_code = 0x430e5, .pme_short_desc = "Cycles thread priority difference is 3 or 4", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 3 or 4.", }, [ POWER5_PME_PM_INST_FROM_L275_SHR ] = { .pme_name = "PM_INST_FROM_L275_SHR", .pme_code = 0x322096, .pme_short_desc = "Instruction fetched from L2.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD", .pme_code = 0x1c70a7, .pme_short_desc = "Marked data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load.", }, [ POWER5_PME_PM_DTLB_MISS_4K ] = { .pme_name = "PM_DTLB_MISS_4K", .pme_code = 0xc40c0, .pme_short_desc = "Data TLB miss for 4K page", .pme_long_desc = "Data TLB references to 4KB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5_PME_PM_CLB_FULL_CYC ] = { .pme_name = "PM_CLB_FULL_CYC", .pme_code = 0x220e5, .pme_short_desc = "Cycles CLB full", .pme_long_desc = "Cycles when both thread's CLB is full.", }, [ POWER5_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x100003, .pme_short_desc = "Marked store instruction completed", .pme_long_desc = "A sampled store has completed (data home)", }, [ POWER5_PME_PM_LSU_FLUSH_LRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_LRQ_FULL", .pme_code = 0x320e7, .pme_short_desc = "Flush caused by LRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR", .pme_code = 0x3c7097, .pme_short_desc = "Marked data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a marked load.", }, [ POWER5_PME_PM_1INST_CLB_CYC ] = { .pme_name = "PM_1INST_CLB_CYC", .pme_code = 0x400c1, .pme_short_desc = "Cycles 1 instruction in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_MEM_SPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_SPEC_RD_CANCEL", .pme_code = 0x721e6, .pme_short_desc = "Speculative memory read cancelled", .pme_long_desc = "Speculative memory read cancelled (i.e. cresp = sourced by L2/L3)", }, [ POWER5_PME_PM_MRK_DTLB_MISS_16M ] = { .pme_name = "PM_MRK_DTLB_MISS_16M", .pme_code = 0xc40c5, .pme_short_desc = "Marked Data TLB misses for 16M page", .pme_long_desc = "Marked Data TLB misses for 16M page", }, [ POWER5_PME_PM_FPU_FDIV ] = { .pme_name = "PM_FPU_FDIV", .pme_code = 0x100088, .pme_short_desc = "FPU executed FDIV instruction", .pme_long_desc = "The floating point unit has executed a divide instruction. This could be fdiv, fdivs, fdiv., fdivs.. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_FPU_SINGLE ] = { .pme_name = "PM_FPU_SINGLE", .pme_code = 0x102090, .pme_short_desc = "FPU executed single precision instruction", .pme_long_desc = "FPU is executing single precision instruction. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_FPU0_FMA ] = { .pme_name = "PM_FPU0_FMA", .pme_code = 0xc1, .pme_short_desc = "FPU0 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_SLB_MISS ] = { .pme_name = "PM_SLB_MISS", .pme_code = 0x280088, .pme_short_desc = "SLB misses", .pme_long_desc = "Total of all Segment Lookaside Buffer (SLB) misses, Instructions + Data.", }, [ POWER5_PME_PM_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_LSU1_FLUSH_LRQ", .pme_code = 0xc00c6, .pme_short_desc = "LSU1 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5_PME_PM_L2SA_ST_HIT ] = { .pme_name = "PM_L2SA_ST_HIT", .pme_code = 0x733e0, .pme_short_desc = "L2 slice A store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C.", }, [ POWER5_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x800c4, .pme_short_desc = "Data TLB misses", .pme_long_desc = "Data TLB misses, all page sizes.", }, [ POWER5_PME_PM_BR_PRED_TA ] = { .pme_name = "PM_BR_PRED_TA", .pme_code = 0x230e3, .pme_short_desc = "A conditional branch was predicted, target prediction", .pme_long_desc = "The target address of a branch instruction was predicted.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD_CYC", .pme_code = 0x4c70a7, .pme_short_desc = "Marked load latency from L3.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_CMPLU_STALL_FXU ] = { .pme_name = "PM_CMPLU_STALL_FXU", .pme_code = 0x211099, .pme_short_desc = "Completion stall caused by FXU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point instruction.", }, [ POWER5_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x400003, .pme_short_desc = "External interrupts", .pme_long_desc = "An interrupt due to an external exception occurred", }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_LRQ", .pme_code = 0x810c6, .pme_short_desc = "LSU1 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5_PME_PM_LSU1_LDF ] = { .pme_name = "PM_LSU1_LDF", .pme_code = 0xc50c4, .pme_short_desc = "LSU1 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU1", }, [ POWER5_PME_PM_MRK_ST_GPS ] = { .pme_name = "PM_MRK_ST_GPS", .pme_code = 0x200003, .pme_short_desc = "Marked store sent to GPS", .pme_long_desc = "A sampled store has been sent to the memory subsystem", }, [ POWER5_PME_PM_FAB_CMD_ISSUED ] = { .pme_name = "PM_FAB_CMD_ISSUED", .pme_code = 0x700c7, .pme_short_desc = "Fabric command issued", .pme_long_desc = "Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5_PME_PM_LSU0_SRQ_STFWD ] = { .pme_name = "PM_LSU0_SRQ_STFWD", .pme_code = 0xc20e0, .pme_short_desc = "LSU0 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", }, [ POWER5_PME_PM_CR_MAP_FULL_CYC ] = { .pme_name = "PM_CR_MAP_FULL_CYC", .pme_code = 0x100c4, .pme_short_desc = "Cycles CR logical operation mapper full", .pme_long_desc = "The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU0_FLUSH_ULD", .pme_code = 0x810c0, .pme_short_desc = "LSU0 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5_PME_PM_LSU_FLUSH_SRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_SRQ_FULL", .pme_code = 0x330e0, .pme_short_desc = "Flush caused by SRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5_PME_PM_FLUSH_IMBAL ] = { .pme_name = "PM_FLUSH_IMBAL", .pme_code = 0x330e3, .pme_short_desc = "Flush caused by thread GCT imbalance", .pme_long_desc = "This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5_PME_PM_MEM_RQ_DISP_Q16to19 ] = { .pme_name = "PM_MEM_RQ_DISP_Q16to19", .pme_code = 0x727e6, .pme_short_desc = "Memory read queue dispatched to queues 16-19", .pme_long_desc = "A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus3or4_CYC", .pme_code = 0x430e1, .pme_short_desc = "Cycles thread priority difference is -3 or -4", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 3 or 4.", }, [ POWER5_PME_PM_DATA_FROM_L35_MOD ] = { .pme_name = "PM_DATA_FROM_L35_MOD", .pme_code = 0x2c309e, .pme_short_desc = "Data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_HI_PRIO_WR_CMPL", .pme_code = 0x726e6, .pme_short_desc = "High priority write completed", .pme_long_desc = "A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FPU1_FDIV ] = { .pme_name = "PM_FPU1_FDIV", .pme_code = 0xc4, .pme_short_desc = "FPU1 executed FDIV instruction", .pme_long_desc = "FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER5_PME_PM_FPU0_FRSP_FCONV ] = { .pme_name = "PM_FPU0_FRSP_FCONV", .pme_code = 0x10c1, .pme_short_desc = "FPU0 executed FRSP or FCONV instructions", .pme_long_desc = "FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_MEM_RQ_DISP ] = { .pme_name = "PM_MEM_RQ_DISP", .pme_code = 0x701c6, .pme_short_desc = "Memory read queue dispatched", .pme_long_desc = "A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_LWSYNC_HELD ] = { .pme_name = "PM_LWSYNC_HELD", .pme_code = 0x130e0, .pme_short_desc = "LWSYNC held at dispatch", .pme_long_desc = "Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response.", }, [ POWER5_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x313088, .pme_short_desc = "FXU produced a result", .pme_long_desc = "The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x800c5, .pme_short_desc = "Data SLB misses", .pme_long_desc = "A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve.", }, [ POWER5_PME_PM_FXLS1_FULL_CYC ] = { .pme_name = "PM_FXLS1_FULL_CYC", .pme_code = 0x110c4, .pme_short_desc = "Cycles FXU1/LS1 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_DATA_FROM_L275_SHR ] = { .pme_name = "PM_DATA_FROM_L275_SHR", .pme_code = 0x3c3097, .pme_short_desc = "Data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a demand load. ", }, [ POWER5_PME_PM_THRD_SEL_T0 ] = { .pme_name = "PM_THRD_SEL_T0", .pme_code = 0x410c0, .pme_short_desc = "Decode selected thread 0", .pme_long_desc = "Thread selection picked thread 0 for decode.", }, [ POWER5_PME_PM_PTEG_RELOAD_VALID ] = { .pme_name = "PM_PTEG_RELOAD_VALID", .pme_code = 0x830e4, .pme_short_desc = "PTEG reload valid", .pme_long_desc = "A Page Table Entry was loaded into the TLB.", }, [ POWER5_PME_PM_LSU_LMQ_LHR_MERGE ] = { .pme_name = "PM_LSU_LMQ_LHR_MERGE", .pme_code = 0xc70e5, .pme_short_desc = "LMQ LHR merges", .pme_long_desc = "A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry.", }, [ POWER5_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x820e6, .pme_short_desc = "Marked STCX failed", .pme_long_desc = "A marked stcx (stwcx or stdcx) failed", }, [ POWER5_PME_PM_2INST_CLB_CYC ] = { .pme_name = "PM_2INST_CLB_CYC", .pme_code = 0x400c2, .pme_short_desc = "Cycles 2 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_FAB_PNtoVN_DIRECT ] = { .pme_name = "PM_FAB_PNtoVN_DIRECT", .pme_code = 0x723e7, .pme_short_desc = "PN to VN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_PTEG_FROM_L2MISS ] = { .pme_name = "PM_PTEG_FROM_L2MISS", .pme_code = 0x38309b, .pme_short_desc = "PTEG loaded from L2 miss", .pme_long_desc = "A Page Table Entry was loaded into the TLB but not from the local L2.", }, [ POWER5_PME_PM_CMPLU_STALL_LSU ] = { .pme_name = "PM_CMPLU_STALL_LSU", .pme_code = 0x211098, .pme_short_desc = "Completion stall caused by LSU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a load/store instruction.", }, [ POWER5_PME_PM_MRK_DSLB_MISS ] = { .pme_name = "PM_MRK_DSLB_MISS", .pme_code = 0xc50c7, .pme_short_desc = "Marked Data SLB misses", .pme_long_desc = "A Data SLB miss was caused by a marked instruction.", }, [ POWER5_PME_PM_LSU_FLUSH_ULD ] = { .pme_name = "PM_LSU_FLUSH_ULD", .pme_code = 0x1c0088, .pme_short_desc = "LRQ unaligned load flushes", .pme_long_desc = "A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1). Combined Unit 0 + 1.", }, [ POWER5_PME_PM_PTEG_FROM_LMEM ] = { .pme_name = "PM_PTEG_FROM_LMEM", .pme_code = 0x283087, .pme_short_desc = "PTEG loaded from local memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on.", }, [ POWER5_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x200005, .pme_short_desc = "Marked instruction BRU processing finished", .pme_long_desc = "The branch unit finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_MEM_WQ_DISP_WRITE ] = { .pme_name = "PM_MEM_WQ_DISP_WRITE", .pme_code = 0x703c6, .pme_short_desc = "Memory write queue dispatched due to write", .pme_long_desc = "A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD_CYC", .pme_code = 0x4c70a3, .pme_short_desc = "Marked load latency from L2.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_LSU1_NCLD ] = { .pme_name = "PM_LSU1_NCLD", .pme_code = 0xc50c5, .pme_short_desc = "LSU1 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by Unit 0.", }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_WQ_PWQ", .pme_code = 0x717c6, .pme_short_desc = "Snoop partial-write retry due to collision with active write or partial-write queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FPR_MAP_FULL_CYC ] = { .pme_name = "PM_FPR_MAP_FULL_CYC", .pme_code = 0x100c1, .pme_short_desc = "Cycles FPR mapper full", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. ", }, [ POWER5_PME_PM_FPU1_FULL_CYC ] = { .pme_name = "PM_FPU1_FULL_CYC", .pme_code = 0x100c7, .pme_short_desc = "Cycles FPU1 issue queue full", .pme_long_desc = "The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped", }, [ POWER5_PME_PM_L3SA_ALL_BUSY ] = { .pme_name = "PM_L3SA_ALL_BUSY", .pme_code = 0x721e3, .pme_short_desc = "L3 slice A active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5_PME_PM_3INST_CLB_CYC ] = { .pme_name = "PM_3INST_CLB_CYC", .pme_code = 0x400c3, .pme_short_desc = "Cycles 3 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 ] = { .pme_name = "PM_MEM_PWQ_DISP_Q2or3", .pme_code = 0x734e6, .pme_short_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3", .pme_long_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_L2SA_SHR_INV ] = { .pme_name = "PM_L2SA_SHR_INV", .pme_code = 0x710c0, .pme_short_desc = "L2 slice A transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5_PME_PM_THRESH_TIMEO ] = { .pme_name = "PM_THRESH_TIMEO", .pme_code = 0x30000b, .pme_short_desc = "Threshold timeout", .pme_long_desc = "The threshold timer expired", }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL ] = { .pme_name = "PM_THRD_SEL_OVER_GCT_IMBAL", .pme_code = 0x410c4, .pme_short_desc = "Thread selection overrides caused by GCT imbalance", .pme_long_desc = "Thread selection was overridden because of a GCT imbalance.", }, [ POWER5_PME_PM_FPU_FSQRT ] = { .pme_name = "PM_FPU_FSQRT", .pme_code = 0x200090, .pme_short_desc = "FPU executed FSQRT instruction", .pme_long_desc = "The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_LRQ", .pme_code = 0x810c2, .pme_short_desc = "LSU0 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5_PME_PM_PMC1_OVERFLOW ] = { .pme_name = "PM_PMC1_OVERFLOW", .pme_code = 0x20000a, .pme_short_desc = "PMC1 Overflow", .pme_long_desc = "Overflows from PMC1 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_L3SC_SNOOP_RETRY ] = { .pme_name = "PM_L3SC_SNOOP_RETRY", .pme_code = 0x731e5, .pme_short_desc = "L3 slice C snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x800c7, .pme_short_desc = "Cycles doing data tablewalks", .pme_long_desc = "Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried.", }, [ POWER5_PME_PM_THRD_PRIO_6_CYC ] = { .pme_name = "PM_THRD_PRIO_6_CYC", .pme_code = 0x420e5, .pme_short_desc = "Cycles thread running at priority level 6", .pme_long_desc = "Cycles this thread was running at priority level 6.", }, [ POWER5_PME_PM_FPU_FEST ] = { .pme_name = "PM_FPU_FEST", .pme_code = 0x401090, .pme_short_desc = "FPU executed FEST instruction", .pme_long_desc = "The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toP1_SIDECAR_EMPTY", .pme_code = 0x702c7, .pme_short_desc = "M1 to P1 sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM", .pme_code = 0x1c70a1, .pme_short_desc = "Marked data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD_CYC", .pme_code = 0x4c70a6, .pme_short_desc = "Marked load latency from L3.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_MEM_PWQ_DISP ] = { .pme_name = "PM_MEM_PWQ_DISP", .pme_code = 0x704c6, .pme_short_desc = "Memory partial-write queue dispatched", .pme_long_desc = "Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toM1_SIDECAR_EMPTY", .pme_code = 0x701c7, .pme_short_desc = "P1 to M1 sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_LD_MISS_L1_LSU0", .pme_code = 0xc10c2, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 0.", }, [ POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL ] = { .pme_name = "PM_SNOOP_PARTIAL_RTRY_QFULL", .pme_code = 0x730e6, .pme_short_desc = "Snoop partial write retry due to partial-write queues full", .pme_long_desc = "A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FPU1_STALL3 ] = { .pme_name = "PM_FPU1_STALL3", .pme_code = 0x20e5, .pme_short_desc = "FPU1 stalled in pipe3", .pme_long_desc = "FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", }, [ POWER5_PME_PM_GCT_USAGE_80to99_CYC ] = { .pme_name = "PM_GCT_USAGE_80to99_CYC", .pme_code = 0x30001f, .pme_short_desc = "Cycles GCT 80-99% full", .pme_long_desc = "Cycles when the Global Completion Table has between 80% and 99% of its slots used. The GCT has 20 entries shared between threads", }, [ POWER5_PME_PM_WORK_HELD ] = { .pme_name = "PM_WORK_HELD", .pme_code = 0x40000c, .pme_short_desc = "Work held", .pme_long_desc = "RAS Unit has signaled completion to stop and there are groups waiting to complete", }, [ POWER5_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x100009, .pme_short_desc = "Instructions completed", .pme_long_desc = "Number of PowerPC instructions that completed. ", }, [ POWER5_PME_PM_LSU1_FLUSH_UST ] = { .pme_name = "PM_LSU1_FLUSH_UST", .pme_code = 0xc00c5, .pme_short_desc = "LSU1 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary)", }, [ POWER5_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x100012, .pme_short_desc = "FXU idle", .pme_long_desc = "FXU0 and FXU1 are both idle.", }, [ POWER5_PME_PM_LSU0_FLUSH_ULD ] = { .pme_name = "PM_LSU0_FLUSH_ULD", .pme_code = 0xc00c0, .pme_short_desc = "LSU0 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1)", }, [ POWER5_PME_PM_LSU1_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU1_REJECT_LMQ_FULL", .pme_code = 0xc60e5, .pme_short_desc = "LSU1 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", }, [ POWER5_PME_PM_GRP_DISP_REJECT ] = { .pme_name = "PM_GRP_DISP_REJECT", .pme_code = 0x120e4, .pme_short_desc = "Group dispatch rejected", .pme_long_desc = "A group that previously attempted dispatch was rejected.", }, [ POWER5_PME_PM_L2SA_MOD_INV ] = { .pme_name = "PM_L2SA_MOD_INV", .pme_code = 0x730e0, .pme_short_desc = "L2 slice A transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_PTEG_FROM_L25_SHR ] = { .pme_name = "PM_PTEG_FROM_L25_SHR", .pme_code = 0x183097, .pme_short_desc = "PTEG loaded from L2.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_FAB_CMD_RETRIED ] = { .pme_name = "PM_FAB_CMD_RETRIED", .pme_code = 0x710c7, .pme_short_desc = "Fabric command retried", .pme_long_desc = "Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_L3SA_SHR_INV ] = { .pme_name = "PM_L3SA_SHR_INV", .pme_code = 0x710c3, .pme_short_desc = "L3 slice A transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5_PME_PM_PTEG_FROM_L375_MOD ] = { .pme_name = "PM_PTEG_FROM_L375_MOD", .pme_code = 0x1830a7, .pme_short_desc = "PTEG loaded from L3.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU1_FLUSH_UST", .pme_code = 0x810c5, .pme_short_desc = "LSU1 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", }, [ POWER5_PME_PM_BR_ISSUED ] = { .pme_name = "PM_BR_ISSUED", .pme_code = 0x230e4, .pme_short_desc = "Branches issued", .pme_long_desc = "A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times.", }, [ POWER5_PME_PM_MRK_GRP_BR_REDIR ] = { .pme_name = "PM_MRK_GRP_BR_REDIR", .pme_code = 0x212091, .pme_short_desc = "Group experienced marked branch redirect", .pme_long_desc = "A group containing a marked (sampled) instruction experienced a branch redirect.", }, [ POWER5_PME_PM_EE_OFF ] = { .pme_name = "PM_EE_OFF", .pme_code = 0x130e3, .pme_short_desc = "Cycles MSR(EE) bit off", .pme_long_desc = "Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked.", }, [ POWER5_PME_PM_MEM_RQ_DISP_Q4to7 ] = { .pme_name = "PM_MEM_RQ_DISP_Q4to7", .pme_code = 0x712c6, .pme_short_desc = "Memory read queue dispatched to queues 4-7", .pme_long_desc = "A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MEM_FAST_PATH_RD_DISP ] = { .pme_name = "PM_MEM_FAST_PATH_RD_DISP", .pme_code = 0x713e6, .pme_short_desc = "Fast path memory read dispatched", .pme_long_desc = "Fast path memory read dispatched", }, [ POWER5_PME_PM_INST_FROM_L3 ] = { .pme_name = "PM_INST_FROM_L3", .pme_code = 0x12208d, .pme_short_desc = "Instruction fetched from L3", .pme_long_desc = "An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x800c0, .pme_short_desc = "Instruction TLB misses", .pme_long_desc = "A TLB miss for an Instruction Fetch has occurred", }, [ POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { .pme_name = "PM_FXU1_BUSY_FXU0_IDLE", .pme_code = 0x400012, .pme_short_desc = "FXU1 busy FXU0 idle", .pme_long_desc = "FXU0 was idle while FXU1 was busy.", }, [ POWER5_PME_PM_FXLS_FULL_CYC ] = { .pme_name = "PM_FXLS_FULL_CYC", .pme_code = 0x411090, .pme_short_desc = "Cycles FXLS queue is full", .pme_long_desc = "Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", }, [ POWER5_PME_PM_DTLB_REF_4K ] = { .pme_name = "PM_DTLB_REF_4K", .pme_code = 0xc40c2, .pme_short_desc = "Data TLB reference for 4K page", .pme_long_desc = "Data TLB references for 4KB pages. Includes hits + misses.", }, [ POWER5_PME_PM_GRP_DISP_VALID ] = { .pme_name = "PM_GRP_DISP_VALID", .pme_code = 0x120e3, .pme_short_desc = "Group dispatch valid", .pme_long_desc = "A group is available for dispatch. This does not mean it was successfully dispatched.", }, [ POWER5_PME_PM_LSU_FLUSH_UST ] = { .pme_name = "PM_LSU_FLUSH_UST", .pme_code = 0x2c0088, .pme_short_desc = "SRQ unaligned store flushes", .pme_long_desc = "A store was flushed because it was unaligned (crossed a 4K boundary). Combined Unit 0 + 1.", }, [ POWER5_PME_PM_FXU1_FIN ] = { .pme_name = "PM_FXU1_FIN", .pme_code = 0x130e6, .pme_short_desc = "FXU1 produced a result", .pme_long_desc = "The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_THRD_PRIO_4_CYC ] = { .pme_name = "PM_THRD_PRIO_4_CYC", .pme_code = 0x420e3, .pme_short_desc = "Cycles thread running at priority level 4", .pme_long_desc = "Cycles this thread was running at priority level 4.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD", .pme_code = 0x2c709e, .pme_short_desc = "Marked data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5_PME_PM_4INST_CLB_CYC ] = { .pme_name = "PM_4INST_CLB_CYC", .pme_code = 0x400c4, .pme_short_desc = "Cycles 4 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_MRK_DTLB_REF_16M ] = { .pme_name = "PM_MRK_DTLB_REF_16M", .pme_code = 0xc40c7, .pme_short_desc = "Marked Data TLB reference for 16M page", .pme_long_desc = "Data TLB references by a marked instruction for 16MB pages.", }, [ POWER5_PME_PM_INST_FROM_L375_MOD ] = { .pme_name = "PM_INST_FROM_L375_MOD", .pme_code = 0x42209d, .pme_short_desc = "Instruction fetched from L3.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_GRP_CMPL ] = { .pme_name = "PM_GRP_CMPL", .pme_code = 0x300013, .pme_short_desc = "Group completed", .pme_long_desc = "A group completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER5_PME_PM_FPU1_1FLOP ] = { .pme_name = "PM_FPU1_1FLOP", .pme_code = 0xc7, .pme_short_desc = "FPU1 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5_PME_PM_FPU_FRSP_FCONV ] = { .pme_name = "PM_FPU_FRSP_FCONV", .pme_code = 0x301090, .pme_short_desc = "FPU executed FRSP or FCONV instructions", .pme_long_desc = "The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_5INST_CLB_CYC ] = { .pme_name = "PM_5INST_CLB_CYC", .pme_code = 0x400c5, .pme_short_desc = "Cycles 5 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_L3SC_REF ] = { .pme_name = "PM_L3SC_REF", .pme_code = 0x701c5, .pme_short_desc = "L3 slice C references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice.", }, [ POWER5_PME_PM_THRD_L2MISS_BOTH_CYC ] = { .pme_name = "PM_THRD_L2MISS_BOTH_CYC", .pme_code = 0x410c7, .pme_short_desc = "Cycles both threads in L2 misses", .pme_long_desc = "Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used.", }, [ POWER5_PME_PM_MEM_PW_GATH ] = { .pme_name = "PM_MEM_PW_GATH", .pme_code = 0x714c6, .pme_short_desc = "Memory partial-write gathered", .pme_long_desc = "Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FAB_PNtoNN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoNN_SIDECAR", .pme_code = 0x713c7, .pme_short_desc = "PN to NN beat went to sidecar first", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5_PME_PM_FAB_DCLAIM_ISSUED ] = { .pme_name = "PM_FAB_DCLAIM_ISSUED", .pme_code = 0x720e7, .pme_short_desc = "dclaim issued", .pme_long_desc = "A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. ", }, [ POWER5_PME_PM_GRP_IC_MISS ] = { .pme_name = "PM_GRP_IC_MISS", .pme_code = 0x120e7, .pme_short_desc = "Group experienced I cache miss", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count.", }, [ POWER5_PME_PM_INST_FROM_L35_SHR ] = { .pme_name = "PM_INST_FROM_L35_SHR", .pme_code = 0x12209d, .pme_short_desc = "Instruction fetched from L3.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0xc30e7, .pme_short_desc = "Cycles LMQ full", .pme_long_desc = "The Load Miss Queue was full.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L2_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_CYC", .pme_code = 0x2c70a0, .pme_short_desc = "Marked load latency from L2", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_LSU_SRQ_SYNC_CYC ] = { .pme_name = "PM_LSU_SRQ_SYNC_CYC", .pme_code = 0x830e5, .pme_short_desc = "SRQ sync duration", .pme_long_desc = "Cycles that a sync instruction is active in the Store Request Queue.", }, [ POWER5_PME_PM_LSU0_BUSY_REJECT ] = { .pme_name = "PM_LSU0_BUSY_REJECT", .pme_code = 0xc20e3, .pme_short_desc = "LSU0 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions. ", }, [ POWER5_PME_PM_LSU_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU_REJECT_ERAT_MISS", .pme_code = 0x1c6090, .pme_short_desc = "LSU reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM_CYC", .pme_code = 0x4c70a1, .pme_short_desc = "Marked load latency from remote memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_DATA_FROM_L375_SHR ] = { .pme_name = "PM_DATA_FROM_L375_SHR", .pme_code = 0x3c309e, .pme_short_desc = "Data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a demand load.", }, [ POWER5_PME_PM_FPU0_FMOV_FEST ] = { .pme_name = "PM_FPU0_FMOV_FEST", .pme_code = 0x10c0, .pme_short_desc = "FPU0 executed FMOV or FEST instructions", .pme_long_desc = "FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5_PME_PM_PTEG_FROM_L25_MOD ] = { .pme_name = "PM_PTEG_FROM_L25_MOD", .pme_code = 0x283097, .pme_short_desc = "PTEG loaded from L2.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_LD_REF_L1_LSU0 ] = { .pme_name = "PM_LD_REF_L1_LSU0", .pme_code = 0xc10c0, .pme_short_desc = "LSU0 L1 D cache load references", .pme_long_desc = "Load references to Level 1 Data Cache, by unit 0.", }, [ POWER5_PME_PM_THRD_PRIO_7_CYC ] = { .pme_name = "PM_THRD_PRIO_7_CYC", .pme_code = 0x420e6, .pme_short_desc = "Cycles thread running at priority level 7", .pme_long_desc = "Cycles this thread was running at priority level 7.", }, [ POWER5_PME_PM_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_LSU1_FLUSH_SRQ", .pme_code = 0xc00c7, .pme_short_desc = "LSU1 SRQ lhs flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. ", }, [ POWER5_PME_PM_L2SC_RCST_DISP ] = { .pme_name = "PM_L2SC_RCST_DISP", .pme_code = 0x702c2, .pme_short_desc = "L2 slice C RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5_PME_PM_CMPLU_STALL_DIV ] = { .pme_name = "PM_CMPLU_STALL_DIV", .pme_code = 0x411099, .pme_short_desc = "Completion stall caused by DIV instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point divide instruction. This is a subset of PM_CMPLU_STALL_FXU.", }, [ POWER5_PME_PM_MEM_RQ_DISP_Q12to15 ] = { .pme_name = "PM_MEM_RQ_DISP_Q12to15", .pme_code = 0x732e6, .pme_short_desc = "Memory read queue dispatched to queues 12-15", .pme_long_desc = "A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_INST_FROM_L375_SHR ] = { .pme_name = "PM_INST_FROM_L375_SHR", .pme_code = 0x32209d, .pme_short_desc = "Instruction fetched from L3.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_ST_REF_L1 ] = { .pme_name = "PM_ST_REF_L1", .pme_code = 0x3c1090, .pme_short_desc = "L1 D cache store references", .pme_long_desc = "Store references to the Data Cache. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_L3SB_ALL_BUSY ] = { .pme_name = "PM_L3SB_ALL_BUSY", .pme_code = 0x721e4, .pme_short_desc = "L3 slice B active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x711c7, .pme_short_desc = "P1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR_CYC", .pme_code = 0x2c70a3, .pme_short_desc = "Marked load latency from L2.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_FAB_HOLDtoNN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoNN_EMPTY", .pme_code = 0x722e7, .pme_short_desc = "Hold buffer to NN empty", .pme_long_desc = "Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_DATA_FROM_LMEM ] = { .pme_name = "PM_DATA_FROM_LMEM", .pme_code = 0x2c3087, .pme_short_desc = "Data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on.", }, [ POWER5_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x100005, .pme_short_desc = "Run cycles", .pme_long_desc = "Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop.", }, [ POWER5_PME_PM_PTEG_FROM_RMEM ] = { .pme_name = "PM_PTEG_FROM_RMEM", .pme_code = 0x1830a1, .pme_short_desc = "PTEG loaded from remote memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on.", }, [ POWER5_PME_PM_L2SC_RCLD_DISP ] = { .pme_name = "PM_L2SC_RCLD_DISP", .pme_code = 0x701c2, .pme_short_desc = "L2 slice C RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5_PME_PM_LSU0_LDF ] = { .pme_name = "PM_LSU0_LDF", .pme_code = 0xc50c0, .pme_short_desc = "LSU0 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU0", }, [ POWER5_PME_PM_LSU_LRQ_S0_VALID ] = { .pme_name = "PM_LSU_LRQ_S0_VALID", .pme_code = 0xc20e2, .pme_short_desc = "LRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each).", }, [ POWER5_PME_PM_PMC3_OVERFLOW ] = { .pme_name = "PM_PMC3_OVERFLOW", .pme_code = 0x40000a, .pme_short_desc = "PMC3 Overflow", .pme_long_desc = "Overflows from PMC3 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_MRK_IMR_RELOAD ] = { .pme_name = "PM_MRK_IMR_RELOAD", .pme_code = 0x820e2, .pme_short_desc = "Marked IMR reloaded", .pme_long_desc = "A DL1 reload occurred due to marked load", }, [ POWER5_PME_PM_MRK_GRP_TIMEO ] = { .pme_name = "PM_MRK_GRP_TIMEO", .pme_code = 0x40000b, .pme_short_desc = "Marked group completion timeout", .pme_long_desc = "The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor", }, [ POWER5_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0xc10c3, .pme_short_desc = "L1 D cache store misses", .pme_long_desc = "A store missed the dcache. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_STOP_COMPLETION ] = { .pme_name = "PM_STOP_COMPLETION", .pme_code = 0x300018, .pme_short_desc = "Completion stopped", .pme_long_desc = "RAS Unit has signaled completion to stop", }, [ POWER5_PME_PM_LSU_BUSY_REJECT ] = { .pme_name = "PM_LSU_BUSY_REJECT", .pme_code = 0x1c2090, .pme_short_desc = "LSU busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions. Combined unit 0 + 1.", }, [ POWER5_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x800c1, .pme_short_desc = "Instruction SLB misses", .pme_long_desc = "A SLB miss for an instruction fetch as occurred", }, [ POWER5_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0xf, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER5_PME_PM_THRD_ONE_RUN_CYC ] = { .pme_name = "PM_THRD_ONE_RUN_CYC", .pme_code = 0x10000b, .pme_short_desc = "One of the threads in run cycles", .pme_long_desc = "At least one thread has set its run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. This event does not respect FCWAIT.", }, [ POWER5_PME_PM_GRP_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_BR_REDIR_NONSPEC", .pme_code = 0x112091, .pme_short_desc = "Group experienced non-speculative branch redirect", .pme_long_desc = "Number of groups, counted at completion, that have encountered a branch redirect.", }, [ POWER5_PME_PM_LSU1_SRQ_STFWD ] = { .pme_name = "PM_LSU1_SRQ_STFWD", .pme_code = 0xc20e4, .pme_short_desc = "LSU1 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", }, [ POWER5_PME_PM_L3SC_MOD_INV ] = { .pme_name = "PM_L3SC_MOD_INV", .pme_code = 0x730e5, .pme_short_desc = "L3 slice C transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_L2_PREF ] = { .pme_name = "PM_L2_PREF", .pme_code = 0xc50c3, .pme_short_desc = "L2 cache prefetches", .pme_long_desc = "A request to prefetch data into L2 was made", }, [ POWER5_PME_PM_GCT_NOSLOT_BR_MPRED ] = { .pme_name = "PM_GCT_NOSLOT_BR_MPRED", .pme_code = 0x41009c, .pme_short_desc = "No slot in GCT caused by branch mispredict", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of a branch misprediction.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD", .pme_code = 0x2c7097, .pme_short_desc = "Marked data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5_PME_PM_L2SB_MOD_INV ] = { .pme_name = "PM_L2SB_MOD_INV", .pme_code = 0x730e1, .pme_short_desc = "L2 slice B transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_L2SB_ST_REQ ] = { .pme_name = "PM_L2SB_ST_REQ", .pme_code = 0x723e1, .pme_short_desc = "L2 slice B store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0xc70e4, .pme_short_desc = "Marked L1 reload data source valid", .pme_long_desc = "The source information is valid and is for a marked load", }, [ POWER5_PME_PM_L3SB_HIT ] = { .pme_name = "PM_L3SB_HIT", .pme_code = 0x711c4, .pme_short_desc = "L3 slice B hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", }, [ POWER5_PME_PM_L2SB_SHR_MOD ] = { .pme_name = "PM_L2SB_SHR_MOD", .pme_code = 0x700c1, .pme_short_desc = "L2 slice B transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", }, [ POWER5_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x130e7, .pme_short_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_long_desc = "Cycles when an interrupt due to an external exception is pending but external exceptions were masked.", }, [ POWER5_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x100013, .pme_short_desc = "One or more PPC instruction completed", .pme_long_desc = "A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once.", }, [ POWER5_PME_PM_L2SC_SHR_MOD ] = { .pme_name = "PM_L2SC_SHR_MOD", .pme_code = 0x700c2, .pme_short_desc = "L2 slice C transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", }, [ POWER5_PME_PM_PMC6_OVERFLOW ] = { .pme_name = "PM_PMC6_OVERFLOW", .pme_code = 0x30001a, .pme_short_desc = "PMC6 Overflow", .pme_long_desc = "Overflows from PMC6 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_LSU_LRQ_FULL_CYC ] = { .pme_name = "PM_LSU_LRQ_FULL_CYC", .pme_code = 0x110c2, .pme_short_desc = "Cycles LRQ full", .pme_long_desc = "Cycles when the LRQ is full.", }, [ POWER5_PME_PM_IC_PREF_INSTALL ] = { .pme_name = "PM_IC_PREF_INSTALL", .pme_code = 0x210c7, .pme_short_desc = "Instruction prefetched installed in prefetch buffer", .pme_long_desc = "A prefetch buffer entry (line) is allocated but the request is not a demand fetch.", }, [ POWER5_PME_PM_TLB_MISS ] = { .pme_name = "PM_TLB_MISS", .pme_code = 0x180088, .pme_short_desc = "TLB misses", .pme_long_desc = "Total of Data TLB mises + Instruction TLB misses", }, [ POWER5_PME_PM_GCT_FULL_CYC ] = { .pme_name = "PM_GCT_FULL_CYC", .pme_code = 0x100c0, .pme_short_desc = "Cycles GCT full", .pme_long_desc = "The Global Completion Table is completely full.", }, [ POWER5_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x200012, .pme_short_desc = "FXU busy", .pme_long_desc = "Cycles when both FXU0 and FXU1 are busy.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L3_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_CYC", .pme_code = 0x2c70a4, .pme_short_desc = "Marked load latency from L3", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_LSU_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU_REJECT_LMQ_FULL", .pme_code = 0x2c6088, .pme_short_desc = "LSU reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all the eight entries are full, subsequent load instructions are rejected. Combined unit 0 + 1.", }, [ POWER5_PME_PM_LSU_SRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_SRQ_S0_ALLOC", .pme_code = 0xc20e5, .pme_short_desc = "SRQ slot 0 allocated", .pme_long_desc = "SRQ Slot zero was allocated", }, [ POWER5_PME_PM_GRP_MRK ] = { .pme_name = "PM_GRP_MRK", .pme_code = 0x100014, .pme_short_desc = "Group marked in IDU", .pme_long_desc = "A group was sampled (marked). The group is called a marked group. One instruction within the group is tagged for detailed monitoring. The sampled instruction is called a marked instructions. Events associated with the marked instruction are annotated with the marked term.", }, [ POWER5_PME_PM_INST_FROM_L25_SHR ] = { .pme_name = "PM_INST_FROM_L25_SHR", .pme_code = 0x122096, .pme_short_desc = "Instruction fetched from L2.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T or SL) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", }, [ POWER5_PME_PM_FPU1_FIN ] = { .pme_name = "PM_FPU1_FIN", .pme_code = 0x10c7, .pme_short_desc = "FPU1 produced a result", .pme_long_desc = "FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , ", }, [ POWER5_PME_PM_DC_PREF_STREAM_ALLOC ] = { .pme_name = "PM_DC_PREF_STREAM_ALLOC", .pme_code = 0x830e7, .pme_short_desc = "D cache new prefetch stream allocated", .pme_long_desc = "A new Prefetch Stream was allocated.", }, [ POWER5_PME_PM_BR_MPRED_TA ] = { .pme_name = "PM_BR_MPRED_TA", .pme_code = 0x230e6, .pme_short_desc = "Branch mispredictions due to target address", .pme_long_desc = "A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction.", }, [ POWER5_PME_PM_CRQ_FULL_CYC ] = { .pme_name = "PM_CRQ_FULL_CYC", .pme_code = 0x110c1, .pme_short_desc = "Cycles CR issue queue full", .pme_long_desc = "The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_L2SA_RCLD_DISP ] = { .pme_name = "PM_L2SA_RCLD_DISP", .pme_code = 0x701c0, .pme_short_desc = "L2 slice A RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5_PME_PM_SNOOP_WR_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_WR_RETRY_QFULL", .pme_code = 0x710c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DTLB_REF_4K ] = { .pme_name = "PM_MRK_DTLB_REF_4K", .pme_code = 0xc40c3, .pme_short_desc = "Marked Data TLB reference for 4K page", .pme_long_desc = "Data TLB references by a marked instruction for 4KB pages.", }, [ POWER5_PME_PM_LSU_SRQ_S0_VALID ] = { .pme_name = "PM_LSU_SRQ_S0_VALID", .pme_code = 0xc20e1, .pme_short_desc = "SRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each).", }, [ POWER5_PME_PM_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_LSU0_FLUSH_LRQ", .pme_code = 0xc00c2, .pme_short_desc = "LSU0 LRQ flushes", .pme_long_desc = "A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5_PME_PM_INST_FROM_L275_MOD ] = { .pme_name = "PM_INST_FROM_L275_MOD", .pme_code = 0x422096, .pme_short_desc = "Instruction fetched from L2.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions ", }, [ POWER5_PME_PM_GCT_EMPTY_CYC ] = { .pme_name = "PM_GCT_EMPTY_CYC", .pme_code = 0x200004, .pme_short_desc = "Cycles GCT empty", .pme_long_desc = "The Global Completion Table is completely empty", }, [ POWER5_PME_PM_LARX_LSU0 ] = { .pme_name = "PM_LARX_LSU0", .pme_code = 0x820e7, .pme_short_desc = "Larx executed on LSU0", .pme_long_desc = "A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0)", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_5or6_CYC", .pme_code = 0x430e6, .pme_short_desc = "Cycles thread priority difference is 5 or 6", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 5 or 6.", }, [ POWER5_PME_PM_SNOOP_RETRY_1AHEAD ] = { .pme_name = "PM_SNOOP_RETRY_1AHEAD", .pme_code = 0x725e6, .pme_short_desc = "Snoop retry due to one ahead collision", .pme_long_desc = "Snoop retry due to one ahead collision", }, [ POWER5_PME_PM_FPU1_FSQRT ] = { .pme_name = "PM_FPU1_FSQRT", .pme_code = 0xc6, .pme_short_desc = "FPU1 executed FSQRT instruction", .pme_long_desc = "FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU1", .pme_code = 0x820e4, .pme_short_desc = "LSU1 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU1.", }, [ POWER5_PME_PM_MRK_FPU_FIN ] = { .pme_name = "PM_MRK_FPU_FIN", .pme_code = 0x300014, .pme_short_desc = "Marked instruction FPU processing finished", .pme_long_desc = "One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5_PME_PM_THRD_PRIO_5_CYC ] = { .pme_name = "PM_THRD_PRIO_5_CYC", .pme_code = 0x420e4, .pme_short_desc = "Cycles thread running at priority level 5", .pme_long_desc = "Cycles this thread was running at priority level 5.", }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM", .pme_code = 0x2c7087, .pme_short_desc = "Marked data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on.", }, [ POWER5_PME_PM_FPU1_FRSP_FCONV ] = { .pme_name = "PM_FPU1_FRSP_FCONV", .pme_code = 0x10c5, .pme_short_desc = "FPU1 executed FRSP or FCONV instructions", .pme_long_desc = "FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x800c3, .pme_short_desc = "Snoop TLBIE", .pme_long_desc = "A tlbie was snooped from another processor.", }, [ POWER5_PME_PM_L3SB_SNOOP_RETRY ] = { .pme_name = "PM_L3SB_SNOOP_RETRY", .pme_code = 0x731e4, .pme_short_desc = "L3 slice B snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5_PME_PM_FAB_VBYPASS_EMPTY ] = { .pme_name = "PM_FAB_VBYPASS_EMPTY", .pme_code = 0x731e7, .pme_short_desc = "Vertical bypass buffer empty", .pme_long_desc = "Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD", .pme_code = 0x1c70a3, .pme_short_desc = "Marked data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load.", }, [ POWER5_PME_PM_6INST_CLB_CYC ] = { .pme_name = "PM_6INST_CLB_CYC", .pme_code = 0x400c6, .pme_short_desc = "Cycles 6 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_L2SB_RCST_DISP ] = { .pme_name = "PM_L2SB_RCST_DISP", .pme_code = 0x702c1, .pme_short_desc = "L2 slice B RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5_PME_PM_FLUSH ] = { .pme_name = "PM_FLUSH", .pme_code = 0x110c7, .pme_short_desc = "Flushes", .pme_long_desc = "Flushes occurred including LSU and Branch flushes.", }, [ POWER5_PME_PM_L2SC_MOD_INV ] = { .pme_name = "PM_L2SC_MOD_INV", .pme_code = 0x730e2, .pme_short_desc = "L2 slice C transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_FPU_DENORM ] = { .pme_name = "PM_FPU_DENORM", .pme_code = 0x102088, .pme_short_desc = "FPU received denormalized data", .pme_long_desc = "The floating point unit has encountered a denormalized operand. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_L3SC_HIT ] = { .pme_name = "PM_L3SC_HIT", .pme_code = 0x711c5, .pme_short_desc = "L3 slice C hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice", }, [ POWER5_PME_PM_SNOOP_WR_RETRY_RQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_RQ", .pme_code = 0x706c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active read queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5_PME_PM_LSU1_REJECT_SRQ ] = { .pme_name = "PM_LSU1_REJECT_SRQ", .pme_code = 0xc60e4, .pme_short_desc = "LSU1 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", }, [ POWER5_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x220e6, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "An instruction prefetch request has been made.", }, [ POWER5_PME_PM_L3SC_ALL_BUSY ] = { .pme_name = "PM_L3SC_ALL_BUSY", .pme_code = 0x721e5, .pme_short_desc = "L3 slice C active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5_PME_PM_MRK_GRP_IC_MISS ] = { .pme_name = "PM_MRK_GRP_IC_MISS", .pme_code = 0x412091, .pme_short_desc = "Group experienced marked I cache miss", .pme_long_desc = "A group containing a marked (sampled) instruction experienced an instruction cache miss.", }, [ POWER5_PME_PM_GCT_NOSLOT_IC_MISS ] = { .pme_name = "PM_GCT_NOSLOT_IC_MISS", .pme_code = 0x21009c, .pme_short_desc = "No slot in GCT caused by I cache miss", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of an Instruction Cache miss.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L3 ] = { .pme_name = "PM_MRK_DATA_FROM_L3", .pme_code = 0x1c708e, .pme_short_desc = "Marked data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a marked load.", }, [ POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL ] = { .pme_name = "PM_GCT_NOSLOT_SRQ_FULL", .pme_code = 0x310084, .pme_short_desc = "No slot in GCT caused by SRQ full", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because the Store Request Queue (SRQ) is full. This happens when the storage subsystem can not process the stores in the SRQ. Groups can not be dispatched until a SRQ entry is available.", }, [ POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD ] = { .pme_name = "PM_THRD_SEL_OVER_ISU_HOLD", .pme_code = 0x410c5, .pme_short_desc = "Thread selection overrides caused by ISU holds", .pme_long_desc = "Thread selection was overridden because of an ISU hold.", }, [ POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { .pme_name = "PM_CMPLU_STALL_DCACHE_MISS", .pme_code = 0x21109a, .pme_short_desc = "Completion stall caused by D cache miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a Data Cache Miss. Data Cache Miss has higher priority than any other Load/Store delay, so if an instruction encounters multiple delays only the Data Cache Miss will be reported and the entire delay period will be charged to Data Cache Miss. This is a subset of PM_CMPLU_STALL_LSU.", }, [ POWER5_PME_PM_L3SA_MOD_INV ] = { .pme_name = "PM_L3SA_MOD_INV", .pme_code = 0x730e3, .pme_short_desc = "L3 slice A transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_LSU_FLUSH_LRQ ] = { .pme_name = "PM_LSU_FLUSH_LRQ", .pme_code = 0x2c0090, .pme_short_desc = "LRQ flushes", .pme_long_desc = "A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1.", }, [ POWER5_PME_PM_THRD_PRIO_2_CYC ] = { .pme_name = "PM_THRD_PRIO_2_CYC", .pme_code = 0x420e1, .pme_short_desc = "Cycles thread running at priority level 2", .pme_long_desc = "Cycles this thread was running at priority level 2.", }, [ POWER5_PME_PM_LSU_FLUSH_SRQ ] = { .pme_name = "PM_LSU_FLUSH_SRQ", .pme_code = 0x1c0090, .pme_short_desc = "SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { .pme_name = "PM_MRK_LSU_SRQ_INST_VALID", .pme_code = 0xc70e6, .pme_short_desc = "Marked instruction valid in SRQ", .pme_long_desc = "This signal is asserted every cycle when a marked request is resident in the Store Request Queue", }, [ POWER5_PME_PM_L3SA_REF ] = { .pme_name = "PM_L3SA_REF", .pme_code = 0x701c3, .pme_short_desc = "L3 slice A references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice ", }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5_PME_PM_FPU0_STALL3 ] = { .pme_name = "PM_FPU0_STALL3", .pme_code = 0x20e1, .pme_short_desc = "FPU0 stalled in pipe3", .pme_long_desc = "FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", }, [ POWER5_PME_PM_GPR_MAP_FULL_CYC ] = { .pme_name = "PM_GPR_MAP_FULL_CYC", .pme_code = 0x130e5, .pme_short_desc = "Cycles GPR mapper full", .pme_long_desc = "The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x100018, .pme_short_desc = "Time Base bit transition", .pme_long_desc = "When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 ", }, [ POWER5_PME_PM_MRK_LSU_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_LRQ", .pme_code = 0x381088, .pme_short_desc = "Marked LRQ flushes", .pme_long_desc = "A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5_PME_PM_FPU0_STF ] = { .pme_name = "PM_FPU0_STF", .pme_code = 0x20e2, .pme_short_desc = "FPU0 executed store instruction", .pme_long_desc = "FPU0 has executed a Floating Point Store instruction.", }, [ POWER5_PME_PM_MRK_DTLB_MISS ] = { .pme_name = "PM_MRK_DTLB_MISS", .pme_code = 0xc50c6, .pme_short_desc = "Marked Data TLB misses", .pme_long_desc = "Data TLB references by a marked instruction that missed the TLB (all page sizes).", }, [ POWER5_PME_PM_FPU1_FMA ] = { .pme_name = "PM_FPU1_FMA", .pme_code = 0xc5, .pme_short_desc = "FPU1 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_L2SA_MOD_TAG ] = { .pme_name = "PM_L2SA_MOD_TAG", .pme_code = 0x720e0, .pme_short_desc = "L2 slice A transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_LSU1_FLUSH_ULD ] = { .pme_name = "PM_LSU1_FLUSH_ULD", .pme_code = 0xc00c4, .pme_short_desc = "LSU1 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1).", }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU0_FLUSH_UST", .pme_code = 0x810c1, .pme_short_desc = "LSU0 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 0 because it was unaligned", }, [ POWER5_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x300005, .pme_short_desc = "Marked instruction finished", .pme_long_desc = "One of the execution units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5_PME_PM_FPU0_FULL_CYC ] = { .pme_name = "PM_FPU0_FULL_CYC", .pme_code = 0x100c3, .pme_short_desc = "Cycles FPU0 issue queue full", .pme_long_desc = "The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped.", }, [ POWER5_PME_PM_LSU_LRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LRQ_S0_ALLOC", .pme_code = 0xc20e6, .pme_short_desc = "LRQ slot 0 allocated", .pme_long_desc = "LRQ slot zero was allocated", }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU1_FLUSH_ULD", .pme_code = 0x810c4, .pme_short_desc = "LSU1 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5_PME_PM_MRK_DTLB_REF ] = { .pme_name = "PM_MRK_DTLB_REF", .pme_code = 0x1c4090, .pme_short_desc = "Marked Data TLB reference", .pme_long_desc = "Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time.", }, [ POWER5_PME_PM_BR_UNCOND ] = { .pme_name = "PM_BR_UNCOND", .pme_code = 0x123087, .pme_short_desc = "Unconditional branch", .pme_long_desc = "An unconditional branch was executed.", }, [ POWER5_PME_PM_THRD_SEL_OVER_L2MISS ] = { .pme_name = "PM_THRD_SEL_OVER_L2MISS", .pme_code = 0x410c3, .pme_short_desc = "Thread selection overrides caused by L2 misses", .pme_long_desc = "Thread selection was overridden because one thread was had a L2 miss pending.", }, [ POWER5_PME_PM_L2SB_SHR_INV ] = { .pme_name = "PM_L2SB_SHR_INV", .pme_code = 0x710c1, .pme_short_desc = "L2 slice B transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_LO_PRIO_WR_CMPL", .pme_code = 0x736e6, .pme_short_desc = "Low priority write completed", .pme_long_desc = "A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5_PME_PM_L3SC_MOD_TAG ] = { .pme_name = "PM_L3SC_MOD_TAG", .pme_code = 0x720e5, .pme_short_desc = "L3 slice C transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_MRK_ST_MISS_L1 ] = { .pme_name = "PM_MRK_ST_MISS_L1", .pme_code = 0x820e3, .pme_short_desc = "Marked L1 D cache store misses", .pme_long_desc = "A marked store missed the dcache", }, [ POWER5_PME_PM_GRP_DISP_SUCCESS ] = { .pme_name = "PM_GRP_DISP_SUCCESS", .pme_code = 0x300002, .pme_short_desc = "Group dispatch success", .pme_long_desc = "Number of groups sucessfully dispatched (not rejected)", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_1or2_CYC", .pme_code = 0x430e4, .pme_short_desc = "Cycles thread priority difference is 1 or 2", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 1 or 2.", }, [ POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BHT_REDIRECT", .pme_code = 0x230e0, .pme_short_desc = "L2 I cache demand request due to BHT redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict).", }, [ POWER5_PME_PM_MEM_WQ_DISP_Q8to15 ] = { .pme_name = "PM_MEM_WQ_DISP_Q8to15", .pme_code = 0x733e6, .pme_short_desc = "Memory write queue dispatched to queues 8-15", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FPU0_SINGLE ] = { .pme_name = "PM_FPU0_SINGLE", .pme_code = 0x20e3, .pme_short_desc = "FPU0 executed single precision instruction", .pme_long_desc = "FPU0 has executed a single precision instruction.", }, [ POWER5_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x280090, .pme_short_desc = "DERAT misses", .pme_long_desc = "Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_THRD_PRIO_1_CYC ] = { .pme_name = "PM_THRD_PRIO_1_CYC", .pme_code = 0x420e0, .pme_short_desc = "Cycles thread running at priority level 1", .pme_long_desc = "Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping.", }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5_PME_PM_FPU1_FEST ] = { .pme_name = "PM_FPU1_FEST", .pme_code = 0x10c6, .pme_short_desc = "FPU1 executed FEST instruction", .pme_long_desc = "FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5_PME_PM_FAB_HOLDtoVN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoVN_EMPTY", .pme_code = 0x721e7, .pme_short_desc = "Hold buffer to VN empty", .pme_long_desc = "Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_SNOOP_RD_RETRY_RQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_RQ", .pme_code = 0x705c6, .pme_short_desc = "Snoop read retry due to collision with active read queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_DCLAIM_RETRY_QFULL", .pme_code = 0x720e6, .pme_short_desc = "Snoop dclaim/flush retry due to write/dclaim queues full", .pme_long_desc = "The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR_CYC", .pme_code = 0x2c70a2, .pme_short_desc = "Marked load latency from L2.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x300003, .pme_short_desc = "Marked store completed with intervention", .pme_long_desc = "A marked store previously sent to the memory subsystem completed (data home) after requiring intervention", }, [ POWER5_PME_PM_FLUSH_BR_MPRED ] = { .pme_name = "PM_FLUSH_BR_MPRED", .pme_code = 0x110c6, .pme_short_desc = "Flush caused by branch mispredict", .pme_long_desc = "A flush was caused by a branch mispredict.", }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_FPU_STF ] = { .pme_name = "PM_FPU_STF", .pme_code = 0x202090, .pme_short_desc = "FPU executed store instruction", .pme_long_desc = "FPU has executed a store instruction. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_CMPLU_STALL_FPU ] = { .pme_name = "PM_CMPLU_STALL_FPU", .pme_code = 0x411098, .pme_short_desc = "Completion stall caused by FPU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point instruction.", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus1or2_CYC", .pme_code = 0x430e2, .pme_short_desc = "Cycles thread priority difference is -1 or -2", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 1 or 2.", }, [ POWER5_PME_PM_GCT_NOSLOT_CYC ] = { .pme_name = "PM_GCT_NOSLOT_CYC", .pme_code = 0x100004, .pme_short_desc = "Cycles no GCT slot allocated", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread.", }, [ POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { .pme_name = "PM_FXU0_BUSY_FXU1_IDLE", .pme_code = 0x300012, .pme_short_desc = "FXU0 busy FXU1 idle", .pme_long_desc = "FXU0 is busy while FXU1 was idle", }, [ POWER5_PME_PM_PTEG_FROM_L35_SHR ] = { .pme_name = "PM_PTEG_FROM_L35_SHR", .pme_code = 0x18309e, .pme_short_desc = "PTEG loaded from L3.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", }, [ POWER5_PME_PM_MRK_LSU_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU_FLUSH_UST", .pme_code = 0x381090, .pme_short_desc = "Marked unaligned store flushes", .pme_long_desc = "A marked store was flushed because it was unaligned", }, [ POWER5_PME_PM_L3SA_HIT ] = { .pme_name = "PM_L3SA_HIT", .pme_code = 0x711c3, .pme_short_desc = "L3 slice A hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR", .pme_code = 0x1c7097, .pme_short_desc = "Marked data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR", .pme_code = 0x1c709e, .pme_short_desc = "Marked data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5_PME_PM_IERAT_XLATE_WR ] = { .pme_name = "PM_IERAT_XLATE_WR", .pme_code = 0x220e7, .pme_short_desc = "Translation written to ierat", .pme_long_desc = "An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed.", }, [ POWER5_PME_PM_L2SA_ST_REQ ] = { .pme_name = "PM_L2SA_ST_REQ", .pme_code = 0x723e0, .pme_short_desc = "L2 slice A store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_THRD_SEL_T1 ] = { .pme_name = "PM_THRD_SEL_T1", .pme_code = 0x410c1, .pme_short_desc = "Decode selected thread 1", .pme_long_desc = "Thread selection picked thread 1 for decode.", }, [ POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BR_REDIRECT", .pme_code = 0x230e1, .pme_short_desc = "L2 I cache demand request due to branch redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target).", }, [ POWER5_PME_PM_INST_FROM_LMEM ] = { .pme_name = "PM_INST_FROM_LMEM", .pme_code = 0x222086, .pme_short_desc = "Instruction fetched from local memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to the same module this proccessor is located on. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_FPU0_1FLOP ] = { .pme_name = "PM_FPU0_1FLOP", .pme_code = 0xc3, .pme_short_desc = "FPU0 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR_CYC", .pme_code = 0x2c70a6, .pme_short_desc = "Marked load latency from L3.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_PTEG_FROM_L2 ] = { .pme_name = "PM_PTEG_FROM_L2", .pme_code = 0x183087, .pme_short_desc = "PTEG loaded from L2", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L2 due to a demand load", }, [ POWER5_PME_PM_MEM_PW_CMPL ] = { .pme_name = "PM_MEM_PW_CMPL", .pme_code = 0x724e6, .pme_short_desc = "Memory partial-write completed", .pme_long_desc = "Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus5or6_CYC", .pme_code = 0x430e0, .pme_short_desc = "Cycles thread priority difference is -5 or -6", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 5 or 6.", }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5_PME_PM_FPU0_FIN ] = { .pme_name = "PM_FPU0_FIN", .pme_code = 0x10c3, .pme_short_desc = "FPU0 produced a result", .pme_long_desc = "FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads.", }, [ POWER5_PME_PM_MRK_DTLB_MISS_4K ] = { .pme_name = "PM_MRK_DTLB_MISS_4K", .pme_code = 0xc40c1, .pme_short_desc = "Marked Data TLB misses for 4K page", .pme_long_desc = "Data TLB references to 4KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5_PME_PM_L3SC_SHR_INV ] = { .pme_name = "PM_L3SC_SHR_INV", .pme_code = 0x710c5, .pme_short_desc = "L3 slice C transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5_PME_PM_GRP_BR_REDIR ] = { .pme_name = "PM_GRP_BR_REDIR", .pme_code = 0x120e6, .pme_short_desc = "Group experienced branch redirect", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count.", }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5_PME_PM_MRK_LSU_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_SRQ", .pme_code = 0x481088, .pme_short_desc = "Marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5_PME_PM_PTEG_FROM_L275_SHR ] = { .pme_name = "PM_PTEG_FROM_L275_SHR", .pme_code = 0x383097, .pme_short_desc = "PTEG loaded from L2.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T) data from the L2 on a different module than this processor is located due to a demand load.", }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5_PME_PM_SNOOP_RD_RETRY_WQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_WQ", .pme_code = 0x715c6, .pme_short_desc = "Snoop read retry due to collision with active write queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_LSU0_NCLD ] = { .pme_name = "PM_LSU0_NCLD", .pme_code = 0xc50c1, .pme_short_desc = "LSU0 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by unit 0.", }, [ POWER5_PME_PM_FAB_DCLAIM_RETRIED ] = { .pme_name = "PM_FAB_DCLAIM_RETRIED", .pme_code = 0x730e7, .pme_short_desc = "dclaim retried", .pme_long_desc = "A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_LSU1_BUSY_REJECT ] = { .pme_name = "PM_LSU1_BUSY_REJECT", .pme_code = 0xc20e7, .pme_short_desc = "LSU1 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions.", }, [ POWER5_PME_PM_FXLS0_FULL_CYC ] = { .pme_name = "PM_FXLS0_FULL_CYC", .pme_code = 0x110c0, .pme_short_desc = "Cycles FXU0/LS0 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_FPU0_FEST ] = { .pme_name = "PM_FPU0_FEST", .pme_code = 0x10c2, .pme_short_desc = "FPU0 executed FEST instruction", .pme_long_desc = "FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", }, [ POWER5_PME_PM_DTLB_REF_16M ] = { .pme_name = "PM_DTLB_REF_16M", .pme_code = 0xc40c6, .pme_short_desc = "Data TLB reference for 16M page", .pme_long_desc = "Data TLB references for 16MB pages. Includes hits + misses.", }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_LSU0_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU0_REJECT_ERAT_MISS", .pme_code = 0xc60e3, .pme_short_desc = "LSU0 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5_PME_PM_DATA_FROM_L25_MOD ] = { .pme_name = "PM_DATA_FROM_L25_MOD", .pme_code = 0x2c3097, .pme_short_desc = "Data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_GCT_USAGE_60to79_CYC ] = { .pme_name = "PM_GCT_USAGE_60to79_CYC", .pme_code = 0x20001f, .pme_short_desc = "Cycles GCT 60-79% full", .pme_long_desc = "Cycles when the Global Completion Table has between 60% and 70% of its slots used. The GCT has 20 entries shared between threads.", }, [ POWER5_PME_PM_DATA_FROM_L375_MOD ] = { .pme_name = "PM_DATA_FROM_L375_MOD", .pme_code = 0x1c30a7, .pme_short_desc = "Data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x200015, .pme_short_desc = "Cycles LMQ and SRQ empty", .pme_long_desc = "Cycles when both the LMQ and SRQ are empty (LSU is idle)", }, [ POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU0_REJECT_RELOAD_CDF", .pme_code = 0xc60e2, .pme_short_desc = "LSU0 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", }, [ POWER5_PME_PM_0INST_FETCH ] = { .pme_name = "PM_0INST_FETCH", .pme_code = 0x42208d, .pme_short_desc = "No instructions fetched", .pme_long_desc = "No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss)", }, [ POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU1_REJECT_RELOAD_CDF", .pme_code = 0xc60e6, .pme_short_desc = "LSU1 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", }, [ POWER5_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0xc70e7, .pme_short_desc = "L1 cache data prefetches", .pme_long_desc = "A request to prefetch data into the L1 was made", }, [ POWER5_PME_PM_MEM_WQ_DISP_Q0to7 ] = { .pme_name = "PM_MEM_WQ_DISP_Q0to7", .pme_code = 0x723e6, .pme_short_desc = "Memory write queue dispatched to queues 0-7", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM_CYC", .pme_code = 0x4c70a0, .pme_short_desc = "Marked load latency from local memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_BRQ_FULL_CYC ] = { .pme_name = "PM_BRQ_FULL_CYC", .pme_code = 0x100c5, .pme_short_desc = "Cycles branch queue full", .pme_long_desc = "Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_GRP_IC_MISS_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_NONSPEC", .pme_code = 0x112099, .pme_short_desc = "Group experienced non-speculative I cache miss", .pme_long_desc = "Number of groups, counted at completion, that have encountered an instruction cache miss.", }, [ POWER5_PME_PM_PTEG_FROM_L275_MOD ] = { .pme_name = "PM_PTEG_FROM_L275_MOD", .pme_code = 0x1830a3, .pme_short_desc = "PTEG loaded from L2.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. ", }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU0", .pme_code = 0x820e0, .pme_short_desc = "LSU0 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU0.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR_CYC", .pme_code = 0x2c70a7, .pme_short_desc = "Marked load latency from L3.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_LSU_FLUSH ] = { .pme_name = "PM_LSU_FLUSH", .pme_code = 0x110c5, .pme_short_desc = "Flush initiated by LSU", .pme_long_desc = "A flush was initiated by the Load Store Unit", }, [ POWER5_PME_PM_DATA_FROM_L3 ] = { .pme_name = "PM_DATA_FROM_L3", .pme_code = 0x1c308e, .pme_short_desc = "Data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a demand load.", }, [ POWER5_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x122086, .pme_short_desc = "Instruction fetched from L2", .pme_long_desc = "An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions", }, [ POWER5_PME_PM_PMC2_OVERFLOW ] = { .pme_name = "PM_PMC2_OVERFLOW", .pme_code = 0x30000a, .pme_short_desc = "PMC2 Overflow", .pme_long_desc = "Overflows from PMC2 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_FPU0_DENORM ] = { .pme_name = "PM_FPU0_DENORM", .pme_code = 0x20e0, .pme_short_desc = "FPU0 received denormalized data", .pme_long_desc = "FPU0 has encountered a denormalized operand. ", }, [ POWER5_PME_PM_FPU1_FMOV_FEST ] = { .pme_name = "PM_FPU1_FMOV_FEST", .pme_code = 0x10c4, .pme_short_desc = "FPU1 executed FMOV or FEST instructions", .pme_long_desc = "FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5_PME_PM_INST_FETCH_CYC ] = { .pme_name = "PM_INST_FETCH_CYC", .pme_code = 0x220e4, .pme_short_desc = "Cycles at least 1 instruction fetched", .pme_long_desc = "Cycles when at least one instruction was sent from the fetch unit to the decode unit.", }, [ POWER5_PME_PM_LSU_LDF ] = { .pme_name = "PM_LSU_LDF", .pme_code = 0x4c5090, .pme_short_desc = "LSU executed Floating Point load instruction", .pme_long_desc = "LSU executed Floating Point load instruction. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x300009, .pme_short_desc = "Instructions dispatched", .pme_long_desc = "Number of PowerPC instructions successfully dispatched.", }, [ POWER5_PME_PM_DATA_FROM_L25_SHR ] = { .pme_name = "PM_DATA_FROM_L25_SHR", .pme_code = 0x1c3097, .pme_short_desc = "Data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0xc30e4, .pme_short_desc = "L1 reload data source valid", .pme_long_desc = "The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads.", }, [ POWER5_PME_PM_MEM_WQ_DISP_DCLAIM ] = { .pme_name = "PM_MEM_WQ_DISP_DCLAIM", .pme_code = 0x713c6, .pme_short_desc = "Memory write queue dispatched due to dclaim/flush", .pme_long_desc = "A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_FPU_FULL_CYC ] = { .pme_name = "PM_FPU_FULL_CYC", .pme_code = 0x110090, .pme_short_desc = "Cycles FPU issue queue full", .pme_long_desc = "Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", }, [ POWER5_PME_PM_MRK_GRP_ISSUED ] = { .pme_name = "PM_MRK_GRP_ISSUED", .pme_code = 0x100015, .pme_short_desc = "Marked group issued", .pme_long_desc = "A sampled instruction was issued.", }, [ POWER5_PME_PM_THRD_PRIO_3_CYC ] = { .pme_name = "PM_THRD_PRIO_3_CYC", .pme_code = 0x420e2, .pme_short_desc = "Cycles thread running at priority level 3", .pme_long_desc = "Cycles this thread was running at priority level 3.", }, [ POWER5_PME_PM_FPU_FMA ] = { .pme_name = "PM_FPU_FMA", .pme_code = 0x200088, .pme_short_desc = "FPU executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_INST_FROM_L35_MOD ] = { .pme_name = "PM_INST_FROM_L35_MOD", .pme_code = 0x22209d, .pme_short_desc = "Instruction fetched from L3.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_MRK_CRU_FIN ] = { .pme_name = "PM_MRK_CRU_FIN", .pme_code = 0x400005, .pme_short_desc = "Marked instruction CRU processing finished", .pme_long_desc = "The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_SNOOP_WR_RETRY_WQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_WQ", .pme_code = 0x716c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active write queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_CMPLU_STALL_REJECT ] = { .pme_name = "PM_CMPLU_STALL_REJECT", .pme_code = 0x41109a, .pme_short_desc = "Completion stall caused by reject", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a load/store reject. This is a subset of PM_CMPLU_STALL_LSU.", }, [ POWER5_PME_PM_LSU1_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU1_REJECT_ERAT_MISS", .pme_code = 0xc60e7, .pme_short_desc = "LSU1 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x200014, .pme_short_desc = "Marked instruction FXU processing finished", .pme_long_desc = "One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", }, [ POWER5_PME_PM_PMC4_OVERFLOW ] = { .pme_name = "PM_PMC4_OVERFLOW", .pme_code = 0x10000a, .pme_short_desc = "PMC4 Overflow", .pme_long_desc = "Overflows from PMC4 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_L3SA_SNOOP_RETRY ] = { .pme_name = "PM_L3SA_SNOOP_RETRY", .pme_code = 0x731e3, .pme_short_desc = "L3 slice A snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5_PME_PM_PTEG_FROM_L35_MOD ] = { .pme_name = "PM_PTEG_FROM_L35_MOD", .pme_code = 0x28309e, .pme_short_desc = "PTEG loaded from L3.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", }, [ POWER5_PME_PM_INST_FROM_L25_MOD ] = { .pme_name = "PM_INST_FROM_L25_MOD", .pme_code = 0x222096, .pme_short_desc = "Instruction fetched from L2.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", }, [ POWER5_PME_PM_THRD_SMT_HANG ] = { .pme_name = "PM_THRD_SMT_HANG", .pme_code = 0x330e7, .pme_short_desc = "SMT hang detected", .pme_long_desc = "A hung thread was detected", }, [ POWER5_PME_PM_CMPLU_STALL_ERAT_MISS ] = { .pme_name = "PM_CMPLU_STALL_ERAT_MISS", .pme_code = 0x41109b, .pme_short_desc = "Completion stall caused by ERAT miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered an ERAT miss. This is a subset of PM_CMPLU_STALL_REJECT.", }, [ POWER5_PME_PM_L3SA_MOD_TAG ] = { .pme_name = "PM_L3SA_MOD_TAG", .pme_code = 0x720e3, .pme_short_desc = "L3 slice A transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_FLUSH_SYNC ] = { .pme_name = "PM_FLUSH_SYNC", .pme_code = 0x330e1, .pme_short_desc = "Flush caused by sync", .pme_long_desc = "This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes.", }, [ POWER5_PME_PM_INST_FROM_L2MISS ] = { .pme_name = "PM_INST_FROM_L2MISS", .pme_code = 0x12209b, .pme_short_desc = "Instruction fetched missed L2", .pme_long_desc = "An instruction fetch group was fetched from beyond the local L2.", }, [ POWER5_PME_PM_L2SC_ST_HIT ] = { .pme_name = "PM_L2SC_ST_HIT", .pme_code = 0x733e2, .pme_short_desc = "L2 slice C store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_MEM_RQ_DISP_Q8to11 ] = { .pme_name = "PM_MEM_RQ_DISP_Q8to11", .pme_code = 0x722e6, .pme_short_desc = "Memory read queue dispatched to queues 8-11", .pme_long_desc = "A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_MRK_GRP_DISP ] = { .pme_name = "PM_MRK_GRP_DISP", .pme_code = 0x100002, .pme_short_desc = "Marked group dispatched", .pme_long_desc = "A group containing a sampled instruction was dispatched", }, [ POWER5_PME_PM_L2SB_MOD_TAG ] = { .pme_name = "PM_L2SB_MOD_TAG", .pme_code = 0x720e1, .pme_short_desc = "L2 slice B transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_CLB_EMPTY_CYC ] = { .pme_name = "PM_CLB_EMPTY_CYC", .pme_code = 0x410c6, .pme_short_desc = "Cycles CLB empty", .pme_long_desc = "Cycles when both thread's CLB is completely empty.", }, [ POWER5_PME_PM_L2SB_ST_HIT ] = { .pme_name = "PM_L2SB_ST_HIT", .pme_code = 0x733e1, .pme_short_desc = "L2 slice B store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C.", }, [ POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_NONSPEC_RD_CANCEL", .pme_code = 0x711c6, .pme_short_desc = "Non speculative memory read cancelled", .pme_long_desc = "A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5_PME_PM_BR_PRED_CR_TA ] = { .pme_name = "PM_BR_PRED_CR_TA", .pme_code = 0x423087, .pme_short_desc = "A conditional branch was predicted, CR and target prediction", .pme_long_desc = "Both the condition (taken or not taken) and the target address of a branch instruction was predicted.", }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_SRQ", .pme_code = 0x810c3, .pme_short_desc = "LSU0 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5_PME_PM_MRK_LSU_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU_FLUSH_ULD", .pme_code = 0x481090, .pme_short_desc = "Marked unaligned load flushes", .pme_long_desc = "A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5_PME_PM_INST_DISP_ATTEMPT ] = { .pme_name = "PM_INST_DISP_ATTEMPT", .pme_code = 0x120e1, .pme_short_desc = "Instructions dispatch attempted", .pme_long_desc = "Number of PowerPC Instructions dispatched (attempted, not filtered by success.", }, [ POWER5_PME_PM_INST_FROM_RMEM ] = { .pme_name = "PM_INST_FROM_RMEM", .pme_code = 0x422086, .pme_short_desc = "Instruction fetched from remote memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to a different module than this proccessor is located on. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_ST_REF_L1_LSU0 ] = { .pme_name = "PM_ST_REF_L1_LSU0", .pme_code = 0xc10c1, .pme_short_desc = "LSU0 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU0.", }, [ POWER5_PME_PM_LSU0_DERAT_MISS ] = { .pme_name = "PM_LSU0_DERAT_MISS", .pme_code = 0x800c2, .pme_short_desc = "LSU0 DERAT misses", .pme_long_desc = "Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction.", }, [ POWER5_PME_PM_L2SB_RCLD_DISP ] = { .pme_name = "PM_L2SB_RCLD_DISP", .pme_code = 0x701c1, .pme_short_desc = "L2 slice B RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5_PME_PM_FPU_STALL3 ] = { .pme_name = "PM_FPU_STALL3", .pme_code = 0x202088, .pme_short_desc = "FPU stalled in pipe3", .pme_long_desc = "FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_BR_PRED_CR ] = { .pme_name = "PM_BR_PRED_CR", .pme_code = 0x230e2, .pme_short_desc = "A conditional branch was predicted, CR prediction", .pme_long_desc = "A conditional branch instruction was predicted as taken or not taken.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x1c7087, .pme_short_desc = "Marked data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a marked load.", }, [ POWER5_PME_PM_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_LSU0_FLUSH_SRQ", .pme_code = 0xc00c3, .pme_short_desc = "LSU0 SRQ lhs flushes", .pme_long_desc = "A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5_PME_PM_FAB_PNtoNN_DIRECT ] = { .pme_name = "PM_FAB_PNtoNN_DIRECT", .pme_code = 0x703c7, .pme_short_desc = "PN to NN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5_PME_PM_IOPS_CMPL ] = { .pme_name = "PM_IOPS_CMPL", .pme_code = 0x1, .pme_short_desc = "Internal operations completed", .pme_long_desc = "Number of internal operations that completed.", }, [ POWER5_PME_PM_L2SC_SHR_INV ] = { .pme_name = "PM_L2SC_SHR_INV", .pme_code = 0x710c2, .pme_short_desc = "L2 slice C transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5_PME_PM_L2SA_RCST_DISP ] = { .pme_name = "PM_L2SA_RCST_DISP", .pme_code = 0x702c0, .pme_short_desc = "L2 slice A RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION ] = { .pme_name = "PM_SNOOP_RETRY_AB_COLLISION", .pme_code = 0x735e6, .pme_short_desc = "Snoop retry due to a b collision", .pme_long_desc = "Snoop retry due to a b collision", }, [ POWER5_PME_PM_FAB_PNtoVN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoVN_SIDECAR", .pme_code = 0x733e7, .pme_short_desc = "PN to VN beat went to sidecar first", .pme_long_desc = "Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_LSU_LMQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LMQ_S0_ALLOC", .pme_code = 0xc30e6, .pme_short_desc = "LMQ slot 0 allocated", .pme_long_desc = "The first entry in the LMQ was allocated.", }, [ POWER5_PME_PM_LSU0_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU0_REJECT_LMQ_FULL", .pme_code = 0xc60e1, .pme_short_desc = "LSU0 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", }, [ POWER5_PME_PM_SNOOP_PW_RETRY_RQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_RQ", .pme_code = 0x707c6, .pme_short_desc = "Snoop partial-write retry due to collision with active read queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_DTLB_REF ] = { .pme_name = "PM_DTLB_REF", .pme_code = 0x2c4090, .pme_short_desc = "Data TLB references", .pme_long_desc = "Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time.", }, [ POWER5_PME_PM_PTEG_FROM_L3 ] = { .pme_name = "PM_PTEG_FROM_L3", .pme_code = 0x18308e, .pme_short_desc = "PTEG loaded from L3", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L3 due to a demand load.", }, [ POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x712c7, .pme_short_desc = "M1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5_PME_PM_LSU_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_SRQ_EMPTY_CYC", .pme_code = 0x400015, .pme_short_desc = "Cycles SRQ empty", .pme_long_desc = "Cycles the Store Request Queue is empty", }, [ POWER5_PME_PM_FPU1_STF ] = { .pme_name = "PM_FPU1_STF", .pme_code = 0x20e6, .pme_short_desc = "FPU1 executed store instruction", .pme_long_desc = "FPU1 has executed a Floating Point Store instruction.", }, [ POWER5_PME_PM_LSU_LMQ_S0_VALID ] = { .pme_name = "PM_LSU_LMQ_S0_VALID", .pme_code = 0xc30e5, .pme_short_desc = "LMQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO", }, [ POWER5_PME_PM_GCT_USAGE_00to59_CYC ] = { .pme_name = "PM_GCT_USAGE_00to59_CYC", .pme_code = 0x10001f, .pme_short_desc = "Cycles GCT less than 60% full", .pme_long_desc = "Cycles when the Global Completion Table has fewer than 60% of its slots used. The GCT has 20 entries shared between threads.", }, [ POWER5_PME_PM_DATA_FROM_L2MISS ] = { .pme_name = "PM_DATA_FROM_L2MISS", .pme_code = 0x3c309b, .pme_short_desc = "Data loaded missed L2", .pme_long_desc = "The processor's Data Cache was reloaded but not from the local L2.", }, [ POWER5_PME_PM_GRP_DISP_BLK_SB_CYC ] = { .pme_name = "PM_GRP_DISP_BLK_SB_CYC", .pme_code = 0x130e1, .pme_short_desc = "Cycles group dispatch blocked by scoreboard", .pme_long_desc = "A scoreboard operation on a non-renamed resource has blocked dispatch.", }, [ POWER5_PME_PM_FPU_FMOV_FEST ] = { .pme_name = "PM_FPU_FMOV_FEST", .pme_code = 0x301088, .pme_short_desc = "FPU executed FMOV or FEST instructions", .pme_long_desc = "The floating point unit has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.. Combined Unit 0 + Unit 1.", }, [ POWER5_PME_PM_XER_MAP_FULL_CYC ] = { .pme_name = "PM_XER_MAP_FULL_CYC", .pme_code = 0x100c2, .pme_short_desc = "Cycles XER mapper full", .pme_long_desc = "The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_FLUSH_SB ] = { .pme_name = "PM_FLUSH_SB", .pme_code = 0x330e2, .pme_short_desc = "Flush caused by scoreboard operation", .pme_long_desc = "This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR", .pme_code = 0x3c709e, .pme_short_desc = "Marked data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a marked load.", }, [ POWER5_PME_PM_MRK_GRP_CMPL ] = { .pme_name = "PM_MRK_GRP_CMPL", .pme_code = 0x400013, .pme_short_desc = "Marked group completed", .pme_long_desc = "A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER5_PME_PM_SUSPENDED ] = { .pme_name = "PM_SUSPENDED", .pme_code = 0x0, .pme_short_desc = "Suspended", .pme_long_desc = "The counter is suspended (does not count).", }, [ POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_BR_REDIR_NONSPEC", .pme_code = 0x120e5, .pme_short_desc = "Group experienced non-speculative I cache miss or branch redirect", .pme_long_desc = "Group experienced non-speculative I cache miss or branch redirect", }, [ POWER5_PME_PM_SNOOP_RD_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_RD_RETRY_QFULL", .pme_code = 0x700c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_L3SB_MOD_INV ] = { .pme_name = "PM_L3SB_MOD_INV", .pme_code = 0x730e4, .pme_short_desc = "L3 slice B transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_DATA_FROM_L35_SHR ] = { .pme_name = "PM_DATA_FROM_L35_SHR", .pme_code = 0x1c309e, .pme_short_desc = "Data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5_PME_PM_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_LD_MISS_L1_LSU1", .pme_code = 0xc10c6, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 1.", }, [ POWER5_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x820e1, .pme_short_desc = "STCX failed", .pme_long_desc = "A stcx (stwcx or stdcx) failed", }, [ POWER5_PME_PM_DC_PREF_DST ] = { .pme_name = "PM_DC_PREF_DST", .pme_code = 0x830e6, .pme_short_desc = "DST (Data Stream Touch) stream start", .pme_long_desc = "A prefetch stream was started using the DST instruction.", }, [ POWER5_PME_PM_GRP_DISP ] = { .pme_name = "PM_GRP_DISP", .pme_code = 0x200002, .pme_short_desc = "Group dispatches", .pme_long_desc = "A group was dispatched", }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5_PME_PM_FPU0_FPSCR ] = { .pme_name = "PM_FPU0_FPSCR", .pme_code = 0x30e0, .pme_short_desc = "FPU0 executed FPSCR instruction", .pme_long_desc = "FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x1c3087, .pme_short_desc = "Data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a demand load.", }, [ POWER5_PME_PM_FPU1_DENORM ] = { .pme_name = "PM_FPU1_DENORM", .pme_code = 0x20e4, .pme_short_desc = "FPU1 received denormalized data", .pme_long_desc = "FPU1 has encountered a denormalized operand.", }, [ POWER5_PME_PM_FPU_1FLOP ] = { .pme_name = "PM_FPU_1FLOP", .pme_code = 0x100090, .pme_short_desc = "FPU executed one flop instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5_PME_PM_FPU0_FSQRT ] = { .pme_name = "PM_FPU0_FSQRT", .pme_code = 0xc2, .pme_short_desc = "FPU0 executed FSQRT instruction", .pme_long_desc = "FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x4c1090, .pme_short_desc = "L1 D cache load references", .pme_long_desc = "Load references to the Level 1 Data Cache. Combined unit 0 + 1.", }, [ POWER5_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x22208d, .pme_short_desc = "Instruction fetched from L1", .pme_long_desc = "An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions", }, [ POWER5_PME_PM_TLBIE_HELD ] = { .pme_name = "PM_TLBIE_HELD", .pme_code = 0x130e4, .pme_short_desc = "TLBIE held at dispatch", .pme_long_desc = "Cycles a TLBIE instruction was held at dispatch.", }, [ POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { .pme_name = "PM_DC_PREF_OUT_OF_STREAMS", .pme_code = 0xc50c2, .pme_short_desc = "D cache out of prefetch streams", .pme_long_desc = "A new prefetch stream was detected but no more stream entries were available.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD_CYC", .pme_code = 0x4c70a2, .pme_short_desc = "Marked load latency from L2.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_SRQ", .pme_code = 0x810c7, .pme_short_desc = "LSU1 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5_PME_PM_MEM_RQ_DISP_Q0to3 ] = { .pme_name = "PM_MEM_RQ_DISP_Q0to3", .pme_code = 0x702c6, .pme_short_desc = "Memory read queue dispatched to queues 0-3", .pme_long_desc = "A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5_PME_PM_ST_REF_L1_LSU1 ] = { .pme_name = "PM_ST_REF_L1_LSU1", .pme_code = 0xc10c5, .pme_short_desc = "LSU1 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU1.", }, [ POWER5_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x182088, .pme_short_desc = "Marked L1 D cache load misses", .pme_long_desc = "Marked L1 D cache load misses", }, [ POWER5_PME_PM_L1_WRITE_CYC ] = { .pme_name = "PM_L1_WRITE_CYC", .pme_code = 0x230e7, .pme_short_desc = "Cycles writing to instruction L1", .pme_long_desc = "Cycles that a cache line was written to the instruction cache.", }, [ POWER5_PME_PM_L2SC_ST_REQ ] = { .pme_name = "PM_L2SC_ST_REQ", .pme_code = 0x723e2, .pme_short_desc = "L2 slice C store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_CMPLU_STALL_FDIV ] = { .pme_name = "PM_CMPLU_STALL_FDIV", .pme_code = 0x21109b, .pme_short_desc = "Completion stall caused by FDIV or FQRT instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point divide or square root instruction. This is a subset of PM_CMPLU_STALL_FPU.", }, [ POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY ] = { .pme_name = "PM_THRD_SEL_OVER_CLB_EMPTY", .pme_code = 0x410c2, .pme_short_desc = "Thread selection overrides caused by CLB empty", .pme_long_desc = "Thread selection was overridden because one thread's CLB was empty.", }, [ POWER5_PME_PM_BR_MPRED_CR ] = { .pme_name = "PM_BR_MPRED_CR", .pme_code = 0x230e5, .pme_short_desc = "Branch mispredictions due to CR bit setting", .pme_long_desc = "A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction.", }, [ POWER5_PME_PM_L3SB_MOD_TAG ] = { .pme_name = "PM_L3SB_MOD_TAG", .pme_code = 0x720e4, .pme_short_desc = "L3 slice B transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5_PME_PM_MRK_DATA_FROM_L2MISS ] = { .pme_name = "PM_MRK_DATA_FROM_L2MISS", .pme_code = 0x3c709b, .pme_short_desc = "Marked data loaded missed L2", .pme_long_desc = "DL1 was reloaded from beyond L2 due to a marked demand load.", }, [ POWER5_PME_PM_LSU_REJECT_SRQ ] = { .pme_name = "PM_LSU_REJECT_SRQ", .pme_code = 0x1c6088, .pme_short_desc = "LSU SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x3c1088, .pme_short_desc = "L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache. Combined unit 0 + 1.", }, [ POWER5_PME_PM_INST_FROM_PREF ] = { .pme_name = "PM_INST_FROM_PREF", .pme_code = 0x32208d, .pme_short_desc = "Instruction fetched from prefetch", .pme_long_desc = "An instruction fetch group was fetched from the prefetch buffer. Fetch groups can contain up to 8 instructions", }, [ POWER5_PME_PM_DC_INV_L2 ] = { .pme_name = "PM_DC_INV_L2", .pme_code = 0xc10c7, .pme_short_desc = "L1 D cache entries invalidated from L2", .pme_long_desc = "A dcache invalidated was received from the L2 because a line in L2 was castout.", }, [ POWER5_PME_PM_STCX_PASS ] = { .pme_name = "PM_STCX_PASS", .pme_code = 0x820e5, .pme_short_desc = "Stcx passes", .pme_long_desc = "A stcx (stwcx or stdcx) instruction was successful", }, [ POWER5_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x110c3, .pme_short_desc = "Cycles SRQ full", .pme_long_desc = "Cycles the Store Request Queue is full.", }, [ POWER5_PME_PM_FPU_FIN ] = { .pme_name = "PM_FPU_FIN", .pme_code = 0x401088, .pme_short_desc = "FPU produced a result", .pme_long_desc = "FPU finished, produced a result. This only indicates finish, not completion. Combined Unit 0 + Unit 1. Floating Point Stores are included in this count but not Floating Point Loads., , , XYZs", }, [ POWER5_PME_PM_L2SA_SHR_MOD ] = { .pme_name = "PM_L2SA_SHR_MOD", .pme_code = 0x700c0, .pme_short_desc = "L2 slice A transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", }, [ POWER5_PME_PM_LSU_SRQ_STFWD ] = { .pme_name = "PM_LSU_SRQ_STFWD", .pme_code = 0x1c2088, .pme_short_desc = "SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. Combined Unit 0 + 1.", }, [ POWER5_PME_PM_0INST_CLB_CYC ] = { .pme_name = "PM_0INST_CLB_CYC", .pme_code = 0x400c0, .pme_short_desc = "Cycles no instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5_PME_PM_FXU0_FIN ] = { .pme_name = "PM_FXU0_FIN", .pme_code = 0x130e2, .pme_short_desc = "FXU0 produced a result", .pme_long_desc = "The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete.", }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC ] = { .pme_name = "PM_THRD_GRP_CMPL_BOTH_CYC", .pme_code = 0x200013, .pme_short_desc = "Cycles group completed by both threads", .pme_long_desc = "Cycles that both threads completed.", }, [ POWER5_PME_PM_PMC5_OVERFLOW ] = { .pme_name = "PM_PMC5_OVERFLOW", .pme_code = 0x10001a, .pme_short_desc = "PMC5 Overflow", .pme_long_desc = "Overflows from PMC5 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5_PME_PM_FPU0_FDIV ] = { .pme_name = "PM_FPU0_FDIV", .pme_code = 0xc0, .pme_short_desc = "FPU0 executed FDIV instruction", .pme_long_desc = "FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER5_PME_PM_PTEG_FROM_L375_SHR ] = { .pme_name = "PM_PTEG_FROM_L375_SHR", .pme_code = 0x38309e, .pme_short_desc = "PTEG loaded from L3.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", }, [ POWER5_PME_PM_LD_REF_L1_LSU1 ] = { .pme_name = "PM_LD_REF_L1_LSU1", .pme_code = 0xc10c4, .pme_short_desc = "LSU1 L1 D cache load references", .pme_long_desc = "Load references to Level 1 Data Cache, by unit 1.", }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", }, [ POWER5_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x20000b, .pme_short_desc = "Hypervisor Cycles", .pme_long_desc = "Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0)", }, [ POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_0_CYC", .pme_code = 0x430e3, .pme_short_desc = "Cycles no thread priority difference", .pme_long_desc = "Cycles when this thread's priority is equal to the other thread's priority.", }, [ POWER5_PME_PM_LR_CTR_MAP_FULL_CYC ] = { .pme_name = "PM_LR_CTR_MAP_FULL_CYC", .pme_code = 0x100c6, .pme_short_desc = "Cycles LR/CTR mapper full", .pme_long_desc = "The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5_PME_PM_L3SB_SHR_INV ] = { .pme_name = "PM_L3SB_SHR_INV", .pme_code = 0x710c4, .pme_short_desc = "L3 slice B transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5_PME_PM_DATA_FROM_RMEM ] = { .pme_name = "PM_DATA_FROM_RMEM", .pme_code = 0x1c30a1, .pme_short_desc = "Data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on.", }, [ POWER5_PME_PM_DATA_FROM_L275_MOD ] = { .pme_name = "PM_DATA_FROM_L275_MOD", .pme_code = 0x1c30a3, .pme_short_desc = "Data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. ", }, [ POWER5_PME_PM_LSU0_REJECT_SRQ ] = { .pme_name = "PM_LSU0_REJECT_SRQ", .pme_code = 0xc60e0, .pme_short_desc = "LSU0 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", }, [ POWER5_PME_PM_LSU1_DERAT_MISS ] = { .pme_name = "PM_LSU1_DERAT_MISS", .pme_code = 0x800c6, .pme_short_desc = "LSU1 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", }, [ POWER5_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x400014, .pme_short_desc = "Marked instruction LSU processing finished", .pme_long_desc = "One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5_PME_PM_DTLB_MISS_16M ] = { .pme_name = "PM_DTLB_MISS_16M", .pme_code = 0xc40c4, .pme_short_desc = "Data TLB miss for 16M page", .pme_long_desc = "Data TLB references to 16MB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5_PME_PM_LSU0_FLUSH_UST ] = { .pme_name = "PM_LSU0_FLUSH_UST", .pme_code = 0xc00c1, .pme_short_desc = "LSU0 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary).", }, [ POWER5_PME_PM_L2SC_MOD_TAG ] = { .pme_name = "PM_L2SC_MOD_TAG", .pme_code = 0x720e2, .pme_short_desc = "L2 slice C transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", } }; #endif papi-5.6.0/src/libpfm4/perf_examples/notify_group.c000664 001750 001750 00000012577 13216244365 024421 0ustar00jshenry1963jshenry1963000000 000000 /* * notify_group.c - self-sampling multuiple events in one group * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define SMPL_PERIOD 2400000000ULL typedef struct { uint64_t ip; } sample_t; static volatile unsigned long notification_received; static perf_event_desc_t *fds; static int num_fds; static int buffer_pages = 1; /* size of buffer payload (must be power of 2) */ static void sigio_handler(int n, siginfo_t *info, struct sigcontext *sc) { struct perf_event_header ehdr; uint64_t ip; int id, ret; id = perf_fd2event(fds, num_fds, info->si_fd); if (id == -1) errx(1, "cannot find event for descriptor %d", info->si_fd); ret = perf_read_buffer(fds+id, &ehdr, sizeof(ehdr)); if (ret) errx(1, "cannot read event header"); if (ehdr.type != PERF_RECORD_SAMPLE) { warnx("unknown event type %d, skipping", ehdr.type); perf_skip_buffer(fds+id, ehdr.size - sizeof(ehdr)); goto skip; } ret = perf_read_buffer(fds+id, &ip, sizeof(ip)); if (ret) errx(1, "cannot read IP"); notification_received++; printf("Notification %lu: 0x%"PRIx64" fd=%d %s\n", notification_received, ip, info->si_fd, fds[id].name); skip: /* * rearm the counter for one more shot */ ret = ioctl(info->si_fd, PERF_EVENT_IOC_REFRESH, 1); if (ret == -1) err(1, "cannot refresh"); } /* * infinite loop waiting for notification to get out */ void busyloop(void) { /* * busy loop to burn CPU cycles */ for(;notification_received < 1024;) ; } int main(int argc, char **argv) { struct sigaction act; sigset_t new, old; size_t pgsz; int ret, i; ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); pgsz = sysconf(_SC_PAGESIZE); /* * Install the signal handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_sigaction = (void *)sigio_handler; act.sa_flags = SA_SIGINFO; sigaction (SIGIO, &act, 0); sigemptyset(&old); sigemptyset(&new); sigaddset(&new, SIGIO); ret = sigprocmask(SIG_SETMASK, NULL, &old); if (ret) err(1, "sigprocmask failed"); if (sigismember(&old, SIGIO)) { warnx("program started with SIGIO masked, unmasking it now\n"); ret = sigprocmask(SIG_UNBLOCK, &new, NULL); if (ret) err(1, "sigprocmask failed"); } /* * allocates fd for us */ ret = perf_setup_list_events("cycles," "instructions," "cycles", &fds, &num_fds); if (ret || !num_fds) exit(1); fds[0].fd = -1; for(i=0; i < num_fds; i++) { /* want a notification for each sample added to the buffer */ fds[i].hw.disabled = !!i; printf("i=%d disabled=%d\n", i, fds[i].hw.disabled); fds[i].hw.wakeup_events = 1; fds[i].hw.sample_type = PERF_SAMPLE_IP; fds[i].hw.sample_period = SMPL_PERIOD; fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, fds[0].fd, 0); if (fds[i].fd == -1) { warn("cannot attach event %s", fds[i].name); goto error; } fds[i].buf = mmap(NULL, (buffer_pages + 1)*pgsz, PROT_READ|PROT_WRITE, MAP_SHARED, fds[i].fd, 0); if (fds[i].buf == MAP_FAILED) err(1, "cannot mmap buffer"); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(fds[i].fd, F_SETFL, fcntl(fds[i].fd, F_GETFL, 0) | O_ASYNC); if (ret == -1) err(1, "cannot set ASYNC"); /* * necessary if we want to get the file descriptor for * which the SIGIO is sent for in siginfo->si_fd. * SA_SIGINFO in itself is not enough */ ret = fcntl(fds[i].fd, F_SETSIG, SIGIO); if (ret == -1) err(1, "cannot setsig"); /* * get ownership of the descriptor */ ret = fcntl(fds[i].fd, F_SETOWN, getpid()); if (ret == -1) err(1, "cannot setown"); fds[i].pgmsk = (buffer_pages * pgsz) - 1; } for(i=0; i < num_fds; i++) { ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1); if (ret == -1) err(1, "cannot refresh"); } busyloop(); prctl(PR_TASK_PERF_EVENTS_DISABLE); error: /* * destroy our session */ for(i=0; i < num_fds; i++) if (fds[i].fd > -1) close(fds[i].fd); perf_free_fds(fds, num_fds); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } papi-5.6.0/src/libpfm4/lib/events/intel_snbep_unc_cbo_events.h000664 001750 001750 00000057205 13216244364 026474 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: snbep_unc_cbo (Intel SandyBridge-EP C-Box uncore PMU) */ #define CBO_FILT_MESIF(a, b, c, d) \ { .uname = "STATE_"#a,\ .udesc = #b" cacheline state",\ .ufilters[0] = 1ULL << (18 + (c)),\ .grpid = d, \ } #define CBO_FILT_MESIFS(d) \ CBO_FILT_MESIF(I, Invalid, 0, d), \ CBO_FILT_MESIF(S, Shared, 1, d), \ CBO_FILT_MESIF(E, Exclusive, 2, d), \ CBO_FILT_MESIF(M, Modified, 3, d), \ CBO_FILT_MESIF(F, Forward, 4, d), \ { .uname = "STATE_MESIF",\ .udesc = "Any cache line state",\ .ufilters[0] = 0x1fULL << 18,\ .grpid = d, \ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, \ } #define CBO_FILT_OPC(d) \ { .uname = "OPC_RFO",\ .udesc = "Demand data RFO (combine with any OPCODE umask)",\ .ufilters[0] = 0x180ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_CRD",\ .udesc = "Demand code read (combine with any OPCODE umask)",\ .ufilters[0] = 0x181ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_DRD",\ .udesc = "Demand data read (combine with any OPCODE umask)",\ .ufilters[0] = 0x182ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PRD",\ .udesc = "Partial reads (UC) (combine with any OPCODE umask)",\ .ufilters[0] = 0x187ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCILF",\ .udesc = "Full Stream store (combine with any OPCODE umask)", \ .ufilters[0] = 0x18cULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCIL",\ .udesc = "Partial Stream store (combine with any OPCODE umask)", \ .ufilters[0] = 0x18dULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_RFO",\ .udesc = "Prefetch RFO into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[0] = 0x190ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_CODE",\ .udesc = "Prefetch code into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[0] = 0x191ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_DATA",\ .udesc = "Prefetch data into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[0] = 0x192ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIWILF",\ .udesc = "PCIe write (non-allocating) (combine with any OPCODE umask)", \ .ufilters[0] = 0x194ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIPRD",\ .udesc = "PCIe UC read (combine with any OPCODE umask)", \ .ufilters[0] = 0x195ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIITOM",\ .udesc = "PCIe write (allocating) (combine with any OPCODE umask)", \ .ufilters[0] = 0x19cULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIRDCUR",\ .udesc = "PCIe read current (combine with any OPCODE umask)", \ .ufilters[0] = 0x19eULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOI",\ .udesc = "Request writeback modified invalidate line (combine with any OPCODE umask)", \ .ufilters[0] = 0x1c4ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOE",\ .udesc = "Request writeback modified set to exclusive (combine with any OPCODE umask)", \ .ufilters[0] = 0x1c5ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_ITOM",\ .udesc = "Request invalidate line (combine with any OPCODE umask)", \ .ufilters[0] = 0x1c8ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSRD",\ .udesc = "PCIe non-snoop read (combine with any OPCODE umask)", \ .ufilters[0] = 0x1e4ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWR",\ .udesc = "PCIe non-snoop write (partial) (combine with any OPCODE umask)", \ .ufilters[0] = 0x1e5ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWRF",\ .udesc = "PCIe non-snoop write (full) (combine with any OPCODE umask)", \ .ufilters[0] = 0x1e6ULL << 23, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ } static const intel_x86_umask_t snbep_unc_c_llc_lookup[]={ { .uname = "ANY", .udesc = "Any request", .grpid = 0, .uflags = INTEL_X86_NCOMBO, .ucode = 0x1f00, }, { .uname = "DATA_READ", .udesc = "Data read requests", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .ucode = 0x300, }, { .uname = "WRITE", .udesc = "Write requests. Includes all write transactions (cached, uncached)", .grpid = 0, .uflags = INTEL_X86_NCOMBO, .ucode = 0x500, }, { .uname = "REMOTE_SNOOP", .udesc = "External snoop request", .grpid = 0, .uflags = INTEL_X86_NCOMBO, .ucode = 0x900, }, { .uname = "NID", .udesc = "Match a given RTID destination NID (must provide nf=X modifier)", .uflags = INTEL_X86_NCOMBO | INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .grpid = 1, .ucode = 0x4100, }, CBO_FILT_MESIFS(2), }; static const intel_x86_umask_t snbep_unc_c_llc_victims[]={ { .uname = "M_STATE", .udesc = "Lines in M state", .ucode = 0x100, }, { .uname = "E_STATE", .udesc = "Lines in E state", .ucode = 0x200, }, { .uname = "S_STATE", .udesc = "Lines in S state", .ucode = 0x400, }, { .uname = "MISS", .udesc = "TBD", .ucode = 0x800, }, { .uname = "NID", .udesc = "Victimized Lines matching the NID filter (must provide nf=X modifier)", .uflags = INTEL_X86_NCOMBO, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .ucode = 0x4000, }, }; static const intel_x86_umask_t snbep_unc_c_misc[]={ { .uname = "RSPI_WAS_FSE", .udesc = "Silent snoop eviction", .ucode = 0x100, }, { .uname = "WC_ALIASING", .udesc = "Write combining aliasing", .ucode = 0x200, }, { .uname = "STARTED", .udesc = "TBD", .ucode = 0x400, }, { .uname = "RFO_HIT_S", .udesc = "RFO hits in S state", .ucode = 0x800, }, }; static const intel_x86_umask_t snbep_unc_c_ring_ad_used[]={ { .uname = "UP_EVEN", .udesc = "Up and Even ring polarity filter", .ucode = 0x100, }, { .uname = "UP_ODD", .udesc = "Up and odd ring polarity filter", .ucode = 0x200, }, { .uname = "DOWN_EVEN", .udesc = "Down and even ring polarity filter", .ucode = 0x400, }, { .uname = "DOWN_ODD", .udesc = "Down and odd ring polarity filter", .ucode = 0x800, }, }; static const intel_x86_umask_t snbep_unc_c_ring_bounces[]={ { .uname = "AK_CORE", .udesc = "Acknowledgment to core", .ucode = 0x200, }, { .uname = "BL_CORE", .udesc = "Data response to core", .ucode = 0x400, }, { .uname = "IV_CORE", .udesc = "Snoops of processor cache", .ucode = 0x800, }, }; static const intel_x86_umask_t snbep_unc_c_ring_iv_used[]={ { .uname = "ANY", .udesc = "Any filter", .ucode = 0xf00, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t snbep_unc_c_rxr_ext_starved[]={ { .uname = "IRQ", .udesc = "Irq externally starved, therefore blocking the IPQ", .ucode = 0x100, }, { .uname = "IPQ", .udesc = "IPQ externally starved, therefore blocking the IRQ", .ucode = 0x200, }, { .uname = "ISMQ", .udesc = "ISMQ externally starved, therefore blocking both IRQ and IPQ", .ucode = 0x400, }, { .uname = "ISMQ_BIDS", .udesc = "Number of time the ISMQ bids", .ucode = 0x800, }, }; static const intel_x86_umask_t snbep_unc_c_rxr_inserts[]={ { .uname = "IPQ", .udesc = "IPQ", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IRQ", .udesc = "IRQ", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IRQ_REJECTED", .udesc = "IRQ rejected", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VFIFO", .udesc = "Counts the number of allocated into the IRQ ordering FIFO", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snbep_unc_c_rxr_ipq_retry[]={ { .uname = "ADDR_CONFLICT", .udesc = "Address conflict", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Any Reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "No QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snbep_unc_c_rxr_irq_retry[]={ { .uname = "ADDR_CONFLICT", .udesc = "Address conflict", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Any reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "No QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RTID", .udesc = "No RTIDs", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snbep_unc_c_rxr_ismq_retry[]={ { .uname = "ANY", .udesc = "Any reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IIO_CREDITS", .udesc = "No IIO credits", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "NO QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RTID", .udesc = "No RTIDs", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t snbep_unc_c_tor_inserts[]={ { .uname = "EVICTION", .udesc = "Number of Evictions transactions inserted into TOR", .ucode = 0x400, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_ALL", .udesc = "Number of miss requests inserted into the TOR", .ucode = 0xa00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_OPCODE", .udesc = "Number of miss transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .ucode = 0x300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_ALL", .udesc = "Number of NID-matched transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_EVICTION", .udesc = "Number of NID-matched eviction transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4400, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_ALL", .udesc = "Number of NID-matched miss transactions that were inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4a00, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_OPCODE", .udesc = "Number of NID and opcode matched miss transactions inserted into the TOR (must provide opc_* umask and nf=X modifier)", .ucode = 0x4300, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_OPCODE", .udesc = "Number of transactions inserted into the TOR that match a NID and opcode (must provide opc_* umask and nf=X modifier)", .ucode = 0x4100, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_WB", .udesc = "Number of NID-matched write back transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x5000, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "OPCODE", .udesc = "Number of transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .ucode = 0x100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WB", .udesc = "Number of write transactions inserted into the TOR", .ucode = 0x1000, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, CBO_FILT_OPC(1) }; static const intel_x86_umask_t snbep_unc_c_tor_occupancy[]={ { .uname = "ALL", .udesc = "All valid TOR entries", .ucode = 0x800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, }, { .uname = "EVICTION", .udesc = "Number of outstanding eviction transactions in the TOR", .ucode = 0x400, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_ALL", .udesc = "Number of outstanding miss requests in the TOR", .ucode = 0xa00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_OPCODE", .udesc = "Number of TOR entries that match a NID and an opcode (must provide opc_* umask)", .ucode = 0x300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_ALL", .udesc = "Number of NID-matched outstanding requests in the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_EVICTION", .udesc = "Number of NID-matched outstanding requests in the TOR (must provide a nf=X modifier)", .ucode = 0x4400, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_ALL", .udesc = "Number of NID-matched outstanding miss requests in the TOR (must provide a nf=X modifier)", .ucode = 0x4a00, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_OPCODE", .udesc = "Number of NID-matched outstanding miss requests in the TOR that an opcode (must provide nf=X modifier and opc_* umask)", .ucode = 0x4300, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_OPCODE", .udesc = "Number of NID-matched TOR entries that an opcode (must provide nf=X modifier and opc_* umask)", .ucode = 0x4100, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF, .uflags = INTEL_X86_NCOMBO, }, { .uname = "OPCODE", .udesc = "Number of TOR entries that match an opcode (must provide opc_* umask)", .ucode = 0x100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, CBO_FILT_OPC(1) }; static const intel_x86_umask_t snbep_unc_c_txr_inserts[]={ { .uname = "AD_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to AD ring", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to AK ring", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to BL ring", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IV_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to IV ring", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AD_CORE", .udesc = "Counts the number of ring transactions from Corebo to AD ring", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK_CORE", .udesc = "Counts the number of ring transactions from Corebo to AK ring", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_CORE", .udesc = "Counts the number of ring transactions from Corebo to BL ring", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_snbep_unc_c_pe[]={ { .name = "UNC_C_CLOCKTICKS", .desc = "C-box Uncore clockticks", .modmsk = 0x0, .cntmsk = 0xf, .code = 0x00, .flags = INTEL_X86_FIXED, }, { .name = "UNC_C_COUNTER0_OCCUPANCY", .desc = "Counter 0 occupancy. Counts the occupancy related information by filtering CB0 occupancy count captured in counter 0.", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xe, .code = 0x1f, }, { .name = "UNC_C_ISMQ_DRD_MISS_OCC", .desc = "TBD", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x21, }, { .name = "UNC_C_LLC_LOOKUP", .desc = "Cache lookups. Counts number of times the LLC is accessed from L2 for code, data, prefetches (Must set filter mask bit 0 and select )", .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .cntmsk = 0x3, .code = 0x34, .ngrp = 3, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_llc_lookup), .umasks = snbep_unc_c_llc_lookup, }, { .name = "UNC_C_LLC_VICTIMS", .desc = "Lines victimized", .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .cntmsk = 0x3, .code = 0x37, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_llc_victims), .ngrp = 1, .umasks = snbep_unc_c_llc_victims, }, { .name = "UNC_C_MISC", .desc = "Miscellaneous C-Box events", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x39, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_misc), .ngrp = 1, .umasks = snbep_unc_c_misc, }, { .name = "UNC_C_RING_AD_USED", .desc = "Address ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xc, .code = 0x1b, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_ring_ad_used), .ngrp = 1, .umasks = snbep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_AK_USED", .desc = "Acknowledgment ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xc, .code = 0x1c, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = snbep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_BL_USED", .desc = "Bus or Data ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xc, .code = 0x1d, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = snbep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_BOUNCES", .desc = "Number of LLC responses that bounced in the ring", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x05, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_ring_bounces), .ngrp = 1, .umasks = snbep_unc_c_ring_bounces, }, { .name = "UNC_C_RING_IV_USED", .desc = "Invalidate ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0xc, .code = 0x1e, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_ring_iv_used), .ngrp = 1, .umasks = snbep_unc_c_ring_iv_used, }, { .name = "UNC_C_RING_SRC_THRTL", .desc = "TDB", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x07, }, { .name = "UNC_C_RXR_EXT_STARVED", .desc = "Ingress arbiter blocking cycles", .modmsk = SNBEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x12, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_ext_starved), .ngrp = 1, .umasks = snbep_unc_c_rxr_ext_starved, }, { .name = "UNC_C_RXR_INSERTS", .desc = "Ingress Allocations", .code = 0x13, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_inserts), .umasks = snbep_unc_c_rxr_inserts }, { .name = "UNC_C_RXR_IPQ_RETRY", .desc = "Probe Queue Retries", .code = 0x31, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_ipq_retry), .umasks = snbep_unc_c_rxr_ipq_retry }, { .name = "UNC_C_RXR_IRQ_RETRY", .desc = "Ingress Request Queue Rejects", .code = 0x32, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_irq_retry), .umasks = snbep_unc_c_rxr_irq_retry }, { .name = "UNC_C_RXR_ISMQ_RETRY", .desc = "ISMQ Retries", .code = 0x33, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_ismq_retry), .umasks = snbep_unc_c_rxr_ismq_retry }, { .name = "UNC_C_RXR_OCCUPANCY", .desc = "Ingress Occupancy", .code = 0x11, .cntmsk = 0x1, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_rxr_inserts), .umasks = snbep_unc_c_rxr_inserts, /* identical to snbep_unc_c_rxr_inserts */ }, { .name = "UNC_C_TOR_INSERTS", .desc = "TOR Inserts", .code = 0x35, .cntmsk = 0x3, .ngrp = 2, .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_tor_inserts), .umasks = snbep_unc_c_tor_inserts }, { .name = "UNC_C_TOR_OCCUPANCY", .desc = "TOR Occupancy", .code = 0x36, .cntmsk = 0x1, .ngrp = 2, .modmsk = SNBEP_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_tor_occupancy), .umasks = snbep_unc_c_tor_occupancy }, { .name = "UNC_C_TXR_ADS_USED", .desc = "Egress events", .code = 0x04, .cntmsk = 0x3, .modmsk = SNBEP_UNC_CBO_ATTRS, }, { .name = "UNC_C_TXR_INSERTS", .desc = "Egress allocations", .code = 0x02, .cntmsk = 0x3, .ngrp = 1, .modmsk = SNBEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(snbep_unc_c_txr_inserts), .umasks = snbep_unc_c_txr_inserts }, }; papi-5.6.0/src/perfctr-2.7.x/usr.lib/x86.h000664 001750 001750 00000001145 13216244370 021716 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.h,v 1.3 2005/04/09 10:25:47 mikpe Exp $ * x86-specific code for performance counters library. * * Copyright (C) 1999-2004 Mikael Pettersson */ #ifndef __LIB_PERFCTR_X86_H #define __LIB_PERFCTR_X86_H #define rdtscl(low) \ __asm__ __volatile__("rdtsc" : "=a"(low) : : "edx") #define rdpmcl(ctr,low) \ __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") #if defined(__x86_64__) #define vperfctr_has_rdpmc(vperfctr) (1) #else #define vperfctr_has_rdpmc(vperfctr) ((vperfctr)->have_rdpmc) #endif extern void perfctr_info_cpu_init(struct perfctr_info*); #endif /* __LIB_PERFCTR_X86_H */ papi-5.6.0/src/components/infiniband_umad/000775 001750 001750 00000000000 13216244357 022623 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/include/000775 001750 001750 00000000000 13216244362 020626 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/vmware/VMwareComponentDocument.txt000664 001750 001750 00000027000 13216244360 026332 0ustar00jshenry1963jshenry1963000000 000000 PAPI-V VMware Component Document Matthew R. Johnson John Nelson 21 November 2011 Revised: 23 January 2012 This document is intended to detail the features of the PAPI-V VMware component, and more specifically the installation, usage, and pseudo performance counters available. In order to make this component possible, extensive research into the actual counters available, as well as the leveraging of the VMware Guest API1, was needed. As this is the first of the PAPI-V components, we seem to be stepping into a new realm of performance measurements that, previously, has been a new frontier, or unexplored all-together. Installation: To make PAPI with the VMware component you must go to the PAPI_ROOT/papi/src/components/vmware directory and configure with the flag: --with-vmware_incdir=, where is the path to the VMware Guest SDK for your machine. NOTE: The VMware Guest SDK is normally found in the following default vmware-tools path: /usr/lib/vmware-tools/GuestSDK or: /opt/GuestSDK e.g.: ./configure --with-vmware_incdir=/usr/lib/vmware-tools/GuestSDK After running configure in the vmware directory, go to PAPI_CVS_ROOT/papi/src and configure again using the flag: --with-components=vmware e.g.: ./configure --with-components=vmware After running the main configure script you can then type make, the Makefiles have been automatically generated. If at any point you would like to uninstall PAPI and the VMware comonent, from the PAPI_ROOT/papi/src directory, just type: make clean clobber To make use of VMWare timekeeping pseudo-performance counters, the following configuration must be added through the vSphere client: monitor_control.pseudo_perfctr = TRUE As well as adding the --with-vmware_pseudo_perfctr WARNING: If you do not enable the monitor_control.pseudo_perfctr on the host side, and give configure the --with-vmware_pseudo_perfctr, you will get a segmentation fault upon readpmc trying to access protected memory wiothout priveledged access. This is expected behavior. flag during component configure in the vmware component directory. Available Performance Counters: Below is the list of available performance metrics available to PAPI through the VMware component. If at any time you would like to see a full list of counters available to PAPI type ./papi_native_avail from within the utils directory. It is important to know that the counters VMWARE_HOST_TSC, VMWARE_ELAPSED_TIME, and VMWARE_ELAPSED_APPARENT are currently the only true to name register counters available from withing a VMware virtual machine. Any Guest OS running on a VMware host must have the access enabled from within the VMware vSphere client managing the system for each virtual machine that wishes to use the VMware component, this exposes the counters to the virtual machine. All other counters you will see in the following lost are software counters that are being exposed through the use of the VMware API1. Event Code | Symbol | Long Description | -------------------------------------------------------------------------------- 0x44000000 | VMWARE_HOST_TSC | Physical host TSC | -------------------------------------------------------------------------------- 0x44000001 | VMWARE_ELAPSED_TIME | Elapsed real time in ns. | -------------------------------------------------------------------------------- 0x44000002 | VMWARE_ELAPSED_APPARENT | Elapsed apparent time in ns. | -------------------------------------------------------------------------------- 0x44000003 | VMWARE_CPU_LIMIT | Retrieves the upper limit of processor use in | | MHz available to the virtual machine. | -------------------------------------------------------------------------------- 0x44000004 | VMWARE_CPU_RESERVATION | Retrieves the minimum processing power | | in MHz reserved for the virtual machine. | -------------------------------------------------------------------------------- 0x44000005 | VMWARE_CPU_SHARES | Retrieves the number of CPU shares allocated | | to the virtual machine. | -------------------------------------------------------------------------------- 0x44000006 | VMWARE_CPU_STOLEN | Retrieves the number of milliseconds that th | | e virtual machine was in a ready state (able to transition to a r | | un state), but was not scheduled to run. | -------------------------------------------------------------------------------- 0x44000007 | VMWARE_CPU_USED | Retrieves the number of milliseconds during wh | | ich the virtual machine has used the CPU. This value includes the | | time used by the guest operating system and the time used by vir | | tualization code for tasks for this virtual machine. You can comb | | ine this value with the elapsed time (VMWARE_ELAPSED) to estimate | | the effective virtual machine CPU speed. This value is a subset | | of elapsedMs. | -------------------------------------------------------------------------------- 0x44000008 | VMWARE_ELAPSED | Retrieves the number of milliseconds that have | | passed in the virtual machine since it last started running on th | | e server. The count of elapsed time restarts each time the virtua | | l machine is powered on, resumed, or migrated using VMotion. This | | value counts milliseconds, regardless of whether the virtual mac | | hine is using processing power during that time. You can combine | | this value with the CPU time used by the virtual machine (VMWARE_ | | CPU_USED) to estimate the effective virtual machine xCPU speed. c | | puUsedMS is a subset of this value. | -------------------------------------------------------------------------------- 0x44000009 | VMWARE_MEM_ACTIVE | Retrieves the amount of memory the virtual m | | achine is actively using in MB€”its estimated working set size. | -------------------------------------------------------------------------------- 0x4400000a | VMWARE_MEM_BALLOONED | Retrieves the amount of memory that has b | | een reclaimed from this virtual machine by the vSphere memory bal | | loon driver (also referred to as the “vmmemctl†driver) in MB. | -------------------------------------------------------------------------------- 0x4400000b | VMWARE_MEM_LIMIT | Retrieves the upper limit of memory that is a | | vailable to the virtual machine in MB. | -------------------------------------------------------------------------------- 0x4400000c | VMWARE_MEM_MAPPED | Retrieves the amount of memory that is alloc | | ated to the virtual machine in MB. Memory that is ballooned, swap | | ped, or has never been accessed is excluded. | -------------------------------------------------------------------------------- 0x4400000d | VMWARE_MEM_OVERHEAD | Retrieves the amount of €œoverhead mem | | ory associated with this virtual machine that is currently consum | | ed on the host system in MB. Overhead memory is additional memory | | that is reserved for data structures required by the virtualizat | | ion layer. | -------------------------------------------------------------------------------- 0x4400000e | VMWARE_MEM_RESERVATION | Retrieves the minimum amount of memory | | that is reserved for the virtual machine in MB. | -------------------------------------------------------------------------------- 0x4400000f | VMWARE_MEM_SHARED | Retrieves the amount of physical memory asso | | ciated with this virtual machine that is copy €Âon €Âwrite (COW) | | shared on the host in MB. | -------------------------------------------------------------------------------- 0x44000010 | VMWARE_MEM_SHARES | Retrieves the number of memory shares alloca | | ted to the virtual machine. | -------------------------------------------------------------------------------- 0x44000011 | VMWARE_MEM_SWAPPED | Retrieves the amount of memory that has bee | | n reclaimed from this virtual machine by transparently swapping g | | uest memory to disk in MB. | -------------------------------------------------------------------------------- 0x44000012 | VMWARE_MEM_TARGET_SIZE | Retrieves the size of the target memory | | allocation for this virtual machine in MB. | -------------------------------------------------------------------------------- 0x44000013 | VMWARE_MEM_USED | Retrieves the estimated amount of physical hos | | t memory currently consumed for this virtual machine’s physical | | memory. | -------------------------------------------------------------------------------- 0x44000014 | VMWARE_HOST_CPU | Retrieves the speed of the ESX system’€™s phys | | ical CPU in MHz. | Timekeeping Counters: The pseudo-performance counter feature uses a trap to catch a privileged machine instruction issued by software running in the virtual machine and therefore has more overhead than reading a performance counter or the TSC on physical hardware. The feature will only trap correctly if the configuration setting is applied as described in Installation. The timekeeping counters behave as follows: VMWARE_HOST_TSC - Provides access the the Time Stamp Counter on the host machine which counts ticks since reset. VMWARE_ELAPSED_TIME - Provides access to the elapsed time in ns since reset as measured on the host machine. VMWARE_ELAPSED_APPARENT - Apparent time is the time visible the Guest OS using virtualized timer devices. This timer may fall behind real time and catch up as needed. Usage: After installation of the VMware Component, you may use the papi_commmand_line interface, found in PAPI_ROOT/papi/src/utils to read out an instantaneous value from a certain counter from the above list. If you would like to read out a counter, type: ./papi_command_line COUNTER_SYMBOL_NAME. e.g.: To read out the value of the VMWARE_MEM_USED counter user@vm1:~/papi/src/utils$ ./papi_command_line VMWARE_MEM_USED Successfully added: VMWARE_MEM_USED VMWARE_MEM_USED : 13074 ---------------------------------- Verification: Checks for valid event name. This utility lets you add events from the command line interface to see if they work. command_line.c PASSED For further usage of PAPI and it’s API on how to incorporate these counters into a program of your own please see the PAPI Documentation2. ________________ References: [1] VMware: http://www.vmware.com/support/developer/guest-sdk. Last accessed November 28, 2011 [2] PAPI : http://icl.cs.utk.edu/projects/papi/wiki/Main_Page. Last accessed November 28, 2011 papi-5.6.0/src/perfctr-2.7.x/usr.lib/x86.c000664 001750 001750 00000053145 13216244370 021720 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.c,v 1.23 2007/10/06 13:02:07 mikpe Exp $ * x86-specific perfctr library procedures. * * Copyright (C) 1999-2007 Mikael Pettersson */ #include #include #include #include /* memset() */ #include "libperfctr.h" #include "x86.h" #include "x86_cpuinfo.h" static unsigned int __NR_vperfctr_open; #define __NR_vperfctr_control (__NR_vperfctr_open+1) #define __NR_vperfctr_write (__NR_vperfctr_open+2) #define __NR_vperfctr_read (__NR_vperfctr_open+3) #include static void init_sys_vperfctr(void) { if (!__NR_vperfctr_open) { unsigned int nr; unsigned int kver = perfctr_linux_version_code(); #if defined(__x86_64__) if (kver >= PERFCTR_KERNEL_VERSION(2,6,18)) nr = 286; else if (kver >= PERFCTR_KERNEL_VERSION(2,6,16)) nr = 280; else nr = 257; #elif defined(__i386__) if (kver >= PERFCTR_KERNEL_VERSION(2,6,18)) nr = 325; else if (kver >= PERFCTR_KERNEL_VERSION(2,6,16)) nr = 318; else nr = 296; #endif __NR_vperfctr_open = nr; } } /* * The actual syscalls. */ int _sys_vperfctr_open(int fd_unused, int tid, int creat) { init_sys_vperfctr(); return syscall(__NR_vperfctr_open, tid, creat); } static int _sys_vperfctr_control(int fd, unsigned int cmd) { init_sys_vperfctr(); return syscall(__NR_vperfctr_control, fd, cmd); } static int _sys_vperfctr_write(int fd, unsigned int domain, const void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_write, fd, domain, arg, argbytes); } static int _sys_vperfctr_read(int fd, unsigned int domain, void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_read, fd, domain, arg, argbytes); } /* * Simple syscall wrappers. */ int _sys_vperfctr_read_sum(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_SUM, arg, sizeof(*arg)); } int _sys_vperfctr_read_children(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_CHILDREN, arg, sizeof(*arg)); } int _sys_vperfctr_unlink(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_UNLINK); } int _sys_vperfctr_iresume(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } /* * Complex syscall wrappers, for transmitting control data * in CPU family specific formats. */ #define MSR_P5_CESR 0x11 #define MSR_P6_PERFCTR0 0xC1 /* .. 0xC2 */ #define MSR_P6_EVNTSEL0 0x186 /* .. 0x187 */ #define MSR_K7_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ #define MSR_K7_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ #define MSR_P4_PERFCTR0 0x300 /* .. 0x311 */ #define MSR_P4_CCCR0 0x360 /* .. 0x371 */ #define MSR_P4_ESCR0 0x3A0 /* .. 0x3E1, with some gaps */ #define MSR_P4_PEBS_ENABLE 0x3F1 #define MSR_P4_PEBS_MATRIX_VERT 0x3F2 #define P4_CCCR_ESCR_SELECT(X) (((X) >> 13) & 0x7) #define P4_FAST_RDPMC 0x80000000 #if 0 static void show_regs(const struct perfctr_cpu_reg *regs, unsigned int n) { unsigned int i; fprintf(stderr, "CPU Register Values:\n"); for(i = 0; i < n; ++i) fprintf(stderr, "MSR %#x\t0x%08x\n", regs[i].nr, regs[i].value); } #else #define show_regs(regs, n) do{}while(0) #endif static int read_packet(int fd, unsigned int domain, void *arg, unsigned int argbytes) { int ret; ret = _sys_vperfctr_read(fd, domain, arg, argbytes); if (ret != argbytes && ret >= 0) { errno = EPROTO; return -1; } return ret; } #if !defined(__x86_64__) static int p5_write_regs(int fd, const struct perfctr_cpu_control *arg) { struct perfctr_cpu_reg reg; unsigned short cesr_half[2]; unsigned int i, pmc; if (!arg->nractrs) return 0; memset(cesr_half, 0, sizeof cesr_half); for(i = 0; i < arg->nractrs; ++i) { pmc = arg->pmc_map[i]; if (pmc > 1) { errno = EINVAL; return -1; } cesr_half[pmc] = arg->evntsel[i]; } reg.nr = MSR_P5_CESR; reg.value = (cesr_half[1] << 16) | cesr_half[0]; show_regs(®, 1); return _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_REGS, ®, sizeof reg); } static int p5_read_regs(int fd, struct perfctr_cpu_control *arg) { struct perfctr_cpu_reg reg; unsigned short cesr_half[2]; unsigned int i, pmc; int ret; if (!arg->nractrs) return 0; reg.nr = MSR_P5_CESR; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_REGS, ®, sizeof reg); if (ret < 0) return ret; show_regs(®, 1); cesr_half[0] = reg.value & 0xffff; cesr_half[1] = reg.value >> 16; for(i = 0; i < arg->nractrs; ++i) { pmc = arg->pmc_map[i]; if (pmc > 1) { errno = EINVAL; return -1; } arg->evntsel[i] = cesr_half[pmc]; } return 0; } #endif static int p6_like_read_write_regs(int fd, struct perfctr_cpu_control *control, unsigned int msr_evntsel0, unsigned int msr_perfctr0, int do_write) { struct perfctr_cpu_reg regs[4+4]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc; int ret; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 4) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; if (pmc >= 4 || (pmc_mask & (1<evntsel[i]; ++nr_regs; if (i >= nractrs) { regs[nr_regs].nr = msr_perfctr0 + pmc; regs[nr_regs].value = control->ireset[i]; ++nr_regs; } } if (do_write) { show_regs(regs, nr_regs); return _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); } ret = read_packet(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); if (ret < 0) return ret; show_regs(regs, nr_regs); nr_regs = 0; for(i = 0; i < nrctrs; ++i) { control->evntsel[i] = regs[nr_regs].value; ++nr_regs; if (i >= nractrs) { control->ireset[i] = regs[nr_regs].value; ++nr_regs; } } return 0; } /* * Table 15-4 in the IA32 Volume 3 manual contains a 18x8 entry mapping * from counter/CCCR number (0-17) and ESCR SELECT value (0-7) to the * actual ESCR MSR number. This mapping contains some repeated patterns, * so we can compact it to a 4x8 table of MSR offsets: * * 1. CCCRs 16 and 17 are mapped just like CCCRs 13 and 14, respectively. * Thus, we only consider the 16 CCCRs 0-15. * 2. The CCCRs are organised in pairs, and both CCCRs in a pair use the * same mapping. Thus, we only consider the 8 pairs 0-7. * 3. In each pair of pairs, the second odd-numbered pair has the same domain * as the first even-numbered pair, and the range is 1+ the range of the * the first even-numbered pair. For example, CCCR(0) and (1) map ESCR * SELECT(7) to 0x3A0, and CCCR(2) and (3) map it to 0x3A1. * The only exception is that pair (7) [CCCRs 14 and 15] does not have * ESCR SELECT(3) in its domain, like pair (6) [CCCRs 12 and 13] has. * NOTE: Revisions of IA32 Volume 3 older than #245472-007 had an error * in this table: CCCRs 12, 13, and 16 had their mappings for ESCR SELECT * values 2 and 3 swapped. * 4. All MSR numbers are on the form 0x3??. Instead of storing these as * 16-bit numbers, the table only stores the 8-bit offsets from 0x300. */ static const unsigned char p4_cccr_escr_map[4][8] = { /* 0x00 and 0x01 as is, 0x02 and 0x03 are +1 */ [0x00/4] { [7] 0xA0, [6] 0xA2, [2] 0xAA, [4] 0xAC, [0] 0xB2, [1] 0xB4, [3] 0xB6, [5] 0xC8, }, /* 0x04 and 0x05 as is, 0x06 and 0x07 are +1 */ [0x04/4] { [0] 0xC0, [2] 0xC2, [1] 0xC4, }, /* 0x08 and 0x09 as is, 0x0A and 0x0B are +1 */ [0x08/4] { [1] 0xA4, [0] 0xA6, [5] 0xA8, [2] 0xAE, [3] 0xB0, }, /* 0x0C, 0x0D, and 0x10 as is, 0x0E, 0x0F, and 0x11 are +1 except [3] is not in the domain */ [0x0C/4] { [4] 0xB8, [5] 0xCC, [6] 0xE0, [0] 0xBA, [2] 0xBC, [3] 0xBE, [1] 0xCA, }, }; static unsigned int p4_escr_addr(unsigned int pmc, unsigned int cccr_val) { unsigned int escr_select, pair, escr_offset; escr_select = P4_CCCR_ESCR_SELECT(cccr_val); if (pmc > 0x11) return 0; /* pmc range error */ if (pmc > 0x0F) pmc -= 3; /* 0 <= pmc <= 0x0F */ pair = pmc / 2; /* 0 <= pair <= 7 */ escr_offset = p4_cccr_escr_map[pair / 2][escr_select]; if (!escr_offset || (pair == 7 && escr_select == 3)) return 0; /* ESCR SELECT range error */ return escr_offset + (pair & 1) + 0x300; }; static int p4_read_write_regs(int fd, struct perfctr_cpu_control *control, int do_write) { struct perfctr_cpu_reg regs[18*3+2]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc, escr_addr; int ret; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 18) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i] & ~P4_FAST_RDPMC; if (pmc >= 18 || (pmc_mask & (1<evntsel[i]; ++nr_regs; escr_addr = p4_escr_addr(pmc, control->evntsel[i]); if (!escr_addr) { errno = EINVAL; return -1; } regs[nr_regs].nr = escr_addr; regs[nr_regs].value = control->p4.escr[i]; ++nr_regs; if (i >= nractrs) { regs[nr_regs].nr = MSR_P4_PERFCTR0 + pmc; regs[nr_regs].value = control->ireset[i]; ++nr_regs; } } regs[nr_regs].nr = MSR_P4_PEBS_ENABLE; regs[nr_regs].value = control->p4.pebs_enable; ++nr_regs; regs[nr_regs].nr = MSR_P4_PEBS_MATRIX_VERT; regs[nr_regs].value = control->p4.pebs_matrix_vert; ++nr_regs; if (do_write) { show_regs(regs, nr_regs); return _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); } ret = read_packet(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); if (ret < 0) return ret; show_regs(regs, nr_regs); nr_regs = 0; for(i = 0; i < nrctrs; ++i) { control->evntsel[i] = regs[nr_regs].value; ++nr_regs; control->p4.escr[i] = regs[nr_regs].value; ++nr_regs; if (i >= nractrs) { control->ireset[i] = regs[nr_regs].value; ++nr_regs; } } control->p4.pebs_enable = regs[nr_regs].value; ++nr_regs; control->p4.pebs_matrix_vert = regs[nr_regs].value; ++nr_regs; return 0; } static int write_cpu_regs(int fd, unsigned int cpu_type, struct perfctr_cpu_control *arg) { switch (cpu_type) { case PERFCTR_X86_GENERIC: return 0; #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_CYRIX_MII: case PERFCTR_X86_WINCHIP_C6: case PERFCTR_X86_WINCHIP_2: return p5_write_regs(fd, arg); case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_INTEL_PENTM: case PERFCTR_X86_VIA_C3: return p6_like_read_write_regs(fd, arg, MSR_P6_EVNTSEL0, MSR_P6_PERFCTR0, 1); case PERFCTR_X86_AMD_K7: #endif case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: return p6_like_read_write_regs(fd, arg, MSR_K7_EVNTSEL0, MSR_K7_PERFCTR0, 1); #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: #endif case PERFCTR_X86_INTEL_P4M3: return p4_read_write_regs(fd, arg, 1); break; default: fprintf(stderr, "unable to write control registers for cpu type %u\n", cpu_type); errno = EINVAL; return -1; } } int _sys_vperfctr_write_control(int fd, unsigned int cpu_type, const struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; ret = _sys_vperfctr_control(fd, VPERFCTR_CONTROL_CLEAR); if (ret < 0) return ret; u.control.si_signo = control->si_signo; u.control.preserve = control->preserve; ret = _sys_vperfctr_write(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; u.header.tsc_on = control->cpu_control.tsc_on; u.header.nractrs = control->cpu_control.nractrs; u.header.nrictrs = control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; ret = write_cpu_regs(fd, cpu_type, (struct perfctr_cpu_control*)&control->cpu_control); if (ret < 0) return ret; return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } static int read_cpu_regs(int fd, unsigned int cpu_type, struct perfctr_cpu_control *arg) { switch (cpu_type) { case PERFCTR_X86_GENERIC: return 0; #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_CYRIX_MII: case PERFCTR_X86_WINCHIP_C6: case PERFCTR_X86_WINCHIP_2: return p5_read_regs(fd, arg); case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_INTEL_PENTM: case PERFCTR_X86_VIA_C3: return p6_like_read_write_regs(fd, arg, MSR_P6_EVNTSEL0, MSR_P6_PERFCTR0, 0); case PERFCTR_X86_AMD_K7: #endif case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: return p6_like_read_write_regs(fd, arg, MSR_K7_EVNTSEL0, MSR_K7_PERFCTR0, 0); #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: #endif case PERFCTR_X86_INTEL_P4M3: return p4_read_write_regs(fd, arg, 0); break; default: fprintf(stderr, "unable to read control registers for cpu type %u\n", cpu_type); errno = EINVAL; return -1; } } int _sys_vperfctr_read_control(int fd, unsigned int cpu_type, struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; memset(control, 0, sizeof *control); ret = read_packet(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; control->si_signo = u.control.si_signo; control->preserve = u.control.preserve; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; control->cpu_control.tsc_on = u.header.tsc_on; control->cpu_control.nractrs = u.header.nractrs; control->cpu_control.nrictrs = u.header.nrictrs; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; return read_cpu_regs(fd, cpu_type, &control->cpu_control); } static int intel_init(const struct cpuinfo *cpuinfo, struct perfctr_info *info) { unsigned int family, model, stepping; if (!cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; family = cpu_family(cpuinfo); model = cpu_model(cpuinfo); stepping = cpu_stepping(cpuinfo); switch (family) { case 5: if (cpu_has(cpuinfo, X86_FEATURE_MMX)) { /* Avoid Pentium Erratum 74. */ if (model == 4 && (stepping == 4 || (stepping == 3 && cpu_type(cpuinfo) == 1))) info->cpu_features &= ~PERFCTR_FEATURE_RDPMC; return PERFCTR_X86_INTEL_P5MMX; } else { info->cpu_features &= ~PERFCTR_FEATURE_RDPMC; return PERFCTR_X86_INTEL_P5; } case 6: if (model == 9 || model == 13) return PERFCTR_X86_INTEL_PENTM; else if (model >= 7) return PERFCTR_X86_INTEL_PIII; else if (model >= 3) return PERFCTR_X86_INTEL_PII; else { /* Avoid Pentium Pro Erratum 26. */ if (stepping < 9) info->cpu_features &= ~PERFCTR_FEATURE_RDPMC; return PERFCTR_X86_INTEL_P6; } case 15: if (model >= 3) return PERFCTR_X86_INTEL_P4M3; else if (model >= 2) return PERFCTR_X86_INTEL_P4M2; else return PERFCTR_X86_INTEL_P4; } return -1; } static int amd_init(const struct cpuinfo *cpuinfo, struct perfctr_info *info) { unsigned int family, model, stepping; if (!cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; family = cpu_family(cpuinfo); model = cpu_model(cpuinfo); stepping = cpu_stepping(cpuinfo); switch (family) { case 15: if (model > 5 || (model >= 4 && stepping >= 8)) return PERFCTR_X86_AMD_K8C; else return PERFCTR_X86_AMD_K8; case 6: return PERFCTR_X86_AMD_K7; } return -1; } static int cyrix_init(const struct cpuinfo *cpuinfo, struct perfctr_info *info) { if (!cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; switch (cpu_family(cpuinfo)) { case 6: /* 6x86MX, MII, or III */ return PERFCTR_X86_CYRIX_MII; } return -1; } static int centaur_init(const struct cpuinfo *cpuinfo, struct perfctr_info *info) { unsigned int family, model; family = cpu_family(cpuinfo); model = cpu_model(cpuinfo); switch (family) { case 5: if (cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; info->cpu_features &= ~PERFCTR_FEATURE_RDTSC; switch (model) { case 4: return PERFCTR_X86_WINCHIP_C6; case 8: /* WinChip 2, 2A, or 2B */ case 9: /* WinChip 3 */ return PERFCTR_X86_WINCHIP_2; default: return -1; } case 6: if (!cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; switch (model) { case 6: /* Cyrix III */ case 7: /* Samuel 2 */ case 8: /* Ezra-T */ case 9: /* Antaur/Nehemiah */ return PERFCTR_X86_VIA_C3; default: return -1; } } return -1; } static int generic_init(const struct cpuinfo *cpuinfo, struct perfctr_info *info) { if (!cpu_has(cpuinfo, X86_FEATURE_TSC)) return -1; info->cpu_features &= ~PERFCTR_FEATURE_RDPMC; return PERFCTR_X86_GENERIC; } void perfctr_info_cpu_init(struct perfctr_info *info) { struct cpuinfo cpuinfo; int cpu_type; identify_cpu(&cpuinfo); cpu_type = -1; /* binary compat prevents using 0 for "unknown" */ if (cpu_has(&cpuinfo, X86_FEATURE_MSR)) { switch (cpuinfo.vendor) { case X86_VENDOR_INTEL: cpu_type = intel_init(&cpuinfo, info); break; case X86_VENDOR_AMD: cpu_type = amd_init(&cpuinfo, info); break; case X86_VENDOR_CYRIX: cpu_type = cyrix_init(&cpuinfo, info); break; case X86_VENDOR_CENTAUR: cpu_type = centaur_init(&cpuinfo, info); break; } } if (cpu_type < 0) cpu_type = generic_init(&cpuinfo, info); info->cpu_type = cpu_type; } unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch( info->cpu_type ) { #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_CYRIX_MII: case PERFCTR_X86_WINCHIP_C6: case PERFCTR_X86_WINCHIP_2: case PERFCTR_X86_INTEL_PENTM: return 2; case PERFCTR_X86_AMD_K7: return 4; case PERFCTR_X86_VIA_C3: return 1; case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: return 18; #endif case PERFCTR_X86_INTEL_P4M3: return 18; case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: return 4; case PERFCTR_X86_GENERIC: default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_X86_GENERIC: return "Generic x86 with TSC"; #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: return "Intel Pentium"; case PERFCTR_X86_INTEL_P5MMX: return "Intel Pentium MMX"; case PERFCTR_X86_INTEL_P6: return "Intel Pentium Pro"; case PERFCTR_X86_INTEL_PII: return "Intel Pentium II"; case PERFCTR_X86_INTEL_PIII: return "Intel Pentium III"; case PERFCTR_X86_CYRIX_MII: return "Cyrix 6x86MX/MII/III"; case PERFCTR_X86_WINCHIP_C6: return "WinChip C6"; case PERFCTR_X86_WINCHIP_2: return "WinChip 2/3"; case PERFCTR_X86_AMD_K7: return "AMD K7"; case PERFCTR_X86_VIA_C3: return "VIA C3"; case PERFCTR_X86_INTEL_P4: return "Intel Pentium 4"; case PERFCTR_X86_INTEL_P4M2: return "Intel Pentium 4 Model 2"; case PERFCTR_X86_INTEL_PENTM: return "Intel Pentium M"; #endif case PERFCTR_X86_INTEL_P4M3: return "Intel Pentium 4 Model 3"; case PERFCTR_X86_AMD_K8: return "AMD K8"; case PERFCTR_X86_AMD_K8C: return "AMD K8 Revision C"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if( nrictrs ) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { if( control->pmc_map[i] >= 18 ) /* for P4 'fast rdpmc' cases */ printf("pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i]); else printf("pmc_map[%u]\t\t%u\n", i, control->pmc_map[i]); printf("evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i]); if( control->p4.escr[i] ) printf("escr[%u]\t\t\t0x%08X\n", i, control->p4.escr[i]); if( i >= nractrs ) printf("ireset[%u]\t\t%d\n", i, control->ireset[i]); } if( control->p4.pebs_enable ) printf("pebs_enable\t\t0x%08X\n", control->p4.pebs_enable); if( control->p4.pebs_matrix_vert ) printf("pebs_matrix_vert\t0x%08X\n", control->p4.pebs_matrix_vert); } papi-5.6.0/src/perfctr-2.6.x/usr.lib/x86.h000775 001750 001750 00000001165 13216244367 021730 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.h,v 1.1 2004/01/26 13:21:41 mikpe Exp $ * x86-specific code for performance counters library. * * Copyright (C) 1999-2004 Mikael Pettersson */ #ifndef __LIB_PERFCTR_X86_H #define __LIB_PERFCTR_X86_H #define PAGE_SIZE 4096 #define rdtscl(low) \ __asm__ __volatile__("rdtsc" : "=a"(low) : : "edx") #define rdpmcl(ctr,low) \ __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") #if defined(__x86_64__) #define vperfctr_has_rdpmc(vperfctr) (1) #else #define vperfctr_has_rdpmc(vperfctr) ((vperfctr)->have_rdpmc) #endif #define perfctr_info_cpu_init(info) do{}while(0) #endif /* __LIB_PERFCTR_X86_H */ papi-5.6.0/src/ctests/000775 001750 001750 00000000000 13216244361 016627 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/ChangeLog000664 001750 001750 00000033437 13216244361 020766 0ustar00jshenry1963jshenry1963000000 000000 2006-08-21 Stephane Eranian This file will not be updated anymore, Refer to SF.net CVS log for diff information 2006-07-10 Stephane Eranian * removed PFM_FL_X86_INSECURE because it is not needed anymore * removed perfmon_i386.h and perfmon_mips64.h because empty 2006-06-28 Stephane Eranian * added pfmsetup.c (Kevin Corry IBM) * fixed pfmsetup.c to correctly handle sampling format uuid 2006-06-28 Stephane Eranian * added libpfm_montecito.3 man page * updated libpfm_itanium2.3 man page * removed pfm_print_event_info() and related calls from library * removed unused pfmlib_mont_ipear_mode_t struct * remove etb_ds from Montecito ETB struct as it can only have one value * added showevtinfo.c example * added PFMLIB_ITA2_EVT_NO_SET to pfmlib_itanium2.h * added PFMLIB_MONT_EVT_NO_SET to pfmlib_montecito.h * replaced pfm_mont_get_event_caf() by pfm_mont_get_event_type() * added missing perfmon_compat.h from include install (Will Cohen) * fortify showreginfo.c for FC6 (Will Cohen) 2006-06-13 Stephane Eranian * added generic support or event umask (Kevin Corry from IBM) * changed detect_pmcs.c to use pfm-getinfo_evtsets() * updated all examples to use the new detect_unavailable_pmcs() * the examples require 2.6.17-rc6 to run 2006-05-22 Stephane Eranian * corrected architected IA-32 PMU detection code, e.g., PIC assembly * fixed counter width of IA-32 architected PMU to 32 * fixed definition of perfevtsel to 64-bit wide for IA-32 architected PMU 2006-05-11 Stephane Eranian * added support for IA-32 architected PMU as specified in the latest IA-32 architecure manuals. There is enough to support miinual functionalities on Core Duo/Solo processors * updated system call number to match those used with 2.6.17-rc4 * enhanced i386_p6 model detection code 2006-04-25 Stephane Eranian * updated pfmlib_gen_mips64.c with latest code from Phil Mucci * introduced get_event_code_counter() internal method to handle the fact that on smoe MPU (MIPS) an event may have a different value based on the counter it is assigned to. This is a superset of the previous get_event_code(). added PFMLIB_CNT_FIRST to ask for first value (or don't care) 2006-04-05 Stephane Eranian * added support for install_prefix in makefile * fixed broken ETB_EVENT (not report has ETB event) * added BRANCH_EVENT as alias to ETB_EVENT for Montecito * added support for unavailable PMC registers to pfm_dispatch_events() * added detect_pmcs.c, detect_pmcs.h in examples * updated all generic examples to use detect_unavail_pmcs() helper function * updated pfm_dispatch_events() man pages * cleanup PFMLIB_REGMASK_*, change to pfm_regmask_* * created a separate set of man pages for all pfm_regmask_* functions 2006-04-04 Stephane Eranian * fixed makefile in include to install perfmon_i386.h for x86_64 install (Will Cohen from Redhat) * install pfmlib_montecito.h on IA64 2006-04-05 Stephane Eranian * updated system call numbers to 2.6.17-rc1 * incorporated a type change for reg_value in pfmlib.h (Kevin Corry from IBM) 2006-03-22 Stephane Eranian * changed HT detection for PEBS examples 2006-03-07 Stephane Eranian * updated to 2.6.16-rc5 new perfmon code base support * added preliminary Montecito support * incorporated AMD provided event list for X86-64 (Ray Bryant) * renamed all GEN_X86_64 gen_x86_64 to amd_x86_64 * removed PFM_32BIT_ABI_64BIT_OS, ABI now supports ILP32,LP64 without special compilation 2006-01-16 Stephane Eranian * added PFM_32BIT_ABI_64BIT_OS to allow 32-bit compile (32-bit ABI) for a 64-bit OS * added C++ support to perfmon header files * added MIPS64 (5K,20K) support (provided by Phil Mucci) * restructured *_standalone.c examples * added pfm_get_event_code_counter() and man page * changed implementation of pfm_get_num_pm*() * remove non-sense example task_view.c * added support for MIPS in some examples 2006-01-09 Stephane Eranian * examples code cleanups * example support up to 2048 CPU (syst.c) * portable sampling examples support more than 64 PMDs 2005-12-15 Stephane Eranian * updated all examples to new pfm_create_context() prototype * fixed some type mismatch in pfmlib_itanium2.c * required for 2.6.15-rc5-git3 kernel patch 2005-10-18 Stephane Eranian * forced perfsel.en bit to 1 for X86-64 and i386/p6 * inverted reset mask to be more familiar in examples/showreginfo.c * updated P4 examples to force enable bit to 1 2005-09-28 Stephane Eranian * split p6/pentium M event tables. Pentium M adds a few more events and changes the semantic of some. * added smpl_standalone.c, notify_standalone.c and ia32/smpl_pebs.c * cleanup the examples some more * updated multiplex. to match structure of multiplex2.c * updated perfmon2 kernel headers to match 2.6.14-rc2-mm1 release * added man pages for libpfm_p6 and libpfm_x86_64 * fixed handling of edge field for P6 2005-08-01 Stephane Eranian * switch all examples in examples/dir to use the multi system call interface. * updated perfmon.h/perfmon_compat.h to latest kernel interface (multi syscall) 2004-06-24 Stephane Eranian * fixed Itanium2 events tables L2_FORCE_RECIRC_* and L2_L3ACCESS_* events can only be measured by PMC4 * fixed pfm_*_get_event_counters(). It would always return the counter mask for event index 0. 2004-06-24 Stephane Eranian * fixed pfm_print_event_info_*() because it would not print the PMC/PMD mask correctly * updated pfm_dispatch_*ear() for Itanium2 * updated pfm_dispatch_irange() for Itanium2 * updated pfm_ita2_print_info() * updated pfm_ita2_num_pmcs() and pfm_ita2_num_pmds() 2004-02-12 Stephane Eranian * fixed a bug in pfmlib_itanium2.c which cause measurements using opcode matching with an event different from IA64_TAGGED_INST_RETIRED* to return wrong results, i.e., opcode filter was ignored. 2003-11-21 Stephane Eranian * changed interface to pfm_get_impl_*() to use a cleaner definition for bitmasks. pfmlib_regmask_t is now a struct and applications must use accesor macros PFMLIB_REGMASK_*() * added pfm_get_num_pmcs(), pfm_get_num_pmds(), pfm_get_num_counters() * updated man pages to reflect changes * cleanup all examples to reflect bitmask changes 2003-10-24 Stephane Eranian * added reserved fields to the key pfmlib structure for future extensions (recompilation from beta required). 2003-10-24 Stephane Eranian * released beta of version 3.0 * some of the changes not reported by older entries: * removed freesmpl.c example * added ita2_btb.c, ita2_dear.c, ita_dear.c, multiplex.c * added task_attach.c, task_attach_timeout.c, task_smpl.c * added missing itanium2 events, mostly subevent combinations for SYLL_NOT_DISPERSED, EXTERN_DP_PINS_0_TO_3, and EXTERN_DP_PINS_4_TO_5 * got rid of pfm_get_first_event(), pfm_get_next_event(). First valid index is always 0, use pfm_get_num_events() to find last event index * renamed pfm_stop() to pfm_self_stop(), pfm_start() to pfm_self_start() * updated all examples to perfmon2 interface * added notify_self2.c, notify_self3.c examples * updated perfmon.h/perfmon_default_smpl.h to reflect latest perfmon-2 changes (2.6.0-test8) 2003-08-25 Stephane Eranian * allowed mulitple EAR/BTB events * really implemented the 4 different ways of programming EAR/BTB 2003-07-30 Stephane Eranian * updated all man pages to reflect changes for 3.0 * more cleanups in the examples to make all package compile without warning with ecc 2003-07-29 Stephane Eranian * fixed a limitation in the iod_table[] used if dispatch_drange(). Pure Opc mode is possible using the IBR/Opc mode. Reported by Geoff Kent at UIUC. * cleaned up all functions using a bitmask as arguments 2003-06-30 Stephane Eranian * added pfm_get_max_event_name_len() * unsigned vs. int cleanups * introduced pfm_*_pmc_reg_t and pfm_*_pmd_reg_t * cleaned up calls using bitmasks * renamed PMU_MAX_* to PFMLIB_MAX_* * got rid of PMU_FIRST_COUNTER * introduced pfmlib_counter_t * internal interface changes, renaming: pmu_name vs name * got rid of char **name and replaced with char *name, int maxlen * added pfm_start(), pfm_stop() as real functions * changed interface of pfm_dispatch_events to make input vs. output parameters more explicit * model-specific input/output to pfm_dispatch_event() now arguments instead of being linked from the generic argument. 2003-06-27 Stephane Eranian * added missing const to char arguments for pfm_find_event, pfm_find_event_byname, pfm_print_event_info. Suggestion by Hans * renamed pfp_pc to pfp_pmc * renamed pfp_pc_count to pfp_pmc_count 2003-06-11 Stephane Eranian * updated manuals to reflect library changes * updated all examples to match the new Linux/ia64 kernel interface (perfmon2). 2003-06-10 Stephane Eranian * fix pfmlib_itanium.c: dispatch_dear(), dispatch_iear() to setup EAR when there is an EAR event but no detailed setting in ita_param. * added pfm_ita_ear_mode_t to pfmlib_itanium.h * added pfm_ita_get_ear_mode() to pfmlib_itanium.h 2003-06-06 Stephane Eranian * add a generic call to return hardware counter width: pfm_get hw_counter_width() * updated perfmon.h to perfmon2 * added flag to itanium/itanium2 specific parameter to tell the library to ignore per-even qualifier constraints. see PFMLIB_ITA_FL_CNT_NO_QUALCHECK and PFMLIB_ITA2_FL_CNT_NO_QUALCHECK. 2003-05-06 Stephane Eranian * got rid of all connections to perfmon.h. the library is now fully self-contained. pfarg_reg_t has been replaced by pfmlib_reg_t. 2002-03-20 Stephane Eranian * fix %x vs. %lx for pmc8/9 in pfmlib_itanium.c and pfmlib_itanium2.c 2002-12-20 Stephane Eranian * added PFM_FL_EXCL_IDLE to perfmon.h 2002-12-18 Stephane Eranian * clear ig_ad, inv fields in PMC8,9 when no code range restriction is used. 2002-12-17 Stephane Eranian * update pfm_initialize.3 to clarify when this function needs to be called. 2002-12-10 Stephane Eranian * changed _SYS_PERFMON.h to _PERFMON_PERFMON.h 2002-12-06 Stephane Eranian * integrated Peter Chubb's Debian script fixes * fixed the Debian script to include the examples 2002-12-05 Stephane Eranian * added man pages for pfm_start() and pfm_stop() * release 2.0 beta for review 2002-12-04 Stephane Eranian * the pfmlib_param_t structure now contains the pmc array (pfp_pc[]) as well as a counter representing the number of valid entries written to pfp_pc[]. cleaned up all modules and headers to reflect changes. * added pfm_ita2_is_fine_mode() to test whether or not fine mode was used for code ranges. 2002-12-03 Stephane Eranian * removed pfm_ita_ism from pfmlib_ita_param_t * removed pfm_ita2_ism from pfmlib_ita2_param_t * added libpfm.3, libpfm_itanium.3, libpfm_itanium2.3 * enabled per-range privilege level mask in pfmlib_itanium.c and pfmlib_itanium2.c 2002-11-21 Stephane Eranian * added pfmlib_generic.h to cleanup pfmlib.h * dropped retry argument to pfm_find_event() * got rid of the pfm_find_byvcode*() interface (internal only) * cleanup up interface code is int not unsigned long * added man pages in docs/man for the generic library interface * moved the PMU specific handy shortcuts for register struct to module specific file. Avoid possible conflicts in applications using different PMU models in one source file. 2002-11-20 Stephane Eranian * separated the library, headers, examples from the pfmon tool * changed license of library to MIT-style license * set version number to 2.0 * added support to generate a shared version of libpfm * fix pfm_dispatch_opcm() to check for effective use of IA64_TAGGED_INST_IBRPX_PMCY before setting the bits in PMC15 (spotted by UIUC Impact Team). * cleaned up error messages in the examples * fix bug in pfm_ita2_print_info() which caused extra umask bits to be displayed for EAR. 2002-11-19 Stephane Eranian * added pfm_get_impl_counters() to library interface and PMU models * added missing support for pfm_get_impl_pmds(), pfm_get_impl_pmcs() to pfmlib_generic.c * created pfmlib_compiler.h to encapsulate inline assembly differences between compilers. * created pfmlib_compiler_priv.h to encapsulate the inline assembly differences for library private code. 2002-11-13 Stephane Eranian * fixed definition of pmc10 in pfmlib_itanium2.h to account for a layout difference between cache and TLB mode (spotted by UIUC Impact Team). Was causing problems with some latency values in IEAR cache mode. * fixed initialization of pmc10 in pfmlib_itanium2.c to reflect above change. 2002-10-14 Stephane Eranian * fixed impl_pmds[] in pfmlib_itanium.c and pfmlib_itanium2.c. PMD17 was missing. 2002-09-09 Stephane Eranian * updated include/perfmon/perfmon.h to include sampling period randomization. 2002-08-14 Stephane Eranian * fix bitfield length for pmc14_ita2_reg and pmd3_ita2_reg in pfmlib_itanium2.h (David Mosberger) papi-5.6.0/src/perfctr-2.6.x/usr.lib/x86.c000775 001750 001750 00000011615 13216244367 021724 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.c,v 1.2.2.11 2010/11/07 19:46:06 mikpe Exp $ * x86-specific perfctr library procedures. * * Copyright (C) 1999-2010 Mikael Pettersson */ #include #include "libperfctr.h" struct cpuid { /* The field order must not be changed. */ unsigned int eax; unsigned int ebx; /* When eax was 1, &ebx should be the start */ unsigned int edx; /* of the 12-byte vendor identification string. */ unsigned int ecx; }; static void get_cpuid(unsigned int op, struct cpuid *cpuid) { unsigned int save_ebx; unsigned int tmp_ebx; __asm__( "movl %%ebx, %0\n\t" "cpuid\n\t" "movl %%ebx, %1\n\t" "movl %0, %%ebx" : "=m"(save_ebx), "=m"(tmp_ebx), "=a"(cpuid->eax), "=d"(cpuid->edx), "=c"(cpuid->ecx) : "a"(op)); cpuid->ebx = tmp_ebx; } static unsigned int atom_nrctrs(void) { struct cpuid cpuid; get_cpuid(0, &cpuid); if (cpuid.eax < 0xA) { printf("%s: cpuid[0].eax == %u, unable to query 0xA leaf\n", __FUNCTION__, cpuid.eax); return 0; } get_cpuid(0xA, &cpuid); if ((cpuid.eax & 0xff) < 2) { printf("%s: cpuid[0xA].eax == 0x%08x appears bogus\n", __FUNCTION__, cpuid.eax); return 0; } return ((cpuid.eax >> 8) & 0xff) + (cpuid.edx & 0x1f); } unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch (info->cpu_type) { #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_CYRIX_MII: case PERFCTR_X86_WINCHIP_C6: case PERFCTR_X86_WINCHIP_2: case PERFCTR_X86_INTEL_PENTM: case PERFCTR_X86_INTEL_CORE: return 2; case PERFCTR_X86_AMD_K7: return 4; case PERFCTR_X86_VIA_C3: return 1; case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: return 18; #endif case PERFCTR_X86_INTEL_P4M3: return 18; case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: case PERFCTR_X86_AMD_FAM10H: return 4; case PERFCTR_X86_INTEL_CORE2: return 5; case PERFCTR_X86_INTEL_ATOM: return atom_nrctrs(); case PERFCTR_X86_INTEL_NHLM: case PERFCTR_X86_INTEL_WSTMR: return 7; case PERFCTR_X86_GENERIC: default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch (info->cpu_type) { case PERFCTR_X86_GENERIC: return "Generic x86 with TSC"; #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: return "Intel Pentium"; case PERFCTR_X86_INTEL_P5MMX: return "Intel Pentium MMX"; case PERFCTR_X86_INTEL_P6: return "Intel Pentium Pro"; case PERFCTR_X86_INTEL_PII: return "Intel Pentium II"; case PERFCTR_X86_INTEL_PIII: return "Intel Pentium III"; case PERFCTR_X86_CYRIX_MII: return "Cyrix 6x86MX/MII/III"; case PERFCTR_X86_WINCHIP_C6: return "WinChip C6"; case PERFCTR_X86_WINCHIP_2: return "WinChip 2/3"; case PERFCTR_X86_AMD_K7: return "AMD K7"; case PERFCTR_X86_VIA_C3: return "VIA C3"; case PERFCTR_X86_INTEL_P4: return "Intel Pentium 4"; case PERFCTR_X86_INTEL_P4M2: return "Intel Pentium 4 Model 2"; case PERFCTR_X86_INTEL_PENTM: return "Intel Pentium M"; case PERFCTR_X86_INTEL_CORE: return "Intel Core"; #endif case PERFCTR_X86_INTEL_CORE2: return "Intel Core 2"; case PERFCTR_X86_INTEL_P4M3: return "Intel Pentium 4 Model 3"; case PERFCTR_X86_AMD_K8: return "AMD K8"; case PERFCTR_X86_AMD_K8C: return "AMD K8 Revision C"; case PERFCTR_X86_AMD_FAM10H: return "AMD Family 10h"; case PERFCTR_X86_INTEL_ATOM: return "Intel Atom"; case PERFCTR_X86_INTEL_NHLM: return "Intel Nehalem"; case PERFCTR_X86_INTEL_WSTMR: return "Intel Westmere"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if (nrictrs) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { if (control->pmc_map[i] >= 18) /* for Core2 fixed counters or P4 fast rdpmc */ printf("pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i]); else printf("pmc_map[%u]\t\t%u\n", i, control->pmc_map[i]); printf("evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i]); if (control->p4.escr[i]) printf("escr[%u]\t\t\t0x%08X\n", i, control->p4.escr[i]); if (i >= nractrs) printf("ireset[%u]\t\t%d\n", i, control->ireset[i]); } if (control->p4.pebs_enable) printf("pebs_enable\t\t0x%08X\n", control->p4.pebs_enable); if (control->p4.pebs_matrix_vert) printf("pebs_matrix_vert\t0x%08X\n", control->p4.pebs_matrix_vert); } papi-5.6.0/src/testlib/fpapi_test.h000664 001750 001750 00000000101 13216244370 021267 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi.h" #define SUCCESS 1 #define NUM_FLOPS 20000000 papi-5.6.0/src/examples/PAPI_state.c000664 001750 001750 00000005035 13216244361 021240 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * We use PAPI_state to get the counting state of an EventSet.This function * * returns the state of the entire EventSet. * *****************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } int main() { int retval; int status = 0; int EventSet = PAPI_NULL; /**************************************************************************** * This part initializes the library and compares the version number of the * * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ****************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { printf("Library initialization error! \n"); exit(-1); } /*Creating the Eventset */ if((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval); /* Add Total Instructions Executed to our EventSet */ if ((retval=PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) ERROR_RETURN(retval); if ((retval=PAPI_state(EventSet, &status)) != PAPI_OK) ERROR_RETURN(retval); printstate(status); /* Start counting */ if ((retval=PAPI_start(EventSet)) != PAPI_OK) ERROR_RETURN(retval); if (PAPI_state(EventSet, &status) != PAPI_OK) ERROR_RETURN(retval); printstate(status); /* free the resources used by PAPI */ PAPI_shutdown(); exit(0); } int printstate(int status) { if(status & PAPI_STOPPED) printf("Eventset is currently stopped or inactive \n"); if(status & PAPI_RUNNING) printf("Eventset is currently running \n"); if(status & PAPI_PAUSED) printf("Eventset is currently Paused \n"); if(status & PAPI_NOT_INIT) printf(" Eventset defined but not initialized \n"); if(status & PAPI_OVERFLOWING) printf(" Eventset has overflowing enabled \n"); if(status & PAPI_PROFILING) printf(" Eventset has profiling enabled \n"); if(status & PAPI_MULTIPLEXING) printf(" Eventset has multiplexing enabled \n"); return 0; } papi-5.6.0/src/perfctr-2.6.x/usr.lib/Makefile000775 001750 001750 00000006150 13216244367 022571 0ustar00jshenry1963jshenry1963000000 000000 # $Id: Makefile,v 1.26.2.46 2010/11/07 19:48:14 mikpe Exp $ SHELL=/bin/sh ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) CC=gcc CFLAGS=-O2 -fomit-frame-pointer -Wall BUILD_INCLDIR=../linux/include CPPFLAGS=-I$(BUILD_INCLDIR) LD=ld AR=ar ARFLAGS=ruv RANLIB=ranlib i386_ESOBJS=event_set_x86.o event_set_amd.o event_set_centaur.o\ event_set_p4.o event_set_p5.o event_set_p6.o x86_64_ESOBJS=event_set_x86.o event_set_amd.o event_set_p4.o ppc_ESOBJS=event_set_ppc.o arm_ESOBJS=event_set_arm.o EVENT_SET_OBJS=$($(ARCH)_ESOBJS) i386_OBJS=x86.o x86_64_OBJS=x86.o ppc_OBJS=ppc.o arm_OBJS=arm.o ARCH_OBJS=$($(ARCH)_OBJS) i386_H=x86.h x86_64_H=x86.h ppc_H=ppc.h arm_H=arm.h ARCH_H=$($(ARCH)_H) AR_OBJS=global.o misc.o virtual.o marshal.o $(EVENT_SET_OBJS) $(ARCH_OBJS) SO_OBJS=$(AR_OBJS:.o=.os) # This is exceedingly ugly. i386_ABIVER=5 x86_64_ABIVER=6 ppc_ABIVER=5 arm_ABIVER=0 SO_ABIVER=$($(ARCH)_ABIVER) SO_LIBVER=2.6.42 SO_NAME=libperfctr.so.$(SO_ABIVER) SO_LIB=libperfctr.so.$(SO_ABIVER).$(SO_LIBVER) HDEP=libperfctr.h $(BUILD_INCLDIR)/linux/perfctr.h $(BUILD_INCLDIR)/asm/perfctr.h i386_ASM_DIR=x86 x86_64_ASM_DIR=x86 ppc_ASM_DIR=powerpc arm_ASM_DIR=arm ARCH_ASM_DIR=asm-$($(ARCH)_ASM_DIR) INSTALL_FILES=$(BUILD_INCLDIR)/$(ARCH_ASM_DIR)/perfctr.h $(BUILD_INCLDIR)/linux/perfctr.h\ libperfctr.h libperfctr.a libperfctr.so perfctr_event_codes.h CLEAN_FILES=$(AR_OBJS) $(SO_OBJS) libperfctr.a libperfctr.so gen-event-codes perfctr_event_codes.h marshal.c marshal.h libperfctr.o # Prevent 16-byte stack alignment crap in gcc-2.95. CFLAGS += $(shell if $(CC) -mpreferred-stack-boundary=2 -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "-mpreferred-stack-boundary=2"; fi) .SUFFIXES: .os %.os: %.c $(COMPILE.c) -fPIC $(OUTPUT_OPTION) $< # this code does not need -fno-strict-aliasing default: $(INSTALL_FILES) libperfctr.o libperfctr.a: $(AR_OBJS) $(AR) $(ARFLAGS) libperfctr.a $(AR_OBJS) $(RANLIB) libperfctr.a # not installed, only built as a workaround for PAPI's broken Makefiles libperfctr.o: $(AR_OBJS) $(LD) -r -o $@ $(AR_OBJS) libperfctr.so: $(SO_OBJS) $(CC) -shared -o $@ -Wl,-soname,$(SO_NAME) $(SO_OBJS) $(AR_OBJS): $(HDEP) $(SO_OBJS): $(HDEP) $(EVENT_SET_OBJS): event_set.h $(BUILD_INCLDIR)/asm/perfctr.h: cd ..; make configure misc.o virtual.o global.o: marshal.h misc.o virtual.o: arch.h $(ARCH_H) marshal.o: marshal.c marshal.h marshal.c: ln -s ../linux/drivers/perfctr/marshal.c . marshal.h: ln -s ../linux/drivers/perfctr/marshal.h . perfctr_event_codes.h: gen-event-codes ./gen-event-codes > $@ gen-event-codes: gen-event-codes.c $(EVENT_SET_OBJS) $(LINK.c) $^ -o $@ install: $(INSTALL_FILES) -mkdir -p $(INCLDIR) $(INCLDIR)/asm $(INCLDIR)/linux cp -f libperfctr.h $(INCLDIR)/ cp perfctr_event_codes.h $(INCLDIR)/ cp -f ../linux/include/$(ARCH_ASM_DIR)/perfctr.h $(INCLDIR)/asm/ cp -f ../linux/include/linux/perfctr.h $(INCLDIR)/linux/ -mkdir -p $(LIBDIR) cp libperfctr.a $(LIBDIR)/ cp libperfctr.so $(LIBDIR)/$(SO_LIB) ln -sf $(SO_LIB) $(LIBDIR)/$(SO_NAME) ln -sf $(SO_NAME) $(LIBDIR)/libperfctr.so distclean realclean: clean clean: rm -f $(CLEAN_FILES) papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_dfl_smpl.h000664 001750 001750 00000006206 13216244362 025777 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file implements the new dfl sampling buffer format * for perfmon2 subsystem. * * This format is supported used by all platforms. For IA-64, older * applications using perfmon v2.0 MUST use the * perfmon_default_smpl.h */ #ifndef __PERFMON_DFL_SMPL_H__ #define __PERFMON_DFL_SMPL_H__ 1 #ifdef __cplusplus extern "C" { #endif #include #define PFM_DFL_SMPL_NAME "default" #ifdef PFMLIB_OLD_PFMV2 /* * UUID for compatibility with perfmon v2.2 (used by Cray) */ #define PFM_DFL_SMPL_UUID { \ 0xd1, 0x39, 0xb2, 0x9e, 0x62, 0xe8, 0x40, 0xe4,\ 0xb4, 0x02, 0x73, 0x07, 0x87, 0x92, 0xe9, 0x37 } #endif /* * format specific parameters (passed at context creation) */ typedef struct { uint64_t buf_size; /* size of the buffer in bytes */ uint32_t buf_flags; /* buffer specific flags */ uint32_t res1; /* for future use */ uint64_t reserved[6]; /* for future use */ } pfm_dfl_smpl_arg_t; /* * This header is at the beginning of the sampling buffer returned to the user. * It is directly followed by the first record. */ typedef struct { uint64_t hdr_count; /* how many valid entries */ uint64_t hdr_cur_offs; /* current offset from top of buffer */ uint64_t hdr_overflows; /* #overflows for buffer */ uint64_t hdr_buf_size; /* bytes in the buffer */ uint64_t hdr_min_buf_space; /* minimal buffer size (internal use) */ uint32_t hdr_version; /* smpl format version */ uint32_t hdr_buf_flags; /* copy of buf_flags */ uint64_t hdr_reserved[10]; /* for future use */ } pfm_dfl_smpl_hdr_t; /* * Entry header in the sampling buffer. The header is directly followed * with the values of the PMD registers of interest saved in increasing * index order: PMD4, PMD5, and so on. How many PMDs are present depends * on how the session was programmed. * * In the case where multiple counters overflow at the same time, multiple * entries are written consecutively. * * last_reset_value member indicates the initial value of the overflowed PMD. */ typedef struct { uint32_t pid; /* thread id (for NPTL, this is gettid()) */ uint16_t ovfl_pmd; /* index of pmd that overflowed for this sample */ uint16_t reserved; /* for future use */ uint64_t last_reset_val; /* initial value of overflowed PMD */ uint64_t ip; /* where did the overflow interrupt happened */ uint64_t tstamp; /* overflow timetamp */ uint16_t cpu; /* cpu on which the overfow occured */ uint16_t set; /* event set active when overflow ocurred */ uint32_t tgid; /* thread group id (for NPTL, this is getpid()) */ } pfm_dfl_smpl_entry_t; #define PFM_DFL_SMPL_VERSION_MAJ 1U #define PFM_DFL_SMPL_VERSION_MIN 0U #define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DFL_SMPL_VERSION_MIN & 0xffff)) #ifdef __cplusplus }; #endif #endif /* __PERFMON_DFL_SMPL_H__ */ papi-5.6.0/src/components/lmsensors/configure.in000664 001750 001750 00000001224 13216244357 024051 0ustar00jshenry1963jshenry1963000000 000000 # Process this file with autoconf to produce a configure script. # File: components/lmsensors/configure.in # CVS: $Id$ AC_INIT AC_ARG_WITH(sensors_incdir, [--with-sensors_incdir= Specify path to sensors includes], [SENSORS_INCDIR=$withval CFLAGS="$CFLAGS -I$withval" AC_CHECK_HEADER([sensors.h], [], [AC_MSG_ERROR([sensors.h not found])], [#include ])], [AC_MSG_ERROR([Component requires path to sensors includes])]) AC_SUBST(SENSORS_INCDIR) AC_CONFIG_FILES([Makefile.lmsensors]) AC_OUTPUT papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/init.c000664 001750 001750 00000004763 13216244370 025144 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: init.c,v 1.83 2007/10/06 13:02:07 mikpe Exp $ * Performance-monitoring counters driver. * Top-level initialisation code. * * Copyright (C) 1999-2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include #include #include #include #include "cpumask.h" #include "virtual.h" #include "version.h" struct perfctr_info perfctr_info; static ssize_t driver_version_show(struct class *class, char *buf) { return sprintf(buf, "%s\n", VERSION); } static ssize_t cpu_features_show(struct class *class, char *buf) { return sprintf(buf, "%#x\n", perfctr_info.cpu_features); } static ssize_t cpu_khz_show(struct class *class, char *buf) { return sprintf(buf, "%u\n", perfctr_info.cpu_khz); } static ssize_t tsc_to_cpu_mult_show(struct class *class, char *buf) { return sprintf(buf, "%u\n", perfctr_info.tsc_to_cpu_mult); } static ssize_t state_user_offset_show(struct class *class, char *buf) { return sprintf(buf, "%u\n", (unsigned int)offsetof(struct perfctr_cpu_state, user)); } static ssize_t cpus_online_show(struct class *class, char *buf) { int ret = cpumask_scnprintf(buf, PAGE_SIZE-1, cpu_online_map); buf[ret++] = '\n'; return ret; } static ssize_t cpus_forbidden_show(struct class *class, char *buf) { int ret = cpumask_scnprintf(buf, PAGE_SIZE-1, perfctr_cpus_forbidden_mask); buf[ret++] = '\n'; return ret; } static struct class_attribute perfctr_class_attrs[] = { __ATTR_RO(driver_version), __ATTR_RO(cpu_features), __ATTR_RO(cpu_khz), __ATTR_RO(tsc_to_cpu_mult), __ATTR_RO(state_user_offset), __ATTR_RO(cpus_online), __ATTR_RO(cpus_forbidden), __ATTR_NULL }; static struct class perfctr_class = { .name = "perfctr", .class_attrs = perfctr_class_attrs, }; char *perfctr_cpu_name __initdata; static int __init perfctr_init(void) { int err; err = perfctr_cpu_init(); if (err) { printk(KERN_INFO "perfctr: not supported by this processor\n"); return err; } err = vperfctr_init(); if (err) return err; err = class_register(&perfctr_class); if (err) { printk(KERN_ERR "perfctr: class initialisation failed\n"); return err; } printk(KERN_INFO "perfctr: driver %s, cpu type %s at %u kHz\n", VERSION, perfctr_cpu_name, perfctr_info.cpu_khz); return 0; } static void __exit perfctr_exit(void) { vperfctr_exit(); perfctr_cpu_exit(); } module_init(perfctr_init) module_exit(perfctr_exit) papi-5.6.0/src/components/cuda/sampling/test/sass_source_map.cubin000664 001750 001750 00000007150 13216244357 027434 0ustar00jshenry1963jshenry1963000000 000000 ELF3¾KÀ À 4@8@.shstrtab.strtab.symtab.symtab_shndx.nv.info.text._Z15MatrixMulKernelPfS_S_i.nv.info._Z15MatrixMulKernelPfS_S_i.nv.shared._Z15MatrixMulKernelPfS_S_i.nv.constant0._Z15MatrixMulKernelPfS_S_i.shstrtab.strtab.symtab.symtab_shndx.nv.info_Z15MatrixMulKernelPfS_S_i.text._Z15MatrixMulKernelPfS_S_i.nv.info._Z15MatrixMulKernelPfS_S_i.nv.shared._Z15MatrixMulKernelPfS_S_i.nv.constant0._Z15MatrixMulKernelPfS_S_i_paramM¸2€# @ ð ð! ð! ð!ÿHPöþX‡€˜Lg€˜LWÈðð âÀ€c6Èð÷€˜\1 æÄ?gÈð'Èð'€Oá þBÐ'N7€O7NóþôW0[g0[€e@âñ þØ÷€˜\ð×ÿÿÿg€˜LíþôÿG`à‚'T÷€˜\‡7@âñ"þ@Äg€Ng€OgNñbþˆ g€O  w0[ñ`þ@Ä—0[g€N 'H8ñ þŒ ç)8 'H8w0[ñ þ@Ä €L gN g€Oñ@þ„ 'ç)8 Lñ þÄ '€L 'H8 ‡0[ñ þ@„g€N gN g€Oñ þÄ 7Lç)8 €Lñ þ„ 'H8w0[7ñ þÄ ÷0[ç)8 Lñ þ„ '€L'H8g€Nñ"þÄ gNg€Oç)8ñ þ„7L€L'H8ñ þÈw 0[70[ç)8ñ þ„L'€L 'H8ñ þÄÇ€˜\—€˜\瀘\ñþÄ·€˜\ç)8 Ôîð .„7L  Ôî €LñBþÀ'H8 ‡€˜\€˜\’þD  Ôî ç)8 Ôîñ üÀL'€Lg€˜\²þÌ Ôî§€˜\ Ôîð È ‡€˜\ Ôî 7L± üô  ÔîGg€˜LñÀþÀÿ‡`à‚'T—€Y ç€YýÀþÀ'4ððg€Y§€Yý?ÀþÄÉÿ@âG€˜\ð÷ÿÿÿö ýôg€˜Lÿg`à‚'T@âñ"þ@Äg€Ng€OgNñbþˆ g€O w˜0[ñbþÈg€N ‡0[ 'H8ñ þˆw˜0[ ç)8 'H8ñ þ@Ä€L gN g€Oó þ„ ç)8 L '€Lñ"þÌ 'H8 §˜0[ç)8ñ þ„ 7L €L 'H8ôþÈ ç)8 L ÔîðÀ2À '€L  Ôî 7LrþD Ôî'  ÔîæGÀþØW€Y §€YG€˜\í? ÿÄg€cK@âg€Nñ¢þ@Äg€OgNw˜0[õ üÄg€O'H8W0[ô þÄç)8€L'H8ô üØ ç)8 L'€Lð@À 7L  Ôîv ýÀ Ôîg€cK W€YýÀþÄ€òÿ@âG€˜\gNöÂþÄg€O70['H8õÀüÈç)8G€LWLñàÿü Üîã‡ÿÿ@âàü€°P°P°P@Æ èð`)pP$SpthÜ\2@€ À ¨¨ÜÜÜpapi-5.6.0/man/man3/PAPI_mh_tlb_info_t.3000664 001750 001750 00000001061 13216244356 021646 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_mh_tlb_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_mh_tlb_info_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "int \fBtype\fP" .br .ti -1c .RI "int \fBnum_entries\fP" .br .ti -1c .RI "int \fBpage_size\fP" .br .ti -1c .RI "int \fBassociativity\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Field Documentation" .PP .SS "int PAPI_mh_tlb_info_t::type" Empty, instr, data, vector, unified .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.6.x/examples/global/ppc.c000775 001750 001750 00000001077 13216244366 023561 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.1.2.1 2004/06/21 22:41:44 mikpe Exp $ * PPC32-specific code. * * Copyright (C) 2004 Mikael Pettersson */ #include #include #include #include "libperfctr.h" #include "arch.h" void setup_control(const struct perfctr_info *info, struct perfctr_cpu_control *control) { memset(control, 0, sizeof *control); control->tsc_on = 1; if (info->cpu_type > PERFCTR_PPC_GENERIC) { control->nractrs = 1; control->pmc_map[0] = 0; control->evntsel[0] = 0x02; /* INSTRUCTIONS_COMPLETED */ counting_mips = 1; } } papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/ita_irr.c000664 001750 001750 00000025252 13216244362 024245 0ustar00jshenry1963jshenry1963000000 000000 /* * ita_irr.c - example of how to use code range restriction with the Itanium PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #define VECTOR_SIZE 1000000UL #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "fp_ops_retired_hi", 0UL} , { "fp_ops_retired_lo", VECTOR_SIZE<<1 }, { NULL, 0UL } }; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } void saxpy(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } } void saxpy2(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } } static int do_test(void) { unsigned long size; double *a, *b, *c; size = VECTOR_SIZE; a = malloc(size*sizeof(double)); b = malloc(size*sizeof(double)); c = malloc(size*sizeof(double)); if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); memset(a, 0, size*sizeof(double)); memset(b, 0, size*sizeof(double)); memset(c, 0, size*sizeof(double)); saxpy(a,b,c, size); saxpy2(a,b,c, size); return 0; } int main(int argc, char **argv) { event_desc_t *p; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita_input_param_t ita_inp; pfmlib_ita_output_param_t ita_outp; pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t ibrs[8]; unsigned long range_start, range_end; pfmlib_options_t pfmlib_options; struct fd { /* function descriptor */ unsigned long addr; unsigned long gp; } *fd; int ret, type = 0; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Compute the range we are interested in * * On IA-64, the function pointer does not point directly * to the function but to a descriptor which contains two * unsigned long: the first one is the actual start address * of the function, the second is the gp (global pointer) * to load into r1 before jumping into the function. Unlesss * we're jumping into a shared library the gp is the same as * the current gp. * * In the artificial example, we also rely on the compiler/linker * NOT reordering code layout. We depend on saxpy2() being just * after saxpy(). * */ fd = (struct fd *)saxpy; range_start = fd->addr; fd = (struct fd *)saxpy2; range_end = fd->addr; /* * linker may reorder saxpy() and saxpy2() */ if (range_end < range_start) { unsigned long tmp; tmp = range_start; range_start = range_end; range_end = tmp; } memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ibrs,0, sizeof(ibrs)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita_inp,0, sizeof(ita_inp)); memset(&ita_outp,0, sizeof(ita_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) fatal_error("Cannot find %s event\n", p->event_name); } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields of rr_infos on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. * */ ita_inp.pfp_ita_irange.rr_used = 1; /* indicate we use code range restriction */ ita_inp.pfp_ita_irange.rr_limits[0].rr_start = range_start; ita_inp.pfp_ita_irange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * print offsets */ printf("code range : [0x%016lx-0x%016lx)\n" "start_offset:-0x%lx end_offset:+0x%lx\n" "%d pairs of debug registers used\n", range_start, range_end, ita_outp.pfp_ita_irange.rr_infos[0].rr_soff, ita_outp.pfp_ita_irange.rr_infos[0].rr_eoff, ita_outp.pfp_ita_irange.rr_nbr_used >> 1); /* * now create the session */ id = pfm_create(0, NULL); if (id == -1) { if (errno == ENOSYS) fatal_error("Your kernel does not have performance monitoring support!\n"); fatal_error("cannot create session %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) { pd[i].reg_num = outp.pfp_pmds[i].reg_num; } /* * propagate the setup for the debug registers from the library to the arguments * to the syscall. The library does not know the type of the syscall * anymore. IBRs are mapped to PMC256-PMC263 */ for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { ibrs[i].reg_num = 256+ita_outp.pfp_ita_irange.rr_br[i].reg_num; ibrs[i].reg_value = ita_outp.pfp_ita_irange.rr_br[i].reg_value; } /* * Program the code debug registers. * * IMPORTANT: programming the debug register MUST always be done before the PMCs * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. */ if (pfm_write(id, 0, PFM_RW_PMC, ibrs, ita_outp.pfp_ita_irange.rr_nbr_used * sizeof(*ibrs)) == -1) fatal_error("pfm_write error errno %d\n",errno); /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * now attach session */ if (pfm_attach(id, 0, getpid()) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * Let's roll now. * * We run two distinct copies of the same function but we restrict measurement * to the first one (saxpy). Therefore the expected count is half what you would * get if code range restriction was not used. The core loop in both case uses * two floating point operation per iteration. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); do_test(); if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * now read the results */ if (pfm_read(id, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error("pfm_read error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); } /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/perfctr-2.7.x/linux/include/linux/000775 001750 001750 00000000000 13216244370 023462 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/perf_examples/syst_smpl.c000775 001750 001750 00000023324 13216244365 023725 0ustar00jshenry1963jshenry1963000000 000000 /* * syst_smpl.c - example of a system-wide sampling * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define SMPL_PERIOD 240000000ULL #define MAX_PATH 1024 #ifndef STR # define _STR(x) #x # define STR(x) _STR(x) #endif typedef struct { int opt_no_show; int mmap_pages; int cpu; int pin; int delay; char *events; char *cgroup; } options_t; static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; static int num_fds; static options_t options; static size_t pgsz; static size_t map_size; static struct option the_options[]={ { "help", 0, 0, 1}, { "no-show", 0, &options.opt_no_show, 1}, { 0, 0, 0, 0} }; static const char *gen_events = "cycles,instructions"; static void process_smpl_buf(perf_event_desc_t *hw) { struct perf_event_header ehdr; int ret; for(;;) { ret = perf_read_buffer(hw, &ehdr, sizeof(ehdr)); if (ret) return; /* nothing to read */ switch(ehdr.type) { case PERF_RECORD_SAMPLE: ret = perf_display_sample(fds, num_fds, hw - fds, &ehdr, stdout); if (ret) errx(1, "cannot parse sample"); collected_samples++; break; case PERF_RECORD_EXIT: display_exit(hw, stdout); break; case PERF_RECORD_LOST: lost_samples += display_lost(hw, fds, num_fds, stdout); break; case PERF_RECORD_THROTTLE: display_freq(1, hw, stdout); break; case PERF_RECORD_UNTHROTTLE: display_freq(0, hw, stdout); break; default: printf("unknown sample type %d\n", ehdr.type); perf_skip_buffer(hw, ehdr.size - sizeof(ehdr)); } } } int setup_cpu(int cpu, int fd) { int ret, flags; int i, pid; /* * does allocate fds */ ret = perf_setup_list_events(options.events, &fds, &num_fds); if (ret || !num_fds) errx(1, "cannot setup event list"); if (!fds[0].hw.sample_period) errx(1, "need to set sampling period or freq on first event, use :period= or :freq="); fds[0].fd = -1; for(i=0; i < num_fds; i++) { fds[i].hw.disabled = !i; /* start immediately */ if (options.cgroup) { flags = PERF_FLAG_PID_CGROUP; pid = fd; } else { flags = 0; pid = -1; } if (options.pin) fds[i].hw.pinned = 1; if (fds[i].hw.sample_period) { /* * set notification threshold to be halfway through the buffer */ if (fds[i].hw.sample_period) { fds[i].hw.wakeup_watermark = (options.mmap_pages*pgsz) / 2; fds[i].hw.watermark = 1; } fds[i].hw.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_READ|PERF_SAMPLE_TIME|PERF_SAMPLE_PERIOD|PERF_SAMPLE_STREAM_ID|PERF_SAMPLE_CPU; /* * if we have more than one event, then record event identifier to help with parsing */ if (num_fds > 1) fds[i].hw.sample_type |= PERF_SAMPLE_IDENTIFIER; printf("%s period=%"PRIu64" freq=%d\n", fds[i].name, fds[i].hw.sample_period, fds[i].hw.freq); fds[i].hw.read_format = PERF_FORMAT_SCALE; if (fds[i].hw.freq) fds[i].hw.sample_type |= PERF_SAMPLE_PERIOD; } fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, fds[0].fd, flags); if (fds[i].fd == -1) { if (fds[i].hw.precise_ip) err(1, "cannot attach event %s: precise mode may not be supported", fds[i].name); err(1, "cannot attach event %s", fds[i].name); } } /* * kernel adds the header page to the size of the mmapped region */ fds[0].buf = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fds[0].fd, 0); if (fds[0].buf == MAP_FAILED) err(1, "cannot mmap buffer"); /* does not include header page */ fds[0].pgmsk = (options.mmap_pages*pgsz)-1; /* * send samples for all events to first event's buffer */ for (i = 1; i < num_fds; i++) { if (!fds[i].hw.sample_period) continue; ret = ioctl(fds[i].fd, PERF_EVENT_IOC_SET_OUTPUT, fds[0].fd); if (ret) err(1, "cannot redirect sampling output"); } /* * collect event ids */ if (num_fds > 1 && fds[0].fd > -1) { for(i = 0; i < num_fds; i++) { /* * read the event identifier using ioctl * new method replaced the trick with PERF_FORMAT_GROUP + PERF_FORMAT_ID + read() */ ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ID, &fds[i].id); if (ret == -1) err(1, "cannot read ID"); printf("ID %"PRIu64" %s\n", fds[i].id, fds[i].name); } } return 0; } static void start_cpu(void) { int ret; ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); if (ret) err(1, "cannot start counter"); } static int cgroupfs_find_mountpoint(char *buf, size_t maxlen) { FILE *fp; char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; char *token, *saved_ptr = NULL; int found = 0; fp = fopen("/proc/mounts", "r"); if (!fp) return -1; /* * in order to handle split hierarchy, we need to scan /proc/mounts * and inspect every cgroupfs mount point to find one that has * perf_event subsystem */ while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" STR(MAX_PATH)"s %*d %*d\n", mountpoint, type, tokens) == 3) { if (!strcmp(type, "cgroup")) { token = strtok_r(tokens, ",", &saved_ptr); while (token != NULL) { if (!strcmp(token, "perf_event")) { found = 1; break; } token = strtok_r(NULL, ",", &saved_ptr); } } if (found) break; } fclose(fp); if (!found) return -1; if (strlen(mountpoint) < maxlen) { strcpy(buf, mountpoint); return 0; } return -1; } int open_cgroup(char *name) { char path[MAX_PATH+1]; char mnt[MAX_PATH+1]; int cfd; if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) errx(1, "cannot find cgroup fs mount point"); snprintf(path, MAX_PATH, "%s/%s", mnt, name); cfd = open(path, O_RDONLY); if (cfd == -1) warn("no access to cgroup %s\n", name); return cfd; } static void handler(int n) { longjmp(jbuf, 1); } int mainloop(char **arg) { static uint64_t ovfl_count = 0; /* static to avoid setjmp issue */ struct pollfd pollfds[1]; int ret; int fd = -1; int i; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed\n"); pgsz = sysconf(_SC_PAGESIZE); map_size = (options.mmap_pages+1)*pgsz; if (options.cgroup) { fd = open_cgroup(options.cgroup); if (fd == -1) err(1, "cannot open cgroup file %s\n", options.cgroup); } setup_cpu(options.cpu, fd); /* done with cgroup */ if (fd != -1) close(fd); signal(SIGALRM, handler); signal(SIGINT, handler); pollfds[0].fd = fds[0].fd; pollfds[0].events = POLLIN; printf("monitoring on CPU%d, session ending in %ds\n", options.cpu, options.delay); if (setjmp(jbuf) == 1) goto terminate_session; start_cpu(); alarm(options.delay); /* * core loop */ for(;;) { ret = poll(pollfds, 1, -1); if (ret < 0 && errno == EINTR) break; ovfl_count++; process_smpl_buf(&fds[0]); } terminate_session: for(i=0; i < num_fds; i++) close(fds[i].fd); /* check for partial event buffer */ process_smpl_buf(&fds[0]); munmap(fds[0].buf, map_size); perf_free_fds(fds, num_fds); printf("%"PRIu64" samples collected in %"PRIu64" poll events, %"PRIu64" lost samples\n", collected_samples, ovfl_count, lost_samples); return 0; } static void usage(void) { printf("usage: syst_smpl [-h] [-P] [--help] [-m mmap_pages] [-f] [-e event1,...,eventn] [-c cpu] [-d seconds]\n"); } int main(int argc, char **argv) { int c; setlocale(LC_ALL, ""); options.cpu = -1; options.delay = -1; while ((c=getopt_long(argc, argv,"hPe:m:c:d:G:", the_options, 0)) != -1) { switch(c) { case 0: continue; case 'e': if (options.events) errx(1, "events specified twice\n"); options.events = optarg; break; case 'm': if (options.mmap_pages) errx(1, "mmap pages already set\n"); options.mmap_pages = atoi(optarg); break; case 'P': options.pin = 1; break; case 'd': options.delay = atoi(optarg); break; case 'G': options.cgroup = optarg; break; case 'c': options.cpu = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown option"); } } if (!options.events) options.events = strdup(gen_events); if (!options.mmap_pages) options.mmap_pages = 1; if (options.cpu == -1) options.cpu = random() % sysconf(_SC_NPROCESSORS_ONLN); if (options.delay == -1) options.delay = 10; if (options.mmap_pages > 1 && ((options.mmap_pages) & 0x1)) errx(1, "number of pages must be power of 2\n"); return mainloop(argv+optind); } papi-5.6.0/src/perfctr-2.6.x/linux/include/asm-x86/000775 001750 001750 00000000000 13216244367 023533 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_mips.c000664 001750 001750 00000021435 13216244365 022103 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_mips.c : support for MIPS chips * * Copyright (c) 2011 Samara Technology Group, Inc * Contributed by Philip Mucci * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_mips_priv.h" pfm_mips_config_t pfm_mips_cfg; static const pfmlib_attr_desc_t mips_mods[]={ PFM_ATTR_B("k", "monitor at system level"), PFM_ATTR_B("u", "monitor at user level"), PFM_ATTR_B("s", "monitor at supervisor level"), PFM_ATTR_B("e", "monitor at exception level "), PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; #ifdef CONFIG_PFMLIB_OS_LINUX /* * helper function to retrieve one value from /proc/cpuinfo * for internal libpfm use only * attr: the attribute (line) to look for * ret_buf: a buffer to store the value of the attribute (as a string) * maxlen : number of bytes of capacity in ret_buf * * ret_buf is null terminated. * * Return: * 0 : attribute found, ret_buf populated * -1: attribute not found */ static int pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen) { FILE *fp = NULL; int ret = -1; size_t attr_len, buf_len = 0; char *p, *value = NULL; char *buffer = NULL; if (attr == NULL || ret_buf == NULL || maxlen < 1) return -1; attr_len = strlen(attr); fp = fopen("/proc/cpuinfo", "r"); if (fp == NULL) return -1; while(pfmlib_getl(&buffer, &buf_len, fp) != -1){ /* skip blank lines */ if (*buffer == '\n') continue; p = strchr(buffer, ':'); if (p == NULL) goto error; /* * p+2: +1 = space, +2= firt character * strlen()-1 gets rid of \n */ *p = '\0'; value = p+2; value[strlen(value)-1] = '\0'; if (!strncmp(attr, buffer, attr_len)) break; } strncpy(ret_buf, value, maxlen-1); ret_buf[maxlen-1] = '\0'; ret = 0; error: free(buffer); fclose(fp); return ret; } #else static int pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen) { DPRINT("/proc/cpuinfo ignored\n"); } #endif static void pfm_mips_display_reg(pfm_mips_sel_reg_t reg, uint64_t cntrs, char *fstr) { __pfm_vbprintf("[0x%"PRIx64" mask=0x%x usr=%d sys=%d sup=%d int=%d cntrs=0x%"PRIx64"] %s\n", reg.val, reg.perfsel64.sel_event_mask, reg.perfsel64.sel_usr, reg.perfsel64.sel_os, reg.perfsel64.sel_sup, reg.perfsel64.sel_exl, cntrs, fstr); } int pfm_mips_detect(void *this) { int ret; char buffer[1024]; DPRINT("mips_detect\n"); ret = pfmlib_getcpuinfo_attr("cpu model", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; if (strstr(buffer,"MIPS") == NULL) return PFM_ERR_NOTSUPP; strncpy(pfm_mips_cfg.model,buffer,strlen(buffer)); /* ret = pfmlib_getcpuinfo_attr("CPU implementer", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_mips_cfg.implementer = strtol(buffer, NULL, 16); ret = pfmlib_getcpuinfo_attr("CPU part", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_mips_cfg.part = strtol(buffer, NULL, 16); ret = pfmlib_getcpuinfo_attr("CPU architecture", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_mips_cfg.architecture = strtol(buffer, NULL, 16); */ return PFM_SUCCESS; } int pfm_mips_get_encoding(void *this, pfmlib_event_desc_t *e) { pfmlib_pmu_t *pmu = this; const mips_entry_t *pe = this_pe(this); pfmlib_event_attr_info_t *a; pfm_mips_sel_reg_t reg; uint64_t ival, cntmask = 0; int plmmsk = 0, code; int k, id; reg.val = 0; code = pe[e->event].code; /* truncates bit 7 (counter info) */ reg.perfsel64.sel_event_mask = code; for (k = 0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; ival = e->attrs[k].ival; switch(a->idx) { case MIPS_ATTR_K: /* os */ reg.perfsel64.sel_os = !!ival; plmmsk |= _MIPS_ATTR_K; break; case MIPS_ATTR_U: /* user */ reg.perfsel64.sel_usr = !!ival; plmmsk |= _MIPS_ATTR_U; break; case MIPS_ATTR_S: /* supervisor */ reg.perfsel64.sel_sup = !!ival; plmmsk |= _MIPS_ATTR_S; break; case MIPS_ATTR_E: /* int */ reg.perfsel64.sel_exl = !!ival; plmmsk |= _MIPS_ATTR_E; } } /* * handle case where no priv level mask was passed. * then we use the dfl_plm */ if (!(plmmsk & MIPS_PLM_ALL)) { if (e->dfl_plm & PFM_PLM0) reg.perfsel64.sel_os = 1; if (e->dfl_plm & PFM_PLM1) reg.perfsel64.sel_sup = 1; if (e->dfl_plm & PFM_PLM2) reg.perfsel64.sel_exl = 1; if (e->dfl_plm & PFM_PLM3) reg.perfsel64.sel_usr = 1; } evt_strcat(e->fstr, "%s", pe[e->event].name); for (k = 0; k < e->npattrs; k++) { if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU) continue; id = e->pattrs[k].idx; switch(id) { case MIPS_ATTR_K: evt_strcat(e->fstr, ":%s=%lu", mips_mods[id].name, reg.perfsel64.sel_os); break; case MIPS_ATTR_U: evt_strcat(e->fstr, ":%s=%lu", mips_mods[id].name, reg.perfsel64.sel_usr); break; case MIPS_ATTR_S: evt_strcat(e->fstr, ":%s=%lu", mips_mods[id].name, reg.perfsel64.sel_sup); break; case MIPS_ATTR_E: evt_strcat(e->fstr, ":%s=%lu", mips_mods[id].name, reg.perfsel64.sel_exl); break; } } e->codes[0] = reg.val; /* cycles and instructions support all counters */ if (code == 0 || code == 1) { cntmask = (1ULL << pmu->num_cntrs) -1; } else { /* event work on odd counters only */ for (k = !!(code & 0x80) ; k < pmu->num_cntrs; k+=2) { cntmask |= 1ULL << k; } } e->codes[1] = cntmask; e->count = 2; pfm_mips_display_reg(reg, cntmask, e->fstr); return PFM_SUCCESS; } int pfm_mips_get_event_first(void *this) { return 0; } int pfm_mips_get_event_next(void *this, int idx) { pfmlib_pmu_t *p = this; if (idx >= (p->pme_count-1)) return -1; return idx+1; } int pfm_mips_event_is_valid(void *this, int pidx) { pfmlib_pmu_t *p = this; return pidx >= 0 && pidx < p->pme_count; } int pfm_mips_validate_table(void *this, FILE *fp) { pfmlib_pmu_t *pmu = this; const mips_entry_t *pe = this_pe(this); int i, j, error = 0; for(i=0; i < pmu->pme_count; i++) { if (!pe[i].name) { fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i, i > 1 ? pe[i-1].name : "??"); error++; } if (!pe[i].desc) { fprintf(fp, "pmu: %s event%d: %s :: no description\n", pmu->name, i, pe[i].name); error++; } for (j=i+1; j < pmu->pme_count; j++) { if (pe[i].code == pe[j].code) { fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code); error++; } } } if (!pmu->supported_plm) { fprintf(fp, "pmu: %s supported_plm=0, is that right?\n", pmu->name); error++; } return error ? PFM_ERR_INVAL : PFM_SUCCESS; } unsigned int pfm_mips_get_event_nattrs(void *this, int pidx) { /* assume all pmus have the same number of attributes */ return MIPS_NUM_ATTRS; } int pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) { /* no umasks, so all attrs are modifiers */ info->name = mips_mods[attr_idx].name; info->desc = mips_mods[attr_idx].desc; info->type = mips_mods[attr_idx].type; info->type = mips_mods[attr_idx].type; info->equiv= NULL; info->idx = attr_idx; /* private index */ info->code = attr_idx; info->is_dfl = 0; info->is_precise = 0; info->ctrl = PFM_ATTR_CTRL_PMU;; return PFM_SUCCESS; } int pfm_mips_get_event_info(void *this, int idx, pfm_event_info_t *info) { pfmlib_pmu_t *pmu = this; const mips_entry_t *pe = this_pe(this); info->name = pe[idx].name; info->desc = pe[idx].desc; info->code = pe[idx].code; info->equiv = NULL; info->idx = idx; /* private index */ info->pmu = pmu->pmu; info->is_precise = 0; /* no attributes defined for MIPS yet */ info->nattrs = pfm_mips_get_event_nattrs(this, idx); return PFM_SUCCESS; } papi-5.6.0/src/perfctr-2.6.x/etc/costs/Athlon-500000775 001750 001750 00000001012 13216244366 023122 0ustar00jshenry1963jshenry1963000000 000000 [data from a 500MHz Athlon] PERFCTR INIT: vendor 2, family 6, model 1 PERFCTR INIT: NITER == 64 PERFCTR INIT: rdpmc ticks == 930 PERFCTR INIT: rdmsr (counter) ticks == 3401 PERFCTR INIT: rdmsr (evntsel) ticks == 3454 PERFCTR INIT: wrmsr (counter) ticks == 5197 PERFCTR INIT: wrmsr (evntsel) ticks == 14915 PERFCTR INIT: loop overhead ticks == 98 PERFCTR INIT: Athlon test0 == 0 (ok) PERFCTR INIT: Athlon test1 == 43 (ok) PERFCTR INIT: Athlon test2 == 43 (EvntSel0 does not override) PERFCTR INIT: Athlon test3 == 0 (ok) papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_pcu.c000664 001750 001750 00000006630 13216244365 025151 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snbep_unc_pcu.c : Intel SandyBridge-EP Power Control Unit (PCU) uncore PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_snbep_unc_pcu_events.h" static void display_pcu(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; pfm_snbep_unc_reg_t f; __pfm_vbprintf("[UNC_PCU=0x%"PRIx64" event=0x%x occ_sel=0x%x en=%d " "inv=%d edge=%d thres=%d occ_inv=%d occ_edge=%d] %s\n", reg->val, reg->pcu.unc_event, reg->pcu.unc_occ, reg->pcu.unc_en, reg->pcu.unc_inv, reg->pcu.unc_edge, reg->pcu.unc_thres, reg->pcu.unc_occ_inv, reg->pcu.unc_occ_edge, pe[e->event].name); if (e->count == 1) return; f.val = e->codes[1]; __pfm_vbprintf("[UNC_PCU_FILTER=0x%"PRIx64" band0=%u band1=%u band2=%u band3=%u]\n", f.val, f.pcu_filt.filt0, f.pcu_filt.filt1, f.pcu_filt.filt2, f.pcu_filt.filt3); } pfmlib_pmu_t intel_snbep_unc_pcu_support = { .desc = "Intel Sandy Bridge-EP PCU uncore", .name = "snbep_unc_pcu", .perf_name = "uncore_pcu", .pmu = PFM_PMU_INTEL_SNBEP_UNC_PCU, .pme_count = LIBPFM_ARRAY_SIZE(intel_snbep_unc_p_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 2, .pe = intel_snbep_unc_p_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_PMU_FL_UNC_OCC | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_snbep_unc_can_auto_encode, .display_reg = display_pcu, }; papi-5.6.0/src/libpfm4/lib/events/amd64_events_k7.h000664 001750 001750 00000015557 13216244364 024022 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Regenerated from previous version by: * * Copyright (c) 2006, 2007 Advanced Micro Devices, Inc. * Contributed by Ray Bryant * Contributed by Robert Richter * Modified for K7 by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: amd64_k7 (AMD64 K7) */ /* * Definitions taken from "AMD Athlon Processor x86 Code Optimization Guide" * Table 11 February 2002 */ static const amd64_umask_t amd64_k7_data_cache_refills[]={ { .uname = "L2_INVALID", .udesc = "Invalid line from L2", .ucode = 0x1, }, { .uname = "L2_SHARED", .udesc = "Shared-state line from L2", .ucode = 0x2, }, { .uname = "L2_EXCLUSIVE", .udesc = "Exclusive-state line from L2", .ucode = 0x4, }, { .uname = "L2_OWNED", .udesc = "Owned-state line from L2", .ucode = 0x8, }, { .uname = "L2_MODIFIED", .udesc = "Modified-state line from L2", .ucode = 0x10, }, { .uname = "ALL", .udesc = "Shared, Exclusive, Owned, Modified State Refills", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_k7_data_cache_refills_from_system[]={ { .uname = "INVALID", .udesc = "Invalid", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "Invalid, Shared, Exclusive, Owned, Modified", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_entry_t amd64_k7_pe[]={ { .name = "DATA_CACHE_ACCESSES", .desc = "Data Cache Accesses", .modmsk = AMD64_BASIC_ATTRS, .code = 0x40, }, { .name = "DATA_CACHE_MISSES", .desc = "Data Cache Misses", .modmsk = AMD64_BASIC_ATTRS, .code = 0x41, }, { .name = "DATA_CACHE_REFILLS", .desc = "Data Cache Refills from L2", .modmsk = AMD64_BASIC_ATTRS, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(amd64_k7_data_cache_refills), .ngrp = 1, .umasks = amd64_k7_data_cache_refills, }, { .name = "DATA_CACHE_REFILLS_FROM_SYSTEM", .desc = "Data Cache Refills from System", .modmsk = AMD64_BASIC_ATTRS, .code = 0x43, .numasks = LIBPFM_ARRAY_SIZE(amd64_k7_data_cache_refills_from_system), .ngrp = 1, .umasks = amd64_k7_data_cache_refills_from_system, }, { .name = "DATA_CACHE_LINES_EVICTED", .desc = "Data Cache Lines Evicted", .modmsk = AMD64_BASIC_ATTRS, .code = 0x44, .numasks = LIBPFM_ARRAY_SIZE(amd64_k7_data_cache_refills_from_system), .ngrp = 1, .umasks = amd64_k7_data_cache_refills_from_system, /* identical to actual umasks list for this event */ }, { .name = "L1_DTLB_MISS_AND_L2_DTLB_HIT", .desc = "L1 DTLB Miss and L2 DTLB Hit", .modmsk = AMD64_BASIC_ATTRS, .code = 0x45, }, { .name = "L1_DTLB_AND_L2_DTLB_MISS", .desc = "L1 DTLB and L2 DTLB Miss", .modmsk = AMD64_BASIC_ATTRS, .code = 0x46, }, { .name = "MISALIGNED_ACCESSES", .desc = "Misaligned Accesses", .modmsk = AMD64_BASIC_ATTRS, .code = 0x47, }, { .name = "CPU_CLK_UNHALTED", .desc = "CPU Clocks not Halted", .modmsk = AMD64_BASIC_ATTRS, .code = 0x76, }, { .name = "INSTRUCTION_CACHE_FETCHES", .desc = "Instruction Cache Fetches", .modmsk = AMD64_BASIC_ATTRS, .code = 0x80, }, { .name = "INSTRUCTION_CACHE_MISSES", .desc = "Instruction Cache Misses", .modmsk = AMD64_BASIC_ATTRS, .code = 0x81, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .desc = "L1 ITLB Miss and L2 ITLB Hit", .modmsk = AMD64_BASIC_ATTRS, .code = 0x84, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", .desc = "L1 ITLB Miss and L2 ITLB Miss", .modmsk = AMD64_BASIC_ATTRS, .code = 0x85, }, { .name = "RETIRED_INSTRUCTIONS", .desc = "Retired Instructions (includes exceptions, interrupts, resyncs)", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc0, }, { .name = "RETIRED_UOPS", .desc = "Retired uops", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc1, }, { .name = "RETIRED_BRANCH_INSTRUCTIONS", .desc = "Retired Branch Instructions", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc2, }, { .name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .desc = "Retired Mispredicted Branch Instructions", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc3, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .desc = "Retired Taken Branch Instructions", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc4, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .desc = "Retired Taken Branch Instructions Mispredicted", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc5, }, { .name = "RETIRED_FAR_CONTROL_TRANSFERS", .desc = "Retired Far Control Transfers", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc6, }, { .name = "RETIRED_BRANCH_RESYNCS", .desc = "Retired Branch Resyncs (only non-control transfer branches)", .modmsk = AMD64_BASIC_ATTRS, .code = 0xc7, }, { .name = "INTERRUPTS_MASKED_CYCLES", .desc = "Interrupts-Masked Cycles", .modmsk = AMD64_BASIC_ATTRS, .code = 0xcd, }, { .name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .desc = "Interrupts-Masked Cycles with Interrupt Pending", .modmsk = AMD64_BASIC_ATTRS, .code = 0xce, }, { .name = "INTERRUPTS_TAKEN", .desc = "Interrupts Taken", .modmsk = AMD64_BASIC_ATTRS, .code = 0xcf, }, }; papi-5.6.0/man/man3/PAPI_stop_counters.3000664 001750 001750 00000002714 13216244356 021760 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_stop_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_stop_counters \- .PP Stop counting hardware events and reset values to zero\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par C Interface: \#include @n int PAPI_stop_counters( long long *values, int array_len ); .fi .PP .PP \fBParameters:\fP .RS 4 \fI*values\fP an array where to put the counter values .br \fIarray_len\fP the number of items in the *values array .RE .PP \fBPostcondition:\fP .RS 4 After this function is called, the values are reset to zero\&. .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOTRUN\fP The EventSet is not started yet\&. .br \fIPAPI_ENOEVST\fP The EventSet has not been added yet\&. .RE .PP The \fBPAPI_stop_counters()\fP function stops the counters and copies the counts into the *values array\&. The counters must have been started by a previous call to \fBPAPI_start_counters()\fP\&. .PP .PP .nf int Events[2] = { PAPI_TOT_CYC, PAPI_TOT_INS }; long long values[2]; if ( PAPI_start_counters( Events, 2 ) != PAPI_OK ) handle_error(1); your_slow_code(); if ( PAPI_stop_counters( values, 2 ) != PAPI_OK ) handle_error(1); * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_read_counters()\fP \fBPAPI_start_counters()\fP \fBPAPI_set_opt()\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_hsw.3000664 001750 001750 00000007774 13216244364 024100 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "April, 2013" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_hsw - support for Intel Haswell core PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: hsw .B PMU desc: Intel Haswell .B PMU name: hsw_ep .B PMU desc: Intel Haswell-EP .sp .SH DESCRIPTION The library supports the Intel Haswell and Haswell-EP core PMU. It should be noted that this PMU model only covers each core's PMU and not the socket level PMU. On Haswell, the number of generic counters depends on the Hyperthreading (HT) mode. When HT is on, then only 4 generic counters are available. When HT is off, then 8 generic counters are available. The \fBpfm_get_pmu_info()\fR function returns the maximum number of generic counters in \fBnum_cntrs\fr. .SH MODIFIERS The following modifiers are supported on Intel Haswell processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on both threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. .TP .B ldlat Pass a latency threshold to the MEM_TRANS_RETIRED:LOAD_LATENCY event. This is an integer attribute that must be in the range [3:65535]. It is required for this event. Note that the event must be used with precise sampling (PEBS). .TP .B intx Monitor the event only when executing inside a transactional memory region (in tx). Event does not count otherwise. This is a boolean modifiers. Default value is 0. .TP .B intxcp Do not count occurrences of the event when they are inside an aborted transactional memory region. This is a boolean modifier. Default value is 0. .SH OFFCORE_RESPONSE events Intel Haswell provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. Those events need special treatment in the performance monitoring infrastructure because each event uses an extra register to store some settings. Thus, in case multiple offcore_response events are monitored simultaneously, the kernel needs to manage the sharing of that extra register. The offcore_response events are exposed as a normal events by the library. The extra settings are exposed as regular umasks. The library takes care of encoding the events according to the underlying kernel interface. On Intel Haswell, the umasks are divided into three categories: request, supplier and snoop. The user must provide at least one umask for each category. The categories are shown in the umask descriptions. There is also the special response umask called \fBANY_RESPONSE\fR. When this umask is used then it overrides any supplier and snoop umasks. In other words, users can specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop is specified, the library defaults to using \fBANY_RESPONSE\fR. For instance, the following are valid event selections: .TP .B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_REQUEST .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY .P But the following are illegal: .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY:ANY_RESPONSE .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/perfctr-2.6.x/etc/costs/PPC750-300000775 001750 001750 00000002324 13216244366 022560 0ustar00jshenry1963jshenry1963000000 000000 [data from a 300 MHz PowerPC 750] PERFCTR INIT: PVR 0x00080202, CPU clock 300753 kHz, TB clock 16708 kHz PERFCTR INIT: NITER == 256 PERFCTR INIT: loop overhead is 40 cycles PERFCTR INIT: mftbl cost is 1.9 cycles (541 total) PERFCTR INIT: mfspr (pmc1) cost is 1.8 cycles (519 total) PERFCTR INIT: mfspr (pmc2) cost is 1.8 cycles (516 total) PERFCTR INIT: mfspr (pmc3) cost is 1.8 cycles (520 total) PERFCTR INIT: mfspr (pmc4) cost is 1.8 cycles (516 total) PERFCTR INIT: mfspr (mmcr0) cost is 1.8 cycles (523 total) PERFCTR INIT: mfspr (mmcr1) cost is 1.8 cycles (516 total) PERFCTR INIT: mtspr (pmc2) cost is 1.8 cycles (525 total) PERFCTR INIT: mtspr (pmc3) cost is 1.8 cycles (516 total) PERFCTR INIT: mtspr (pmc4) cost is 1.8 cycles (519 total) PERFCTR INIT: mtspr (mmcr1) cost is 1.9 cycles (545 total) PERFCTR INIT: mtspr (mmcr0) cost is 2.0 cycles (559 total) PERFCTR INIT: check_fcece(0): MMCR0[FC] is 0, PMC1 is 0x80000076 PERFCTR INIT: check_fcece(1): MMCR0[FC] is 0, PMC1 is 0x80000040 PERFCTR INIT: check_trigger(0): MMCR0[TRIGGER] is 1, PMC1 is 0x80000076, PMC2 is 0x78 PERFCTR INIT: check_trigger(1): MMCR0[TRIGGER] is 1, PMC1 is 0x80000041, PMC2 is 0x41 perfctr: driver 2.6.4, cpu type PowerPC 60x/7xx/74xx at 300753 kHz papi-5.6.0/man/man3/PAPIF_get_real_usec.3000664 001750 001750 00000000763 13216244355 021761 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_get_real_usec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_get_real_usec \- .PP Get real time counter value in microseconds\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_get_real_usec( C_LONG_LONG time )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_real_usec\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/ctests/dmem_info.c000664 001750 001750 00000005005 13216244360 020727 0ustar00jshenry1963jshenry1963000000 000000 /* * This file perfoms the following test: dynamic memory info * The pages used should increase steadily. * * Author: Kevin London * london@cs.utk.edu */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define ALLOCMEM 200000 static void dump_memory_info( FILE * output, PAPI_dmem_info_t * d ) { fprintf( output, "\n--------\n" ); fprintf( output, "Mem Size:\t\t%lld\n", d->size ); fprintf( output, "Mem Peak Size:\t\t%lld\n", d->peak ); fprintf( output, "Mem Resident:\t\t%lld\n", d->resident ); fprintf( output, "Mem High Water Mark:\t%lld\n", d->high_water_mark ); fprintf( output, "Mem Shared:\t\t%lld\n", d->shared ); fprintf( output, "Mem Text:\t\t%lld\n", d->text ); fprintf( output, "Mem Library:\t\t%lld\n", d->library ); fprintf( output, "Mem Heap:\t\t%lld\n", d->heap ); fprintf( output, "Mem Locked:\t\t%lld\n", d->locked ); fprintf( output, "Mem Stack:\t\t%lld\n", d->stack ); fprintf( output, "Mem Pagesize:\t\t%lld\n", d->pagesize ); fprintf( output, "Mem Page Table Entries:\t\t%lld\n", d->pte ); fprintf( output, "--------\n\n" ); } int main( int argc, char **argv ) { PAPI_dmem_info_t dmem; long long value[7]; int retval, i = 0, j = 0; double *m[7]; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); for ( i = 0; i < 7; i++ ) { retval = PAPI_get_dmem_info( &dmem ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_get_dmem_info", retval ); /* dump_memory_info(stdout,&dmem); */ value[i] = dmem.size; m[i] = ( double * ) malloc( ALLOCMEM * sizeof ( double ) ); touch_dummy( m[j], ALLOCMEM ); } if ( !TESTS_QUIET ) { printf( "Test case: Dynamic Memory Information.\n" ); dump_memory_info( stdout, &dmem ); printf ( "------------------------------------------------------------------------\n" ); for ( i = 0; i < 7; i++ ) printf( "Malloc additional: %d KB Memory Size in KB: %d\n", ( int ) ( ( sizeof ( double ) * ALLOCMEM ) / 1024 ), ( int ) value[i] ); printf ( "------------------------------------------------------------------------\n" ); } if ( value[6] >= value[5] && value[5] >= value[4] && value[4] >= value[3] && value[3] >= value[2] && value[2] >= value[1] && value[1] >= value[0] ) test_pass( __FILE__ ); else test_fail( __FILE__, __LINE__, "Calculating Resident Memory", ( int ) value[6] ); return 1; } papi-5.6.0/src/libpfm-3.y/examples_v3.x/x86/000775 001750 001750 00000000000 13216244362 022324 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/describe.c000664 001750 001750 00000007501 13216244360 020555 0ustar00jshenry1963jshenry1963000000 000000 /* From Paul Drongowski at HP. Thanks. */ /* I have not been able to call PAPI_describe_event without incurring a segv, including the sample code on the man page. I noticed that PAPI_describe_event is not exercised by the PAPI test programs, so I haven't been able to check the function call using known good code. (Or steal your code for that matter. :-) */ /* PAPI_describe_event has been deprecated in PAPI 3, since its functionality exists in other API calls. Below shows several ways that this call was used, with replacement code compatible with PAPI 3. */ #include #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int EventSet = PAPI_NULL; int retval; long long g1[2]; int eventcode = PAPI_TOT_INS; PAPI_event_info_t info, info1, info2; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } if ( ( retval = PAPI_query_event( eventcode ) ) != PAPI_OK ) { if (!quiet) printf("Trouble checking event\n"); test_skip( __FILE__, __LINE__, "PAPI_query_event(PAPI_TOT_INS)", retval ); } if ( ( retval = PAPI_add_event( EventSet, eventcode ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); if ( ( retval = PAPI_stop( EventSet, g1 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); /* Case 0, no info, should fail */ eventcode = 0; /* if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) == PAPI_OK) test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); */ if (!quiet) { printf("This test expects a 'PAPI Error' to be returned from this PAPI call.\n"); } if ( ( retval = PAPI_get_event_info( eventcode, &info ) ) == PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); /* Case 1, fill in name field. */ eventcode = PAPI_TOT_INS; /* if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); */ if ( ( retval = PAPI_get_event_info( eventcode, &info1 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); if ( strcmp( info1.symbol, "PAPI_TOT_INS" ) != 0 ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info symbol value is bogus", retval ); if ( strlen( info1.long_descr ) == 0 ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info long_descr value is bogus", retval ); eventcode = 0; /* Case 2, fill in code field. */ /* if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); */ if ( ( retval = PAPI_event_name_to_code( info1.symbol, ( int * ) &eventcode ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); } if ( eventcode != PAPI_TOT_INS ) test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code code value is bogus", retval ); if ( ( retval = PAPI_get_event_info( eventcode, &info2 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); if ( strcmp( info2.symbol, "PAPI_TOT_INS" ) != 0 ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info symbol value is bogus", retval ); if ( strlen( info2.long_descr ) == 0 ) test_fail( __FILE__, __LINE__, "PAPI_get_event_info long_descr value is bogus", retval ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_itanium2.h000664 001750 001750 00000045102 13216244362 025530 0ustar00jshenry1963jshenry1963000000 000000 /* * Itanium 2 PMU specific types and definitions * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_ITANIUM2_H__ #define __PFMLIB_ITANIUM2_H__ #include #include #if BYTE_ORDER != LITTLE_ENDIAN #error "this file only supports little endian environments" #endif #ifdef __cplusplus extern "C" { #endif #define PMU_ITA2_FIRST_COUNTER 4 /* index of first PMC/PMD counter */ #define PMU_ITA2_NUM_COUNTERS 4 /* total numbers of PMC/PMD pairs used as counting monitors */ #define PMU_ITA2_NUM_PMCS 16 /* total number of PMCS defined */ #define PMU_ITA2_NUM_PMDS 18 /* total number of PMDS defined */ #define PMU_ITA2_NUM_BTB 8 /* total number of PMDS in BTB */ #define PMU_ITA2_COUNTER_WIDTH 47 /* hardware counter bit width */ /* * This structure provides a detailed way to setup a PMC register. * Once value is loaded, it must be copied (via pmu_reg) to the * perfmon_req_t and passed to the kernel via perfmonctl(). */ typedef union { unsigned long pmc_val; /* complete register value */ /* This is the Itanium2-specific PMC layout for counter config */ struct { unsigned long pmc_plm:4; /* privilege level mask */ unsigned long pmc_ev:1; /* external visibility */ unsigned long pmc_oi:1; /* overflow interrupt */ unsigned long pmc_pm:1; /* privileged monitor */ unsigned long pmc_ig1:1; /* reserved */ unsigned long pmc_es:8; /* event select */ unsigned long pmc_umask:4; /* unit mask */ unsigned long pmc_thres:3; /* threshold */ unsigned long pmc_enable:1; /* pmc4 only: power enable bit */ unsigned long pmc_ism:2; /* instruction set mask */ unsigned long pmc_ig2:38; /* reserved */ } pmc_ita2_counter_reg; /* opcode matchers */ struct { unsigned long opcm_ig_ad:1; /* ignore instruction address range checking */ unsigned long opcm_inv:1; /* invert range check */ unsigned long opcm_bit2:1; /* must be 1 */ unsigned long opcm_mask:27; /* mask encoding bits {41:27}{12:0} */ unsigned long opcm_ig1:3; /* reserved */ unsigned long opcm_match:27; /* match encoding bits {41:27}{12:0} */ unsigned long opcm_b:1; /* B-syllable */ unsigned long opcm_f:1; /* F-syllable */ unsigned long opcm_i:1; /* I-syllable */ unsigned long opcm_m:1; /* M-syllable */ } pmc8_9_ita2_reg; /* * instruction event address register configuration * * The register has two layout depending on the value of the ct field. * In cache mode(ct=1x): * - ct is 1 bit, umask is 8 bits * In TLB mode (ct=00): * - ct is 2 bits, umask is 7 bits * ct=11 <=> cache mode and use a latency with eighth bit set * ct=01 => nothing monitored * * The ct=01 value is the only reason why we cannot fix the layout * to ct 1 bit and umask 8 bits. Even though in TLB mode, only 6 bits * are effectively used for the umask, if the user inadvertently use * a umask with the most significant bit set, it would be equivalent * to no monitoring. */ struct { unsigned long iear_plm:4; /* privilege level mask */ unsigned long iear_pm:1; /* privileged monitor */ unsigned long iear_umask:8; /* event unit mask: 7 bits in TLB mode, 8 bits in cache mode */ unsigned long iear_ct:1; /* cache tlb bit13: 0 for TLB mode, 1 for cache mode */ unsigned long iear_ism:2; /* instruction set */ unsigned long iear_ig4:48; /* reserved */ } pmc10_ita2_cache_reg; struct { unsigned long iear_plm:4; /* privilege level mask */ unsigned long iear_pm:1; /* privileged monitor */ unsigned long iear_umask:7; /* event unit mask: 7 bits in TLB mode, 8 bits in cache mode */ unsigned long iear_ct:2; /* cache tlb bit13: 0 for TLB mode, 1 for cache mode */ unsigned long iear_ism:2; /* instruction set */ unsigned long iear_ig4:48; /* reserved */ } pmc10_ita2_tlb_reg; /* data event address register configuration */ struct { unsigned long dear_plm:4; /* privilege level mask */ unsigned long dear_ig1:2; /* reserved */ unsigned long dear_pm:1; /* privileged monitor */ unsigned long dear_mode:2; /* mode */ unsigned long dear_ig2:7; /* reserved */ unsigned long dear_umask:4; /* unit mask */ unsigned long dear_ig3:4; /* reserved */ unsigned long dear_ism:2; /* instruction set */ unsigned long dear_ig4:38; /* reserved */ } pmc11_ita2_reg; /* branch trace buffer configuration register */ struct { unsigned long btbc_plm:4; /* privilege level */ unsigned long btbc_ig1:2; unsigned long btbc_pm:1; /* privileged monitor */ unsigned long btbc_ds:1; /* data selector */ unsigned long btbc_tm:2; /* taken mask */ unsigned long btbc_ptm:2; /* predicted taken address mask */ unsigned long btbc_ppm:2; /* predicted predicate mask */ unsigned long btbc_brt:2; /* branch type mask */ unsigned long btbc_ig2:48; } pmc12_ita2_reg; /* data address range configuration register */ struct { unsigned long darc_ig1:3; unsigned long darc_cfg_dbrp0:2; /* constraint on dbr0 */ unsigned long darc_ig2:6; unsigned long darc_cfg_dbrp1:2; /* constraint on dbr1 */ unsigned long darc_ig3:6; unsigned long darc_cfg_dbrp2:2; /* constraint on dbr2 */ unsigned long darc_ig4:6; unsigned long darc_cfg_dbrp3:2; /* constraint on dbr3 */ unsigned long darc_ig5:16; unsigned long darc_ena_dbrp0:1; /* enable constraint dbr0 */ unsigned long darc_ena_dbrp1:1; /* enable constraint dbr1 */ unsigned long darc_ena_dbrp2:1; /* enable constraint dbr2 */ unsigned long darc_ena_dbrp3:1; /* enable constraint dbr3 */ unsigned long darc_ig6:15; } pmc13_ita2_reg; /* instruction address range configuration register */ struct { unsigned long iarc_ig1:1; unsigned long iarc_ibrp0:1; /* constrained by ibr0 */ unsigned long iarc_ig2:2; unsigned long iarc_ibrp1:1; /* constrained by ibr1 */ unsigned long iarc_ig3:2; unsigned long iarc_ibrp2:1; /* constrained by ibr2 */ unsigned long iarc_ig4:2; unsigned long iarc_ibrp3:1; /* constrained by ibr3 */ unsigned long iarc_ig5:2; unsigned long iarc_fine:1; /* fine mode */ unsigned long iarc_ig6:50; } pmc14_ita2_reg; /* opcode matcher configuration register */ struct { unsigned long opcmc_ibrp0_pmc8:1; unsigned long opcmc_ibrp1_pmc9:1; unsigned long opcmc_ibrp2_pmc8:1; unsigned long opcmc_ibrp3_pmc9:1; unsigned long opcmc_ig1:60; } pmc15_ita2_reg; } pfm_ita2_pmc_reg_t; typedef union { unsigned long pmd_val; /* counter value */ /* counting pmd register */ struct { unsigned long pmd_count:47; /* 47-bit hardware counter */ unsigned long pmd_sxt47:17; /* sign extension of bit 46 */ } pmd_ita2_counter_reg; /* instruction event address register: data address register */ struct { unsigned long iear_stat:2; /* status bit */ unsigned long iear_ig1:3; unsigned long iear_iaddr:59; /* instruction cache line address {60:51} sxt {50}*/ } pmd0_ita2_reg; /* instruction event address register: data address register */ struct { unsigned long iear_latency:12; /* latency */ unsigned long iear_overflow:1; /* latency overflow */ unsigned long iear_ig1:51; /* reserved */ } pmd1_ita2_reg; /* data event address register: data address register */ struct { unsigned long dear_daddr; /* data address */ } pmd2_ita2_reg; /* data event address register: data address register */ struct { unsigned long dear_latency:13; /* latency */ unsigned long dear_overflow:1; /* overflow */ unsigned long dear_stat:2; /* status */ unsigned long dear_ig1:48; /* ignored */ } pmd3_ita2_reg; /* branch trace buffer data register when pmc12.ds == 0 */ struct { unsigned long btb_b:1; /* branch bit */ unsigned long btb_mp:1; /* mispredict bit */ unsigned long btb_slot:2; /* which slot, 3=not taken branch */ unsigned long btb_addr:60; /* bundle address(b=1), target address(b=0) */ } pmd8_15_ita2_reg; /* branch trace buffer data register when pmc12.ds == 1 */ struct { unsigned long btb_b:1; /* branch bit */ unsigned long btb_mp:1; /* mispredict bit */ unsigned long btb_slot:2; /* which slot, 3=not taken branch */ unsigned long btb_loaddr:37; /* b=1, bundle address, b=0 target address */ unsigned long btb_pred:20; /* low 20bits of L1IBR */ unsigned long btb_hiaddr:3; /* hi 3bits of bundle address(b=1) or target address (b=0)*/ } pmd8_15_ds_ita2_reg; /* branch trace buffer index register */ struct { unsigned long btbi_bbi:3; /* next entry index */ unsigned long btbi_full:1; /* full bit (sticky) */ unsigned long btbi_pmd8ext_b1:1; /* pmd8 ext */ unsigned long btbi_pmd8ext_bruflush:1; /* pmd8 ext */ unsigned long btbi_pmd8ext_ig:2; /* pmd8 ext */ unsigned long btbi_pmd9ext_b1:1; /* pmd9 ext */ unsigned long btbi_pmd9ext_bruflush:1; /* pmd9 ext */ unsigned long btbi_pmd9ext_ig:2; /* pmd9 ext */ unsigned long btbi_pmd10ext_b1:1; /* pmd10 ext */ unsigned long btbi_pmd10ext_bruflush:1; /* pmd10 ext */ unsigned long btbi_pmd10ext_ig:2; /* pmd10 ext */ unsigned long btbi_pmd11ext_b1:1; /* pmd11 ext */ unsigned long btbi_pmd11ext_bruflush:1; /* pmd11 ext */ unsigned long btbi_pmd11ext_ig:2; /* pmd11 ext */ unsigned long btbi_pmd12ext_b1:1; /* pmd12 ext */ unsigned long btbi_pmd12ext_bruflush:1; /* pmd12 ext */ unsigned long btbi_pmd12ext_ig:2; /* pmd12 ext */ unsigned long btbi_pmd13ext_b1:1; /* pmd13 ext */ unsigned long btbi_pmd13ext_bruflush:1; /* pmd13 ext */ unsigned long btbi_pmd13ext_ig:2; /* pmd13 ext */ unsigned long btbi_pmd14ext_b1:1; /* pmd14 ext */ unsigned long btbi_pmd14ext_bruflush:1; /* pmd14 ext */ unsigned long btbi_pmd14ext_ig:2; /* pmd14 ext */ unsigned long btbi_pmd15ext_b1:1; /* pmd15 ext */ unsigned long btbi_pmd15ext_bruflush:1; /* pmd15 ext */ unsigned long btbi_pmd15ext_ig:2; /* pmd15 ext */ unsigned long btbi_ignored:28; } pmd16_ita2_reg; /* data event address register: data address register */ struct { unsigned long dear_slot:2; /* slot */ unsigned long dear_bn:1; /* bundle bit (if 1 add 16 to address) */ unsigned long dear_vl:1; /* valid */ unsigned long dear_iaddr:60; /* instruction address (2-bundle window)*/ } pmd17_ita2_reg; } pfm_ita2_pmd_reg_t; /* * type definition for Itanium 2 instruction set support */ typedef enum { PFMLIB_ITA2_ISM_BOTH=0, /* IA-32 and IA-64 (default) */ PFMLIB_ITA2_ISM_IA32=1, /* IA-32 only */ PFMLIB_ITA2_ISM_IA64=2 /* IA-64 only */ } pfmlib_ita2_ism_t; typedef struct { unsigned int flags; /* counter specific flags */ unsigned int thres; /* per event threshold */ pfmlib_ita2_ism_t ism; /* per event instruction set */ } pfmlib_ita2_counter_t; /* * counter specific flags */ #define PFMLIB_ITA2_FL_EVT_NO_QUALCHECK 0x1 /* don't check qualifier constraints */ typedef struct { unsigned char opcm_used; /* set to 1 if this opcode matcher is used */ unsigned long pmc_val; /* full opcode mask (41bits) */ } pfmlib_ita2_opcm_t; /* * * The BTB can be configured via 4 different methods: * * - BRANCH_EVENT is in the event list, pfp_ita2_btb.btb_used == 0: * The BTB will be configured (PMC12) to record all branches AND a counting * monitor will be setup to count BRANCH_EVENT. * * - BRANCH_EVENT is in the event list, pfp_ita2_btb.btb_used == 1: * The BTB will be configured (PMC12) according to information in pfp_ita2_btb AND * a counter will be setup to count BRANCH_EVENT. * * - BRANCH_EVENT is NOT in the event list, pfp_ita2_btb.btb_used == 0: * Nothing is programmed * * - BRANCH_EVENT is NOT in the event list, pfp_ita2_btb.btb_used == 1: * The BTB will be configured (PMC12) according to information in pfp_ita2_btb. * This is the free running BTB mode. */ typedef struct { unsigned char btb_used; /* set to 1 if the BTB is used */ unsigned char btb_ds; /* data selector */ unsigned char btb_tm; /* taken mask */ unsigned char btb_ptm; /* predicted target mask */ unsigned char btb_ppm; /* predicted predicate mask */ unsigned char btb_brt; /* branch type mask */ unsigned int btb_plm; /* BTB privilege level mask */ } pfmlib_ita2_btb_t; /* * There are four ways to configure EAR: * * - an EAR event is in the event list AND pfp_ita2_?ear.ear_used = 0: * The EAR will be programmed (PMC10 or PMC11) based on the information encoded in the * event (umask, cache, tlb,alat). A counting monitor will be programmed to * count DATA_EAR_EVENTS or L1I_EAR_EVENTS depending on the type of EAR. * * - an EAR event is in the event list AND pfp_ita2_?ear.ear_used = 1: * The EAR will be programmed (PMC10 or PMC11) according to the information in the * pfp_ita2_?ear structure because it contains more detailed information * (such as priv level and instruction set). A counting monitor will be programmed * to count DATA_EAR_EVENTS or L1I_EAR_EVENTS depending on the type of EAR. * * - no EAR event is in the event list AND pfp_ita2_?ear.ear_used = 0: * Nothing is programmed. * * - no EAR event is in the event list AND pfp_ita2_?ear.ear_used = 1: * The EAR will be programmed (PMC10 or PMC11) according to the information in the * pfp_ita2_?ear structure. This is the free running mode for EAR */ typedef enum { PFMLIB_ITA2_EAR_CACHE_MODE= 0, /* Cache mode : I-EAR and D-EAR */ PFMLIB_ITA2_EAR_TLB_MODE = 1, /* TLB mode : I-EAR and D-EAR */ PFMLIB_ITA2_EAR_ALAT_MODE = 2 /* ALAT mode : D-EAR only */ } pfmlib_ita2_ear_mode_t; typedef struct { unsigned char ear_used; /* when set will force definition of PMC[10] */ pfmlib_ita2_ear_mode_t ear_mode; /* EAR mode */ pfmlib_ita2_ism_t ear_ism; /* instruction set */ unsigned int ear_plm; /* IEAR privilege level mask */ unsigned long ear_umask; /* umask value for PMC10 */ } pfmlib_ita2_ear_t; /* * describes one range. rr_plm is ignored for data ranges * a range is interpreted as unused (not defined) when rr_start = rr_end = 0. * if rr_plm is not set it will use the default settings set in the generic * library param structure. */ typedef struct { unsigned int rr_plm; /* privilege level (ignored for data ranges) */ unsigned long rr_start; /* start address */ unsigned long rr_end; /* end address (not included) */ } pfmlib_ita2_input_rr_desc_t; typedef struct { unsigned long rr_soff; /* start offset from actual start */ unsigned long rr_eoff; /* end offset from actual end */ } pfmlib_ita2_output_rr_desc_t; /* * rr_used must be set to true for the library to configure the debug registers. * rr_inv only applies when the rr_limits table contains ONLY 1 range. * * If using less than 4 intervals, must mark the end with entry: rr_start = rr_end = 0 */ typedef struct { unsigned int rr_flags; /* set of flags for all ranges */ pfmlib_ita2_input_rr_desc_t rr_limits[4]; /* at most 4 distinct intervals */ unsigned char rr_used; /* set if address range restriction is used */ } pfmlib_ita2_input_rr_t; typedef struct { unsigned int rr_nbr_used; /* how many registers were used */ pfmlib_ita2_output_rr_desc_t rr_infos[4]; /* at most 4 distinct intervals */ pfmlib_reg_t rr_br[8]; /* debug reg to configure */ } pfmlib_ita2_output_rr_t; #define PFMLIB_ITA2_RR_INV 0x1 /* inverse instruction ranges (iranges only) */ #define PFMLIB_ITA2_RR_NO_FINE_MODE 0x2 /* force non fine mode for instruction ranges */ /* * Itanium 2 specific parameters for the library */ typedef struct { pfmlib_ita2_counter_t pfp_ita2_counters[PMU_ITA2_NUM_COUNTERS]; /* extended counter features */ unsigned long pfp_ita2_flags; /* Itanium2 specific flags */ pfmlib_ita2_opcm_t pfp_ita2_pmc8; /* PMC8 (opcode matcher) configuration */ pfmlib_ita2_opcm_t pfp_ita2_pmc9; /* PMC9 (opcode matcher) configuration */ pfmlib_ita2_ear_t pfp_ita2_iear; /* IEAR configuration */ pfmlib_ita2_ear_t pfp_ita2_dear; /* DEAR configuration */ pfmlib_ita2_btb_t pfp_ita2_btb; /* BTB configuration */ pfmlib_ita2_input_rr_t pfp_ita2_drange; /* data range restrictions */ pfmlib_ita2_input_rr_t pfp_ita2_irange; /* code range restrictions */ unsigned long reserved[1]; /* for future use */ } pfmlib_ita2_input_param_t; typedef struct { pfmlib_ita2_output_rr_t pfp_ita2_drange; /* data range restrictions */ pfmlib_ita2_output_rr_t pfp_ita2_irange; /* code range restrictions */ unsigned long reserved[6]; /* for future use */ } pfmlib_ita2_output_param_t; extern int pfm_ita2_is_ear(unsigned int i); extern int pfm_ita2_is_dear(unsigned int i); extern int pfm_ita2_is_dear_tlb(unsigned int i); extern int pfm_ita2_is_dear_cache(unsigned int i); extern int pfm_ita2_is_dear_alat(unsigned int i); extern int pfm_ita2_is_iear(unsigned int i); extern int pfm_ita2_is_iear_tlb(unsigned int i); extern int pfm_ita2_is_iear_cache(unsigned int i); extern int pfm_ita2_is_btb(unsigned int i); extern int pfm_ita2_support_opcm(unsigned int i); extern int pfm_ita2_support_iarr(unsigned int i); extern int pfm_ita2_support_darr(unsigned int i); extern int pfm_ita2_get_ear_mode(unsigned int i, pfmlib_ita2_ear_mode_t *m); extern int pfm_ita2_irange_is_fine(pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out); extern int pfm_ita2_get_event_maxincr(unsigned int i, unsigned int *maxincr); extern int pfm_ita2_get_event_umask(unsigned int i, unsigned long *umask); extern int pfm_ita2_get_event_group(unsigned int i, int *grp); extern int pfm_ita2_get_event_set(unsigned int i, int *set); /* * values of group (grp) returned by pfm_ita2_get_event_group() */ #define PFMLIB_ITA2_EVT_NO_GRP 0 /* event does not belong to a group */ #define PFMLIB_ITA2_EVT_L1_CACHE_GRP 1 /* event belongs to L1 Cache group */ #define PFMLIB_ITA2_EVT_L2_CACHE_GRP 2 /* event belongs to L2 Cache group */ /* * possible values returned in set by pfm_ita2_get_event_set() */ #define PFMLIB_ITA2_EVT_NO_SET -1 /* event does not belong to a set */ #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_ITANIUM2_H__ */ papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_x86_arch.3000664 001750 001750 00000003366 13216244364 024712 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2009" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_x86_arch - support for Intel X86 architectural PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: ix86arch .B PMU desc: Intel X86 architectural PMU .sp .SH DESCRIPTION The library supports \fbany\fR processor implementing the Intel architectural PMU. This is a minimal PMU with a variable number of counters but predefined set of events. It is implemented in all recent processors starting with Intel Core Duo/Core Solo. It acts as a default PMU support in case the library is run on a very recent processor for which the specific support has not yet been implemented. .SH MODIFIERS The following modifiers are supported on Intel architectural PMU: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on both threads at the same time assuming hyper-threading is enabled. This modifier requires at least version 3 of the architectural PMU. This is a boolean modifier. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/lib/events/intel_pm_events.h000664 001750 001750 00000073363 13216244364 024314 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: pm (Intel Pentium M) */ static const intel_x86_umask_t pm_l2_ifetch[]={ { .uname = "I", .udesc = "Invalid state", .ucode = 0x100, }, { .uname = "S", .udesc = "Shared state", .ucode = 0x200, }, { .uname = "E", .udesc = "Exclusive state", .ucode = 0x400, }, { .uname = "M", .udesc = "Modified state", .ucode = 0x800, }, }; static const intel_x86_umask_t pm_bus_drdy_clocks[]={ { .uname = "SELF", .udesc = "Clocks when processor is driving bus", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Clocks when any agent is driving bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pm_mmx_instr_type_exec[]={ { .uname = "MUL", .udesc = "MMX packed multiply instructions executed", .ucode = 0x100, }, { .uname = "SHIFT", .udesc = "MMX packed shift instructions executed", .ucode = 0x200, }, { .uname = "PACK", .udesc = "MMX pack operation instructions executed", .ucode = 0x400, }, { .uname = "UNPACK", .udesc = "MMX unpack operation instructions executed", .ucode = 0x800, }, { .uname = "LOGICAL", .udesc = "MMX packed logical instructions executed", .ucode = 0x1000, }, { .uname = "ARITH", .udesc = "MMX packed arithmetic instructions executed", .ucode = 0x2000, }, }; static const intel_x86_umask_t pm_fp_mmx_trans[]={ { .uname = "TO_FP", .udesc = "From MMX instructions to floating-point instructions", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TO_MMX", .udesc = "From floating-point instructions to MMX instructions", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pm_seg_rename_stalls[]={ { .uname = "ES", .udesc = "Segment register ES", .ucode = 0x100, }, { .uname = "DS", .udesc = "Segment register DS", .ucode = 0x200, }, { .uname = "FS", .udesc = "Segment register FS", .ucode = 0x400, }, { .uname = "GS", .udesc = "Segment register GS", .ucode = 0x800, }, }; static const intel_x86_umask_t pm_emon_kni_pref_dispatched[]={ { .uname = "NTA", .udesc = "Prefetch NTA", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T1", .udesc = "Prefetch T1", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T2", .udesc = "Prefetch T2", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WEAK", .udesc = "Weakly ordered stores", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pm_emon_est_trans[]={ { .uname = "ALL", .udesc = "All transitions", .ucode = 0x0, }, { .uname = "FREQ", .udesc = "Only frequency transitions", .ucode = 0x200, }, }; static const intel_x86_umask_t pm_emon_fused_uops_ret[]={ { .uname = "ALL", .udesc = "All fused micro-ops", .ucode = 0x0, }, { .uname = "LD_OP", .udesc = "Only load+Op micro-ops", .ucode = 0x100, }, { .uname = "STD_STA", .udesc = "Only std+sta micro-ops", .ucode = 0x200, }, }; static const intel_x86_umask_t pm_emon_sse_sse2_inst_retired[]={ { .uname = "SSE_PACKED_SCALAR_SINGLE", .udesc = "SSE Packed Single and Scalar Single", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_SCALAR_SINGLE", .udesc = "SSE Scalar Single", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE2_PACKED_DOUBLE", .udesc = "SSE2 Packed Double", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE2_SCALAR_DOUBLE", .udesc = "SSE2 Scalar Double", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pm_l2_ld[]={ { .uname = "I", .udesc = "Invalid state", .ucode = 0x100, }, { .uname = "S", .udesc = "Shared state", .ucode = 0x200, }, { .uname = "E", .udesc = "Exclusive state", .ucode = 0x400, }, { .uname = "M", .udesc = "Modified state", .ucode = 0x800, }, { .uname = "EXCL_HW_PREFETCH", .udesc = "Exclude hardware prefetched lines", .ucode = 0x0, }, { .uname = "ONLY_HW_PREFETCH", .udesc = "Only hardware prefetched lines", .ucode = 0x1000, }, { .uname = "NON_HW_PREFETCH", .udesc = "Non hardware prefetched lines", .ucode = 0x2000, }, }; static const intel_x86_entry_t intel_pm_pe[]={ { .name = "CPU_CLK_UNHALTED", .desc = "Number cycles during which the processor is not halted and not in a thermal trip", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x79, }, { .name = "INST_RETIRED", .desc = "Number of instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc0, }, { .name = "DATA_MEM_REFS", .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, }, { .name = "DCU_LINES_IN", .desc = "Total lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x45, }, { .name = "DCU_M_LINES_IN", .desc = "Number of M state lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x46, }, { .name = "DCU_M_LINES_OUT", .desc = "Number of M state lines evicted from the DCU. This includes evictions via snoop HITM, intervention or replacement", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x47, }, { .name = "DCU_MISS_OUTSTANDING", .desc = "Weighted number of cycle while a DCU miss is outstanding, incremented by the number of cache misses at any particular time. Cacheable read requests only are considered. Uncacheable requests are excluded Read-for-ownerships are counted, as well as line fills, invalidates, and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x48, }, { .name = "IFU_IFETCH", .desc = "Number of instruction fetches, both cacheable and noncacheable including UC fetches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x80, }, { .name = "IFU_IFETCH_MISS", .desc = "Number of instruction fetch misses. All instructions fetches that do not hit the IFU (i.e., that produce memory requests). Includes UC accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x81, }, { .name = "ITLB_MISS", .desc = "Number of ITLB misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x85, }, { .name = "IFU_MEM_STALL", .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, }, { .name = "ILD_STALL", .desc = "Number of cycles that the instruction length decoder is stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x87, }, { .name = "L2_IFETCH", .desc = "Number of L2 instruction fetches. This event indicates that a normal instruction fetch was received by the L2. The count includes only L2 cacheable instruction fetches: it does not include UC instruction fetches It does not include ITLB miss accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ifetch), .ngrp = 1, .umasks = pm_l2_ifetch, }, { .name = "L2_ST", .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ifetch), .ngrp = 1, .umasks = pm_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_INM", .desc = "Number of modified lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x25, }, { .name = "L2_RQSTS", .desc = "Total number of L2 requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ifetch), .ngrp = 1, .umasks = pm_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_ADS", .desc = "Number of L2 address strobes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x21, }, { .name = "L2_DBUS_BUSY", .desc = "Number of cycles during which the L2 cache data bus was busy", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x22, }, { .name = "L2_DBUS_BUSY_RD", .desc = "Number of cycles during which the data bus was busy transferring read data from L2 to the processor", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x23, }, { .name = "BUS_DRDY_CLOCKS", .desc = "Number of clocks during which DRDY# is asserted. Utilization of the external system data bus during data transfers", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, }, { .name = "BUS_LOCK_CLOCKS", .desc = "Number of clocks during which LOCK# is asserted on the external system bus", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_REQ_OUTSTANDING", .desc = "Number of bus requests outstanding. This counter is incremented by the number of cacheable read bus requests outstanding in any given cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x60, }, { .name = "BUS_TRANS_BRD", .desc = "Number of burst read transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_RFO", .desc = "Number of completed read for ownership transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x66, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_WB", .desc = "Number of completed write back transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_IFETCH", .desc = "Number of completed instruction fetch transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_INVAL", .desc = "Number of completed invalidate transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_PWR", .desc = "Number of completed partial write transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6a, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_P", .desc = "Number of completed partial transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6b, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IO", .desc = "Number of completed I/O transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_DEF", .desc = "Number of completed deferred transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_BURST", .desc = "Number of completed burst transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6e, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_ANY", .desc = "Number of all completed bus transactions. Address bus utilization can be calculated knowing the minimum address bus occupancy. Includes special cycles, etc.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x70, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_MEM", .desc = "Number of completed memory transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6f, .numasks = LIBPFM_ARRAY_SIZE(pm_bus_drdy_clocks), .ngrp = 1, .umasks = pm_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_DATA_RECV", .desc = "Number of bus clock cycles during which this processor is receiving data", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x64, }, { .name = "BUS_BNR_DRV", .desc = "Number of bus clock cycles during which this processor is driving the BNR# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x61, }, { .name = "BUS_HIT_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HIT# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7a, }, { .name = "BUS_HITM_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HITM# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7b, }, { .name = "BUS_SNOOP_STALL", .desc = "Number of clock cycles during which the bus is snoop stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7e, }, { .name = "FLOPS", .desc = "Number of computational floating-point operations retired. Excludes floating-point computational operations that cause traps or assists. Includes internal sub-operations for complex floating-point instructions like transcendentals. Excludes floating point loads and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xc1, }, { .name = "FP_COMP_OPS_EXE", .desc = "Number of computational floating-point operations executed. The number of FADD, FSUB, FCOM, FMULs, integer MULs and IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This number does not include the number of cycles, but the number of operations. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x10, }, { .name = "FP_ASSIST", .desc = "Number of floating-point exception cases handled by microcode.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x11, }, { .name = "MUL", .desc = "Number of multiplies.This count includes integer as well as FP multiplies and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x12, }, { .name = "DIV", .desc = "Number of divides.This count includes integer as well as FP divides and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x13, }, { .name = "CYCLES_DIV_BUSY", .desc = "Number of cycles during which the divider is busy, and cannot accept new divides. This includes integer and FP divides, FPREM, FPSQRT, etc. and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x14, }, { .name = "LD_BLOCKS", .desc = "Number of load operations delayed due to store buffer blocks. Includes counts caused by preceding stores whose addresses are unknown, preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflicts with the load but which incompletely overlap the load", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3, }, { .name = "SB_DRAINS", .desc = "Number of store buffer drain cycles. Incremented every cycle the store buffer is draining. Draining is caused by serializing operations like CPUID, synchronizing operations like XCHG, interrupt acknowledgment, as well as other conditions (such as cache flushing).", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4, }, { .name = "MISALIGN_MEM_REF", .desc = "Number of misaligned data memory references. Incremented by 1 every cycle during which, either the processor's load or store pipeline dispatches a misaligned micro-op Counting is performed if it is the first or second half or if it is blocked, squashed, or missed. In this context, misaligned means crossing a 64-bit boundary", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x5, }, { .name = "UOPS_RETIRED", .desc = "Number of micro-ops retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc2, }, { .name = "INST_DECODED", .desc = "Number of instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd0, }, { .name = "HW_INT_RX", .desc = "Number of hardware interrupts received", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc8, }, { .name = "CYCLES_INT_MASKED", .desc = "Number of processor cycles for which interrupts are disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc6, }, { .name = "CYCLES_INT_PENDING_AND_MASKED", .desc = "Number of processor cycles for which interrupts are disabled and interrupts are pending.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc7, }, { .name = "BR_INST_RETIRED", .desc = "Number of branch instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc4, }, { .name = "BR_MISS_PRED_RETIRED", .desc = "Number of mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc5, }, { .name = "BR_TAKEN_RETIRED", .desc = "Number of taken branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc9, }, { .name = "BR_MISS_PRED_TAKEN_RET", .desc = "Number of taken mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xca, }, { .name = "BR_INST_DECODED", .desc = "Number of branch instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe0, }, { .name = "BTB_MISSES", .desc = "Number of branches for which the BTB did not produce a prediction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe2, }, { .name = "BR_BOGUS", .desc = "Number of bogus branches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe4, }, { .name = "BACLEARS", .desc = "Number of times BACLEAR is asserted. This is the number of times that a static branch prediction was made, in which the branch decoder decided to make a branch prediction because the BTB did not", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe6, }, { .name = "RESOURCE_STALLS", .desc = "Incremented by 1 during every cycle for which there is a resource related stall. Includes register renaming buffer entries, memory buffer entries. Does not include stalls due to bus queue full, too many cache misses, etc. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xa2, }, { .name = "PARTIAL_RAT_STALLS", .desc = "Number of cycles or events for partial stalls. This includes flag partial stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd2, }, { .name = "SEGMENT_REG_LOADS", .desc = "Number of segment register loads.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6, }, { .name = "MMX_SAT_INSTR_EXEC", .desc = "Number of MMX saturating instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb1, }, { .name = "MMX_UOPS_EXEC", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb2, }, { .name = "MMX_INSTR_TYPE_EXEC", .desc = "Number of MMX instructions executed by type", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb3, .numasks = LIBPFM_ARRAY_SIZE(pm_mmx_instr_type_exec), .ngrp = 1, .umasks = pm_mmx_instr_type_exec, }, { .name = "FP_MMX_TRANS", .desc = "Number of MMX transitions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(pm_fp_mmx_trans), .ngrp = 1, .umasks = pm_fp_mmx_trans, }, { .name = "MMX_ASSIST", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcd, }, { .name = "SEG_RENAME_STALLS", .desc = "Number of Segment Register Renaming Stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd4, .numasks = LIBPFM_ARRAY_SIZE(pm_seg_rename_stalls), .ngrp = 1, .umasks = pm_seg_rename_stalls, }, { .name = "SEG_REG_RENAMES", .desc = "Number of Segment Register Renames", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd5, .numasks = LIBPFM_ARRAY_SIZE(pm_seg_rename_stalls), .ngrp = 1, .umasks = pm_seg_rename_stalls, /* identical to actual umasks list for this event */ }, { .name = "RET_SEG_RENAMES", .desc = "Number of segment register rename events retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd6, }, { .name = "EMON_KNI_PREF_DISPATCHED", .desc = "Number of Streaming SIMD extensions prefetch/weakly-ordered instructions dispatched (speculative prefetches are included in counting). Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_kni_pref_dispatched), .ngrp = 1, .umasks = pm_emon_kni_pref_dispatched, }, { .name = "EMON_KNI_PREF_MISS", .desc = "Number of prefetch/weakly-ordered instructions that miss all caches. Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_kni_pref_dispatched), .ngrp = 1, .umasks = pm_emon_kni_pref_dispatched, /* identical to actual umasks list for this event */ }, { .name = "EMON_EST_TRANS", .desc = "Number of Enhanced Intel SpeedStep technology transitions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x58, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_est_trans), .ngrp = 1, .umasks = pm_emon_est_trans, }, { .name = "EMON_THERMAL_TRIP", .desc = "Duration/occurrences in thermal trip; to count the number of thermal trips; edge detect must be used", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x59, }, { .name = "BR_INST_EXEC", .desc = "Branch instructions executed (not necessarily retired)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x88, }, { .name = "BR_MISSP_EXEC", .desc = "Branch instructions executed that were mispredicted at execution", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x89, }, { .name = "BR_BAC_MISSP_EXEC", .desc = "Branch instructions executed that were mispredicted at Front End (BAC)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8a, }, { .name = "BR_CND_EXEC", .desc = "Conditional branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8b, }, { .name = "BR_CND_MISSP_EXEC", .desc = "Conditional branch instructions executed that were mispredicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8c, }, { .name = "BR_IND_EXEC", .desc = "Indirect branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8d, }, { .name = "BR_IND_MISSP_EXEC", .desc = "Indirect branch instructions executed that were mispredicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8e, }, { .name = "BR_RET_EXEC", .desc = "Return branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8f, }, { .name = "BR_RET_MISSP_EXEC", .desc = "Return branch instructions executed that were mispredicted at Execution", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x90, }, { .name = "BR_RET_BAC_MISSP_EXEC", .desc = "Return branch instructions executed that were mispredicted at Front End (BAC)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x91, }, { .name = "BR_CALL_EXEC", .desc = "CALL instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x92, }, { .name = "BR_CALL_MISSP_EXEC", .desc = "CALL instructions executed that were mispredicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x93, }, { .name = "BR_IND_CALL_EXEC", .desc = "Indirect CALL instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x94, }, { .name = "EMON_SIMD_INSTR_RETIRED", .desc = "Number of retired MMX instructions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xce, }, { .name = "EMON_SYNCH_UOPS", .desc = "Sync micro-ops", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd3, }, { .name = "EMON_ESP_UOPS", .desc = "Total number of micro-ops", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd7, }, { .name = "EMON_FUSED_UOPS_RET", .desc = "Total number of micro-ops", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xda, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_fused_uops_ret), .ngrp = 1, .umasks = pm_emon_fused_uops_ret, }, { .name = "EMON_UNFUSION", .desc = "Number of unfusion events in the ROB, happened on a FP exception to a fused micro-op", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xdb, }, { .name = "EMON_PREF_RQSTS_UP", .desc = "Number of upward prefetches issued", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xf0, }, { .name = "EMON_PREF_RQSTS_DN", .desc = "Number of downward prefetches issued", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xf8, }, { .name = "EMON_SSE_SSE2_INST_RETIRED", .desc = "Streaming SIMD extensions instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd8, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_sse_sse2_inst_retired), .ngrp = 1, .umasks = pm_emon_sse_sse2_inst_retired, }, { .name = "EMON_SSE_SSE2_COMP_INST_RETIRED", .desc = "Computational SSE instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd9, .numasks = LIBPFM_ARRAY_SIZE(pm_emon_sse_sse2_inst_retired), .ngrp = 1, .umasks = pm_emon_sse_sse2_inst_retired, /* identical to actual umasks list for this event */ }, { .name = "L2_LD", .desc = "Number of L2 data loads", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ld), .ngrp = 1, .umasks = pm_l2_ld, }, { .name = "L2_LINES_IN", .desc = "Number of L2 lines allocated", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ld), .ngrp = 1, .umasks = pm_l2_ld, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_OUT", .desc = "Number of L2 lines evicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x26, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ld), .ngrp = 1, .umasks = pm_l2_ld, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_OUT", .desc = "Number of L2 M-state lines evicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x27, .numasks = LIBPFM_ARRAY_SIZE(pm_l2_ld), .ngrp = 1, .umasks = pm_l2_ld, /* identical to actual umasks list for this event */ }, }; papi-5.6.0/src/libpfm4/perf_examples/self.c000664 001750 001750 00000010447 13216244365 022620 0ustar00jshenry1963jshenry1963000000 000000 /* * self.c - example of a simple self monitoring task * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2002-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" static const char *gen_events[]={ "cycles", "instructions", NULL }; static volatile int quit; void sig_handler(int n) { quit = 1; } void noploop(void) { for(;quit == 0;); } static void print_counts(perf_event_desc_t *fds, int num_fds, const char *msg) { uint64_t val; uint64_t values[3]; double ratio; int i; ssize_t ret; /* * now read the results. We use pfp_event_count because * libpfm guarantees that counters for the events always * come first. */ memset(values, 0, sizeof(values)); for (i = 0; i < num_fds; i++) { ret = read(fds[i].fd, values, sizeof(values)); if (ret < (ssize_t)sizeof(values)) { if (ret == -1) err(1, "cannot read results: %s", strerror(errno)); else warnx("could not read event%d", i); } /* * scaling is systematic because we may be sharing the PMU and * thus may be multiplexed */ val = perf_scale(values); ratio = perf_scale_ratio(values); printf("%s %'20"PRIu64" %s (%.2f%% scaling, raw=%'"PRIu64", ena=%'"PRIu64", run=%'"PRIu64")\n", msg, val, fds[i].name, (1.0-ratio)*100.0, values[0], values[1], values[2]); } } int main(int argc, char **argv) { perf_event_desc_t *fds = NULL; int i, ret, num_fds = 0; setlocale(LC_ALL, ""); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : gen_events, &fds, &num_fds); if (ret || !num_fds) errx(1, "cannot setup events"); fds[0].fd = -1; for(i=0; i < num_fds; i++) { /* request timing information necessary for scaling */ fds[i].hw.read_format = PERF_FORMAT_SCALE; fds[i].hw.disabled = 1; /* do not start now */ /* each event is in an independent group (multiplexing likely) */ fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, -1, 0); if (fds[i].fd == -1) err(1, "cannot open event %d", i); } signal(SIGALRM, sig_handler); /* * enable all counters attached to this thread and created by it */ ret = prctl(PR_TASK_PERF_EVENTS_ENABLE); if (ret) err(1, "prctl(enable) failed"); print_counts(fds, num_fds, "INITIAL: "); alarm(10); noploop(); /* * disable all counters attached to this thread */ ret = prctl(PR_TASK_PERF_EVENTS_DISABLE); if (ret) err(1, "prctl(disable) failed"); printf("Final counts:\n"); print_counts(fds, num_fds, "FINAL: "); for (i = 0; i < num_fds; i++) close(fds[i].fd); perf_free_fds(fds, num_fds); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } papi-5.6.0/src/perfctr-2.7.x/Makefile000664 001750 001750 00000002135 13216244367 021210 0ustar00jshenry1963jshenry1963000000 000000 SHELL=/bin/sh ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) ASM_ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/ -e s/ppc.*/powerpc/) SUBDIRS=usr.lib examples default: linux/include/asm/perfctr.h $(MAKE) MAKETARGET=default all-subdirs all-subdirs: -for dir in $(SUBDIRS); do \ (cd $$dir; $(MAKE) $(MAKEARGS) $(MAKETARGET); cd ..); \ done linux/include/asm/perfctr.h config configure: rm -f linux/include/asm ln -s asm-${ASM_ARCH} linux/include/asm install: default etc/install.sh "$(PREFIX)" "$(BINDIR)" "$(LIBDIR)" "$(INCLDIR)" "$(ARCH)" install2: $(MAKE) MAKEARGS="BINDIR=$(BINDIR) LIBDIR=$(LIBDIR) INCLDIR=$(INCLDIR)" MAKETARGET=install all-subdirs distclean realclean mrproper: clean rm -f linux/include/asm linuxclean: rm -f linux/drivers/perfctr/*.mod.c rm -f linux/drivers/perfctr/*.ko rm -f linux/drivers/perfctr/*.o rm -f linux/drivers/perfctr/.*.flags rm -f linux/drivers/perfctr/.depend rm -f linux/drivers/perfctr/.*.cmd clean: linuxclean $(MAKE) MAKETARGET=clean all-subdirs papi-5.6.0/src/ftests/first.F000664 001750 001750 00000013213 13216244361 020070 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program first IMPLICIT integer (p) integer event1 INTEGER retval INTEGER*8 values(10) INTEGER*8 max, min INTEGER EventSet integer domain, granularity character*(PAPI_MAX_STR_LEN) domainstr, grnstr character*(PAPI_MAX_STR_LEN) name Integer last_char, n External last_char integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() EventSet = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event1 = PAPI_TOT_INS else event1 = PAPI_FP_INS end if call PAPIf_create_eventset(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( EventSet, event1, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', *retval) end if call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', *retval) end if call PAPIf_start(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_read(EventSet, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_read', retval) end if call PAPIf_reset(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_reset', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_read(EventSet, values(3), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_read', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_read(EventSet, values(5), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_read', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_stop(EventSet, values(7), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_read(EventSet, values(9), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_read', retval) end if if (tests_quiet .EQ. 0) then print *, 'TEST CASE 1: Non-overlapping start, stop, read.' print *, '--------------------------------------------------'// * '--------------------------------' end if call PAPIf_get_domain(EventSet, domain, PAPI_DEFDOM, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_domain', retval) end if call stringify_domain(domain, domainstr) if (tests_quiet .EQ. 0) then write (*,900) 'Default domain is:', domain, domainstr 900 format(a20, i3, ' ', a70) end if call PAPIf_get_granularity(eventset, granularity, PAPI_DEFGRN, *retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_granularity', *retval) end if call stringify_granularity(granularity, grnstr) if (tests_quiet .EQ. 0) then call PAPIf_event_code_to_name (event1, name, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_event_code_to_name', retval) end if n=last_char(name) write (*,800) 'Default granularity is:', granularity, grnstr 800 format(a25, i3, ' ', a20) print *, 'Using', NUM_FLOPS, ' iterations of c += b*c' print *, '-----------------------------------------------'// * '-----------------------------------' write (*,100) 'Test type', 1, 2, 3, 4, 5 write (*,100) name(1:n), values(1), values(3), * values(5), values(7), values(9) write (*,100) 'PAPI_TOT_CYC', values(2), values(4), * values(6), values(8), values(10) 100 format(a13, ': ', i11, i11, i11, i11, i11) print *, '-----------------------------------------------'// * '-----------------------------------' print *, 'Verification:' print *, 'Column 1 approximately equals column 2' print *, 'Column 3 approximately equals 2 * column 2' print *, 'Column 4 approximately equals 3 * column 2' print *, 'Column 4 exactly equals column 5' end if min = INT(REAL(values(3))*0.8) max = INT(REAL(values(3))*1.2) if ((values(1).gt.max) .OR. (values(1).lt.min) .OR. *(values(5).gt.(max*2)) .OR. (values(5).lt.(min*2)) .OR. *(values(7).gt.(max*3)) .OR. (values(7).lt.(min*3)) .OR. *(values(7).NE.values(9))) then call ftest_fail(__FILE__, __LINE__, . name, 1) end if min = INT(REAL(values(4))*0.65) max = INT(REAL(values(4))*1.35) if ((values(2).gt.max) .OR. (values(2).lt.min) .OR. *(values(6).gt.(max*2)) .OR. (values(6).lt.(min*2)) .OR. *(values(8).gt.(max*3)) .OR. (values(8).lt.(min*3)) .OR. *(values(8).NE.values(10))) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_TOT_CYC', 1) end if call ftests_pass(__FILE__) end papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_find_event_mask.3000664 001750 001750 00000000032 13216244361 025045 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_find_event.3 papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_regmask_clr.3000664 001750 001750 00000000033 13216244361 024203 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_regmask_set.3 papi-5.6.0/src/libpfm4/perf_examples/self_count.c000664 001750 001750 00000014056 13216244365 024030 0ustar00jshenry1963jshenry1963000000 000000 /* * self_count.c - example of a simple self monitoring using mmapped page * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" static const char *gen_events[]={ "cycles", NULL }; static volatile int quit; void sig_handler(int n) { quit = 1; } #if defined(__x86_64__) || defined(__i386__) #ifdef __x86_64__ #define DECLARE_ARGS(val, low, high) unsigned low, high #define EAX_EDX_VAL(val, low, high) ((low) | ((uint64_t )(high) << 32)) #define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) #define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) #else #define DECLARE_ARGS(val, low, high) unsigned long long val #define EAX_EDX_VAL(val, low, high) (val) #define EAX_EDX_ARGS(val, low, high) "A" (val) #define EAX_EDX_RET(val, low, high) "=A" (val) #endif #define barrier() __asm__ __volatile__("": : :"memory") static inline int rdpmc(struct perf_event_mmap_page *hdr, uint64_t *value) { int counter = hdr->index - 1; DECLARE_ARGS(val, low, high); if (counter < 0) return -1; asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); *value = EAX_EDX_VAL(val, low, high); return 0; } #else /* * Default barrier macro. * Given this is architecture specific, it must be defined when * libpfm is ported to new architecture. The default macro below * simply does nothing. */ #define barrier() {} /* * Default function to read counter directly from user level mode. * Given this is architecture specific, it must be defined when * libpfm is ported to new architecture. The default routine below * simply fails and the caller falls backs to syscall. */ static inline int rdpmc(struct perf_event_mmap_page *hdr, uint64_t *value) { int counter = hdr->index - 1; if (counter < 0) return -1; printf("your architecture does not have a way to read counters from user mode\n"); return -1; } #endif /* * our test code (function cannot be made static otherwise it is optimized away) */ unsigned long fib(unsigned long n) { if (n == 0) return 0; if (n == 1) return 2; return fib(n-1)+fib(n-2); } uint64_t read_count(perf_event_desc_t *fds) { struct perf_event_mmap_page *hdr; uint64_t values[3]; uint64_t count = 0; uint32_t width; unsigned int seq; ssize_t ret; int idx = -1; hdr = fds->buf; width = hdr->pmc_width; do { seq = hdr->lock; barrier(); /* try reading directly from user mode */ if (!rdpmc(hdr, &values[0])) { values[1] = hdr->time_enabled; values[2] = hdr->time_running; ret = 0; } else { idx = -1; ret = read(fds->fd, values, sizeof(values)); if (ret < (ssize_t)sizeof(values)) errx(1, "cannot read values"); printf("using read\n"); break; } barrier(); } while (hdr->lock != seq); printf("raw=0x%"PRIx64 " width=%d ena=%"PRIu64 " run=%"PRIu64" idx=%d\n", values[0], width, values[1], values[2], idx); count = values[0]; count <<= 64 - width; count >>= 64 - width; values[0] = count; return perf_scale(values); } int main(int argc, char **argv) { perf_event_desc_t *fds = NULL; long lret; size_t pgsz; uint64_t val, prev_val; int i, ret, num_fds = 0; lret = sysconf(_SC_PAGESIZE); if (lret < 0) err(1, "cannot get page size"); pgsz = (size_t)lret; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : gen_events, &fds, &num_fds); if (ret || !num_fds) errx(1, "cannot setup events"); fds[0].fd = -1; for(i=0; i < num_fds; i++) { /* request timing information necesaary for scaling */ fds[i].hw.read_format = PERF_FORMAT_SCALE; fds[i].hw.disabled = 0; //fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, fds[0].fd, 0); fds[i].fd = perf_event_open(&fds[i].hw, 0, -1, -1, 0); if (fds[i].fd == -1) err(1, "cannot open event %d", i); fds[i].buf = mmap(NULL, pgsz, PROT_READ, MAP_SHARED, fds[i].fd, 0); if (fds[i].buf == MAP_FAILED) err(1, "cannot mmap page"); } signal(SIGALRM, sig_handler); /* * enable all counters attached to this thread */ ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); alarm(10); prev_val = 0; for(;quit == 0;) { for (i = 0; i < num_fds; i++) { val = read_count(&fds[i]); /* print evnet deltas */ printf("%20"PRIu64" %s\n", val - prev_val, fds[i].name); prev_val = val; } fib(35); } /* * disable all counters attached to this thread */ ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0); for (i=0; i < num_fds; i++) { munmap(fds[i].buf, pgsz); close(fds[i].fd); } perf_free_fds(fds, num_fds); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } papi-5.6.0/src/components/cuda/tests/simpleMultiGPU.h000664 001750 001750 00000002362 13216244357 024626 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ /* * This application demonstrates how to use the CUDA API to use multiple GPUs. * * Note that in order to detect multiple GPUs in your system you have to disable * SLI in the nvidia control panel. Otherwise only one GPU is visible to the * application. On the other side, you can still extend your desktop to screens * attached to both GPUs. */ #ifndef SIMPLEMULTIGPU_H #define SIMPLEMULTIGPU_H typedef struct { //Host-side input data int dataN; float *h_Data; //Partial sum for this GPU float *h_Sum; //Device buffers float *d_Data,*d_Sum; //Reduction copied back from GPU float *h_Sum_from_device; //Stream for asynchronous command execution cudaStream_t stream; } TGPUplan; extern "C" void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s); #endif papi-5.6.0/src/components/appio/tests/iozone/Gnuplot.txt000664 001750 001750 00000001354 13216244356 025464 0ustar00jshenry1963jshenry1963000000 000000 The script Generate_Graphs will create the 3D surface plots and display them. It will also produce postscript outputs for each test and leave them in their respective sub-directory. It processes the output from an Iozone run. The output from Iozone that it is expecting is the text output from the iozone default behavior. (iozone -a, or iozone -az) How to produce graphs: Generate_Graphs iozone.out The gen_graphs script will: 1. Create the databases for each type of operation and then processes them with Gnuplot. 2. It will display each result on the X11 screen, and also save a copy in postscript in the test sub-directory. Thanks to Yves Rougy for providing the nifty scripts to help with the plots. papi-5.6.0/src/validation_tests/papi_l2_dcm.c000664 001750 001750 00000011136 13216244370 023215 0ustar00jshenry1963jshenry1963000000 000000 /* This code attempts to test the L2 Data Cache Missses */ /* performance counter PAPI_L2_DCM */ /* by Vince Weaver, vincent.weaver@maine.edu */ /* Due to prefetching it is hard to create a testcase short of */ /* just having random accesses. */ /* In addition, due to context switching the cache might be */ /* affected by other processes on a busy system. */ /* Other tests to attempt */ /* Repeatedly reading same cache line should give very small error */ #include #include #include #include "papi.h" #include "papi_test.h" #include "cache_helper.h" #include "display_error.h" #include "testcode.h" /* How much should we allow? */ #define ALLOWED_ERROR 5.0 #define NUM_RUNS 100 #define ITERATIONS 1000000 int main(int argc, char **argv) { int i; int eventset=PAPI_NULL; int num_runs=NUM_RUNS; long long high,low,average,expected; long long count,total; int retval; int l1_size,l2_size,l1_linesize,l2_linesize,l2_entries; int arraysize; int quiet,errors=0; double error; double *array; double aSumm = 0.0; quiet=tests_quiet(argc,argv); if (!quiet) { printf("Testing the PAPI_L2_DCM event\n"); } /* Init the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { test_fail(__FILE__,__LINE__,"PAPI_library_init",retval); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_L2_DCM"); if (retval!=PAPI_OK) { test_skip( __FILE__, __LINE__, "adding PAPI_L2_DCM", retval ); } l1_size=get_cachesize(L1D_CACHE); l1_linesize=get_linesize(L1D_CACHE); l2_size=get_cachesize(L2_CACHE); l2_linesize=get_linesize(L2_CACHE); l2_entries=get_entries(L2_CACHE); if (!quiet) { printf("\tDetected %dk L1 DCache, %dB linesize\n", l1_size/1024,l1_linesize); printf("\tDetected %dk L2 DCache, %dB linesize, %d entries\n", l2_size/1024,l2_linesize,l2_entries); } arraysize=(l2_size/sizeof(double))*8; if (arraysize==0) { if (!quiet) printf("Could not detect cache size\n"); test_skip(__FILE__,__LINE__,"Could not detect cache size",0); } if (!quiet) { printf("\tAllocating %zu bytes of memory (%d doubles)\n", arraysize*sizeof(double),arraysize); } array=calloc(arraysize,sizeof(double)); if (array==NULL) { test_fail(__FILE__,__LINE__,"Can't allocate memory",0); } /******************/ /* Testing Writes */ /******************/ if (!quiet) { printf("\nWrite Test: Writing an array of %d doubles %d random times:\n", arraysize,ITERATIONS); printf("\tPrefetch and shared nature of L2s make this hard.\n"); printf("\tExpected 7/8 of accesses to be miss.\n"); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count ALLOWED_ERROR) || (error<-ALLOWED_ERROR)) { if (!quiet) { printf("Instruction count off by more " "than %.2lf%%\n",ALLOWED_ERROR); } errors++; } if (!quiet) printf("\n"); /******************/ /* Testing Reads */ /******************/ if (!quiet) { printf("\nRead Test: Summing %d random doubles from array " "of size %d:\n",ITERATIONS,arraysize); printf("\tExpected 7/8 of accesses to be miss.\n"); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count ALLOWED_ERROR) || (error<-ALLOWED_ERROR)) { if (!quiet) { printf("Instruction count off by more " "than %.2lf%%\n",ALLOWED_ERROR); } errors++; } if (!quiet) { printf("\n"); } /* FIXME: Warn, as we fail on broadwell and more recent chips */ if (errors) { test_warn( __FILE__, __LINE__, "Error too high", 1 ); } test_pass(__FILE__); return 0; } papi-5.6.0/man/man3/PAPI_get_component_index.3000664 001750 001750 00000001603 13216244356 023075 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_component_index" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_component_index \- .PP returns the component index for the named component .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @retval ENOCMP component does not exist @param name name of component to find index for @par Examples: .fi .PP .PP .nf int cidx; cidx = PAPI_get_component_index("cuda"); if (cidx==PAPI_OK) { printf("The CUDA component is cidx %d\n",cidx); } * .fi .PP \fBPAPI_get_component_index()\fP returns the component index of the named component\&. This is useful for finding out if a specified component exists\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_event_component\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_ivbep_unc_ubo.3000664 001750 001750 00000002133 13216244364 026076 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_ivbep_unc_ubo - support for Intel Ivy Bridge-EP U-Box uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: ivbep_unc_ubo .B PMU desc: Intel Ivy Bridge-EP U-Box uncore PMU .sp .SH DESCRIPTION The library supports the Intel Ivy Bridge system configuration unit (U-Box) uncore PMU. This PMU model only exists on Ivy Bridge model 62. .SH MODIFIERS The following modifiers are supported on Intel Ivy Bridge U-Box uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:15]. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/ctests/clockres_pthreads.c000664 001750 001750 00000005017 13216244360 022474 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "papi.h" #include "papi_test.h" #include "clockcore.h" void * pthread_main( void *arg ) { ( void ) arg; int retval = PAPI_register_thread( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); } retval=clockcore( TESTS_QUIET ); if (retval != PAPI_OK ) { test_fail(__FILE__, __LINE__, "clockcore failure", retval ); } retval = PAPI_unregister_thread( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); } return NULL; } int main( int argc, char **argv ) { pthread_t t1, t2, t3, t4; pthread_attr_t attr; int retval; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); if (( retval = PAPI_library_init( PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval = PAPI_thread_init( ( unsigned long ( * )(void) ) (pthread_self) ); if ( retval != PAPI_OK ) { if ( retval == PAPI_ECMP ) { test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); } else { test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); } } if ( !TESTS_QUIET ) { printf( "Test case: Clock latency and resolution.\n" ); printf( "Note: Virtual timers are proportional to # CPUs.\n" ); printf( "------------------------------------------------\n" ); } pthread_attr_init( &attr ); #ifdef PTHREAD_CREATE_UNDETACHED pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); #endif #ifdef PTHREAD_SCOPE_SYSTEM retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); if ( retval != 0 ) { test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); } #endif if (pthread_create( &t1, &attr, pthread_main, NULL )) { test_fail(__FILE__, __LINE__, "cannot create thread", retval); } if (pthread_create( &t2, &attr, pthread_main, NULL )) { test_fail(__FILE__, __LINE__, "cannot create thread", retval); } if (pthread_create( &t3, &attr, pthread_main, NULL )) { test_fail(__FILE__, __LINE__, "cannot create thread", retval); } if (pthread_create( &t4, &attr, pthread_main, NULL )) { test_fail(__FILE__, __LINE__, "cannot create thread", retval); } pthread_main( NULL ); pthread_join( t1, NULL ); pthread_join( t2, NULL ); pthread_join( t3, NULL ); pthread_join( t4, NULL ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/infiniband_umad/tests/Makefile000664 001750 001750 00000001266 13216244357 025432 0ustar00jshenry1963jshenry1963000000 000000 NAME=infiniband_umad include ../../Makefile_comp_tests.target include ../Makefile.infiniband_umad INFINIBANDLIBS = -L$(INFINIBAND_IBMAD_DIR)/lib64 -L$(INFINIBAND_IBUMAD_DIR)/lib64 -libumad -libmad TESTS = infiniband_umad_list_events infiniband_umad_values_by_code infiniband_umad_tests: $(TESTS) %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< infiniband_umad_list_events: infiniband_umad_list_events.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) infiniband_umad_values_by_code: infiniband_umad_values_by_code.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/components/bgpm/000775 001750 001750 00000000000 13216244357 020441 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.6.x/etc/costs/Athlon-1.66000775 001750 001750 00000001373 13216244366 023222 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.66 GHz Athlon XP 2000+] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1659642 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 236 cycles PERFCTR INIT: rdtsc cost is 17.1 cycles (1336 total) PERFCTR INIT: rdpmc cost is 15.4 cycles (1225 total) PERFCTR INIT: rdmsr (counter) cost is 49.4 cycles (3402 total) PERFCTR INIT: rdmsr (evntsel) cost is 50.2 cycles (3455 total) PERFCTR INIT: wrmsr (counter) cost is 83.3 cycles (5573 total) PERFCTR INIT: wrmsr (evntsel) cost is 229.6 cycles (14936 total) PERFCTR INIT: read cr4 cost is 0.1 cycles (245 total) PERFCTR INIT: write cr4 cost is 60.7 cycles (4124 total) PERFCTR INIT: write LVTPC cost is 3.0 cycles (432 total) perfctr: driver 2.6.6, cpu type AMD K7 at 1659642 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.3000664 001750 001750 00000001470 13216244367 023126 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.3 GHz Athlon (XP 1500+)] PERFCTR INIT: vendor 2, family 6, model 8, stepping 1, clock 1297680 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 159 cycles PERFCTR INIT: rdtsc cost is 16.0 cycles (1189 total) PERFCTR INIT: rdpmc cost is 16.4 cycles (1213 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3474 total) PERFCTR INIT: rdmsr (evntsel) cost is 53.3 cycles (3571 total) PERFCTR INIT: wrmsr (counter) cost is 83.8 cycles (5523 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.7 cycles (14991 total) PERFCTR INIT: read cr4 cost is 2.1 cycles (294 total) PERFCTR INIT: write cr4 cost is 62.8 cycles (4180 total) PERFCTR INIT: write LVTPC cost is 9.3 cycles (755 total) PERFCTR INIT: sync_core cost is 73.8 cycles (4883 total) perfctr: driver 2.7.5, cpu type AMD K7/K8 at 1297680 kHz papi-5.6.0/src/components/appio/tests/iozone/Changes.txt000664 001750 001750 00000334765 13216244356 025423 0ustar00jshenry1963jshenry1963000000 000000 V1.0 (capps): Capps: Beginning of the code base. Isom: Added reread Added rewrite Added read backwards Added lseek+read Added lseek+reread Capps: Added more accurate time collection method. Added alignment in the on chip Cache code. Added change step when passing 16 Meg size file. Capps: Added auto+ to purge on chip cache. kcollins: replaced the lseek+read &reread test with random reads and writes Capps: Replaced reverse re-read with record rewrite. This gives both source and destination on chip cache hits. Capps: added auto+multi Support for multiple buffers in the iozone. Capps: Removed the recursion through main(). Cleaned up the printout when not in auto mode. Added support for stride-read. ( Manual mode only ) Capps: Cleanup so it will build for bsd4_2 ( C series machines ) Capps: Cleanup on frontend. Now uses getopt() and has a real parser. Cleanup on error handling. Added throughput tests. Eliminated page faults in the throughput tests. Capps: Made all perf tests table driven. Add back Bill's Copyright. ansify, prototypes, scope limitations. V2.1 (kcollins): Simplified auto mode logic. auto test now runs from MEGABYTES_START to MEGABYTES_END and from RECLEN_START to RECLEN_END with values multiplied by MULTIPLIER each iteration. Range set to (4K...16M) for RECLEN and (1M...512M) for MEGABYTES. (the physical I/O tests for RECLEN <4K take several hours on a 1200). Enlarged MAXBUFFERSIZE to 16MB for large record tests. Added error checking for mallocs (!-). Changed multibuffer code to use all of MAXBUFFERSIZE as a circular buffer, with the number of buffers varying as MAXBUFFERSIZE/reclen. This corrects problems where MAXBUFFERSIZE*MAXBUFFERS was very large. Also modified auto mode so that tests where reclen>filesize are skipped. Modified preadv code to vary the number of buffers as necessary such that they will fit in min(MAXBUFFERSIZE,filesize). This fixes problems where the number of buffers in the i/o vector exceeded the size of mainbuffer. Added bzero for buffer when it is first malloc'd. This ensures that it is initialized before use. Created a script (profile.fs) that runs a series of tests to generate a "box" around common application variables such as filesize, buffer size, buffer encachement, and number of concurrent processes. This is intended to serve as the "standard" filesystem profile. buffer reset to mainbuffer before each test loop V2.3 (kcollins): added -F option to write to specify pathnames for throughput tests (allowing throughput tests to multiple filesystems). V2.4 (capps): Changed preadv/pwritev to use a non-sequential access pattern. Changed the version number. Moved all user interface values to KB. This simplifies the user interface. (consistant scaling) and it also allows one to start with 512kb file. This is very important since the first indirect block causes a significant slowdown in the initial write cases. V2.5 (capps): Re-structure and cleanup. V2.6 (kcollins) Bug fix for the throughput tests. V2.7 (capps): Added -o flag. This makes all file opens for writes have the O_SYNC flag set. This makes all writes go to disk before competion. This is useful for seeing what the media can do without the buffer cache helping. V2.8 (capps): Added -V flag. This turns on pattern verification. If the user were to type: -V 165 Then bit pattern 0xa5 would be placed in every byte in the buffer and when read back from buffer cache, or disk, it will be verified to be correct. If it fails then the error handler will specify the byte location of the miscompare. V2.9 (capps): Added fread/re-fread, fwrite/re-fwrite to list of tests. Added -E to allow the user to run pread and friends as an option. V2.10 (capps): Added -R. This will generate Excel compatible files that can then be imported into Excel and graphed. Added support for 5 targets to the makefile. Added -M This prints out the uname -a stuff about a machine. Added -O This gives all results in operations/sec instead of KB/sec. More code cleanup. Update comments. V2.11 (kcollins) added -A. Auto mode with no crossover and read/write tests only changed default record size to 64KB (from 512 bytes) V2.12 (capps) Added shared memory barrier sync for throughput mode. This provides much finer control over the actual timeing of the children. Added mmap() for BSD (Convex) machines that do not have System V shared memory. Added two ways of showing throughput results. The second method takes into consideration children that lag behind due to slow devices, and gives results that are more accurate. Cleanup of some tab problems in throughput results. Cleanup of floating point output taking to much space. Added -d to allow a variable delay comming out of the barrier in the throughput tests. V2.12 (kcollins) added declaration for create_list to make ansi c compiles work several fixes to some of the SPPUX 5.x make targets added date run to banner (hope this doesn't break your scripts $-) V2.13 (capps) Added "stone walling". During throughput tests, if one process finishes then all others are sent a signal to tell them to stop. (The parallel region has finished). This provides better numbers for throughput. Only bzero or fill min(reclen,CACHE_SIZE) this saves a bunch of paging on workstations with small memory systems. Fixed broken target in the makefile. Note: use of -d is not advised. It makes the children not run in parallel. V2.14 (capps) Bug fix to avoid anomaly in SPP-UX. In SPP-UX the filesystem code preallocates meta-data to improve initial file writes. The first indirect block allocation was causing a block of zeros to be written syncronously. In SPP-UX the filesytem code preallocates zero filled blocks when the first writer touches a filesystem after a sync. A pool of on disk zero'd blocks are created asynchronously and handed out to writers when they cross the boundry into the first level indirect and would have had to stop and wait for the zero filled block to be written. Iozone's testing methodology was not allowing the OS to have any time to complete the async pre-allocation and was not showing the speed up that real applications would see. V2.15 (capps) Improve throughput testing mode. V2.16 (capps) Added -U option. This allows the filesystem to be unmounted and remounted between tests. This guarentees that the buffer cache is cold. V2.17 (capps) Added -T option. This makes the throughput tests use threads instead of processes. Currently using pthread_create(), pthread_self(), and pthread_exit(). Cleaned up file cleanup mechanism. Control C will now cause all temp files to be deleted. Removed all signals used to control sub-processes. V2.18 (capps) Cleanup. Added read stride, read backwards to the throughput tests. Various bug fixes V2.19 (capps) Removed all calls to malloc() and all use of system V shared memory. mmap() is much easier to deal with. As for malloc() HP programs are very limited on the ammount of malloc() space and not nearly so constrained on mmap() memory. It was necessary to move to mmap() since multiple threads all need buffers in the processes address space. Removed dependency on first thread being number 2. Iozone now probes to find out what the thread library will return for the first thread. This makes the switching thread libraries much easier. V2.20 (capps) Children now set stop_flag and shutdown all other children.There is no further need to tell the parent to distribute the stop_flag. verify, purge, and osync are now supported in the throughput tests. Fixed bug where pthreads stack size was causing segmentation violation when purgeit() was called for buffer that were greater than 256kb. V2.21 (capps) Enhanced throughput reporting. Now provides: Child throughput, Parent throughput, Minimum throughput for any child in the group, Maximum throughput for any child in the group, and Minimum transfer count. Due to stone walling not all children write the full requested size. This minimum transfer count provides the user with knowledge of how much work was performed by the slowest child. Added -C flag. This allows the user to see all of the transfer counts for each child. Had to add system 5 shared memory back. Linux does not support mmap(MAP_ANONYMOUS|MAP_SHARED). So it must use SYSV shared memory get get sharing working. V2.22 (capps) Made changes to make iozone work correctly on Linux on a PC. Changes are just scaling down the test to fit on a pc, and scaling down shared segments to < 16 Meg so it can run on an Intel 386 class machine. Added: -L # Set the processor cache line size in bytes. Added: -S # Set the processor cache size in kbytes. Removed spin wait in parent waiting for threads to finish each throughput test. Code not uses thread_join(). Fixed -O (operations/sec) mode to work in throughput tests. V2.23 (capps) Close small timing hole where thread/process has set stop flag and others are in a system call. The hole allowed threads/processes to continue to increment work done after one had finished and told the others to stop. The result was that the children would report slightly high numbers as they were not truely parallel at the finish line. Added random read throughput test. Fixes for VxFS small extents being created by prime_zb() functions. Provides more details about the throughput run. V2.24 (capps) Added support for -R (Excell chart generation) to the throughput tests. Also added support for the -O (ops/sec) to the throughput Excell chart. V2.25 (capps) Added support for selecting which test to run. -i # -i 0 -i 3 will run write and read-backwards tests only. For a list of the test numbers type iozone -h. V2.26 (capps) Added support for LARGE_FILES for the hpux-11.0 target. V2.27 (capps) All tests now verify one long word of data from each page written/read to/from the file. This is to level the playing field with systems that do not move data when "read" or "write" is called, but instead just map the file and perform the I/O when the address space is touched. Benchmarks that do not validate the data ,at least touch each page, do not measure the read/write times just the map times. Note: The -V option still verifies each byte of the buffer, the default is now to verify one long from each page. V2.28 (capps) Added support for benchmarking mmap() files. Added more command line options. -B -G -D B = Use mmap() files for the benchmark. G = Use msync(MS_SYNC) for mmap files. D = Use msync(MS_ASYNC) for mmap files. V2.29 (capps) Bug fixes for: Combination of running individual tests and mmap() files support. Stride read bug that caused only portions of the total file to be examined. V2.30 (capps) Fixups for build under SPP-UX V2.31 (capps) Fixups for build under Linux. Added -j ### to support user setting the stride size for the stride read benchmark. V2.32 (capps) Add support for IRIX and IRIX64. V2.33 (capps) Add support for POSIX async I/O benchmarking. Uses a library to interface to POSIX async I/O model. The library provides and extended async_read() interface. It takes the standard calling options of read() but also allows the application to perform read-ahead with a stride. (positive or negative) and allows the user to specify how much read ahead to perform. Tested on HP-UX 11.0, Linux, SGI Origin. V2.34 (capps) Added -k. This allows POSIX async I/O to utilize the buffer specified and not to perform any bcopys. Fixes to make multi-threadedness work on SGI Origin. V2.34 (capps) Added [-k #]. This allows POSIX async I/O to utilize the buffer specified and not to perform any bcopys. Fixes to make multi-threadedness work on SGI Origin. V2.36 (capps) Iozone is now a 64 bit application. It may be compiled for either 64 bit or 32 bit machines. The makefile supports 64 and 32 bit targets for machines that support 32 & 64 bit targets. All version numbers are now automatically generated by RCS. This is the last time we have to bump the version number by hand. ----------------------------------------------------------------------------------- Changed over to RCS source control here: Version Numbers are reset at this point back to Version 1.1. ----------------------------------------------------------------------------------- RCS file: iozone.c,v; Working file: iozone.c head: 1.94 locks: ; strict access list: symbolic names: comment leader: " * " total revisions: 94; selected revisions: 94 description: Initial rcs version of Iozone ---------------------------- Revision 1.94 date: 99/01/18 13:02:57; author: capps; state: Exp; lines added/del: 7/2 Call msync if writer wants sync in timing and terminates early in multi thread test case. ---------------------------- Revision 1.93 date: 99/01/18 11:46:11; author: capps; state: Exp; lines added/del: 309/126 Cleanup for include_flush and include_close for single and multi threaded operations. ---------------------------- Revision 1.92 date: 99/01/15 10:53:58; author: capps; state: Exp; lines added/del: 40/11 Add include_close support for throughput testing ---------------------------- Revision 1.91 date: 98/12/07 09:26:22; author: capps; state: Exp; lines added/del: 43/24 For Windows: Use the high resolution timers instead of timeofday(); Fix a few casting problems. ---------------------------- Revision 1.90 date: 98/11/30 14:49:46; author: capps; state: Exp; lines added/del: 24/17 Update the copyright and names and places ---------------------------- Revision 1.89 date: 98/10/30 09:04:51; author: capps; state: Exp; lines added/del: 1/2 An extra close(fd) causes HP-UX to fail future unmounts... ---------------------------- Revision 1.88 date: 98/10/29 09:47:25; author: capps; state: Exp; lines added/del: 17/17 Cleanup the help screen ---------------------------- Revision 1.87 date: 98/10/28 23:31:11; author: capps; state: Exp; lines added/del: 7/6 Spelling error fix. ---------------------------- Revision 1.86 date: 98/10/14 11:21:50; author: capps; state: Exp; lines added/del: 23/68 Unified the time method to only have 2 ways to get time. ---------------------------- Revision 1.85 date: 98/10/14 09:22:09; author: capps; state: Exp; lines added/del: 91/91 Added code to remove the latency of gettimeofday() from the file performance measurements. ---------------------------- Revision 1.84 date: 98/10/12 11:44:50; author: capps; state: Exp; lines added/del: 107/8 Add time resolution output, and fix the divide by zero when the time in a system call turns out to be Zero. This will introduce distortion for machines that have very fast system calls and very poor time resolution. Windows has a 50 Milli second resolution on gettimeofday(). So... to fix it all calls that take less than 50 Milli seconds will be rounded up to cost 50 milliseconds. ---------------------------- Revision 1.83 date: 98/10/06 09:58:16; author: capps; state: Exp; lines added/del: 46/2 Add support for Windows build ---------------------------- Revision 1.82 date: 98/09/23 09:48:02; author: capps; state: Exp; lines added/del: 2/2 Fix bug where -i # was leaving tmp files after throughput test. ---------------------------- Revision 1.81 date: 98/09/23 09:41:12; author: capps; state: Exp; lines added/del: 1/3 Remove debug printf ---------------------------- Revision 1.80 date: 98/09/23 09:29:01; author: capps; state: Exp; lines added/del: 23/1 Add my_nap(). This allows the threads to switch processors to their new bound processor before performing any work. ---------------------------- Revision 1.79 date: 98/09/22 11:57:20; author: capps; state: Exp; lines added/del: 8/8 Change xx back into an int so the modulo will work better. ---------------------------- Revision 1.78 date: 98/09/18 16:27:05; author: capps; state: Exp; lines added/del: 18/15 Remove create in rewrite path. ---------------------------- Revision 1.77 date: 98/08/17 16:44:06; author: capps; state: Exp; lines added/del: 23/1 Fixes for Solaris and the new processor bind feature. ---------------------------- Revision 1.76 date: 98/08/17 16:17:45; author: capps; state: Exp; lines added/del: 1/2 Remove debug code. ---------------------------- Revision 1.75 date: 98/08/17 16:16:15; author: capps; state: Exp; lines added/del: 92/5 Add support for binding procs/threads to cpus. ---------------------------- Revision 1.74 date: 98/08/07 16:51:41; author: capps; state: Exp; lines added/del: 4/3 Add fsync to the fwrite test case when the user specifies -e ---------------------------- Revision 1.73 date: 98/08/07 16:47:38; author: capps; state: Exp; lines added/del: 178/208 Add -c and -e to allow closes and fsyncs to be inside the timing calculations. ---------------------------- Revision 1.72 date: 98/08/06 22:40:15; author: capps; state: Exp; lines added/del: 9/1 Add setvbuf to fwrite and fread tests so that the internal fwrite and fread buffer size is the same as the record size. This is what a well tuned application would do. ---------------------------- Revision 1.71 date: 98/08/06 09:03:06; author: capps; state: Exp; lines added/del: 2/3 Fix fsync filename problem in fwrite_perf_test ---------------------------- Revision 1.70 date: 98/08/05 18:06:41; author: capps; state: Exp; lines added/del: 6/2 Add fsync after fwrite test case so the fread will start with a clean buffer cache and no writes in progress. ---------------------------- Revision 1.69 date: 98/08/03 10:45:49; author: capps; state: Exp; lines added/del: 3/3 Bug fix for -V option not filling the entire buffer. ---------------------------- Revision 1.68 date: 98/07/30 22:11:11; author: capps; state: Exp; lines added/del: 2/3 Fix for solaris ---------------------------- Revision 1.67 date: 98/07/30 22:08:19; author: capps; state: Exp; lines added/del: 2/2 Fix for solaris ---------------------------- Revision 1.66 date: 98/07/30 22:05:02; author: capps; state: Exp; lines added/del: 43/15 Add support for Solaris ---------------------------- Revision 1.65 date: 98/07/01 14:19:19; author: capps; state: Exp; lines added/del: 80/82 Move end_async inside the timing loops as in async I/O it counts. ---------------------------- Revision 1.64 date: 98/06/16 17:04:36; author: capps; state: Exp; lines added/del: 13/2 Correct problem where user specifies pread tests on hpux... which does not support these operations. The test now prints an error message and exits. ---------------------------- Revision 1.63 date: 98/06/16 16:54:22; author: capps; state: Exp; lines added/del: 1/2 Remove exit from auto_test. This allows the message "iozone test complete" to be printed when in auto test mode. ---------------------------- Revision 1.62 date: 98/06/10 10:54:28; author: capps; state: Exp; lines added/del: 175/173 All exit()s now have a unique exit value. ---------------------------- Revision 1.61 date: 98/05/18 13:34:03; author: capps; state: Exp; lines added/del: 17/18 Move .dat file descriptors to global data. Needed to prevent re-opens. ---------------------------- Revision 1.60 date: 98/05/18 13:24:22; author: capps; state: Exp; lines added/del: 6/3 Bug fix. Prevents re-opending .dat files when in auto mode. ---------------------------- Revision 1.59 date: 98/05/08 13:03:02; author: capps; state: Exp; lines added/del: 21/3 Enhance throughput tests to follow the -i test number to run options. ---------------------------- Revision 1.58 date: 98/05/07 14:15:49; author: capps; state: Exp; lines added/del: 109/39 Make VXFS a define in the make command. This makes moving to other targets easier. It removes the binding of HPUX and VXFS. Also, Added -Q to support offset/latency file generation for later use as inputs to plot program. ---------------------------- Revision 1.57 date: 98/05/06 15:09:43; author: capps; state: Exp; lines added/del: 100/27 Add -N to provide results in microseconds per operation. ---------------------------- Revision 1.56 date: 98/05/05 13:23:29; author: capps; state: Exp; lines added/del: 3/10 If the user specifies -i 0 then run both write and rewrite tests. ---------------------------- Revision 1.55 date: 98/04/30 15:19:02; author: capps; state: Exp; lines added/del: 1/1 No change ---------------------------- Revision 1.54 date: 98/04/30 15:09:58; author: capps; state: Exp; lines added/del: 2/2 Unlink the vxfstest when the test fails. ---------------------------- Revision 1.53 date: 98/04/30 13:07:21; author: capps; state: Exp; lines added/del: 7/5 Cleanup help output. ---------------------------- Revision 1.52 date: 98/04/30 12:58:29; author: capps; state: Exp; lines added/del: 21/4 Add async I/O with no bcopy to throughput tests. ---------------------------- Revision 1.51 date: 98/04/29 15:29:29; author: capps; state: Exp; lines added/del: 5/1 Fixes so it will compile on the SGI Origin. ---------------------------- Revision 1.50 date: 98/04/29 11:57:58; author: capps; state: Exp; lines added/del: 5/1 Do not need to limit async ops. Fix is in libasync.c ---------------------------- Revision 1.49 date: 98/04/29 10:45:19; author: capps; state: Exp; lines added/del: 61/3 Add async I/O to throughput testing for writes ---------------------------- Revision 1.48 date: 98/04/28 11:57:13; author: capps; state: Exp; lines added/del: 5/1 Limit max async operations to 60. Beyond this there be dragons. ---------------------------- Revision 1.47 date: 98/04/28 10:16:09; author: capps; state: Exp; lines added/del: 108/21 Completed support for no_bcopy POSIX async I/O in the async_write_no_copy path. This allows write tests to perform async I/O with buffers released when the write is completed. ---------------------------- Revision 1.46 date: 98/04/27 16:58:38; author: capps; state: Exp; lines added/del: 43/10 Add aio_write() to the write and re-write tests. This provides POSIX async I/O for the those tests. ---------------------------- Revision 1.45 date: 98/04/25 09:53:39; author: capps; state: Exp; lines added/del: 3/2 direct_flag is an int. Was a char in one place and an int in another. ---------------------------- Revision 1.44 date: 98/04/25 09:17:42; author: capps; state: Exp; lines added/del: 27/15 More support for vx_direct support in the write path ---------------------------- Revision 1.43 date: 98/04/24 16:33:44; author: capps; state: Exp; lines added/del: 115/77 Move VX_DIRECT to libasync. But keep the VX_DIRECT support also in iozone. So one can use VX_DIRECT with and without async I/O ---------------------------- Revision 1.42 date: 98/04/24 16:20:34; author: capps; state: Exp; lines added/del: 127/60 Move VX_DIRECT to the libasync module. ---------------------------- Revision 1.41 date: 98/04/24 15:50:54; author: capps; state: Exp; lines added/del: 190/7 Add support for VxFS VX_DIRECT Idea is to use VX_DIRECT and POSIX async I/O together ---------------------------- Revision 1.40 date: 98/04/22 16:38:25; author: capps; state: Exp; lines added/del: 5/5 Sppux wants ail_gettimeofday variables to be unsigned int. ---------------------------- Revision 1.39 date: 98/04/22 16:19:50; author: capps; state: Exp; lines added/del: 7/3 Fix -M option not printing cleanly Fix -R in 32 bit mode printing garbage. ---------------------------- Revision 1.38 date: 98/04/22 15:56:02; author: capps; state: Exp; lines added/del: 1/1 Change to only disply revision not full header. ---------------------------- Revision 1.37 date: 98/04/22 15:52:19; author: capps; state: Exp; lines added/del: 1/1 Add RCS Header to support versioning. ---------------------------- Revision 1.36 date: 98/04/22 15:38:26; author: capps; state: Exp; lines added/del: 1/1 fix to bcopy() third arg needs to be size_t for 32 bit mode. ---------------------------- Revision 1.35 date: 98/04/22 09:09:24; author: capps; state: Exp; lines added/del: 17/17 Bug fixes for 64 bit mode on IRIX, and addition on the internal inuse queue to insure that the internal struct_cache_ent structures are not released too early when doing direct I/O (async_read_no_copy). ---------------------------- Revision 1.34 date: 98/04/21 09:31:02; author: capps; state: Exp; lines added/del: 4/0 Fix to eliminate hidden (dot) files that iozone was creating in throughput mode. All files are now visible with ls. ---------------------------- Revision 1.33 date: 98/04/21 08:30:35; author: capps; state: Exp; lines added/del: 7/1 Have Iozone print the compile model used. ---------------------------- Revision 1.32 date: 98/04/20 18:46:02; author: capps; state: Exp; lines added/del: 49/20 Fixes for 32 bit mode. ---------------------------- Revision 1.31 date: 98/04/20 16:57:29; author: capps; state: Exp; lines added/del: 8/8 make sure malloc is called with (size_t) parameter. ---------------------------- Revision 1.30 date: 98/04/20 16:05:08; author: capps; state: Exp; lines added/del: 933/757 Iozone now 64 bit application ---------------------------- Revision 1.29 date: 98/04/20 12:32:25; author: capps; state: Exp; lines added/del: 4/4 Move msync to before munmap so file gets written. ---------------------------- Revision 1.28 date: 98/04/20 10:21:30; author: capps; state: Exp; lines added/del: 2/2 Minor fix for -O flag and -B not working smoothly together. ---------------------------- Revision 1.27 date: 98/04/20 10:17:19; author: capps; state: Exp; lines added/del: 0/0 No change ---------------------------- Revision 1.26 date: 98/04/19 15:11:07; author: capps; state: Exp; lines added/del: 5/5 Remove prime_zbfill. It causes problems with mmap files. ---------------------------- Revision 1.25 date: 98/04/16 15:24:50; author: capps; state: Exp; lines added/del: 228/70 -H is Nastran async I/O with bcopy -k is async I/O without any bcopys ---------------------------- Revision 1.24 date: 98/04/15 16:48:30; author: capps; state: Exp; lines added/del: 22/4 fix to make build on 9.05 and 10.1 ---------------------------- Revision 1.23 date: 98/04/15 15:36:55; author: capps; state: Exp; lines added/del: 9/9 Cleanup some compiler warnings about un-initialized variables. They are not really un-initialized and used but it does generate compiler warnings on some machines. ---------------------------- Revision 1.22 date: 98/04/15 15:32:56; author: capps; state: Exp; lines added/del: 7/7 Need to free the dummyname space a bit later. ---------------------------- Revision 1.21 date: 98/04/15 14:37:05; author: capps; state: Exp; lines added/del: 27/13 Fix to use smaller stack size in thread_ routines. It was causing the SGI to drop core in throughput tests. ---------------------------- Revision 1.20 date: 98/04/14 17:01:19; author: capps; state: Exp; lines added/del: 27/16 Fix a memory leak. In multi_throughput testing shmalloc was getting called for each iteration. This is not needed and causes much to much shm to be allocated. Not broken but definately a pig. ---------------------------- Revision 1.19 date: 98/04/14 15:19:15; author: capps; state: Exp; lines added/del: 2/0 When -k is specified alone this will turn on the POSIX async I/O and set depth to 0. ---------------------------- Revision 1.18 date: 98/04/14 15:00:18; author: capps; state: Exp; lines added/del: 21/20 Fixes to make multi-threaded version run on the SGI Origin. ---------------------------- Revision 1.17 date: 98/04/14 11:55:44; author: capps; state: Exp; lines added/del: 17/11 Add support for -k. When using POSIX async I/O use the buffer specified and do not perform any bcopys. ---------------------------- Revision 1.16 date: 98/04/13 10:22:18; author: capps; state: Exp; lines added/del: 27/380 Add libasync library support ---------------------------- Revision 1.15 date: 98/04/11 12:09:25; author: capps; state: Exp; lines added/del: 1/0 Fix memory leak. Now calls del_cache when ever any calls to async_end happen. This will ensure that there are no outstanding I/Os on the cache that ha ve not been canceled . ---------------------------- Revision 1.14 date: 98/04/11 11:57:10; author: capps; state: Exp; lines added/del: 632/47 Add support for POSIX async I/O testing ---------------------------- Revision 1.13 date: 98/03/31 14:30:15; author: capps; state: Exp; lines added/del: 44/6 Fix support for bsd4_2 and ConvexOS ---------------------------- Revision 1.12 date: 98/03/31 11:26:34; author: capps; state: Exp; lines added/del: 2/2 Bump version number to 2.32 ---------------------------- Revision 1.11 date: 98/03/31 11:20:51; author: capps; state: Exp; lines added/del: 70/6 Add support for SGI IRIX and SGI IRIX64 ---------------------------- Revision 1.10 date: 98/03/27 14:00:47; author: capps; state: Exp; lines added/del: 15/20 Put the bcopy back. It is more represenative of what the real application will do. ---------------------------- Revision 1.9 date: 98/03/27 13:25:02; author: capps; state: Exp; lines added/del: 40/14 Improved mmap file support. Now only have 1 long word from each page touched. This eliminates the overhead of bcopy dominating the results. It also is performing the same work that the non-mmap version does with verify(). ---------------------------- Revision 1.8 date: 98/03/27 10:41:13; author: capps; state: Exp; lines added/del: 10/4 Bug fix. Frewrite was truncating the file. This fix ensures that the Frewrite test opens without trunc. ---------------------------- Revision 1.7 date: 98/03/27 10:16:41; author: capps; state: Exp; lines added/del: 3/3 Fix report to specify stride size as a function of reclen. It did not make sense to output kbytes as the value changes when in auto mode to match the current record length. ---------------------------- Revision 1.6 date: 98/03/26 15:28:15; author: capps; state: Exp; lines added/del: 16/8 Add support for -j option. This allows the user to specify the stride size for the strided file access benchmark. ---------------------------- Revision 1.5 date: 98/03/25 15:27:01; author: capps; state: Exp; lines added/del: 1/1 Fixup help screen to reflect new options ---------------------------- Revision 1.4 date: 98/03/25 15:21:23; author: capps; state: Exp; lines added/del: 1/1 Change the revision number ---------------------------- Revision 1.3 date: 98/03/25 15:20:28; author: capps; state: Exp; lines added/del: 16/1 Fixup support for Linux ---------------------------- Revision 1.2 date: 98/03/25 13:58:05; author: capps; state: Exp; lines added/del: 16/3 Bug fixes for SPP-UX ---------------------------- Revision 1.1 date: 98/03/25 10:43:45; author: capps; state: Exp; Initial revision ============================================================================= RCS file: libasync.c,v; Working file: libasync.c head: 1.39 locks: ; strict access list: symbolic names: comment leader: " * " total revisions: 39; selected revisions: 39 description: Initial version of POSIX async I/O library interface. ---------------------------- Revision 1.39 date: 98/07/30 22:05:21; author: capps; state: Exp; lines added/del: 3/1 Add support for Solaris ---------------------------- Revision 1.38 date: 98/07/07 13:00:39; author: capps; state: Exp; lines added/del: 1/11 Remove extra bcopy in the async_write_no_bcopy path. ---------------------------- Revision 1.37 date: 98/06/11 09:47:58; author: capps; state: Exp; lines added/del: 3/3 Fix syntax error for IRIX ---------------------------- Revision 1.36 date: 98/06/10 10:56:55; author: capps; state: Exp; lines added/del: 10/10 All exit()s now have a unique exit value. ---------------------------- Revision 1.35 date: 98/05/07 14:17:20; author: capps; state: Exp; lines added/del: 2/2 Make VXFS a define in the make command. This makes moving to other targets easier. It removes the binding of HPUX and VXFS. Also, Added -Q to support offset/latency file generation for later use as inputs to plot program. ---------------------------- Revision 1.34 date: 98/04/30 15:19:54; author: capps; state: Exp; lines added/del: 1/3 Remove debug code that breaks 64 bit mode compiled code. ---------------------------- Revision 1.33 date: 98/04/30 13:09:13; author: capps; state: Exp; lines added/del: 2/2 Make retval an int so it can be checked for less than zero. ---------------------------- Revision 1.32 date: 98/04/29 16:49:34; author: capps; state: Exp; lines added/del: 5/11 If overshooting on number of asyncs then terminate the loop and let the next time through pick up the I/O. ---------------------------- Revision 1.31 date: 98/04/29 16:37:49; author: capps; state: Exp; lines added/del: 3/3 Remove debug code ---------------------------- Revision 1.30 date: 98/04/29 15:29:48; author: capps; state: Exp; lines added/del: 3/1 Fixes so it will compile on the SGI Origin. ---------------------------- Revision 1.29 date: 98/04/29 11:56:27; author: capps; state: Exp; lines added/del: 36/10 Work around for bug in POSIX async I/O library ---------------------------- Revision 1.28 date: 98/04/29 11:04:26; author: capps; state: Exp; lines added/del: 1/2 Remove debug code ---------------------------- Revision 1.27 date: 98/04/29 11:02:54; author: capps; state: Exp; lines added/del: 54/27 Added resource shortage paths. ---------------------------- Revision 1.26 date: 98/04/28 18:12:51; author: capps; state: Exp; lines added/del: 1/3 Add async I/O to the throughput tests ---------------------------- Revision 1.25 date: 98/04/28 17:12:40; author: capps; state: Exp; lines added/del: 3/1 fix wait_for_ routine to reset w_tail if item being removed is also the tail. ---------------------------- Revision 1.24 date: 98/04/28 16:14:06; author: capps; state: Exp; lines added/del: 1/3 bug fix. 2 calls to malloc for aligned memory. ---------------------------- Revision 1.23 date: 98/04/28 11:57:39; author: capps; state: Exp; lines added/del: 37/13 Limit max async operations to 60. Beyond this there be dragons. ---------------------------- Revision 1.22 date: 98/04/28 10:17:22; author: capps; state: Exp; lines added/del: 127/42 Completed support for no_bcopy POSIX async I/O in the async_write_no_copy path. This allows write tests to perform async I/O with buffers released when the write is completed. ---------------------------- Revision 1.21 date: 98/04/27 16:59:14; author: capps; state: Exp; lines added/del: 246/9 Add aio_write() to the write and re-write tests. This provides POSIX async I/O for the those tests. ---------------------------- Revision 1.20 date: 98/04/24 16:20:55; author: capps; state: Exp; lines added/del: 15/3 Move VX_DIRECT to the libasync module. ---------------------------- Revision 1.19 date: 98/04/24 15:50:13; author: capps; state: Exp; lines added/del: 42/11 Add support for VxFS VX_DIRECT Idea is to use VX_DIRECT and POSIX async I/O together ---------------------------- Revision 1.18 date: 98/04/24 12:36:42; author: capps; state: Exp; lines added/del: 13/5 Fix some error printfs to match the size of the off_t. ---------------------------- Revision 1.17 date: 98/04/24 12:18:11; author: capps; state: Exp; lines added/del: 7/7 Fixes for LP64 mode. off_t changed to off64_t ---------------------------- Revision 1.16 date: 98/04/24 09:33:32; author: capps; state: Exp; lines added/del: 275/35 Add comments and fix for LP64 model on hpux. ---------------------------- Revision 1.15 date: 98/04/23 16:58:06; author: capps; state: Exp; lines added/del: 167/13 Make libasync large file aware. ---------------------------- Revision 1.14 date: 98/04/22 15:58:45; author: capps; state: Exp; lines added/del: 1/1 Change version to only display rcs version id. ---------------------------- Revision 1.13 date: 98/04/22 15:52:54; author: capps; state: Exp; lines added/del: 1/2 Add RCS version support ---------------------------- Revision 1.12 date: 98/04/22 11:39:35; author: capps; state: Exp; lines added/del: 52/8 Add firewall to prevent in flight changes to the aiocb structure. ---------------------------- Revision 1.11 date: 98/04/22 09:10:36; author: capps; state: Exp; lines added/del: 57/19 Bug fixes for 64 bit mode on IRIX, and addition on the internal inuse queue to insure that the internal struct_cache_ent structures are not released too early when doing direct I/O (async_read_no_copy). ---------------------------- Revision 1.10 date: 98/04/21 09:34:14; author: capps; state: Exp; lines added/del: 18/10 Improve error messages. ---------------------------- Revision 1.9 date: 98/04/20 16:06:21; author: capps; state: Exp; lines added/del: 53/50 Iozone now 64 bit application ---------------------------- Revision 1.8 date: 98/04/20 10:17:59; author: capps; state: Exp; lines added/del: 0/0 no change ---------------------------- Revision 1.7 date: 98/04/17 08:49:16; author: capps; state: Exp; lines added/del: 15/2 Optimization on async operations. Just add one to the end of the list if the list already has more than one item. ---------------------------- Revision 1.6 date: 98/04/17 00:00:30; author: capps; state: Exp; lines added/del: 10/2 Make cancel keep trying until it succeeds. Otherwise transfers after the buffer is freed can occur. ---------------------------- Revision 1.5 date: 98/04/16 16:49:28; author: capps; state: Exp; lines added/del: 49/4 Improve error handling when running machine out of memory. ---------------------------- Revision 1.4 date: 98/04/16 15:26:41; author: capps; state: Exp; lines added/del: 118/28 added async_read_no_copy(). This allows the application to let the library specify the destination buffer and perform the async I/O without unwanted bcopys. ---------------------------- Revision 1.3 date: 98/04/14 11:56:23; author: capps; state: Exp; lines added/del: 36/10 Add supporf for -k. When using POSIX async I/O use the buffer specified and do not perform any bcopys. ---------------------------- Revision 1.2 date: 98/04/13 10:35:20; author: capps; state: Exp; lines added/del: 5/7 Fixup for error path to propagate any small transfers. ---------------------------- Revision 1.1 date: 98/04/13 10:21:23; author: capps; state: Exp; Initial revision ============================================================================= RCS file: makefile,v; Working file: makefile head: 1.20 locks: ; strict access list: symbolic names: comment leader: "# " total revisions: 20; selected revisions: 20 description: Initial version of makefile ---------------------------- Revision 1.20 date: 98/10/06 10:36:22; author: capps; state: Exp; lines added/del: 87/28 Add comments to describe each targets capabilities. ---------------------------- Revision 1.19 date: 98/10/06 09:59:18; author: capps; state: Exp; lines added/del: 3/3 Fix spelling error ---------------------------- Revision 1.18 date: 98/10/06 09:58:29; author: capps; state: Exp; lines added/del: 18/3 Add support for Windows build ---------------------------- Revision 1.17 date: 98/08/17 16:44:56; author: capps; state: Exp; lines added/del: 2/2 Fixes for Solaris ---------------------------- Revision 1.16 date: 98/07/30 22:05:33; author: capps; state: Exp; lines added/del: 20/1 Add support for Solaris ---------------------------- Revision 1.15 date: 98/05/07 14:17:26; author: capps; state: Exp; lines added/del: 13/13 Make VXFS a define in the make command. This makes moving to other targets easier. It removes the binding of HPUX and VXFS. Also, Added -Q to support offset/latency file generation for later use as inputs to plot program. ---------------------------- Revision 1.14 date: 98/04/22 16:02:42; author: capps; state: Exp; lines added/del: 2/0 Add RCS version ids. ---------------------------- Revision 1.13 date: 98/04/22 13:58:54; author: capps; state: Exp; lines added/del: 6/6 For now only build the SGI targets in 32 bit mode. There is some problem with POSIX async I/O and 64 bit apps. ---------------------------- Revision 1.12 date: 98/04/22 12:08:25; author: capps; state: Exp; lines added/del: 3/3 Let the IRIX64 target default to its default compile mode. ---------------------------- Revision 1.11 date: 98/04/22 09:10:54; author: capps; state: Exp; lines added/del: 3/3 Bug fixes for 64 bit mode on IRIX, and addition on the internal inuse queue to insure that the internal struct_cache_ent structures are not released too early when doing direct I/O (async_read_no_copy). ---------------------------- Revision 1.10 date: 98/04/21 09:29:57; author: capps; state: Exp; lines added/del: 17/17 Improve dependencies ---------------------------- Revision 1.9 date: 98/04/20 16:05:48; author: capps; state: Exp; lines added/del: 58/29 Iozone now 64 bit application ---------------------------- Revision 1.8 date: 98/04/20 10:17:44; author: capps; state: Exp; lines added/del: 0/0 *** empty log message *** ---------------------------- Revision 1.7 date: 98/04/16 16:50:11; author: capps; state: Exp; lines added/del: 6/6 Have the SGI build 32 bit app too. ---------------------------- Revision 1.6 date: 98/04/15 16:48:09; author: capps; state: Exp; lines added/del: 5/5 Fix to make build on 9.05 and 10.1 ---------------------------- Revision 1.5 date: 98/04/13 10:22:34; author: capps; state: Exp; lines added/del: 14/6 Add support for libasync library. ---------------------------- Revision 1.4 date: 98/04/11 11:57:34; author: capps; state: Exp; lines added/del: 10/10 AAdd support for POSIX async I/O testing ---------------------------- Revision 1.3 date: 98/03/31 11:21:34; author: capps; state: Exp; lines added/del: 24/0 Add support for SGI IRIX and SGI IRIX64 ---------------------------- Revision 1.2 date: 98/03/25 13:59:18; author: capps; state: Exp; lines added/del: 21/9 Fixes for SPP-UX ---------------------------- Revision 1.1 date: 98/03/25 10:48:21; author: capps; state: Exp; Initial revision ============================================================================= Added support for BIFF file output. Iozone can now write Excel spreadsheet format. This allows one to directly access the Excel spreadsheet without needing to import with tab and space delimited method. Added support for large files and threads for Solaris. Add support for FreeBSD Change default stride value to avoid nodalization with various spindle counts. ============================================================================= Version 3.3: Changed name of processor_bind to ioz_processor_bind to avoid collision with SVR5.4.MP shared library. Removed leading tab on an #ifdef that caused some compilers to get sick. ============================================================================= Version 3.4: Add support for OpenBSD ============================================================================= Version 3.6: Lots of code cleanup. Added support for OSF1 on the DEC Alpha. ============================================================================= Version 3.7: Add support for OSF Version 4. Add timer resolution problem detection. ============================================================================= Add support for OSF Version 5. ============================================================================= Version 3.13: Add support for Linux to use pthreads. ============================================================================= Version 3.16: ============================================================================= Add support for Netbsd Add support for Largefiles and Async I/O to Linux target ============================================================================= Version 3.17: ============================================================================= Removed small model for Linux. In the past Iozone was forced to use a small model for testing Linux as the normal load caused Redhat to panic. Redhat users have told me that the system now works fine with the normal load. They have tested Redhat 6.1 and it no longer panics. ============================================================================= Version 3.18: ============================================================================= Add support for BSDI. Base, largefiles, pthread. No async I/O ============================================================================= Revision 3.19 ============================================================================= date: 2000/03/08 14:47:21; author: capps; state: Exp; lines added/del: 4/1 Add support for getpagesize. This is used when available. ============================================================================= Revision 3.20 ============================================================================= date: 00/04/01 11:04:59; author: capps; state: Exp; lines added/del: 2/2 Fix for multiple filenames and range of threads being used. ============================================================================= Revision 3.21 ============================================================================= date: 00/04/01 11:10:54; author: capps; state: Exp; lines added/del: 3/1 SPPUX does not have getpagesize... ============================================================================= Revision 3.22 ============================================================================= Add support for Linux-ia64 Add support for mmap & normal file I/O mixing. ============================================================================= Revision 3.23 Fixups for IBM AIX. ============================================================================= Revision 3.24 Fixups for BSD 2.7 (New release of BSD that supports O_SYNC) ============================================================================= Revision 3.27 Fixups for Cygnus compiler changes. (Windows targets). With this change Iozone will compile with at least 2 versions of the Cygnus compilers. ============================================================================= Revision 3.28 ============================================================================= Add support for reading and writing while holding lockf() on the file. This turns out to be important aspect of NFS benchmarking. ============================================================================= Revision 3.29 ============================================================================= Change calls to lockf() to calls to fcntl(). This is more portable. ============================================================================= Revision 3.30 ============================================================================= Add support for variable compute cycle time before each I/O operation. This allows one to more accuratly represent a specific application that is doing compute/read/compute/read style operations. ============================================================================= Revision 3.30 through 3.37 ============================================================================= Add support for read and write telemetry files. ============================================================================= Revision 3.40 ============================================================================= Code cleanup for popen() usage in -M path. ============================================================================= Revision 3.41 ============================================================================= Bug fix for ops/sec in rewrite throughput testing. Added average throughput to output in throughput mode. ============================================================================= Revision 3.42 ============================================================================= Bug fix for read and re-read. Usage of un-initialized variable that caused results to be wrong. ============================================================================= Revision 3.43 ============================================================================= Add support for latency plot data for throughput testing. Each child thread/process gets its own data file. ============================================================================= Revision 3.44 ============================================================================= Enhance compatibility of multi-thread/proc latency offsets with telemetry file support. ============================================================================= Revision 3.45 through 3.48 ============================================================================= Added latency/offset plot data files for all throughput tests. ============================================================================= Revision 3.49 ============================================================================= Fixed compile warning for Linux off64_t redefinition. Add Solaris2.6 target with simple build. ============================================================================= Revision 3.50 ============================================================================= Added support for openbsd-threads Cleanup for page size foo. ============================================================================= Revision 3.51, 3.52, 3.53 ============================================================================= Cleanup for new random write testing in throughput mode. Improve perror handling. ============================================================================= Revision 3.54 ============================================================================= Add -g maxfilesize so people will not have to edit the source to test files bigger than 512 Mbytes. ============================================================================= Revision 3.55 ============================================================================= Supports -n and -g to set the min and max file sizes to be used for an auto mode run. ============================================================================= Revision 3.56 ============================================================================= Added support for SCO Unixware SVR5 with gcc compiler ============================================================================= Revision 3.57 ============================================================================= Fixed bug where file locking was not being used when _LARGE_FILE64_SOURCE was defined in read_perf_test. ============================================================================= Revision 3.58 ============================================================================= Added -z option. This is to be used with the -a option. It provides more complete testing for small record sizes when the file sizes are very large. Fixed -a so that the cross-over mechanism works correctly. ============================================================================= Revision 3.59 ============================================================================= Fix a bug where the user specified -R -s but did not specify -a or -r. This caused the Excel report to print a bunch of zeros. ============================================================================= Revision 3.60 ============================================================================= Fix headers in the Excel output when cross over kicks in. ============================================================================= Revision 3.61 ============================================================================= Added -y and -q to set record size range Added command line to output ============================================================================= Revision 3.62 ============================================================================= Put auto cross over back to 16 Meg ============================================================================= Revision 3.63 ============================================================================= Minor code cleanups for error messages ============================================================================= Revision 3.64 ============================================================================= Re-organize the help listing. ============================================================================= Revision 3.65 ============================================================================= Add labels to the latency/offset output files. ============================================================================= Revision 3.66 ============================================================================= Added Randy Dunlap to the list of contributors. Thanks Randy !! ============================================================================= Revision 3.67 ============================================================================= Fix labels when using -R and -i options together. ============================================================================= Revision 3.68 ============================================================================= Code cleanup. No functionality changes. ============================================================================= Revision 3.69 ============================================================================= Prevent mixed modes. Auto and throughput. Added support for the Plus extended options. ============================================================================= Revision 3.70 ============================================================================= Added support for -+u option. Cpu utilization. ============================================================================= Revision 3.71 ============================================================================= Added comment for the support for -+u option. Cpu utilization. ============================================================================= Revision 3.72 ============================================================================= Added network testing mode. -+m (Experimental) Tested: Linux, HP-UX ============================================================================= Revision 3.73 ============================================================================= Added -xflag support for distributed mode. Handle interrupts when in distributed mode. ============================================================================= Revision 3.74 ============================================================================= Add default for REMOTE_SHELL ============================================================================= Revision 3.75 ============================================================================= Code cleanup. ============================================================================= Revision 3.76 ============================================================================= Portability change for shmat(). Added and example of client_list file to the distribution. ============================================================================= Revision 3.77 ============================================================================= Disable CPU utilization in distributed mode. Bug fix for CPU utilization in normal mode. ============================================================================= Revision 3.78 ============================================================================= Fix compatibility with AIX for shmat() ============================================================================= Revision 3.79 ============================================================================= Fix throughput labels when user is selecting specific tests with -i option. ============================================================================= Revision 3.80 ============================================================================= Remove dependency on min() and max(). They are not portable. ============================================================================= Revision 3.81 ============================================================================= Changes for 64bit architectures. Brad Smith. OpenBSD. ============================================================================= Revision 3.83 ============================================================================= Add -+m cluster option to the help list and the list of options. ============================================================================= Revision 3.84 -> 3.88 ============================================================================= Fix file descriptor leak in cluster mode. ============================================================================= Revision 3.89 -> 3.91 ============================================================================= Support for heterogeneous clusters, bug fix for -C ============================================================================= Revision 3.92 ============================================================================= Add a small sleep in the client so the master's terminate message can arrive before the client exits and closes the channel. ============================================================================= Revision 3.93 ============================================================================= Add support for UWIN (Unix for Windows) ============================================================================= Revision 3.94 ============================================================================= Bug fix for client's working dir in cluster mode. ============================================================================= Revision 3.95 ============================================================================= Enable more options in Cluster mode. ============================================================================= Revision 3.96 ============================================================================= Add support for Solaris 8 in 64-bit mode. ============================================================================= Revision 3.97 ============================================================================= Linux demands a function proto for functions that take floats as args. ============================================================================= Revision 3.98 ============================================================================= Changes for Solaris to make their silly compiler eat reasonable function prototypes. (yech !!) ============================================================================= Revision 3.99 ============================================================================= Add protocol version checking for distributed messages. Add support for AIX 5.2 ============================================================================= Revision 3.100 ============================================================================= Fixes for socket ports. Needed to be in network format. ============================================================================= Revision 3.101 ============================================================================= Add support for RSH environment override. ============================================================================= Revision 3.102 ============================================================================= Improve O_DIRECT and VX_DIRECT so that testing is done on the correct file on the correct client. ============================================================================= Revision 3.103 ============================================================================= Code cleanup. ============================================================================= Revision 3.104 ============================================================================= Code cleanup. Bug fix for O_DIRECT in read_perf_test. ============================================================================= Revision 3.105 ============================================================================= Bug fix for TRU64 and OSF where reclen was not getting displayed. ============================================================================= Revision 3.106 ============================================================================= Add -+d file I/O diagnostic mode. ============================================================================= Revision 3.107 ============================================================================= Fixes for the awesome Diagnostics mode. ============================================================================= Revision 3.108 ============================================================================= turn off cdebug Switch child comm to SOCK_STREAM. Avoid UDP fragment problems. ============================================================================= Revision 3.109 ============================================================================= Fix for "disrupt" and Direct I/O. Needs to be page size and aligned. ============================================================================= Revision 3.110 ============================================================================= Cleanup for -Wall to all source files. ============================================================================= Revision 3.111 ============================================================================= Fixes for UWIN compile warnings. ============================================================================= Revision 3.112 ============================================================================= Fixes for Windows compile warnings. do_compute() proto. ============================================================================= Revision 3.113 ============================================================================= Add definition char *dumb for Solaris to alloc_mem() ============================================================================= Revision 3.114 ============================================================================= Code cleanup for AIX. No async support caused warnings. ============================================================================= Revision 3.115 ============================================================================= Fix for Solaris returning short reads() from socket to child_listen. ============================================================================= Revision 3.116 ============================================================================= Add support for Mac OS X ============================================================================= Revision 3.117 ============================================================================= Add code to set the socket buffer window size. Solaris needs this. ============================================================================= Revision 3.118 ============================================================================= Add O_Direct for AIX ============================================================================= Revision 3.119-> 3.120 ============================================================================= Fix some compiler warnings and implement the -+x option for setting the multiplier used for file and record size incrementing. ============================================================================= Revision 3.121 ============================================================================= Add changes from Debian. Add powerpc and sparc. Add changes to fix warning on Irix and Irix64 ============================================================================= Revision 3.122 ============================================================================= Bug fix for cluster mode. Need to bzero buffers before sprintf or sscanf ============================================================================= Revision 3.123 ============================================================================= Bug fix for handling all chars that are transported over messaging. ============================================================================= Revision 3.124 ============================================================================= Simplify the child's debug output mechanism. ============================================================================= Revision 3.125 ============================================================================= Fix for stonewall in cluster mode. ============================================================================= Revision 3.126 ============================================================================= Shrink the client_neutral_command structure so it fits in a single UDP packet. ============================================================================= Revision 3.127 ============================================================================= Improve debug code for cluster mode. ============================================================================= Revision 3.128 ============================================================================= Reduce the message traffic due to master's distribution of STOP. Only one STOP distribution is needed. More can lead to socket buffer overflows. ============================================================================= Revision 3.129 ============================================================================= Bzero structures on the stack before using. No problem seen but it is a possible hole. ============================================================================= Revision 3.130 ============================================================================= Add error checking for the client file contents. ============================================================================= Revision 3.131 ============================================================================= Use prealloc() for HP-UX to create file for use with mmap. ============================================================================= Revision 3.132 ============================================================================= Add random mix mode. ============================================================================= Revision 3.133 ============================================================================= Make a better 32 bit random offset from calling rand()<<16||rand() ============================================================================= Revision 3.134 ============================================================================= Add -+p percentage read option. ============================================================================= Revision 3.135 ============================================================================= Improve the mixed mode distribution algorithm. ============================================================================= Revision 3.136 ============================================================================= Fix auto bug introduced by mixed mode testing. Introduce -+r for O_RSYNC. ============================================================================= Revision 3.137 ============================================================================= Code cleanup for some warnings on IA-64 systems. ============================================================================= Revision 3.138 ============================================================================= Fixes for FreeBSD ============================================================================= Revision 3.139 ============================================================================= Add support for multiple -r and -s options. ============================================================================= Revision 3.140 ============================================================================= Code cleanup for non-ansi builds Add target build to output. ============================================================================= Revision 3.141 ============================================================================= Add speed check code. ============================================================================= Revision 3.142 ============================================================================= Increase maximum threads/procs to 256 ============================================================================= Revision 3.143 ============================================================================= Add contribs and -+t to help splash screen. ============================================================================= Revision 3.144 ============================================================================= Bug fix for Redhat. ============================================================================= Revision 3.145 ============================================================================= Bug fix for when user used -l but failed to use -u too. ============================================================================= Revision 3.146 ============================================================================= Add void to speed_main() for non-ansi compiles. ============================================================================= Revision 3.147 ============================================================================= Add "Test running" So users will know the test is running and not to hit control 'c' too soon. Bug fix in libbif.c do_float() ============================================================================= Revision 3.148 ============================================================================= Turn off some child debug code. ============================================================================= Revision 3.149 ============================================================================= Disable fread and fwrite testing if mmap or async is in use. ============================================================================= Revision 3.150 ============================================================================= Add pread/pwrite to Linux ============================================================================= Revision 3.151 ============================================================================= Handle -EB ============================================================================= Revision 3.152 ============================================================================= Add pread/pwrite throughput testing ============================================================================= Revision 3.153 ============================================================================= Changed second parameter to mmap() to be size_t. AIX needs this. ============================================================================= Revision 3.154 ============================================================================= Add support for madvise(). ============================================================================= Revision 3.155 ============================================================================= Code cleanup. ============================================================================= Revision 3.156 ============================================================================= Fixes for -w -t -R from Veritas ============================================================================= Revision 3.157 ============================================================================= Make madvise() go away for windows. ============================================================================= Revision 3.158 ============================================================================= Permit smaller values for -n and -g ============================================================================= Revision 3.159 ============================================================================= Make initial write in initfile() a page size request. ============================================================================= Revision 3.160 ============================================================================= Stop test if file can not be written. ============================================================================= Revision 3.161 ============================================================================= Special handling for mmap of a file that is opened (O_DIRECT) ============================================================================= Revision 3.162 ============================================================================= Fixup for systems that do not have O_DIRECT. ============================================================================= Revision 3.163 ============================================================================= Simplify the prototype for do_compute() ============================================================================= Revision 3.164 ============================================================================= Zero compute_val inside of loops. ============================================================================= Revision 3.165 ============================================================================= Add support for O_DIRECT for IRIX and IRIX64 ============================================================================= Revision 3.166 ============================================================================= Improve macros and add prototypes. ============================================================================= Revision 3.167 ============================================================================= Improve resolution of get_resolution(). ============================================================================= Revision 3.168 ============================================================================= Changes to support RedHat 9.0. ============================================================================= Revision 3.169 ============================================================================= Special handling of NAME for broken frontend in Cygwin/Windows env. ============================================================================= Revision 3.170 ============================================================================= Add support for the CrayX1 ============================================================================= Revision 3.171 ============================================================================= Remove reference to PAGE_SIZE for linux. This causes problems with SuSe 8. ============================================================================= Revision 3.172 ============================================================================= Fixup for SCO build. ============================================================================= Revision 3.173 ============================================================================= Add -DHAVE_PREAD for Solaris8-64 target. ============================================================================= Revision 3.174 ============================================================================= Code cleanup for Linux ============================================================================= Revision 3.177 ============================================================================= Improve -+d so that each byte is more unique. Improve byte level validation. ============================================================================= Revision 3.178 ============================================================================= Provide byte level error detection with Found char and Expecting Char in -+d mode. ============================================================================= Revision 3.179 ============================================================================= Improve speed of -+d without losing uniqueness of bytes. ============================================================================= Revision 3.180 ============================================================================= Fix so that Windows can use multiple processes. Needed mmap like SCO. ============================================================================= Revision 3.181 ============================================================================= Use malloc() instead of mmap() for threads memory, instead of mmap. ============================================================================= Revision 3.182 ============================================================================= Make CPU utilization use doubles everywhere. ============================================================================= Revision 3.183 ============================================================================= Add support for CPU utilization while in distributed mode. ============================================================================= Revision 3.184 ============================================================================= Make all times relative so multi node can do CPU usage. ============================================================================= Revision 3.185 ============================================================================= Remove unused variables. ============================================================================= Revision 3.186 ============================================================================= Add -+n option to disable re-testing. ============================================================================= Revision 3.187 ============================================================================= Fixup -+n for throughput mode. ============================================================================= Revision 3.188 ============================================================================= Fix Excel output when -+n is used. ============================================================================= Revision 3.189 ============================================================================= Add support for the IBM S390 running Linux. ============================================================================= Revision 3.190 ============================================================================= Cleanup naming conventions for the S390 and fixup a #define. ============================================================================= Revision 3.191 ============================================================================= Add 64 bit compiles for s390x Move BIG_ENDIAN to ZBIG_ENDIAN to avoid header conflicts. ============================================================================= Revision 3.192 ============================================================================= Make random offsets always based on 48 bit random values. ============================================================================= Revision 3.193 ============================================================================= Addition for make random offsets always based on 48 bit random values. ============================================================================= Revision 3.194 ============================================================================= Make rands long longs. ============================================================================= Revision 3.195 ============================================================================= Bug fix for 48 bit rands in bsd4_2 and Windows. ============================================================================= Revision 3.196 ============================================================================= Make big_rand a long long. ============================================================================= Revision 3.197 ============================================================================= Inject Erik's changes for Multi-client Windows. ============================================================================= Revision 3.198 ============================================================================= Change proto version due to changes in Windows -+m support. Add Eric to the contributors list. ============================================================================= Revision 3.199 ============================================================================= Add more Windows support. ============================================================================= Revision 3.200 ============================================================================= Spelling error. ============================================================================= Revision 3.201 ============================================================================= Bug fixes from Erik H. ============================================================================= Revision 3.202 ============================================================================= Reduce usage of shared memory. ============================================================================= Revision 3.203 ============================================================================= Eliminate STUPID warning from the silly compiler. ============================================================================= Revision 3.204 ============================================================================= Changes to remove warnings on BSD. Thanks to Christian Weisgerber ============================================================================= Revision 3.205 ============================================================================= Support for the AMD64 ============================================================================= Revision 3.206 ============================================================================= Add -+k for constant aggregate data set size in throughput mode. ============================================================================= Revision 3.207 ============================================================================= Add pread support for the TRU64 target. Department of Defense in Canada. Add -+q for delay in seconds between tests. ============================================================================= Revision 3.208 ============================================================================= Move variable up, GCC on Solaris was getting a bogus parse error ============================================================================= Revision 3.209 ============================================================================= Add support for -+D (O_DSYNC) mode testing. ============================================================================= Revision 3.210 ============================================================================= Make O_DSYNC conditional. ============================================================================= Revision 3.211 ============================================================================= Add telemetry support for pread/pwrite ============================================================================= Revision 3.212 ============================================================================= Add record locking Add single file, file sharing. ============================================================================= Revision 3.213 ============================================================================= Enhance fill/verify (diag mode) for shared file. ============================================================================= Revision 3.214 ============================================================================= Remove warnings. ============================================================================= Revision 3.215 ============================================================================= Add prototype for mylockr() ============================================================================= Revision 3.216 ============================================================================= Fix prototype for mylockr ============================================================================= Revision 3.217 ============================================================================= Enable options for Windows systems. ============================================================================= Revision 3.218 ============================================================================= Add label to Excel spreadsheet that describes the rows and columns. Add support for Solaris64 with VxFS. Add support for Linux-arm ============================================================================= Revision 3.219 ============================================================================= Add sleep to permit child to get connection up before master does connect. ============================================================================= Revision 3.220 ============================================================================= Improve master connect to child, without delays. ============================================================================= Revision 3.221 ============================================================================= Add -+B Mixed sequential testing. BlueArc request. ============================================================================= Revision 3.222 ============================================================================= Workaround for bug in Cygwin's sscanf ============================================================================= Revision 3.223 ============================================================================= Add transfer size to the output from -Q ============================================================================= Revision 3.224 ============================================================================= Work around for TCP_WAIT in Windows. ============================================================================= Revision 3.225 ============================================================================= Fix for broken rsh on Windows. ============================================================================= Revision 3.226 ============================================================================= Workaround for gcc 3.4. From the folks at Gentoo.org. ============================================================================= Revision 3.227 ============================================================================= Enable -+m and telemetry files. ============================================================================= Revision 3.228 ============================================================================= Make more unique file names for mmap files. ============================================================================= Revision 3.229 ============================================================================= Add -+T time stamps. ============================================================================= Revision 3.230 ============================================================================= Bug fix for -m and validation code. ============================================================================= Revision 3.231 ============================================================================= Add a space to the throughput output dump. ============================================================================= Revision 3.232 ============================================================================= Add another space to the throughput output dump. ============================================================================= Revision 3.233 ============================================================================= Enable shared file with no locking ============================================================================= Revision 3.234 ============================================================================= Add sanity check to validate that open(name, O_CREAT | O_WRONLY | O_TRUNC, 0) does work correctly. This is an NFS client test that detects if the NFS server's local filesystem is broken and fails to support the sequence above correctly. ============================================================================= Revision 3.235 ============================================================================= add a close(fd) to the sanity test. ============================================================================= Revision 3.237 ============================================================================= Transport the -o flag to remote clients. ============================================================================= Revision 3.238 ============================================================================= Fix hang when using HP-UX master, Linux client, ssh buildup. ============================================================================= Revision 3.239 ============================================================================= Add -+h hostname. Permits one to manually set the hostname. For systems with multiple names/NICs. ============================================================================= Revision 3.241 ============================================================================= Add -+h, set hostname, and fix Solaris hang. ============================================================================= Revision 3.242 ============================================================================= Remove the side effect of no-rereads when using -w. Now use -+n for consistancy. ============================================================================= Revision 3.243 ============================================================================= Bug fix for -+k option. ============================================================================= Revision 3.246 ============================================================================= Add the -+U for WIN32 API calls .. Unbuffered I/O. Sony studios. ============================================================================= Revision 3.247 ============================================================================= Add support for -+U with -K (WIN32API calls + Jitter) ============================================================================= Revision 3.248 ============================================================================= Bug fix. -J with -+m not passing compute delay correctly. ============================================================================= Revision 3.249 ============================================================================= Add support for -i 8 when used with -+B (sequential mix) ============================================================================= Revision 3.250 ============================================================================= Change the default pattern. Samba is trying to cheat by special casing IOZONE.tmp, and the pattern of 0xA5. ============================================================================= Revision 3.251 ============================================================================= Make the default pattern random, and based on Iozone version. This is to prevent the hack from Richard Sharpe (in Samba) from special casing Iozone, and lying to the user. ============================================================================= Revision 3.252 ============================================================================= bug fix in pattern gen. ============================================================================= Revision 3.253 ============================================================================= Add -+Z old data set mode. Add -+X constant data for short circuit testing only. ============================================================================= Revision 3.254 ============================================================================= Multi-node changes for new options. (-+Z and -+X) ============================================================================= Revision 3.255 ============================================================================= Add -+K flag for Sony. ============================================================================= Revision 3.256 ============================================================================= Move -+K outside of Windows only. ============================================================================= Revision 3.257 ============================================================================= Simplify percentage calculation ============================================================================= Revision 3.258 ============================================================================= Add error checking for -f and -F in the wrong modes. ============================================================================= Revision 3.259 ============================================================================= Bug fix for pbuffer allocation on remote clients. ============================================================================= Revision 3.260 ============================================================================= Check for max_rec_size when using ranges. -r -r -r ============================================================================= Revision 3.261 ============================================================================= Fix for Debian user bug. -r 1m -n 1m -g 2m gave bogus error. ============================================================================= Revision 3.262 ============================================================================= Bug fix for -k used in conjunction with -t and content validation. ============================================================================= Revision 3.263 ============================================================================= Bug fix for -k used in conjunction with -t and content validation. ============================================================================= Revision 3.264 ============================================================================= Add DragonFly target. ============================================================================= Revision 3.265 ============================================================================= Put PER_VECTOR_OFFSET in for HP-UX ============================================================================= Revision 3.266 ============================================================================= Fix compiler warning messages ============================================================================= Revision 3.267 ============================================================================= Enforce minimum file size of page_size ============================================================================= Revision 3.268 ============================================================================= Minor fixes. ============================================================================= Revision 3.269 ============================================================================= Check fsync and close for errors. ============================================================================= Revision 3.270 ============================================================================= Adding support for testing block devices. Will be done is phases. This is phase 1. (Single threaded mode only) ============================================================================= Revision 3.271 ============================================================================= Adding 4 token support to client_list. Each entry may now contain 4 tokens and the new one is the absolute path to the temp file for testing. ============================================================================= Revision 3.272 Editorial change. ============================================================================= Revision 3.273 Add support for external monitor start & stop for throughput tests. IMON_START and IMON_STOP environmental variables used. ============================================================================= Revision 3.274 ============================================================================= minor change. ============================================================================= Revision 3.275 Bug fix for systems without O_DIRECT. Fall through in switch statement. ============================================================================= Revision 3.276 Fix for -c -t over NFS and initial writer close() when told by another to stop ============================================================================= Revision 3.277 Add Benny Halevy to contributors list. ============================================================================= Revision 3.278 Fix for Cygwin environment. ============================================================================= Revision 3.279 Code cleanup, and add arg to external trigger. ============================================================================= Revision 3.280 Code fixes for macosx ============================================================================= ============================================================================= Revision 3.281 Add support for building with Sun's Studio 11 compiler ============================================================================= Revision 3.283 Bug fix for fread/fwrite with > 2Gig files. ============================================================================= Revision 3.287 Add O_DIRECT for Windows ============================================================================= Revision 3.288 Add -+w dedup testing mode. ============================================================================= Revision 3.289 Make remaining non-dedup data unique. ============================================================================= Revision 3.290 Make non-dedupable more unique. ============================================================================= Revision 3.291 Bug fix for non-dedup. ============================================================================= Revision 3.292 Make random offsets unique, using Knuth shuffle. ============================================================================= Revision 3.292 free memory used for random offset uniqueness. ============================================================================= Revision 3.294 Make unique/random offsets 64bits. ============================================================================= Revision 3.295 Add fallback for random/unique. ============================================================================= Revision 3.296 Make non-dedup region more unique ============================================================================= Revision 3.297 Add -+y ## to set percentage of interior dedup. ============================================================================= Revision 3.298 Add -+y ## to set percentage of interior dedup. ============================================================================= Revision 3.299 Bug fixes for -+w and -+y ============================================================================= Revision 3.300 Minor fix for dedup ============================================================================= Revision 3.302 Adding -+C to set percent of dedupable within a file. ============================================================================= Revision 3.303 bug fix ============================================================================= Revision 3.304 Add solaris to read sync O_RSYNC ============================================================================= Revision 3.305 Add space to avoid field output touching each other. ============================================================================= Revision 3.306 Add check for config file exceeding MAXSTREAMS. ============================================================================= Revision 3.307 Add new contributor's name. ============================================================================= Revision 3.308 Fix type-oh ============================================================================= Revision 3.309 Bug fix. rewrite_rec needed to fill entire buffer, or later stride read will fail. ============================================================================= Revision 3.310 ============================================================================= Add ability for remote clients to return errors to the master and have the master display on output. ============================================================================= Revision 3.311 ============================================================================= fix double reporting of client errors ============================================================================= Revision 3.312 ============================================================================= Eliminate extra file descriptor in fwrite test. ============================================================================= Revision 3.312 ============================================================================= bug fix for barray allocation in -T mode ============================================================================= Revision 3.313 Revision 3.314 Revision 3.315 ============================================================================= Changes from Debian: Retry umount, add fileop for linux-sparc, and fix column width in fileop for faster boxes. ============================================================================= Revision 3.316 Add O_DIRECT support to FreeBSD ============================================================================= Revision 3.317 Fix for defines in FreeBSD ============================================================================= Revision 3.318 Add IMON_SYNC to enable monitor scripts to be run sync. ============================================================================= Revision 3.319 Add directio() for Solaris ============================================================================= Revision 3.320 Add fixes for unresolved references in directio() for Solaris ============================================================================= Revision 3.321 Fix type oh. ============================================================================= Revision 3.322 Fix c++ style comment back to 'C' style comment. ============================================================================= Revision 3.323 Bug fix for check_filenames and large files ============================================================================= Revision 3.324 Replace tripple rand() calls with 64 bit Mersene twister. ============================================================================= Revision 3.325 Add read-only, external file, with no-verify. -+E ============================================================================= Revision 3.325 Permit -+E on dedup files. ============================================================================= Revision 3.327 Permit -+E on random read only testing, on existing file. ============================================================================= Revision 3.328 Add passing master listener's port to remote children if it is not HOST_LIST_PORT ============================================================================= Revision 3.329 Adding Dave Boone's notruncate option -+N ============================================================================= Revision 3.330 Bug fix for Dave's code. ============================================================================= Revision 3.331 Add multi -t ops. Fabrice ============================================================================= Revision 3.332 Added Li Qin's multi dedup set support. -+S # ============================================================================= Revision 3.333 Bug fix for -+S dedup_mseed needs to be an integer ============================================================================= Revision 3.334 Make -+S generate more uniqueness ============================================================================= Revision 3.335 Make -+S generate more uniqueness ============================================================================= Revision 3.336 Make -+S generate more uniqueness ============================================================================= Revision 3.337 Bug fix for -+S ============================================================================= Revision 3.338 Make umount/remount more robust, in the face of server errors. ============================================================================= Revision 3.339 Improve the help string for the -+S option. ============================================================================= Revision 3.340 Add new contributor name. ============================================================================= Revision 3.342 Add support for the programmable interdimensional timer. ============================================================================= Revision 3.343 Bug fix for PIT on remote clients. ============================================================================= Revision 3.344 Bug fix for PIT on remote clients. ============================================================================= Revision 3.345 Have children re-acquire get_resolution. ============================================================================= Revision 3.346 Bug fix for t_range addition. ============================================================================= Revision 3.347 Get rid of a warning. ( An invalid warning, but none the less ) ============================================================================= Revision 3.348 Add more words to the usage warnings and license ============================================================================= Revision 3.349 Remove Ascii dependency for IBM's Z/OS that speaks EBCDIC. ============================================================================= Revision 3.353 Add support for SUA ============================================================================= Revision 3.354 Remove Sanity check so that SMB on Windows, under SUA, works ============================================================================= Revision 3.355 Cache the getaddrinfo call. ============================================================================= Revision 3.356 delete optimization.. bad...Cache the getaddrinfo call. ============================================================================= Revision 3.358 Change pit to use unsigned long longs ============================================================================= Revision 3.359 Add Linux processor affinity ============================================================================= Revision 3.360 Remove UDP usage ============================================================================= Revision 3.361 Increment protocol_version to catch incompat versions. ============================================================================= Revision 3.362 Fixup for new include needed by Solaris10 ============================================================================= Revision 3.363 Patch for Mac errno ============================================================================= Revision 3.364 Patch for Mac printf's ============================================================================= Revision 3.365 Fix Josh's introduction of new Linux warnings. ============================================================================= Revision 3.366 Take sleep(1) out put path, deal with it in error/retry path ============================================================================= Revision 3.367 Add -+z latency histogram logging. ============================================================================= Revision 3.368 Format change for the -+z latency histogram logging. ============================================================================= Revision 3.369 Added -+O Op_rate control. ============================================================================= Revision 3.370 Close race condition with master closing socket to child async reader ============================================================================= Revision 3.371 Add "mygen" generation to the remote children protocol. This prevents zombies from coming back to life and killing future masters. ============================================================================= Revision 3.372 Set Listen(s,100) to Listen(s,MAXSTREAMS) ============================================================================= Revision 3.373 Move lable "again" to outside of cdebug. ============================================================================= Revision 3.374 More fixes for busted crap in Solaris !!! ============================================================================= Revision 3.376 AIX update. They now have errno.h ============================================================================= Revision 3.377 Need errno.h for FreeBSD ============================================================================= Revision 3.379 Need to include errno.h for Cygwin ============================================================================= Revision 3.381 Add SO_LINGER for master_listen and child_listen, so that wind-blows will work like all other systems on the planet. ============================================================================= Revision 3.382 Fix for linger addition ============================================================================= Revision 3.383 Fix for linger addition ============================================================================= Revision 3.384 Fix for linger addition ============================================================================= Revision 3.385 Make linger for all ============================================================================= Revision 3.387 Change sleep() calls, that help connect() to nanosleep() calls. ============================================================================= Revision 3.388 Fixup remainder for nanosleep() ============================================================================= Revision 3.389 Fixup remainder for nanosleep() ============================================================================= Revision 3.390 Add code for pread/pwrite from Ben England (Redhat) ============================================================================= Revision 3.391 Add code for MDEBUG and CDEBUG from Bob England (Redhat) ============================================================================= Revision 3.392 Add code for building HPUX. Errno.h ============================================================================= Revision 3.393 Fixes for Windows (nanosleep doesn't always work ) ============================================================================= Revision 3.394 Fixes for preadv and pwritev from RedHat (Ben Englanc) ============================================================================= Revision 3.395 Add warnings for default switch cases, and exit with value for unknowns. ============================================================================= Revision 3.396 Fix warnings from RedHat patches ============================================================================= Revision 3.397 Bug fix for getopt default case, with bad parameter handed in. ============================================================================= Revision 3.398 Adding thread_read_test and thread_write_test. ============================================================================= papi-5.6.0/src/perfctr-2.6.x/usr.lib/event_set_amd.c000775 001750 001750 00000055053 13216244367 024120 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_amd.c,v 1.8.2.1 2004/08/02 22:27:06 mikpe Exp $ * Performance counter event descriptions for AMD K7 and K8. * * Copyright (C) 2003 Mikael Pettersson * * References * ---------- * "AMD Athlon Processor x86 Code Optimization Guide", * Appendix D: "Performance Monitoring Counters". * AMD Publication #22007 * Revision E (on AMD Processor Technical Documents CD, Med-12/99-0, 21860F) * Revision K (at http://www.amd.com/). * * "BIOS and Kernel Developer's Guide for AMD Athlon 64 and * AMD Opteron Processors", Chapter 10: "Performance Monitoring". * AMD Publication #26094, Revision 3.14 (at http://www.amd.com). * "Revision Guide for AMD Opteron Processors", * AMD Publication #25759, Revision 3.09 */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" /* * AMD K7 events. * * Note: Different revisions of AMD #22007 list different sets of events. * We split the K7 event set into an "official" part based on recent * revisions of #22007, and an "unofficial" part which includes events * only documented in older revisions of #22007 (specifically Rev. E). * * All official K7 events are also present in K8, as are most of the * unofficial K7 events. */ static const struct perfctr_unit_mask_5 k7_um_moesi = { { .type = perfctr_um_type_bitmask, .default_value = 0x1F, .nvalues = 5 }, { { 0x10, "Modified (M)" }, { 0x08, "Owner (O)" }, { 0x04, "Exclusive (E)" }, { 0x02, "Shared (S)" }, { 0x01, "Invalid (I)" } } }; static const struct perfctr_event k7_official_events[] = { { 0x40, 0xF, NULL, "DATA_CACHE_ACCESSES", "Data cache accesses" }, { 0x41, 0xF, NULL, "DATA_CACHE_MISSES", "Data cache misses" }, { 0x42, 0xF, UM(k7_um_moesi), "DATA_CACHE_REFILLS_FROM_L2", "Data cache refills from L2" }, { 0x43, 0xF, UM(k7_um_moesi), "DATA_CACHE_REFILLS_FROM_SYSTEM", "Data cache refills from system" }, { 0x44, 0xF, UM(k7_um_moesi), "DATA_CACHE_WRITEBACKS", "Data cache writebacks" }, { 0x45, 0xF, NULL, "L1_DTLB_MISSES_AND_L2_DTLB_HITS", "L1 DTLB misses and L2 DTLB hits" }, { 0x46, 0xF, NULL, "L1_AND_L2_DTLB_MISSES", "L1 and L2 DTLB misses" }, { 0x47, 0xF, NULL, "MISALIGNED_DATA_REFERENCES", "Misaligned data references" }, { 0x80, 0xF, NULL, "INSTRUCTION_CACHE_FETCHES", "Instruction cache fetches" }, { 0x81, 0xF, NULL, "INSTRUCTION_CACHE_MISSES", "Instruction cache misses" }, { 0x84, 0xF, NULL, "L1_ITLB_MISSES_AND_L2_ITLB_HITS", /* XXX: was L1_ITLB_MISSES */ "L1 ITLB misses (and L2 ITLB hits)" }, { 0x85, 0xF, NULL, "L1_AND_L2_ITLB_MISSES", /* XXX: was L2_ITLB_MISSES */ "(L1 and) L2 ITLB misses" }, { 0xC0, 0xF, NULL, "RETIRED_INSTRUCTIONS", "Retired instructions (includes exceptions, interrupts, resyncs)" }, { 0xC1, 0xF, NULL, "RETIRED_OPS", "Retired Ops" }, { 0xC2, 0xF, NULL, "RETIRED_BRANCHES", "Retired branches (conditional, unconditional, exceptions, interrupts)" }, { 0xC3, 0xF, NULL, "RETIRED_BRANCHES_MISPREDICTED", "Retired branches mispredicted" }, { 0xC4, 0xF, NULL, "RETIRED_TAKEN_BRANCHES", "Retired taken branches" }, { 0xC5, 0xF, NULL, "RETIRED_TAKEN_BRANCHES_MISPREDICTED", "Retired taken branches mispredicted" }, { 0xC6, 0xF, NULL, "RETIRED_FAR_CONTROL_TRANSFERS", "Retired far control transfers" }, { 0xC7, 0xF, NULL, "RETIRED_RESYNC_BRANCHES", "Retired resync branches (only non-control transfer branches counted)" }, { 0xCD, 0xF, NULL, "INTERRUPTS_MASKED_CYCLES", "Interrupts masked cycles (IF=0)" }, { 0xCE, 0xF, NULL, "INTERRUPTS_MASKED_WHILE_PENDING_CYCLES", "Interrupts masked while pending cycles (INTR while IF=0)" }, { 0xCF, 0xF, NULL, "NUMBER_OF_TAKEN_HARDWARE_INTERRUPTS", "Number of taken hardware interrupts" }, }; static const struct perfctr_event_set k7_official_event_set = { .cpu_type = PERFCTR_X86_AMD_K7, .event_prefix = "K7_", .include = NULL, .nevents = ARRAY_SIZE(k7_official_events), .events = k7_official_events, }; /* also in K8 */ static const struct perfctr_unit_mask_7 k7_um_seg_reg = { { .type = perfctr_um_type_bitmask, .default_value = 0x3F, .nvalues = 7 }, { { 0x40, "HS" }, /* what's this? */ { 0x20, "GS" }, { 0x10, "FS" }, { 0x08, "DS" }, { 0x04, "SS" }, { 0x02, "CS" }, { 0x01, "ES" } } }; /* not in K8 */ static const struct perfctr_unit_mask_5 k7_um_system_request = { { .type = perfctr_um_type_bitmask, .default_value = 0x73, .nvalues = 5 }, { { 0x40, "WB" }, { 0x20, "WP" }, { 0x10, "WT" }, { 0x02, "WC" }, { 0x01, "UC" } } }; /* not in K8 */ static const struct perfctr_unit_mask_3 k7_um_snoop_hits = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x04, "L2 (L2 hit and no DC hit)" }, { 0x02, "Data cache" }, { 0x01, "Instruction cache" } } }; /* not in K8 */ static const struct perfctr_unit_mask_2 k7_um_ecc = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x02, "L2 single bit error" }, { 0x01, "System single bit error" } } }; /* not in K8 */ static const struct perfctr_unit_mask_4 k7_um_invalidates = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x08, "I invalidates D" }, { 0x04, "I invalidates I" }, { 0x02, "D invalidates D" }, { 0x01, "D invalidates I" } } }; /* not in K8 */ static const struct perfctr_unit_mask_8 k7_um_L2_requests = { { .type = perfctr_um_type_bitmask, .default_value = 0xFF, .nvalues = 8 }, { { 0x80, "Data block write from the L2 (TBL RMW)" }, { 0x40, "Data block write from the DC" }, { 0x20, "Data block write from the system" }, { 0x10, "Data block read data store" }, { 0x08, "Data block read data load" }, { 0x04, "Data block read instruction" }, { 0x02, "Tag write" }, { 0x01, "Tag read" } } }; static const struct perfctr_event k7_unofficial_events[] = { { 0x20, 0xF, UM(k7_um_seg_reg), "SEGMENT_REGISTER_LOADS", /* also in K8 */ "Segment register loads" }, { 0x21, 0xF, NULL, "STORES_TO_ACTIVE_INSTRUCTION_STREAM", /* also in K8 as SELF_MODIFY_RESYNC */ "Stores to active instruction stream" }, { 0x64, 0xF, NULL, "DRAM_SYSTEM_REQUESTS", /* not in K8 */ "DRAM system requests" }, { 0x65, 0xF, UM(k7_um_system_request), "SYSTEM_REQUESTS_WITH_THE_SELECTED_TYPE", /* not in K8 */ "System requests with the selected type" }, { 0x73, 0xF, UM(k7_um_snoop_hits), "SNOOP_HITS", /* not in K8 */ "Snoop hits" }, { 0x74, 0xF, UM(k7_um_ecc), "SINGLE_BIT_ECC_ERRORS_DETECTED_CORRECTED", /* not in K8 */ /* XXX: was SINGLE_BIT_ECC_ERRORS_DETECTED_OR_CORRECTED */ "Single-bit ECC errors detected/corrected" }, { 0x75, 0xF, UM(k7_um_invalidates), "INTERNAL_CACHE_LINE_INVALIDATES", /* not in K8 */ "Internal cache line invalidates" }, { 0x76, 0xF, NULL, "CYCLES_PROCESSOR_IS_RUNNING", /* also in K8 */ "Cycles processor is running (not in HLT or STPCLK)" }, { 0x79, 0xF, UM(k7_um_L2_requests), "L2_REQUESTS", /* not in K8 */ "L2 requests" }, { 0x7A, 0xF, NULL, "CYCLES_THAT_AT_LEAST_ONE_FILL_REQUEST_WAITED_TO_USE_THE_L2", /* not in K8 */ "Cycles that at least one fill request waited to use the L2" }, { 0x82, 0xF, NULL, "INSTRUCTION_CACHE_REFILLS_FROM_L2", /* also in K8 */ "Instruction cache refills from L2" }, { 0x83, 0xF, NULL, "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", /* also in K8 */ "Instruction cache refills from system" }, { 0x86, 0xF, NULL, "SNOOP_RESYNCS", /* also in K8 */ "Snoop resyncs" }, { 0x87, 0xF, NULL, "INSTRUCTION_FETCH_STALL_CYCLES", /* also in K8 */ "Instruction fetch stall cycles" }, { 0x88, 0xF, NULL, "RETURN_STACK_HITS", /* also in K8 */ "Instruction cache hits" }, { 0x89, 0xF, NULL, "RETURN_STACK_OVERFLOW", /* also in K8 */ "Return stack overflow" }, { 0xC8, 0xF, NULL, "RETIRED_NEAR_RETURNS", /* also in K8 */ "Retired near returns" }, { 0xC9, 0xF, NULL, "RETIRED_NEAR_RETURNS_MISPREDICTED", /* also in K8 */ "Retired near returns mispredicted" }, { 0xCA, 0xF, NULL, "RETIRED_INDIRECT_BRANCHES_WITH_TARGET_MISPREDICTED", /* also in K8 */ "Retired indirect branches with target mispredicted" }, { 0xD0, 0xF, NULL, "INSTRUCTION_DECODER_EMPTY", /* also in K8 */ "Instruction decoder empty" }, { 0xD1, 0xF, NULL, "DISPATCH_STALLS", /* also in K8 */ "Dispatch stalls (event masks D2h through DAh below combined)" }, { 0xD2, 0xF, NULL, "BRANCH_ABORT_TO_RETIRE", /* also in K8 */ /* XXX: was BRANCH_ABORTS_TO_RETIRE */ "Branch abort to retire" }, { 0xD3, 0xF, NULL, "SERIALIZE", /* also in K8 */ "Serialize" }, { 0xD4, 0xF, NULL, "SEGMENT_LOAD_STALL", /* also in K8 */ "Segment load stall" }, { 0xD5, 0xF, NULL, "ICU_FULL", /* also in K8 */ "ICU full" }, { 0xD6, 0xF, NULL, "RESERVATION_STATIONS_FULL", /* also in K8 */ "Reservation stations full" }, { 0xD7, 0xF, NULL, "FPU_FULL", /* also in K8 */ "FPU full" }, { 0xD8, 0xF, NULL, "LS_FULL", /* also in K8 */ "LS full" }, { 0xD9, 0xF, NULL, "ALL_QUIET_STALL", /* also in K8 */ "All quiet stall" }, { 0xDA, 0xF, NULL, "FAR_TRANSFER_OR_RESYNC_BRANCH_PENDING", /* also in K8 */ "Fall transfer or resync branch pending" }, { 0xDC, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR0", /* also in K8 */ "Breakpoint matches for DR0" }, { 0xDD, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR1", /* also in K8 */ "Breakpoint matches for DR1" }, { 0xDE, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR2", /* also in K8 */ "Breakpoint matches for DR2" }, { 0xDF, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR3", /* also in K8 */ "Breakpoint matches for DR3" }, }; const struct perfctr_event_set perfctr_k7_event_set = { .cpu_type = PERFCTR_X86_AMD_K7, .event_prefix = "K7_", .include = &k7_official_event_set, .nevents = ARRAY_SIZE(k7_unofficial_events), .events = k7_unofficial_events, }; /* * AMD K8 events. * * Some events are described as being "Revision B and later", but * AMD does not document how to distinguish Revision B processors * from earlier ones. */ static const struct perfctr_unit_mask_6 k8_um_fpu_ops = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x3F, .nvalues = 6 }, { { 0x01, "Add pipe ops excluding junk ops" }, { 0x02, "Multiply pipe ops excluding junk ops" }, { 0x04, "Store pipe ops excluding junk ops" }, { 0x08, "Add pipe junk ops" }, { 0x10, "Multiply pipe junk ops" }, { 0x20, "Store pipe junk ops" } } }; static const struct perfctr_unit_mask_2 k8_um_ecc = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Scrubber error" }, { 0x02, "Piggyback scrubber errors" } } }; static const struct perfctr_unit_mask_3 k8_um_prefetch = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "Load" }, { 0x02, "Store" }, { 0x04, "NTA" } } }; static const struct perfctr_unit_mask_5 k8_um_int_L2_req = { { .type = perfctr_um_type_bitmask, .default_value = 0x1F, .nvalues = 5 }, { { 0x01, "IC fill" }, { 0x02, "DC fill" }, { 0x04, "TLB reload" }, { 0x08, "Tag snoop request" }, { 0x10, "Cancelled request" } } }; static const struct perfctr_unit_mask_3 k8_um_fill_req = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "IC fill" }, { 0x02, "DC fill" }, { 0x04, "TLB reload" } } }; static const struct perfctr_unit_mask_2 k8_um_fill_L2 = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Dirty L2 victim" }, { 0x02, "Victim from L2" } } }; static const struct perfctr_unit_mask_4 k8_um_fpu_instr = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "x87 instructions" }, { 0x02, "Combined MMX & 3DNow! instructions" }, { 0x04, "Combined packed SSE and SSE2 instructions" }, { 0x08, "Combined scalar SSE and SSE2 instructions" } } }; static const struct perfctr_unit_mask_3 k8_um_fpu_fastpath = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "With low op in position 0" }, { 0x02, "With low op in position 1" }, { 0x04, "With low op in position 2" } } }; static const struct perfctr_unit_mask_4 k8_um_fpu_exceptions = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "x87 reclass microfaults" }, { 0x02, "SSE retype microfaults" }, { 0x04, "SSE reclass microfaults" }, { 0x08, "SSE and x87 microtraps" } } }; static const struct perfctr_unit_mask_3 k8_um_page_access = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "Page hit" }, { 0x02, "Page miss" }, { 0x04, "Page conflict" } } }; static const struct perfctr_unit_mask_3 k8_um_turnaround = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "DIMM turnaround" }, { 0x02, "Read to write turnaround" }, { 0x04, "Write to read turnaround" } } }; static const struct perfctr_unit_mask_4 k8_um_saturation = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Memory controller high priority bypass" }, { 0x02, "Memory controller low priority bypass" }, { 0x04, "DRAM controller interface bypass" }, { 0x08, "DRAM controller queue bypass" } } }; static const struct perfctr_unit_mask_7 k8_um_sized_commands = { { .type = perfctr_um_type_bitmask, .default_value = 0x7F, .nvalues = 7 }, { { 0x01, "NonPostWrSzByte" }, { 0x02, "NonPostWrSzDword" }, { 0x04, "PostWrSzByte" }, { 0x08, "PostWrSzDword" }, { 0x10, "RdSzByte" }, { 0x20, "RdSzDword" }, { 0x40, "RdModWr" } } }; static const struct perfctr_unit_mask_4 k8_um_probe = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Probe miss" }, { 0x02, "Probe hit" }, { 0x04, "Probe hit dirty without memory cancel" }, { 0x08, "Probe hit dirty with memory cancel" } } }; static const struct perfctr_unit_mask_4 k8_um_ht = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Command sent" }, { 0x02, "Data sent" }, { 0x04, "Buffer release sent" }, { 0x08, "Nop sent" } } }; static const struct perfctr_event k8_common_events[] = { { 0x00, 0xF, UM(k8_um_fpu_ops), "DISPATCHED_FPU_OPS", /* Revision B and later */ "Dispatched FPU ops" }, { 0x01, 0xF, NULL, "NO_FPU_OPS", /* Revision B and later */ "Cycles with no FPU ops retired" }, { 0x02, 0xF, NULL, "FAST_FPU_OPS", /* Revision B and later */ "Dispatched FPU ops that use the fast flag interface" }, { 0x20, 0xF, UM(k7_um_seg_reg), "SEG_REG_LOAD", "Segment register load" }, { 0x21, 0xF, NULL, "SELF_MODIFY_RESYNC", "Microarchitectural resync caused by self modifying code" }, { 0x22, 0xF, NULL, "LS_RESYNC_BY_SNOOP", /* similar to 0x86, but LS unit instead of IC unit */ "Microarchitectural resync caused by snoop" }, { 0x23, 0xF, NULL, "LS_BUFFER_FULL", "LS Buffer 2 Full" }, /* 0x24: changed in Revision C */ { 0x25, 0xF, NULL, "OP_LATE_CANCEL", "Microarchitectural late cancel of an operation" }, { 0x26, 0xF, NULL, "CFLUSH_RETIRED", "Retired CFLUSH instructions" }, { 0x27, 0xF, NULL, "CPUID_RETIRED", "Retired CPUID instructions" }, /* 0x40-0x47: from K7 official event set */ { 0x48, 0xF, NULL, "ACCESS_CANCEL_LATE", "Microarchitectural late cancel of an access" }, { 0x49, 0xF, NULL, "ACCESS_CANCEL_EARLY", "Microarchitectural early cancel of an access" }, { 0x4A, 0xF, UM(k8_um_ecc), "ECC_BIT_ERR", "One bit ECC error recorded found by scrubber" }, { 0x4B, 0xF, UM(k8_um_prefetch), "DISPATCHED_PRE_INSTRS", "Dispatched prefetch instructions" }, /* 0x4C: added in Revision C */ { 0x76, 0xF, NULL, "CPU_CLK_UNHALTED", /* XXX: was CYCLES_PROCESSOR_IS_RUNNING */ "Cycles processor is running (not in HLT or STPCLK)" }, { 0x7D, 0xF, UM(k8_um_int_L2_req), "BU_INT_L2_REQ", "Internal L2 request" }, { 0x7E, 0xF, UM(k8_um_fill_req), "BU_FILL_REQ", "Fill request that missed in L2" }, { 0x7F, 0xF, UM(k8_um_fill_L2), "BU_FILL_L2", "Fill into L2" }, /* 0x80-0x81: from K7 official event set */ { 0x82, 0xF, NULL, "IC_REFILL_FROM_L2", "Refill from L2" }, { 0x83, 0xF, NULL, "IC_REFILL_FROM_SYS", "Refill from system" }, /* 0x84-0x85: from K7 official event set */ { 0x86, 0xF, NULL, "IC_RESYNC_BY_SNOOP", /* similar to 0x22, but IC unit instead of LS unit */ "Microarchitectural resync caused by snoop" }, { 0x87, 0xF, NULL, "IC_FETCH_STALL", "Instruction fetch stall" }, { 0x88, 0xF, NULL, "IC_STACK_HIT", "Return stack hit" }, { 0x89, 0xF, NULL, "IC_STACK_OVERFLOW", "Return stack overflow" }, /* 0xC0-0xC7: from K7 official event set */ { 0xC8, 0xF, NULL, "RETIRED_NEAR_RETURNS", "Retired near returns" }, { 0xC9, 0xF, NULL, "RETIRED_RETURNS_MISPREDICT", "Retired near returns mispredicted" }, { 0xCA, 0xF, NULL, "RETIRED_BRANCH_MISCOMPARE", "Retired taken branches mispredicted due to address miscompare" }, { 0xCB, 0xF, UM(k8_um_fpu_instr), "RETIRED_FPU_INSTRS", /* Revision B and later */ "Retired FPU instructions" }, { 0xCC, 0xF, UM(k8_um_fpu_fastpath), "RETIRED_FASTPATH_INSTRS", /* Revision B and later */ "Retired fastpath double op instructions" }, /* 0xCD-0xCF: from K7 official event set */ { 0xD0, 0xF, NULL, "DECODER_EMPTY", "Nothing to dispatch (decoder empty)" }, { 0xD1, 0xF, NULL, "DISPATCH_STALLS", "Dispatch stalls (events 0xD2-0xDA combined)" }, { 0xD2, 0xF, NULL, "DISPATCH_STALL_FROM_BRANCH_ABORT", "Dispatch stall from branch abort to retire" }, { 0xD3, 0xF, NULL, "DISPATCH_STALL_SERIALIZATION", "Dispatch stall for serialization" }, { 0xD4, 0xF, NULL, "DISPATCH_STALL_SEG_LOAD", "Dispatch stall for segment load" }, { 0xD5, 0xF, NULL, "DISPATCH_STALL_REORDER_BUFFER", "Dispatch stall when reorder buffer is full" }, { 0xD6, 0xF, NULL, "DISPATCH_STALL_RESERVE_STATIONS", "Dispatch stall when reservation stations are full" }, { 0xD7, 0xF, NULL, "DISPATCH_STALL_FPU", "Dispatch stall when FPU is full" }, { 0xD8, 0xF, NULL, "DISPATCH_STALL_LS", "Dispatch stall when LS is full" }, { 0xD9, 0xF, NULL, "DISPATCH_STALL_QUIET_WAIT", "Dispatch stall when waiting for all to be quiet" }, { 0xDA, 0xF, NULL, "DISPATCH_STALL_PENDING", "Dispatch stall when far control transfer or resync branch is pending" }, { 0xDB, 0xF, UM(k8_um_fpu_exceptions), "FPU_EXCEPTIONS", /* Revision B and later */ "FPU exceptions" }, { 0xDC, 0xF, NULL, "DR0_BREAKPOINTS", "Number of breakpoints for DR0" }, { 0xDD, 0xF, NULL, "DR1_BREAKPOINTS", "Number of breakpoints for DR1" }, { 0xDE, 0xF, NULL, "DR2_BREAKPOINTS", "Number of breakpoints for DR2" }, { 0xDF, 0xF, NULL, "DR3_BREAKPOINTS", "Number of breakpoints for DR3" }, { 0xE0, 0xF, UM(k8_um_page_access), "MEM_PAGE_ACCESS", "Memory controller page access" }, { 0xE1, 0xF, NULL, "MEM_PAGE_TBL_OVERFLOW", "Memory controller page table overflow" }, { 0xE2, 0xF, NULL, "DRAM_SLOTS_MISSED", "Memory controller DRAM command slots missed (in MemClks)" }, { 0xE3, 0xF, UM(k8_um_turnaround), "MEM_TURNAROUND", "Memory controller turnaround" }, { 0xE4, 0xF, UM(k8_um_saturation), "MEM_BYPASS_SAT", "Memory controller bypass counter saturation" }, { 0xEB, 0xF, UM(k8_um_sized_commands), "SIZED_COMMANDS", "Sized commands" }, { 0xEC, 0xF, UM(k8_um_probe), "PROBE_RESULT", "Probe result" }, { 0xF6, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS0_WIDTH", "Hypertransport (tm) bus 0 bandwidth" }, { 0xF7, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS1_WIDTH", "Hypertransport (tm) bus 1 bandwidth" }, { 0xF8, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS2_WIDTH", "Hypertransport (tm) bus 2 bandwidth" }, }; static const struct perfctr_event_set k8_common_event_set = { .cpu_type = PERFCTR_X86_AMD_K8, .event_prefix = "K8_", .include = &k7_official_event_set, .nevents = ARRAY_SIZE(k8_common_events), .events = k8_common_events, }; static const struct perfctr_event k8_events[] = { { 0x24, 0xF, NULL, "LOCKED_OP", /* unit mask changed in Rev. C */ "Locked operation" }, }; const struct perfctr_event_set perfctr_k8_event_set = { .cpu_type = PERFCTR_X86_AMD_K8, .event_prefix = "K8_", .include = &k8_common_event_set, .nevents = ARRAY_SIZE(k8_events), .events = k8_events, }; /* * K8 Revision C. Starts at CPUID 0xF58 for Opteron/Athlon64FX and * CPUID 0xF48 for Athlon64. (CPUID 0xF51 is Opteron Revision B3.) */ static const struct perfctr_unit_mask_3 k8c_um_locked_op = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 3 }, { { 0x01, "Number of lock instructions executed" }, { 0x02, "Number of cycles spent in the lock request/grant stage" }, { 0x04, "Number of cycles a lock takes to complete once it is " "non-speculative and is the oldest load/store operation " "(non-speculative cycles in Ls2 entry 0)" } } }; static const struct perfctr_unit_mask_2 k8c_um_lock_accesses = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Number of dcache accesses by lock instructions" }, { 0x02, "Number of dcache misses by lock instructions" } } }; static const struct perfctr_event k8c_events[] = { { 0x24, 0xF, UM(k8c_um_locked_op), "LOCKED_OP", /* unit mask changed */ "Locked operation" }, { 0x4C, 0xF, UM(k8c_um_lock_accesses), "LOCK_ACCESSES", "DCACHE accesses by locks" }, }; const struct perfctr_event_set perfctr_k8c_event_set = { .cpu_type = PERFCTR_X86_AMD_K8C, .event_prefix = "K8C_", .include = &k8_common_event_set, .nevents = ARRAY_SIZE(k8c_events), .events = k8c_events, }; papi-5.6.0/man/man3/PAPI_get_thr_specific.3000664 001750 001750 00000003715 13216244356 022354 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_thr_specific" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_thr_specific \- .PP Retrieve a pointer to a thread specific data structure\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par Prototype: \#include @n int PAPI_get_thr_specific( int tag, void **ptr ); @param tag An identifier, the value of which is either PAPI_USR1_TLS or PAPI_USR2_TLS. This identifier indicates which of several data structures associated with this thread is to be accessed. @param ptr A pointer to the memory containing the data structure. @retval PAPI_OK @retval PAPI_EINVAL The @em tag argument is out of range. In C, PAPI_get_thr_specific PAPI_get_thr_specific will retrieve the pointer from the array with index @em tag. There are 2 user available locations and @em tag can be either PAPI_USR1_TLS or PAPI_USR2_TLS. The array mentioned above is managed by PAPI and allocated to each thread which has called PAPI_thread_init. There is no Fortran equivalent function. @par Example: .fi .PP .PP .nf int ret; HighLevelInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); // Do we have the thread specific data setup yet? ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); if (state == NULL) return (PAPI_ESYS); memset(state, 0, sizeof(HighLevelInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); ret = PAPI_set_thr_specific(PAPI_USR1_TLS, state); if (ret != PAPI_OK) return (ret); } * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_register_thread\fP \fBPAPI_thread_init\fP \fBPAPI_thread_id\fP \fBPAPI_set_thr_specific\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.14-rc5-mm1000664 001750 001750 00000105652 13216244370 025406 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.14-rc5-mm1/CREDITS linux-2.6.14-rc5-mm1.perfctr27/CREDITS --- linux-2.6.14-rc5-mm1/CREDITS 2005-11-06 22:50:58.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/CREDITS 2005-11-06 22:53:09.000000000 +0100 @@ -2630,6 +2630,7 @@ N: Mikael Pettersson E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.14-rc5-mm1/MAINTAINERS linux-2.6.14-rc5-mm1.perfctr27/MAINTAINERS --- linux-2.6.14-rc5-mm1/MAINTAINERS 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/MAINTAINERS 2005-11-06 22:53:09.000000000 +0100 @@ -1958,6 +1958,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@csd.uu.se +W: http://www.csd.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de diff -rupN linux-2.6.14-rc5-mm1/arch/i386/Kconfig linux-2.6.14-rc5-mm1.perfctr27/arch/i386/Kconfig --- linux-2.6.14-rc5-mm1/arch/i386/Kconfig 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/i386/Kconfig 2005-11-06 22:53:09.000000000 +0100 @@ -991,6 +991,9 @@ config CRASH_DUMP depends on HIGHMEM help Generate crash dump after being started by kexec. + +source "drivers/perfctr/Kconfig" + endmenu diff -rupN linux-2.6.14-rc5-mm1/arch/i386/kernel/entry.S linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.14-rc5-mm1/arch/i386/kernel/entry.S 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/entry.S 2005-11-06 22:53:09.000000000 +0100 @@ -448,6 +448,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error diff -rupN linux-2.6.14-rc5-mm1/arch/i386/kernel/i8259.c linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.14-rc5-mm1/arch/i386/kernel/i8259.c 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/i8259.c 2005-11-06 22:53:09.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.14-rc5-mm1/arch/i386/kernel/process.c linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/process.c --- linux-2.6.14-rc5-mm1/arch/i386/kernel/process.c 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/process.c 2005-11-06 22:53:09.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -395,6 +396,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -474,6 +476,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -735,6 +739,7 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); return prev_p; } diff -rupN linux-2.6.14-rc5-mm1/arch/i386/kernel/syscall_table.S linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.14-rc5-mm1/arch/i386/kernel/syscall_table.S 2005-08-29 14:34:27.000000000 +0200 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/i386/kernel/syscall_table.S 2005-11-06 22:53:09.000000000 +0100 @@ -294,3 +294,9 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_ni_syscall + .long sys_ni_syscall /* 295 */ + .long sys_vperfctr_open + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.14-rc5-mm1/arch/ppc/Kconfig linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/Kconfig --- linux-2.6.14-rc5-mm1/arch/ppc/Kconfig 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/Kconfig 2005-11-06 22:53:09.000000000 +0100 @@ -288,6 +288,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx || E200 default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" diff -rupN linux-2.6.14-rc5-mm1/arch/ppc/kernel/head.S linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/head.S --- linux-2.6.14-rc5-mm1/arch/ppc/kernel/head.S 2005-11-06 22:50:23.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/head.S 2005-11-06 22:53:09.000000000 +0100 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. diff -rupN linux-2.6.14-rc5-mm1/arch/ppc/kernel/misc.S linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/misc.S --- linux-2.6.14-rc5-mm1/arch/ppc/kernel/misc.S 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/misc.S 2005-11-06 22:53:09.000000000 +0100 @@ -1575,3 +1575,9 @@ _GLOBAL(sys_call_table) .long sys_inotify_init /* 275 */ .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 280 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.14-rc5-mm1/arch/ppc/kernel/process.c linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/process.c --- linux-2.6.14-rc5-mm1/arch/ppc/kernel/process.c 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc/kernel/process.c 2005-11-06 22:53:09.000000000 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -303,7 +304,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -365,6 +368,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -457,6 +461,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.14-rc5-mm1/arch/ppc64/Kconfig linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/Kconfig --- linux-2.6.14-rc5-mm1/arch/ppc64/Kconfig 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/Kconfig 2005-11-06 22:53:09.000000000 +0100 @@ -388,6 +388,8 @@ config CMDLINE some command-line options at build time by entering them here. In most cases you will need to specify the root device here. +source "drivers/perfctr/Kconfig" + endmenu config ISA_DMA_API diff -rupN linux-2.6.14-rc5-mm1/arch/ppc64/kernel/misc.S linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/kernel/misc.S --- linux-2.6.14-rc5-mm1/arch/ppc64/kernel/misc.S 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/kernel/misc.S 2005-11-06 22:53:09.000000000 +0100 @@ -1300,6 +1300,12 @@ _GLOBAL(sys_call_table32) .llong .sys_inotify_init /* 275 */ .llong .sys_inotify_add_watch .llong .sys_inotify_rm_watch + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vperfctr_open /* 280 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read .balign 8 _GLOBAL(sys_call_table) @@ -1581,3 +1587,9 @@ _GLOBAL(sys_call_table) .llong .sys_inotify_init /* 275 */ .llong .sys_inotify_add_watch .llong .sys_inotify_rm_watch + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vperfctr_open /* 280 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read diff -rupN linux-2.6.14-rc5-mm1/arch/ppc64/kernel/process.c linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/kernel/process.c --- linux-2.6.14-rc5-mm1/arch/ppc64/kernel/process.c 2005-11-06 22:50:57.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/ppc64/kernel/process.c 2005-11-06 22:53:09.000000000 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -245,7 +246,9 @@ struct task_struct *__switch_to(struct t } local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -345,6 +348,7 @@ void exit_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -450,6 +454,8 @@ copy_thread(int nr, unsigned long clone_ */ kregs->nip = *((unsigned long *)ret_from_fork); + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.14-rc5-mm1/arch/x86_64/Kconfig linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/Kconfig --- linux-2.6.14-rc5-mm1/arch/x86_64/Kconfig 2005-11-06 22:50:58.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/Kconfig 2005-11-06 22:53:09.000000000 +0100 @@ -530,6 +530,8 @@ config UID16 depends on IA32_EMULATION default y +source "drivers/perfctr/Kconfig" + endmenu source "net/Kconfig" diff -rupN linux-2.6.14-rc5-mm1/arch/x86_64/ia32/ia32entry.S linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.14-rc5-mm1/arch/x86_64/ia32/ia32entry.S 2005-11-06 22:50:25.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/ia32/ia32entry.S 2005-11-06 22:53:09.000000000 +0100 @@ -643,6 +643,12 @@ ia32_sys_call_table: .quad sys_inotify_init .quad sys_inotify_add_watch .quad sys_inotify_rm_watch + .quad quiet_ni_syscall /* pselect6 */ + .quad quiet_ni_syscall /* ppoll 295 */ + .quad sys_vperfctr_open + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff -rupN linux-2.6.14-rc5-mm1/arch/x86_64/kernel/entry.S linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.14-rc5-mm1/arch/x86_64/kernel/entry.S 2005-11-06 22:50:58.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/entry.S 2005-11-06 22:53:09.000000000 +0100 @@ -648,6 +648,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.14-rc5-mm1/arch/x86_64/kernel/i8259.c linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.14-rc5-mm1/arch/x86_64/kernel/i8259.c 2005-11-06 22:50:58.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/i8259.c 2005-11-06 22:53:09.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -592,6 +593,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.14-rc5-mm1/arch/x86_64/kernel/process.c linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.14-rc5-mm1/arch/x86_64/kernel/process.c 2005-11-06 22:50:58.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/arch/x86_64/kernel/process.c 2005-11-06 22:53:09.000000000 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -349,6 +350,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -457,6 +459,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -609,6 +613,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.14-rc5-mm1/drivers/Makefile linux-2.6.14-rc5-mm1.perfctr27/drivers/Makefile --- linux-2.6.14-rc5-mm1/drivers/Makefile 2005-11-06 22:51:00.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/drivers/Makefile 2005-11-06 22:53:09.000000000 +0100 @@ -65,6 +65,7 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ obj-y += firmware/ diff -rupN linux-2.6.14-rc5-mm1/include/asm-i386/mach-default/irq_vectors.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.14-rc5-mm1/include/asm-i386/mach-default/irq_vectors.h 2004-05-10 11:14:37.000000000 +0200 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2005-11-06 22:53:09.000000000 +0100 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.14-rc5-mm1/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.14-rc5-mm1/include/asm-i386/mach-visws/irq_vectors.h 2004-01-09 13:19:11.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2005-11-06 22:53:09.000000000 +0100 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.14-rc5-mm1/include/asm-i386/processor.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/processor.h --- linux-2.6.14-rc5-mm1/include/asm-i386/processor.h 2005-11-06 22:51:01.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/processor.h 2005-11-06 22:53:09.000000000 +0100 @@ -460,6 +460,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.14-rc5-mm1/include/asm-i386/system.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/system.h --- linux-2.6.14-rc5-mm1/include/asm-i386/system.h 2005-11-06 22:51:01.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/system.h 2005-11-06 22:53:09.000000000 +0100 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ diff -rupN linux-2.6.14-rc5-mm1/include/asm-i386/unistd.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/unistd.h --- linux-2.6.14-rc5-mm1/include/asm-i386/unistd.h 2005-11-06 22:51:01.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-i386/unistd.h 2005-11-06 22:53:09.000000000 +0100 @@ -299,8 +299,12 @@ #define __NR_inotify_init 291 #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 +#define __NR_vperfctr_open 296 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define NR_syscalls 294 +#define NR_syscalls 300 /* * user-visible error numbers are in the range -1 - -128: see diff -rupN linux-2.6.14-rc5-mm1/include/asm-ppc/processor.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/processor.h --- linux-2.6.14-rc5-mm1/include/asm-ppc/processor.h 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/processor.h 2005-11-06 22:53:09.000000000 +0100 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.14-rc5-mm1/include/asm-ppc/reg.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/reg.h --- linux-2.6.14-rc5-mm1/include/asm-ppc/reg.h 2005-11-06 22:50:31.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/reg.h 2005-11-06 22:53:09.000000000 +0100 @@ -275,22 +275,14 @@ #define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ #define SPRN_LDSTDB 0x3f4 /* */ #define SPRN_LR 0x008 /* Link Register */ -#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ -#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ #ifndef SPRN_PIR #define SPRN_PIR 0x3FF /* Processor Identification Register */ #endif -#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ -#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ -#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ -#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ #define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ #define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -317,16 +309,79 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FB0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07B00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + /* Bit definitions for MMCR0 and PMC1 / PMC2. */ #define MMCR0_PMC1_CYCLES (1 << 7) #define MMCR0_PMC1_ICACHEMISS (5 << 7) @@ -335,7 +390,6 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 -#define MMCR0_PMXE (1 << 26) /* Processor Version Register */ diff -rupN linux-2.6.14-rc5-mm1/include/asm-ppc/unistd.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/unistd.h --- linux-2.6.14-rc5-mm1/include/asm-ppc/unistd.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc/unistd.h 2005-11-06 22:53:09.000000000 +0100 @@ -282,8 +282,12 @@ #define __NR_inotify_init 275 #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 +#define __NR_vperfctr_open 280 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 278 +#define __NR_syscalls 284 #define __NR(n) #n diff -rupN linux-2.6.14-rc5-mm1/include/asm-ppc64/processor.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc64/processor.h --- linux-2.6.14-rc5-mm1/include/asm-ppc64/processor.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc64/processor.h 2005-11-06 22:53:09.000000000 +0100 @@ -445,6 +445,8 @@ struct thread_struct { unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + /* performance counters */ + struct vperfctr *perfctr; }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.14-rc5-mm1/include/asm-ppc64/unistd.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc64/unistd.h --- linux-2.6.14-rc5-mm1/include/asm-ppc64/unistd.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-ppc64/unistd.h 2005-11-06 22:53:09.000000000 +0100 @@ -288,8 +288,12 @@ #define __NR_inotify_init 275 #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 +#define __NR_vperfctr_open 280 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 278 +#define __NR_syscalls 284 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/hw_irq.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/hw_irq.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/hw_irq.h 2005-11-06 22:53:09.000000000 +0100 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/ia32_unistd.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/ia32_unistd.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/ia32_unistd.h 2005-08-29 14:34:33.000000000 +0200 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/ia32_unistd.h 2005-11-06 22:53:09.000000000 +0100 @@ -299,7 +299,11 @@ #define __NR_ia32_inotify_init 291 #define __NR_ia32_inotify_add_watch 292 #define __NR_ia32_inotify_rm_watch 293 +#define __NR_ia32_vperfctr_open 296 +#define __NR_ia32_vperfctr_control (__NR_ia32_vperfctr_open+1) +#define __NR_ia32_vperfctr_write (__NR_ia32_vperfctr_open+2) +#define __NR_ia32_vperfctr_read (__NR_ia32_vperfctr_open+3) -#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 300 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/irq.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/irq.h 2005-11-06 22:50:31.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/irq.h 2005-11-06 22:53:09.000000000 +0100 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/processor.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/processor.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/processor.h 2005-11-06 22:53:09.000000000 +0100 @@ -254,6 +254,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/system.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/system.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/system.h 2005-11-06 22:50:31.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/system.h 2005-11-06 22:53:09.000000000 +0100 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.14-rc5-mm1/include/asm-x86_64/unistd.h linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.14-rc5-mm1/include/asm-x86_64/unistd.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/asm-x86_64/unistd.h 2005-11-06 22:53:09.000000000 +0100 @@ -571,8 +571,16 @@ __SYSCALL(__NR_inotify_init, sys_inotify __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 255 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_vperfctr_open 256 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_inotify_rm_watch +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ diff -rupN linux-2.6.14-rc5-mm1/include/linux/sched.h linux-2.6.14-rc5-mm1.perfctr27/include/linux/sched.h --- linux-2.6.14-rc5-mm1/include/linux/sched.h 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/include/linux/sched.h 2005-11-06 22:53:09.000000000 +0100 @@ -1223,6 +1223,9 @@ extern void unhash_process(struct task_s * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.14-rc5-mm1/kernel/exit.c linux-2.6.14-rc5-mm1.perfctr27/kernel/exit.c --- linux-2.6.14-rc5-mm1/kernel/exit.c 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/kernel/exit.c 2005-11-06 22:53:09.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); diff -rupN linux-2.6.14-rc5-mm1/kernel/sched.c linux-2.6.14-rc5-mm1.perfctr27/kernel/sched.c --- linux-2.6.14-rc5-mm1/kernel/sched.c 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/kernel/sched.c 2005-11-06 22:53:09.000000000 +0100 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4460,6 +4461,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.14-rc5-mm1/kernel/sys_ni.c linux-2.6.14-rc5-mm1.perfctr27/kernel/sys_ni.c --- linux-2.6.14-rc5-mm1/kernel/sys_ni.c 2005-08-29 14:34:34.000000000 +0200 +++ linux-2.6.14-rc5-mm1.perfctr27/kernel/sys_ni.c 2005-11-06 22:53:09.000000000 +0100 @@ -68,6 +68,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.14-rc5-mm1/kernel/timer.c linux-2.6.14-rc5-mm1.perfctr27/kernel/timer.c --- linux-2.6.14-rc5-mm1/kernel/timer.c 2005-11-06 22:51:02.000000000 +0100 +++ linux-2.6.14-rc5-mm1.perfctr27/kernel/timer.c 2005-11-06 22:53:09.000000000 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -791,6 +792,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/libpfm4/lib/events/intel_slm_events.h000664 001750 001750 00000073401 13216244364 024464 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2013 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: slm (Intel Silvermont) */ static const intel_x86_umask_t slm_icache[]={ { .uname = "ACCESSES", .udesc = "Instruction fetches, including uncacheacble fetches", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "MISSES", .udesc = "Count all instructions fetches that miss the icache or produce memory requests. This includes uncacheache fetches. Any instruction fetch miss is counted only once and not once for every cycle it is outstanding", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "HIT", .udesc = "Count all instructions fetches from the instruction cache", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_uops_retired[]={ { .uname = "ANY", .udesc = "Micro-ops retired", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "MS", .udesc = "Micro-ops retired that were supplied fro MSROM", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "STALLED_CYCLES", .udesc = "Cycles no micro-ops retired", .ucode = 0x1000 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "STALLS", .udesc = "Periods no micro-ops retired", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_E, }, }; static const intel_x86_umask_t slm_inst_retired[]={ { .uname = "ANY_P", .udesc = "Instructions retired using generic counter (precise event)", .ucode = 0x0, .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Instructions retired using generic counter (precise event)", .uequiv = "ANY_P", .ucode = 0x0, .uflags= INTEL_X86_PEBS, }, }; static const intel_x86_umask_t slm_l2_reject_xq[]={ { .uname = "ALL", .udesc = "Number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions from the L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims)", .ucode = 0x000, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_machine_clears[]={ { .uname = "SMC", .udesc = "Self-Modifying Code detected", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, { .uname = "MEMORY_ORDERING", .udesc = "Number of stalled cycles due to memory ordering", .ucode = 0x200, }, { .uname = "FP_ASSIST", .udesc = "Number of stalled cycle due to FPU assist", .ucode = 0x400, }, { .uname = "ALL", .udesc = "Count any the machine clears", .ucode = 0x800, }, { .uname = "ANY", .udesc = "Count any the machine clears", .uequiv = "ALL", .ucode = 0x800, }, }; static const intel_x86_umask_t slm_br_inst_retired[]={ { .uname = "ANY", .udesc = "Any retired branch instruction (Precise Event)", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "Any Retired branch instruction (Precise Event)", .uequiv = "ANY", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_TAKEN_BRANCHES", .udesc = "Retired branch instructions (Precise Event)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .grpid = 0, .ucntmsk = 0xfull, }, { .uname = "JCC", .udesc = "JCC instructions retired (Precise Event)", .ucode = 0x7e00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TAKEN_JCC", .udesc = "Taken JCC instructions retired (Precise Event)", .ucode = 0xfe00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "CALL", .udesc = "Near call instructions retired (Precise Event)", .ucode = 0xf900, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REL_CALL", .udesc = "Near relative call instructions retired (Precise Event)", .ucode = 0xfd00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "IND_CALL", .udesc = "Near indirect call instructions retired (Precise Event)", .ucode = 0xfb00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETURN", .udesc = "Near ret instructions retired (Precise Event)", .ucode = 0xf700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NON_RETURN_IND", .udesc = "Number of near indirect jmp and near indirect call instructions retired (Precise Event)", .ucode = 0xeb00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR_BRANCH", .udesc = "Far branch instructions retired (Precise Event)", .uequiv = "FAR", .ucode = 0xbf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR", .udesc = "Far branch instructions retired (Precise Event)", .ucode = 0xbf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t slm_baclears[]={ { .uname = "ANY", .udesc = "BACLEARS asserted", .uequiv = "ALL", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ALL", .udesc = "BACLEARS asserted", .ucode = 0x100, .uflags= INTEL_X86_DFL | INTEL_X86_NCOMBO, }, { .uname = "RETURN", .udesc = "Number of baclears for return branches", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "COND", .udesc = "Number of baclears for conditional branches", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_cpu_clk_unhalted[]={ { .uname = "CORE_P", .udesc = "Core cycles when core is not halted", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "BUS", .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", .uequiv = "REF", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REF", .udesc = "Number of reference cycles that the core is not in a halted state. The core enters the halted state when it is running the HLT instruction. In mobile systems, the core frequency may change from time to time. This event is not affected by core frequency changes but counts as if the core is running a the same maximum frequency all the time", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_mem_uop_retired[]={ { .uname = "LD_DCU_MISS", .udesc = "Number of load uops retired that miss in L1 data cache. Note that prefetch misses will not be counted", .ucode = 0x100, }, { .uname = "LD_L2_HIT", .udesc = "Number of load uops retired that hit L2 (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_PEBS, }, { .uname = "LD_L2_MISS", .udesc = "Number of load uops retired that missed L2 (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_PEBS, }, { .uname = "LD_DTLB_MISS", .udesc = "Number of load uops retired that had a DTLB miss (Precise Event)", .ucode = 0x800, .uflags= INTEL_X86_PEBS, }, { .uname = "LD_UTLB_MISS", .udesc = "Number of load uops retired that had a UTLB miss", .ucode = 0x1000, }, { .uname = "HITM", .udesc = "Number of load uops retired that got data from the other core or from the other module and the line was modified (Precise Event)", .ucode = 0x2000, .uflags= INTEL_X86_PEBS, }, { .uname = "ANY_LD", .udesc = "Number of load uops retired", .ucode = 0x4000, }, { .uname = "ANY_ST", .udesc = "Number of store uops retired", .ucode = 0x8000, }, }; static const intel_x86_umask_t slm_llc_rqsts[]={ { .uname = "MISS", .udesc = "Number of L2 cache misses", .ucode = 0x4100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Number of L2 cache references", .ucode = 0x4f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_rehabq[]={ { .uname = "LD_BLOCK_ST_FORWARD", .udesc = "Number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch (Precise Event)", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LD_BLOCK_STD_NOTREADY", .udesc = "Number of times forward was technically possible but did not occur because the store data was not available at the right time", .ucode = 0x0200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ST_SPLITS", .udesc = "Number of retired stores that experienced cache line boundary splits", .ucode = 0x0400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LD_SPLITS", .udesc = "Number of retired loads that experienced cache line boundary splits (Precise Event)", .ucode = 0x0800, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOCK", .udesc = "Number of retired memory operations with lock semantics. These are either implicit locked instructions such as XCHG or instructions with an explicit LOCK prefix", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "STA_FULL", .udesc = "Number of retired stores that are delayed because there is not a store address buffer available", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY_LD", .udesc = "Number of load uops reissued from RehabQ", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY_ST", .udesc = "Number of store uops reissued from RehabQ", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_offcore_response[]={ { .uname = "DMND_DATA_RD", .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", .ucode = 1ULL << (0 + 8), .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", .ucode = 1ULL << (1 + 8), .grpid = 0, }, { .uname = "DMND_IFETCH", .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", .ucode = 1ULL << (2 + 8), .grpid = 0, }, { .uname = "WB", .udesc = "Request: number of writebacks (modified to exclusive) transactions", .ucode = 1ULL << (3 + 8), .grpid = 0, }, { .uname = "PF_L2_DATA_RD", .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", .ucode = 1ULL << (4 + 8), .grpid = 0, }, { .uname = "PF_RFO", .udesc = "Request: number of RFO requests generated by L2 prefetchers", .ucode = 1ULL << (5 + 8), .grpid = 0, }, { .uname = "PF_IFETCH", .udesc = "Request: number of code reads generated by L2 prefetchers", .ucode = 1ULL << (6 + 8), .grpid = 0, }, { .uname = "PARTIAL_READ", .udesc = "Request: number of demand reads of partial cachelines (including UC, WC)", .ucode = 1ULL << (7 + 8), .grpid = 0, }, { .uname = "PARTIAL_WRITE", .udesc = "Request: number of demand RFO requests to write to partial cache lines (includes UC, WT, WP)", .ucode = 1ULL << (8 + 8), .grpid = 0, }, { .uname = "UC_IFETCH", .udesc = "Request: number of UC instruction fetches", .ucode = 1ULL << (9 + 8), .grpid = 0, }, { .uname = "BUS_LOCKS", .udesc = "Request: number bus lock and split lock requests", .ucode = 1ULL << (10 + 8), .grpid = 0, }, { .uname = "STRM_ST", .udesc = "Request: number of streaming store requests", .ucode = 1ULL << (11 + 8), .grpid = 0, }, { .uname = "SW_PREFETCH", .udesc = "Request: number of software prefetch requests", .ucode = 1ULL << (12 + 8), .grpid = 0, }, { .uname = "PF_L1_DATA_RD", .udesc = "Request: number of data cacheline reads generated by L1 prefetchers", .ucode = 1ULL << (13 + 8), .grpid = 0, }, { .uname = "PARTIAL_STRM_ST", .udesc = "Request: number of partial streaming store requests", .ucode = 1ULL << (14 + 8), .grpid = 0, }, { .uname = "OTHER", .udesc = "Request: counts one any other request that crosses IDI, including I/O", .ucode = 1ULL << (15+8), .grpid = 0, }, { .uname = "ANY_IFETCH", .udesc = "Request: combination of PF_IFETCH | DMND_IFETCH | UC_IFETCH", .uequiv = "PF_IFETCH:DMND_IFETCH:UC_IFETCH", .ucode = (1ULL << 6 | 1ULL << 2 | 1ULL << 9) << 8, .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Request: combination of all request umasks", .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_L2_DATA_RD:PF_RFO:PF_IFETCH:PARTIAL_READ:PARTIAL_WRITE:UC_IFETCH:BUS_LOCKS:STRM_ST:SW_PREFETCH:PF_L1_DATA_RD:PARTIAL_STRM_ST:OTHER", .ucode = 0xffff00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "ANY_DATA", .udesc = "Request: combination of DMND_DATA | PF_L1_DATA_RD | PF_L2_DATA_RD", .uequiv = "DMND_DATA_RD:PF_L1_DATA_RD:PF_L2_DATA_RD", .ucode = (1ULL << 0 | 1ULL << 4 | 1ULL << 13) << 8, .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Request: combination of DMND_RFO | PF_RFO", .uequiv = "DMND_RFO:PF_RFO", .ucode = (1ULL << 1 | 1ULL << 5) << 8, .grpid = 0, }, { .uname = "ANY_RESPONSE", .udesc = "Response: count any response type", .ucode = 1ULL << (16+8), .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 1, }, { .uname = "L2_HIT", .udesc = "Supplier: counts L2 hits in M/E/S states", .ucode = 1ULL << (18+8), .grpid = 1, }, { .uname = "SNP_NONE", .udesc = "Snoop: counts number of times no snoop-related information is available", .ucode = 1ULL << (31+8), .grpid = 2, }, { .uname = "SNP_MISS", .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", .ucode = 1ULL << (33+8), .grpid = 2, }, { .uname = "SNP_HIT", .udesc = "Snoop: counts number of times a snoop hits in the other module where no modified copies were found in the L1 cache of the other core", .ucode = 1ULL << (34+8), .grpid = 2, }, { .uname = "SNP_HITM", .udesc = "Snoop: counts number of times a snoop hits in the other module where modified copies were found in the L1 cache of the other core", .ucode = 1ULL << (36+8), .grpid = 2, }, { .uname = "NON_DRAM", .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", .ucode = 1ULL << (37+8), .grpid = 2, }, { .uname = "SNP_ANY", .udesc = "Snoop: any snoop reason", .ucode = 0x7dULL << (31+8), .uequiv = "SNP_NONE:SNP_MISS:SNP_HIT:SNP_HITM:NON_DRAM", .uflags= INTEL_X86_DFL, .grpid = 2, }, }; static const intel_x86_umask_t slm_br_misp_retired[]={ { .uname = "ALL_BRANCHES", .udesc = "All mispredicted branches (Precise Event)", .uequiv = "ANY", .ucode = 0x0000, /* architectural encoding */ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ANY", .udesc = "All mispredicted branches (Precise Event)", .ucode = 0x0000, /* architectural encoding */ .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "JCC", .udesc = "Number of mispredicted conditional branch instructions retired (Precise Event)", .ucode = 0x7e00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NON_RETURN_IND", .udesc = "Number of mispredicted non-return branch instructions retired (Precise Event)", .ucode = 0xeb00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETURN", .udesc = "Number of mispredicted return branch instructions retired (Precise Event)", .ucode = 0xf700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "IND_CALL", .udesc = "Number of mispredicted indirect call branch instructions retired (Precise Event)", .ucode = 0xfb00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TAKEN_JCC", .udesc = "Number of mispredicted taken conditional branch instructions retired (Precise Event)", .ucode = 0xfe00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t slm_no_alloc_cycles[]={ { .uname = "ANY", .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated for any reason", .ucode = 0x3f00, .uequiv = "ALL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL", .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated for any reason", .ucode = 0x3f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "NOT_DELIVERED", .udesc = "Number of cycles when the front-end does not provide any instructions to be allocated but the back-end is not stalled", .ucode = 0x5000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MISPREDICTS", .udesc = "Number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RAT_STALL", .udesc = "Number of cycles when no uops are allocated and a RAT stall is asserted", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ROB_FULL", .udesc = "Number of cycles when no uops are allocated and the ROB is full (less than 2 entries available)", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_rs_full_stall[]={ { .uname = "MEC", .udesc = "Number of cycles when the allocation pipeline is stalled due to the RS for the MEC cluster is full", .ucode = 0x0100, }, { .uname = "ALL", .udesc = "Number of cycles when the allocation pipeline is stalled due any one of the RS being full", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Number of cycles when the allocation pipeline is stalled due any one of the RS being full", .ucode = 0x1f00, .uequiv = "ALL", .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t slm_cycles_div_busy[]={ { .uname = "ANY", .udesc = "Number of cycles the divider is busy", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_ms_decoded[]={ { .uname = "ENTRY", .udesc = "Number of times the MSROM starts a flow of uops", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_decode_restriction[]={ { .uname = "PREDECODE_WRONG", .udesc = "Number of times the prediction (from the predecode cache) for instruction length is incorrect", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_fetch_stall[]={ { .uname = "ICACHE_FILL_PENDING_CYCLES", .udesc = "Number of cycles the NIP stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses", .ucode = 0x0400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_core_reject_l2q[]={ { .uname = "ALL", .udesc = "Number of requests that were not accepted into the L2Q because the L2Q was FULL", .ucode = 0x0000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t slm_page_walks[]={ { .uname = "CYCLES", .udesc = "Total cycles for all the page walks. (I-side and D-side)", .ucode = 0x0300, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WALKS", .udesc = "Total number of page walks. (I-side and D-side)", .ucode = 0x0300 | INTEL_X86_MOD_EDGE, .uequiv = "D_SIDE_WALKS:I_SIDE_WALKS", .uflags = INTEL_X86_NCOMBO, }, { .uname = "D_SIDE_CYCLES", .udesc = "Number of cycles when a D-side page walk is in progress", .ucode = 0x0100, }, { .uname = "D_SIDE_WALKS", .udesc = "Number of D-side page walks", .ucode = 0x0100 | INTEL_X86_MOD_EDGE, .uequiv = "D_SIDE_CYCLES:e", }, { .uname = "I_SIDE_CYCLES", .udesc = "Number of cycles when a I-side page walk is in progress", .ucode = 0x0200, }, { .uname = "I_SIDE_WALKS", .udesc = "Number of I-side page walks", .ucode = 0x0200 | INTEL_X86_MOD_EDGE, .uequiv = "I_SIDE_CYCLES:e", }, }; static const intel_x86_entry_t intel_slm_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Unhalted core cycles", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x200000003ull, .code = 0x3c, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycle", .modmsk = INTEL_FIXED3_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "INSTRUCTION_RETIRED", .desc = "Instructions retired", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x100000003ull, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias for INSTRUCTION_RETIRED", .modmsk = INTEL_V2_ATTRS, .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x10003, .code = 0xc0, }, { .name = "LLC_REFERENCES", .desc = "Last level of cache references", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LAST_LEVEL_CACHE_REFERENCES", .desc = "This is an alias for LLC_REFERENCES", .modmsk = INTEL_V2_ATTRS, .equiv = "LLC_REFERENCES", .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LLC_MISSES", .desc = "Last level of cache misses", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x412e, }, { .name = "LAST_LEVEL_CACHE_MISSES", .desc = "This is an alias for LLC_MISSES", .modmsk = INTEL_V2_ATTRS, .equiv = "LLC_MISSES", .cntmsk = 0x3, .code = 0x412e, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Branch instructions retired", .modmsk = INTEL_V2_ATTRS, .equiv = "BR_INST_RETIRED:ANY", .cntmsk = 0x3, .code = 0xc4, }, { .name = "MISPREDICTED_BRANCH_RETIRED", .desc = "Mispredicted branch instruction retired", .equiv = "BR_MISP_RETIRED", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc5, .flags= INTEL_X86_PEBS, }, /* begin model specific events */ { .name = "DECODE_RESTRICTION", .desc = "Instruction length prediction delay", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xe9, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(slm_decode_restriction), .umasks = slm_decode_restriction, }, { .name = "L2_REJECT_XQ", .desc = "Rejected L2 requests to XQ", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x30, .numasks = LIBPFM_ARRAY_SIZE(slm_l2_reject_xq), .ngrp = 1, .umasks = slm_l2_reject_xq, }, { .name = "ICACHE", .desc = "Instruction fetches", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x80, .numasks = LIBPFM_ARRAY_SIZE(slm_icache), .ngrp = 1, .umasks = slm_icache, }, { .name = "UOPS_RETIRED", .desc = "Micro-ops retired", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc2, .numasks = LIBPFM_ARRAY_SIZE(slm_uops_retired), .ngrp = 1, .umasks = slm_uops_retired, }, { .name = "INST_RETIRED", .desc = "Instructions retired", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc0, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(slm_inst_retired), .ngrp = 1, .umasks = slm_inst_retired, }, { .name = "CYCLES_DIV_BUSY", .desc = "Cycles the divider is busy", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xcd, .numasks = LIBPFM_ARRAY_SIZE(slm_cycles_div_busy), .ngrp = 1, .umasks = slm_cycles_div_busy, }, { .name = "RS_FULL_STALL", .desc = "RS full", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(slm_rs_full_stall), .ngrp = 1, .umasks = slm_rs_full_stall, }, { .name = "LLC_RQSTS", .desc = "L2 cache requests", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(slm_llc_rqsts), .ngrp = 1, .umasks = slm_llc_rqsts, }, { .name = "MACHINE_CLEARS", .desc = "Self-Modifying Code detected", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc3, .numasks = LIBPFM_ARRAY_SIZE(slm_machine_clears), .ngrp = 1, .umasks = slm_machine_clears, }, { .name = "BR_INST_RETIRED", .desc = "Retired branch instructions", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc4, .numasks = LIBPFM_ARRAY_SIZE(slm_br_inst_retired), .flags= INTEL_X86_PEBS, .ngrp = 1, .umasks = slm_br_inst_retired, }, { .name = "BR_MISP_RETIRED", .desc = "Mispredicted retired branch instructions (Precise Event)", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc5, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(slm_br_misp_retired), .ngrp = 1, .umasks = slm_br_misp_retired, }, { .name = "BR_MISP_INST_RETIRED", /* for backward compatibility with older version */ .desc = "Mispredicted retired branch instructions (Precise Event)", .modmsk = INTEL_V2_ATTRS, .equiv = "BR_MISP_RETIRED", .cntmsk = 0x3, .code = 0xc5, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(slm_br_misp_retired), .ngrp = 1, .umasks = slm_br_misp_retired, }, { .name = "MS_DECODED", .desc = "MS decoder", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xe7, .numasks = LIBPFM_ARRAY_SIZE(slm_ms_decoded), .ngrp = 1, .umasks = slm_ms_decoded, }, { .name = "BACLEARS", .desc = "Branch address calculator", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xe6, .numasks = LIBPFM_ARRAY_SIZE(slm_baclears), .ngrp = 1, .umasks = slm_baclears, }, { .name = "NO_ALLOC_CYCLES", .desc = "Front-end allocation", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xca, .numasks = LIBPFM_ARRAY_SIZE(slm_no_alloc_cycles), .ngrp = 1, .umasks = slm_no_alloc_cycles, }, { .name = "CPU_CLK_UNHALTED", .desc = "Core cycles when core is not halted", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x3c, .numasks = LIBPFM_ARRAY_SIZE(slm_cpu_clk_unhalted), .ngrp = 1, .umasks = slm_cpu_clk_unhalted, }, { .name = "MEM_UOP_RETIRED", .desc = "Retired loads micro-ops", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x4, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(slm_mem_uop_retired), .ngrp = 1, .umasks = slm_mem_uop_retired, }, { .name = "CORE_REJECT_L2Q", .desc = "Demand and L1 prefetcher requests rejected by L2", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x31, .numasks = LIBPFM_ARRAY_SIZE(slm_core_reject_l2q), .ngrp = 1, .umasks = slm_core_reject_l2q, }, { .name = "REHABQ", .desc = "Memory reference queue", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x03, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(slm_rehabq), .ngrp = 1, .umasks = slm_rehabq, }, { .name = "FETCH_STALL", .desc = "Fetch stalls", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x86, .numasks = LIBPFM_ARRAY_SIZE(slm_fetch_stall), .ngrp = 1, .umasks = slm_fetch_stall, }, { .name = "PAGE_WALKS", .desc = "Page walker", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x5, .numasks = LIBPFM_ARRAY_SIZE(slm_page_walks), .ngrp = 1, .umasks = slm_page_walks, }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0xf, .code = 0x01b7, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), .ngrp = 3, .umasks = slm_offcore_response, }, { .name = "OFFCORE_RESPONSE_1", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0xf, .code = 0x02b7, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(slm_offcore_response), .ngrp = 3, .umasks = slm_offcore_response, /* identical to actual umasks list for this event */ }, }; papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.8000664 001750 001750 00000001365 13216244367 023136 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.8 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 10, stepping 0, clock 1837590 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 109 cycles PERFCTR INIT: rdtsc cost is 15.9 cycles (1130 total) PERFCTR INIT: rdpmc cost is 14.2 cycles (1023 total) PERFCTR INIT: rdmsr (counter) cost is 51.4 cycles (3399 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.5 cycles (3474 total) PERFCTR INIT: wrmsr (counter) cost is 80.7 cycles (5280 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.2 cycles (14908 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (220 total) PERFCTR INIT: write cr4 cost is 62.4 cycles (4105 total) PERFCTR INIT: write LVTPC cost is 4.4 cycles (395 total) perfctr: driver 2.7.3, cpu type AMD K7/K8 at 1837590 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumII-350000664 001750 001750 00000001315 13216244367 023550 0ustar00jshenry1963jshenry1963000000 000000 [data from a 350 MHz Pentium II (Deschutes)] PERFCTR INIT: vendor 0, family 6, model 5, stepping 1, clock 349410 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 88 cycles PERFCTR INIT: rdtsc cost is 33.7 cycles (2250 total) PERFCTR INIT: rdpmc cost is 29.6 cycles (1986 total) PERFCTR INIT: rdmsr (counter) cost is 81.2 cycles (5289 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4534 total) PERFCTR INIT: wrmsr (counter) cost is 87.4 cycles (5684 total) PERFCTR INIT: wrmsr (evntsel) cost is 79.1 cycles (5153 total) PERFCTR INIT: read cr4 cost is 1.9 cycles (211 total) PERFCTR INIT: write cr4 cost is 42.2 cycles (2792 total) perfctr: driver 2.3.4, cpu type Intel Pentium II at 349410 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.2000664 001750 001750 00000001267 13216244367 023131 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.2 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 4, stepping 2, clock 1200062 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 85 cycles PERFCTR INIT: rdtsc cost is 10.7 cycles (773 total) PERFCTR INIT: rdpmc cost is 18.6 cycles (1280 total) PERFCTR INIT: rdmsr (counter) cost is 59.9 cycles (3919 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.4 cycles (3441 total) PERFCTR INIT: wrmsr (counter) cost is 79.8 cycles (5198 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.8 cycles (14925 total) PERFCTR INIT: read cr4 cost is 9.8 cycles (715 total) PERFCTR INIT: write cr4 cost is 63.1 cycles (4129 total) perfctr: driver 2.3.10, cpu type AMD K7 at 1200062 kHz papi-5.6.0/src/libpfm4/lib/pfmlib_sicortex_priv.h000664 001750 001750 00000011113 13216244365 024030 0ustar00jshenry1963jshenry1963000000 000000 /* * Contributed by Philip Mucci based on code from * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #ifndef __PFMLIB_SICORTEX_PRIV_H__ #define __PFMLIB_SICORTEX_PRIV_H__ #include "pfmlib_gen_mips64_priv.h" #define PFMLIB_SICORTEX_MAX_UMASK 5 typedef struct { char *pme_uname; /* unit mask name */ char *pme_udesc; /* event/umask description */ unsigned int pme_ucode; /* unit mask code */ } pme_sicortex_umask_t; typedef struct { char *pme_name; char *pme_desc; /* text description of the event */ unsigned int pme_code; /* event mask, holds room for four events, low 8 bits cntr0, ... high 8 bits cntr3 */ unsigned int pme_counters; /* Which counter event lives on */ unsigned int pme_numasks; /* number of umasks */ pme_sicortex_umask_t pme_umasks[PFMLIB_SICORTEX_MAX_UMASK]; /* umask desc */ } pme_sicortex_entry_t; /* * SiCortex specific */ typedef union { uint64_t val; /* complete register value */ struct { unsigned long sel_exl:1; /* int level */ unsigned long sel_os:1; /* system level */ unsigned long sel_sup:1; /* supervisor level */ unsigned long sel_usr:1; /* user level */ unsigned long sel_int:1; /* enable intr */ unsigned long sel_event_mask:6; /* event mask */ unsigned long sel_res1:23; /* reserved */ unsigned long sel_res2:32; /* reserved */ } perfsel; } pfm_sicortex_sel_reg_t; #define PMU_SICORTEX_SCB_NUM_COUNTERS 256 typedef union { uint64_t val; struct { unsigned long Interval:4; unsigned long IntBit:5; unsigned long NoInc:1; unsigned long AddrAssert:1; unsigned long MagicEvent:2; unsigned long Reserved:19; } sicortex_ScbPerfCtl_reg; struct { unsigned long HistGte:20; unsigned long Reserved:12; } sicortex_ScbPerfHist_reg; struct { unsigned long Bucket:8; unsigned long Reserved:24; } sicortex_ScbPerfBuckNum_reg; struct { unsigned long ena:1; unsigned long Reserved:31; } sicortex_ScbPerfEna_reg; struct { unsigned long event:15; unsigned long hist:1; unsigned long ifOther:2; unsigned long Reserved:15; } sicortex_ScbPerfBucket_reg; } pmc_sicortex_scb_reg_t; typedef union { uint64_t val; struct { unsigned long Reserved:2; uint64_t VPCL:38; unsigned long VPCH:2; } sicortex_CpuPerfVPC_reg; struct { unsigned long Reserved:5; unsigned long PEA:31; unsigned long Reserved2:12; unsigned long ASID:8; unsigned long L2STOP:4; unsigned long L2STATE:3; unsigned long L2HIT:1; } sicortex_CpuPerfPEA_reg; } pmd_sicortex_cpu_reg_t; #define PFMLIB_SICORTEX_INPUT_SCB_NONE (unsigned long)0x0 #define PFMLIB_SICORTEX_INPUT_SCB_INTERVAL (unsigned long)0x1 #define PFMLIB_SICORTEX_INPUT_SCB_NOINC (unsigned long)0x2 #define PFMLIB_SICORTEX_INPUT_SCB_HISTGTE (unsigned long)0x4 #define PFMLIB_SICORTEX_INPUT_SCB_BUCKET (unsigned long)0x8 static pme_sicortex_umask_t sicortex_scb_umasks[PFMLIB_SICORTEX_MAX_UMASK] = { { "IFOTHER_NONE","Both buckets count independently",0x00 }, { "IFOTHER_AND","Increment where this event counts and the opposite bucket counts",0x02 }, { "IFOTHER_ANDNOT","Increment where this event counts and the opposite bucket does not",0x04 }, { "HIST_NONE","Count cycles where the event is asserted",0x0 }, { "HIST_EDGE","Histogram on edges of the specified event",0x1 } }; #endif /* __PFMLIB_GEN_MIPS64_PRIV_H__ */ papi-5.6.0/man/man3/PAPIF_read.3000664 001750 001750 00000000755 13216244355 020074 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_read" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_read \- .PP Read hardware counters from an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_read\fP(C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_read\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/000775 001750 001750 00000000000 13216244473 016663 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/etc/costs/Pentium4M-1.8000664 001750 001750 00000001572 13216244367 023533 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.8GHz Mobile P4-M] PERFCTR INIT: vendor 0, family 15, model 2, stepping 7, clock 1789893 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 292 cycles PERFCTR INIT: rdtsc cost is 81.8 cycles (5528 total) PERFCTR INIT: rdpmc cost is 145.8 cycles (9628 total) PERFCTR INIT: rdmsr (counter) cost is 254.5 cycles (16580 total) PERFCTR INIT: rdmsr (escr) cost is 165.8 cycles (10904 total) PERFCTR INIT: wrmsr (counter) cost is 792.8 cycles (51032 total) PERFCTR INIT: wrmsr (escr) cost is 872.6 cycles (56144 total) PERFCTR INIT: read cr4 cost is 4.6 cycles (588 total) PERFCTR INIT: write cr4 cost is 253.4 cycles (16512 total) PERFCTR INIT: rdpmc (fast) cost is 60.3 cycles (4152 total) PERFCTR INIT: rdmsr (cccr) cost is 166.5 cycles (10952 total) PERFCTR INIT: wrmsr (cccr) cost is 832.8 cycles (53592 total) perfctr: driver 2.7.3, cpu type Intel P4 at 1789893 kHz papi-5.6.0/man/man3/PAPIF_stop_counters.3000664 001750 001750 00000001041 13216244356 022056 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_stop_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_stop_counters \- .PP Stop counting hardware events and reset values to zero\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_stop_counters\fP( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_stop_counters\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/lib/pfmlib_gen_ia32.c000664 001750 001750 00000056626 13216244363 023055 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_gen_ia32.c : Intel architectural PMU v1, v2, v3 * * The file provides support for the Intel architectural PMU v1 and v2. * * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * This file implements supports for the IA-32 architectural PMU as specified * in the following document: * "IA-32 Intel Architecture Software Developer's Manual - Volume 3B: System * Programming Guide" */ #include #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_gen_ia32_priv.h" /* architecture private */ #include "gen_ia32_events.h" /* architected event table */ /* let's define some handy shortcuts! */ #define sel_event_select perfevtsel.sel_event_select #define sel_unit_mask perfevtsel.sel_unit_mask #define sel_usr perfevtsel.sel_usr #define sel_os perfevtsel.sel_os #define sel_edge perfevtsel.sel_edge #define sel_pc perfevtsel.sel_pc #define sel_int perfevtsel.sel_int #define sel_any perfevtsel.sel_any #define sel_en perfevtsel.sel_en #define sel_inv perfevtsel.sel_inv #define sel_cnt_mask perfevtsel.sel_cnt_mask pfm_pmu_support_t *gen_support; /* * Description of the PMC/PMD register mappings use by * this module (as reported in pfmlib_reg_t.reg_num) * * For V1 (up to 16 generic counters 0-15): * * 0 -> PMC0 -> PERFEVTSEL0 -> MSR @ 0x186 * 1 -> PMC1 -> PERFEVTSEL1 -> MSR @ 0x187 * ... * n -> PMCn -> PERFEVTSELn -> MSR @ 0x186+n * * 0 -> PMD0 -> IA32_PMC0 -> MSR @ 0xc1 * 1 -> PMD1 -> IA32_PMC1 -> MSR @ 0xc2 * ... * n -> PMDn -> IA32_PMCn -> MSR @ 0xc1+n * * For V2 (up to 16 generic and 16 fixed counters): * * 0 -> PMC0 -> PERFEVTSEL0 -> MSR @ 0x186 * 1 -> PMC1 -> PERFEVTSEL1 -> MSR @ 0x187 * ... * 15 -> PMC15 -> PERFEVTSEL15 -> MSR @ 0x186+15 * * 16 -> PMC16 -> IA32_FIXED_CTR_CTRL -> MSR @ 0x38d * * 0 -> PMD0 -> IA32_PMC0 -> MSR @ 0xc1 * 1 -> PMD1 -> IA32_PMC1 -> MSR @ 0xc2 * ... * 15 -> PMD15 -> IA32_PMC15 -> MSR @ 0xc1+15 * * 16 -> PMD16 -> IA32_FIXED_CTR0 -> MSR @ 0x309 * 17 -> PMD17 -> IA32_FIXED_CTR1 -> MSR @ 0x30a * ... * n -> PMDn -> IA32_FIXED_CTRn -> MSR @ 0x309+n */ #define GEN_IA32_SEL_BASE 0x186 #define GEN_IA32_CTR_BASE 0xc1 #define GEN_IA32_FIXED_CTR_BASE 0x309 #define FIXED_PMD_BASE 16 #define PFMLIB_GEN_IA32_ALL_FLAGS \ (PFM_GEN_IA32_SEL_INV|PFM_GEN_IA32_SEL_EDGE|PFM_GEN_IA32_SEL_ANYTHR) static char * pfm_gen_ia32_get_event_name(unsigned int i); static pme_gen_ia32_entry_t *gen_ia32_pe; static int gen_ia32_cycle_event, gen_ia32_inst_retired_event; static unsigned int num_fixed_cnt, num_gen_cnt, pmu_version; #ifdef __i386__ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { /* * because ebx is used in Pic mode, we need to save/restore because * cpuid clobbers it. I could not figure out a way to get ebx out in * one cpuid instruction. To extract ebx, we need to move it to another * register (here eax) */ __asm__("pushl %%ebx;cpuid; popl %%ebx" :"=a" (*eax) : "a" (op) : "ecx", "edx"); __asm__("pushl %%ebx;cpuid; movl %%ebx, %%eax;popl %%ebx" :"=a" (*ebx) : "a" (op) : "ecx", "edx"); } #else static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { __asm__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "c"(0)); } #endif static pfmlib_regmask_t gen_ia32_impl_pmcs, gen_ia32_impl_pmds; /* * create architected event table */ static int create_arch_event_table(unsigned int mask) { pme_gen_ia32_entry_t *pe; unsigned int i, num_events = 0; unsigned int m; /* * first pass: count the number of supported events */ m = mask; for(i=0; i < 7; i++, m>>=1) { if ((m & 0x1) == 0) num_events++; } gen_ia32_support.pme_count = num_events; gen_ia32_pe = calloc(num_events, sizeof(pme_gen_ia32_entry_t)); if (gen_ia32_pe == NULL) return PFMLIB_ERR_NOTSUPP; /* * second pass: populate the table */ gen_ia32_cycle_event = gen_ia32_inst_retired_event = -1; m = mask; for(i=0, pe = gen_ia32_pe; i < 7; i++, m>>=1) { if ((m & 0x1) == 0) { *pe = gen_ia32_all_pe[i]; /* * setup default event: cycles and inst_retired */ if (i == PME_GEN_IA32_UNHALTED_CORE_CYCLES) gen_ia32_cycle_event = pe - gen_ia32_pe; if (i == PME_GEN_IA32_INSTRUCTIONS_RETIRED) gen_ia32_inst_retired_event = pe - gen_ia32_pe; pe++; } } return PFMLIB_SUCCESS; } static int check_arch_pmu(int family) { union { unsigned int val; pmu_eax_t eax; pmu_edx_t edx; } eax, ecx, edx, ebx; /* * check family number to reject for processors * older than Pentium (family=5). Those processors * did not have the CPUID instruction */ if (family < 5) return PFMLIB_ERR_NOTSUPP; /* * check if CPU supports 0xa function of CPUID * 0xa started with Core Duo. Needed to detect if * architected PMU is present */ cpuid(0x0, &eax.val, &ebx.val, &ecx.val, &edx.val); if (eax.val < 0xa) return PFMLIB_ERR_NOTSUPP; /* * extract architected PMU information */ cpuid(0xa, &eax.val, &ebx.val, &ecx.val, &edx.val); /* * version must be greater than zero */ return eax.eax.version < 1 ? PFMLIB_ERR_NOTSUPP : PFMLIB_SUCCESS; } static int pfm_gen_ia32_detect(void) { int ret, family; char buffer[128]; ret = __pfm_getcpuinfo_attr("vendor_id", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; if (strcmp(buffer, "GenuineIntel")) return PFMLIB_ERR_NOTSUPP; ret = __pfm_getcpuinfo_attr("cpu family", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; family = atoi(buffer); return check_arch_pmu(family); } static int pfm_gen_ia32_init(void) { union { unsigned int val; pmu_eax_t eax; pmu_edx_t edx; } eax, ecx, edx, ebx; unsigned int num_cnt, i; int ret; /* * extract architected PMU information */ if (forced_pmu == PFMLIB_NO_PMU) { cpuid(0xa, &eax.val, &ebx.val, &ecx.val, &edx.val); } else { /* * when forced, simulate v2 * with 2 generic and 3 fixed counters */ eax.eax.version = 3; eax.eax.num_cnt = 2; eax.eax.cnt_width = 40; eax.eax.ebx_length = 0; /* unused */ ebx.val = 0; edx.edx.num_cnt = 3; edx.edx.cnt_width = 40; } num_cnt = eax.eax.num_cnt; pmu_version = eax.eax.version; /* * populate impl_pm* bitmasks for generic counters */ for(i=0; i < num_cnt; i++) { pfm_regmask_set(&gen_ia32_impl_pmcs, i); pfm_regmask_set(&gen_ia32_impl_pmds, i); } /* check for fixed counters */ if (pmu_version >= 2) { /* * As described in IA-32 Developer's manual vol 3b * in section 18.12.2.1, early processors supporting * V2 may report invalid information concerning the fixed * counters. So we compensate for this here by forcing * num_cnt to 3. */ if (edx.edx.num_cnt == 0) edx.edx.num_cnt = 3; for(i=0; i < edx.edx.num_cnt; i++) pfm_regmask_set(&gen_ia32_impl_pmds, FIXED_PMD_BASE+i); if (i) pfm_regmask_set(&gen_ia32_impl_pmcs, 16); } num_gen_cnt = eax.eax.num_cnt; num_fixed_cnt = edx.edx.num_cnt; gen_ia32_support.pmc_count = num_gen_cnt + (num_fixed_cnt > 0); gen_ia32_support.pmd_count = num_gen_cnt + num_fixed_cnt; gen_ia32_support.num_cnt = num_gen_cnt + num_fixed_cnt; __pfm_vbprintf("Intel architected PMU: version=%d num_gen=%u num_fixed=%u pmc=%u pmd=%d\n", pmu_version, num_gen_cnt,num_fixed_cnt, gen_ia32_support.pmc_count, gen_ia32_support.pmd_count); ret = create_arch_event_table(ebx.val); if (ret != PFMLIB_SUCCESS) return ret; gen_support = &gen_ia32_support; return PFMLIB_SUCCESS; } static int pfm_gen_ia32_dispatch_counters_v1(pfmlib_input_param_t *inp, pfmlib_gen_ia32_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_gen_ia32_input_param_t *param = mod_in; pfmlib_gen_ia32_counter_t *cntrs; pfm_gen_ia32_sel_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; unsigned long plm; unsigned int i, j, cnt, k, ucode, val; unsigned int assign[PMU_GEN_IA32_MAX_COUNTERS]; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; cntrs = param ? param->pfp_gen_ia32_counters : NULL; if (PFMLIB_DEBUG()) { for (j=0; j < cnt; j++) { DPRINT("ev[%d]=%s\n", j, gen_ia32_pe[e[j].event].pme_name); } } if (cnt > gen_support->pmd_count) return PFMLIB_ERR_TOOMANY; for(i=0, j=0; j < cnt; j++) { if (e[j].plm & (PFM_PLM1|PFM_PLM2)) { DPRINT("event=%d invalid plm=%d\n", e[j].event, e[j].plm); return PFMLIB_ERR_INVAL; } if (e[j].flags & ~PFMLIB_GEN_IA32_ALL_FLAGS) { DPRINT("event=%d invalid flags=0x%lx\n", e[j].event, e[j].flags); return PFMLIB_ERR_INVAL; } if (cntrs && pmu_version != 3 && (cntrs[j].flags & PFM_GEN_IA32_SEL_ANYTHR)) { DPRINT("event=%d anythread requires architectural perfmon v3", e[j].event); return PFMLIB_ERR_INVAL; } /* * exclude restricted registers from assignment */ while(i < gen_support->pmc_count && pfm_regmask_isset(r_pmcs, i)) i++; if (i == gen_support->pmc_count) return PFMLIB_ERR_TOOMANY; /* * events can be assigned to any counter */ assign[j] = i++; } for (j=0; j < cnt ; j++ ) { reg.val = 0; /* assume reserved bits are zerooed */ /* if plm is 0, then assume not specified per-event and use default */ plm = e[j].plm ? e[j].plm : inp->pfp_dfl_plm; val = gen_ia32_pe[e[j].event].pme_code; reg.sel_event_select = val & 0xff; ucode = (val >> 8) & 0xff; for(k=0; k < e[j].num_masks; k++) ucode |= gen_ia32_pe[e[j].event].pme_umasks[e[j].unit_masks[k]].pme_ucode; val |= ucode << 8; reg.sel_unit_mask = ucode; /* use 8 least significant bits */ reg.sel_usr = plm & PFM_PLM3 ? 1 : 0; reg.sel_os = plm & PFM_PLM0 ? 1 : 0; reg.sel_en = 1; /* force enable bit to 1 */ reg.sel_int = 1; /* force APIC int to 1 */ reg.sel_cnt_mask = val >>24; reg.sel_inv = val >> 23; reg.sel_any = val >> 21;; reg.sel_edge = val >> 18; if (cntrs) { if (!reg.sel_cnt_mask) { /* * counter mask is 8-bit wide, do not silently * wrap-around */ if (cntrs[i].cnt_mask > 255) return PFMLIB_ERR_INVAL; reg.sel_cnt_mask = cntrs[j].cnt_mask; } if (!reg.sel_edge) reg.sel_edge = cntrs[j].flags & PFM_GEN_IA32_SEL_EDGE ? 1 : 0; if (!reg.sel_inv) reg.sel_inv = cntrs[j].flags & PFM_GEN_IA32_SEL_INV ? 1 : 0; } pc[j].reg_num = assign[j]; pc[j].reg_addr = GEN_IA32_SEL_BASE+assign[j]; pc[j].reg_value = reg.val; pd[j].reg_num = assign[j]; pd[j].reg_addr = GEN_IA32_CTR_BASE+assign[j]; __pfm_vbprintf("[PERFEVTSEL%u(pmc%u)=0x%llx event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d] %s\n", assign[j], assign[j], reg.val, reg.sel_event_select, reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, gen_ia32_pe[e[j].event].pme_name); __pfm_vbprintf("[PMC%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of evtsel registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static const char *fixed_event_names[]={ "INSTRUCTIONS_RETIRED", "UNHALTED_CORE_CYCLES ", "UNHALTED_REFERENCE_CYCLES " }; #define MAX_EVENT_NAMES (sizeof(fixed_event_names)/sizeof(char *)) static int pfm_gen_ia32_dispatch_counters_v23(pfmlib_input_param_t *inp, pfmlib_gen_ia32_input_param_t *param, pfmlib_output_param_t *outp) { #define HAS_OPTIONS(x) (cntrs && (cntrs[i].flags || cntrs[i].cnt_mask)) #define is_fixed_pmc(a) (a > 15) pfmlib_gen_ia32_counter_t *cntrs; pfm_gen_ia32_sel_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; uint64_t val; unsigned long plm; unsigned int fixed_ctr_mask; unsigned int npc = 0; unsigned int i, j, n, k, ucode; unsigned int assign[PMU_GEN_IA32_MAX_COUNTERS]; unsigned int next_gen, last_gen; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; n = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; cntrs = param ? param->pfp_gen_ia32_counters : NULL; if (n > gen_support->pmd_count) return PFMLIB_ERR_TOOMANY; /* * initilize to empty */ for(i=0; i < n; i++) assign[i] = -1; /* * error checking */ for(j=0; j < n; j++) { /* * only supports two priv levels for perf counters */ if (e[j].plm & (PFM_PLM1|PFM_PLM2)) return PFMLIB_ERR_INVAL; /* * check for valid flags */ if (cntrs && cntrs[j].flags & ~PFMLIB_GEN_IA32_ALL_FLAGS) return PFMLIB_ERR_INVAL; if (cntrs && pmu_version != 3 && (cntrs[j].flags & PFM_GEN_IA32_SEL_ANYTHR)) { DPRINT("event=%d anythread requires architectural perfmon v3", e[j].event); return PFMLIB_ERR_INVAL; } } next_gen = 0; /* first generic counter */ last_gen = num_gen_cnt - 1; /* last generic counter */ fixed_ctr_mask = (1 << num_fixed_cnt) - 1; /* * first constraint: fixed counters (try using them first) */ if (fixed_ctr_mask) { for(i=0; i < n; i++) { /* fixed counters do not support event options (filters) */ if (HAS_OPTIONS(i)) { if (pmu_version != 3) continue; if (cntrs[i].flags != PFM_GEN_IA32_SEL_ANYTHR) continue; /* ok for ANYTHR */ } for(j=0; j < num_fixed_cnt; j++) { if ((fixed_ctr_mask & (1<pfp_dfl_plm; if (plm & PFM_PLM0) val |= 1ULL; if (plm & PFM_PLM3) val |= 2ULL; /* only possible for v3 */ if (cntrs && cntrs[i].flags & PFM_GEN_IA32_SEL_ANYTHR) val |= 4ULL; val |= 1ULL << 3; /* force APIC int (kernel may force it anyway) */ reg.val |= val << ((assign[i]-FIXED_PMD_BASE)<<2); /* setup pd array */ pd[i].reg_num = assign[i]; pd[i].reg_addr = GEN_IA32_FIXED_CTR_BASE+assign[i]-FIXED_PMD_BASE; } if (reg.val) { pc[npc].reg_num = 16; pc[npc].reg_value = reg.val; pc[npc].reg_addr = 0x38D; __pfm_vbprintf("[FIXED_CTRL(pmc%u)=0x%"PRIx64, pc[npc].reg_num, reg.val); for(i=0; i < num_fixed_cnt; i++) { if (pmu_version != 3) __pfm_vbprintf(" pmi%d=1 en%d=0x%"PRIx64, i, i, (reg.val >> (i*4)) & 0x3ULL); else __pfm_vbprintf(" pmi%d=1 en%d=0x%"PRIx64 " any%d=%"PRId64, i, i, (reg.val >> (i*4)) & 0x3ULL, i, !!((reg.val >> (i*4)) & 0x4ULL)); } __pfm_vbprintf("] "); for(i=0; i < num_fixed_cnt; i++) { if ((fixed_ctr_mask & (0x1 << i)) == 0) { if (i < MAX_EVENT_NAMES) __pfm_vbprintf("%s ", fixed_event_names[i]); else __pfm_vbprintf("??? "); } } __pfm_vbprintf("\n"); npc++; for (i=0; i < n ; i++ ) { if (!is_fixed_pmc(assign[i])) continue; __pfm_vbprintf("[FIXED_CTR%u(pmd%u)]\n", pd[i].reg_num, pd[i].reg_num); } } for (i=0; i < n ; i++ ) { /* skip fixed counters */ if (is_fixed_pmc(assign[i])) continue; reg.val = 0; /* assume reserved bits are zerooed */ /* if plm is 0, then assume not specified per-event and use default */ plm = e[i].plm ? e[i].plm : inp->pfp_dfl_plm; val = gen_ia32_pe[e[i].event].pme_code; reg.sel_event_select = val & 0xff; ucode = (val >> 8) & 0xff; for(k=0; k < e[i].num_masks; k++) ucode |= gen_ia32_pe[e[i].event].pme_umasks[e[i].unit_masks[k]].pme_ucode; val |= ucode << 8; reg.sel_unit_mask = ucode; reg.sel_usr = plm & PFM_PLM3 ? 1 : 0; reg.sel_os = plm & PFM_PLM0 ? 1 : 0; reg.sel_en = 1; /* force enable bit to 1 */ reg.sel_int = 1; /* force APIC int to 1 */ reg.sel_cnt_mask = val >>24; reg.sel_inv = val >> 23; reg.sel_any = val >> 21;; reg.sel_edge = val >> 18; if (cntrs) { if (!reg.sel_cnt_mask) { /* * counter mask is 8-bit wide, do not silently * wrap-around */ if (cntrs[i].cnt_mask > 255) return PFMLIB_ERR_INVAL; reg.sel_cnt_mask = cntrs[i].cnt_mask; } if (!reg.sel_edge) reg.sel_edge = cntrs[i].flags & PFM_GEN_IA32_SEL_EDGE ? 1 : 0; if (!reg.sel_inv) reg.sel_inv = cntrs[i].flags & PFM_GEN_IA32_SEL_INV ? 1 : 0; if (!reg.sel_any) reg.sel_any = cntrs[i].flags & PFM_GEN_IA32_SEL_ANYTHR? 1 : 0; } pc[npc].reg_num = assign[i]; pc[npc].reg_value = reg.val; pc[npc].reg_addr = GEN_IA32_SEL_BASE+assign[i]; pd[i].reg_num = assign[i]; pd[i].reg_addr = GEN_IA32_CTR_BASE+assign[i]; if (pmu_version < 3) __pfm_vbprintf("[PERFEVTSEL%u(pmc%u)=0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d] %s\n", pc[npc].reg_num, pc[npc].reg_num, reg.val, reg.sel_event_select, reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, gen_ia32_pe[e[i].event].pme_name); else __pfm_vbprintf("[PERFEVTSEL%u(pmc%u)=0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d anythr=%d] %s\n", pc[npc].reg_num, pc[npc].reg_num, reg.val, reg.sel_event_select, reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, reg.sel_any, gen_ia32_pe[e[i].event].pme_name); __pfm_vbprintf("[PMC%u(pmd%u)]\n", pd[i].reg_num, pd[i].reg_num); npc++; } /* number of evtsel/ctr registers programmed */ outp->pfp_pmc_count = npc; outp->pfp_pmd_count = n; return PFMLIB_SUCCESS; } static int pfm_gen_ia32_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { pfmlib_gen_ia32_input_param_t *mod_in = model_in; if (inp->pfp_dfl_plm & (PFM_PLM1|PFM_PLM2)) { DPRINT("invalid plm=%x\n", inp->pfp_dfl_plm); return PFMLIB_ERR_INVAL; } /* simplfied v1 (no fixed counters */ if (pmu_version == 1) return pfm_gen_ia32_dispatch_counters_v1(inp, mod_in, outp); /* v2 or above */ return pfm_gen_ia32_dispatch_counters_v23(inp, mod_in, outp); } static int pfm_gen_ia32_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && cnt > gen_support->pmc_count) return PFMLIB_ERR_INVAL; *code = gen_ia32_pe[i].pme_code; return PFMLIB_SUCCESS; } static void pfm_gen_ia32_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; memset(counters, 0, sizeof(*counters)); for(i=0; i < num_gen_cnt; i++) pfm_regmask_set(counters, i); for(i=0; i < num_fixed_cnt; i++) { if (gen_ia32_pe[j].pme_fixed == (FIXED_PMD_BASE+i)) pfm_regmask_set(counters, FIXED_PMD_BASE+i); } } static void pfm_gen_ia32_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { *impl_pmcs = gen_ia32_impl_pmcs; } static void pfm_gen_ia32_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { *impl_pmds = gen_ia32_impl_pmds; } static void pfm_gen_ia32_get_impl_counters(pfmlib_regmask_t *impl_counters) { /* all pmds are counters */ *impl_counters = gen_ia32_impl_pmds; } static void pfm_gen_ia32_get_hw_counter_width(unsigned int *width) { /* * Even though, CPUID 0xa returns in eax the actual counter * width, the architecture specifies that writes are limited * to lower 32-bits. As such, only the lower 31 bits have full * degree of freedom. That is the "useable" counter width. */ *width = PMU_GEN_IA32_COUNTER_WIDTH; } static char * pfm_gen_ia32_get_event_name(unsigned int i) { return gen_ia32_pe[i].pme_name; } static int pfm_gen_ia32_get_event_description(unsigned int ev, char **str) { char *s; s = gen_ia32_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static char * pfm_gen_ia32_get_event_mask_name(unsigned int ev, unsigned int midx) { return gen_ia32_pe[ev].pme_umasks[midx].pme_uname; } static int pfm_gen_ia32_get_event_mask_desc(unsigned int ev, unsigned int midx, char **str) { char *s; s = gen_ia32_pe[ev].pme_umasks[midx].pme_udesc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static unsigned int pfm_gen_ia32_get_num_event_masks(unsigned int ev) { return gen_ia32_pe[ev].pme_numasks; } static int pfm_gen_ia32_get_event_mask_code(unsigned int ev, unsigned int midx, unsigned int *code) { *code =gen_ia32_pe[ev].pme_umasks[midx].pme_ucode; return PFMLIB_SUCCESS; } static int pfm_gen_ia32_get_cycle_event(pfmlib_event_t *e) { if (gen_ia32_cycle_event == -1) return PFMLIB_ERR_NOTSUPP; e->event = gen_ia32_cycle_event; return PFMLIB_SUCCESS; } static int pfm_gen_ia32_get_inst_retired(pfmlib_event_t *e) { if (gen_ia32_inst_retired_event == -1) return PFMLIB_ERR_NOTSUPP; e->event = gen_ia32_inst_retired_event; return PFMLIB_SUCCESS; } /* architected PMU */ pfm_pmu_support_t gen_ia32_support={ .pmu_name = "Intel architectural PMU", .pmu_type = PFMLIB_GEN_IA32_PMU, .pme_count = 0, .pmc_count = 0, .pmd_count = 0, .num_cnt = 0, .get_event_code = pfm_gen_ia32_get_event_code, .get_event_name = pfm_gen_ia32_get_event_name, .get_event_counters = pfm_gen_ia32_get_event_counters, .dispatch_events = pfm_gen_ia32_dispatch_events, .pmu_detect = pfm_gen_ia32_detect, .pmu_init = pfm_gen_ia32_init, .get_impl_pmcs = pfm_gen_ia32_get_impl_pmcs, .get_impl_pmds = pfm_gen_ia32_get_impl_pmds, .get_impl_counters = pfm_gen_ia32_get_impl_counters, .get_hw_counter_width = pfm_gen_ia32_get_hw_counter_width, .get_event_desc = pfm_gen_ia32_get_event_description, .get_cycle_event = pfm_gen_ia32_get_cycle_event, .get_inst_retired_event = pfm_gen_ia32_get_inst_retired, .get_num_event_masks = pfm_gen_ia32_get_num_event_masks, .get_event_mask_name = pfm_gen_ia32_get_event_mask_name, .get_event_mask_code = pfm_gen_ia32_get_event_mask_code, .get_event_mask_desc = pfm_gen_ia32_get_event_mask_desc }; papi-5.6.0/src/event_data/power5/groups000664 001750 001750 00000056022 13216244361 022124 0ustar00jshenry1963jshenry1963000000 000000 { **************************** { THIS IS OPEN SOURCE CODE { **************************** { (C) COPYRIGHT International Business Machines Corp. 2005 { This file is licensed under the University of Tennessee license. { See LICENSE.txt. { { File: events/power5/groups { Author: Maynard Johnson { maynardj@us.ibm.com { Mods: { { Number of groups 145 { Group descriptions #0,190,71,56,12,0,0,pm_utilization,CPI and utilization data ##00005,00001,00009,0000F,00009,00005 00000000,00000000,0A02121E,00000000 CPI and utilization data #1,2,195,49,12,0,0,pm_completion,Completion and cycle counts ##00013,00004,00013,0000F,00009,00005 00000000,00000000,2608261E,00000000 Completion and cycle counts #2,66,65,50,60,0,0,pm_group_dispatch,Group dispatch events ##120E3,120E4,130E1,00009,00009,00005 00000000,4000000E,C6C8C212,00000000 Group dispatch events #3,0,2,169,138,0,0,pm_clb1,CLB fullness ##400C0,400C2,410C6,C70A6,00009,00005 00000000,015B0001,80848C4C,00000001 CLB fullness #4,6,6,149,59,0,0,pm_clb2,CLB fullness ##400C5,400C6,C70E6,00001,00009,00005 00000000,01430002,8A8CCC02,00000001 CLB fullness #5,60,59,46,51,0,0,pm_gct_empty,GCT empty reasons ##00004,1009C,10084,1009C,00009,00005 00000000,40000000,08380838,00000000 GCT empty reasons #6,62,61,47,52,0,0,pm_gct_usage,GCT Usage ##0001F,0001F,0001F,0001F,00009,00005 00000000,00000000,3E3E3E3E,00000000 GCT Usage #7,143,143,113,119,0,0,pm_lsu1,LSU LRQ and LMQ events ##C20E6,C20E2,C30E6,C30E5,00009,00005 00000000,000F000F,CCC4CCCA,00000000 LSU LRQ and LMQ events #8,147,147,119,123,0,0,pm_lsu2,LSU SRQ events ##C20E5,C20E1,830E5,110C3,00009,00005 00000000,400E000E,CAC2CA86,00000000 LSU SRQ events #9,149,141,112,122,0,0,pm_lsu3,LSU SRQ and LMQ events ##C2088,00015,C70E5,00015,00009,00005 00000000,010F000A,102ACA2A,00000000 LSU SRQ and LMQ events #10,212,73,117,18,0,0,pm_prefetch1,Prefetch stream allocation ##2209B,220E4,C50C2,830E7,00009,00005 00000000,8432000D,36C884CE,00000000 Prefetch stream allocation #11,73,9,61,58,0,0,pm_prefetch2,Prefetch events ##00001,220E5,C70E7,210C7,00009,00005 00000000,81030006,02CACE8E,00000001 Prefetch events #12,139,1,87,59,0,0,pm_prefetch3,L2 prefetch and misc events ##C2090,400C1,C50C3,00001,00009,00005 00000000,047C0008,20828602,00000001 L2 prefetch and misc events #13,126,135,13,91,0,0,pm_prefetch4,Misc prefetch and reject events ##C60E0,C60E4,830E6,C50C3,00009,00005 00000000,063E000E,C0C8CC86,00000000 Misc prefetch and reject events #14,145,144,25,159,0,0,pm_lsu_reject1,LSU reject events ##C6090,C6088,330E3,81088,00009,00005 00000000,C22C000E,2010C610,00000001 LSU reject events #15,125,134,55,66,0,0,pm_lsu_reject2,LSU rejects due to reload CDF or tag update collision ##C60E2,C60E6,00001,230E7,00009,00005 00000000,820C000D,C4CC02CE,00000001 LSU rejects due to reload CDF or tag update collision #16,123,132,120,191,0,0,pm_lsu_reject3,LSU rejects due to ERAT, held instuctions ##C60E3,C60E7,130E0,130E4,00009,00005 00000000,420C000F,C6CEC0C8,00000000 LSU rejects due to ERAT, held instuctions #17,124,133,55,1,0,0,pm_lsu_reject4,LSU0/1 reject LMQ full ##C60E1,C60E5,00001,230E4,00009,00005 00000000,820C000D,C2CA02C8,00000001 LSU0/1 reject LMQ full #18,146,145,109,31,0,0,pm_lsu_reject5,LSU misc reject and flush events ##C6088,C6090,110C5,110C7,00009,00005 00000000,420C000C,10208A8E,00000000 LSU misc reject and flush events #19,73,140,25,16,0,0,pm_flush1,Misc flush events ##00001,C0088,330E3,C10C7,00009,00005 00000000,C0F00002,0210C68E,00000001 Misc flush events #20,81,71,27,33,0,0,pm_flush2,Flushes due to scoreboard and sync ##800C0,00001,330E2,330E1,00009,00005 00000000,C0800003,8002C4C2,00000001 Flushes due to scoreboard and sync #21,141,138,55,113,0,0,pm_lsu_flush_srq_lrq,LSU flush by SRQ and LRQ events ##C0090,C0090,00001,110C5,00009,00005 00000000,40C00000,2020028A,00000001 LSU flush by SRQ and LRQ events #22,119,128,109,59,0,0,pm_lsu_flush_lrq,LSU0/1 flush due to LRQ ##C00C2,C00C6,110C5,00001,00009,00005 00000000,40C00000,848C8A02,00000001 LSU0/1 flush due to LRQ #23,120,129,55,113,0,0,pm_lsu_flush_srq,LSU0/1 flush due to SRQ ##C00C3,C00C7,00001,110C5,00009,00005 00000000,40C00000,868E028A,00000001 LSU0/1 flush due to SRQ #24,142,140,0,59,0,0,pm_lsu_flush_unaligned,LSU flush due to unaligned data ##C0088,C0088,230E4,00001,00009,00005 00000000,80C00002,1010C802,00000001 LSU flush due to unaligned data #25,121,130,109,59,0,0,pm_lsu_flush_uld,LSU0/1 flush due to unaligned load ##C00C0,C00C4,110C5,00001,00009,00005 00000000,40C00000,80888A02,00000001 LSU0/1 flush due to unaligned load #26,122,131,55,113,0,0,pm_lsu_flush_ust,LSU0/1 flush due to unaligned store ##C00C1,C00C5,00001,110C5,00009,00005 00000000,40C00000,828A028A,00000001 LSU0/1 flush due to unaligned store #27,140,71,147,114,0,0,pm_lsu_flush_full,LSU flush due to LRQ/SRQ full ##320E7,00001,81088,330E0,00009,00005 00000000,C0200009,CE0210C0,00000001 LSU flush due to LRQ/SRQ full #28,70,13,55,10,0,0,pm_lsu_stall1,LSU Stalls ##00014,11098,00001,1109A,00009,00005 00000000,40000000,28300234,00000001 LSU Stalls #29,73,10,6,8,0,0,pm_lsu_stall2,LSU Stalls ##00001,1109A,0000F,1109B,00009,00005 00000000,40000000,02341E36,00000001 LSU Stalls #30,68,12,55,7,0,0,pm_fxu_stall,FXU Stalls ##12091,11099,00001,11099,00009,00005 00000000,40000008,22320232,00000001 FXU Stalls #31,57,11,55,9,0,0,pm_fpu_stall,FPU Stalls ##10090,1109B,00001,11098,00009,00005 00000000,40000000,20360230,00000001 FPU Stalls #32,115,7,116,116,0,0,pm_queue_full,BRQ LRQ LMQ queue full ##820E7,100C5,110C2,C30E7,00009,00005 00000000,400B0009,CE8A84CE,00000000 BRQ LRQ LMQ queue full #33,41,49,40,46,0,0,pm_issueq_full,FPU FX full ##100C3,100C7,110C0,110C4,00009,00005 00000000,40000000,868E8088,00000000 FPU FX full #34,11,114,48,11,0,0,pm_mapper_full1,CR CTR GPR mapper full ##100C4,100C6,130E5,110C1,00009,00005 00000000,40000002,888CCA82,00000000 CR CTR GPR mapper full #35,35,204,188,59,0,0,pm_mapper_full2,FPR XER mapper full ##100C1,100C2,C709B,00001,00009,00005 00000000,41030002,82843602,00000001 FPR XER mapper full #36,198,193,106,112,0,0,pm_misc_load,Non-cachable loads and stcx events ##820E1,820E5,C50C1,C50C5,00009,00005 00000000,0438000C,C2CA828A,00000001 Non-cachable loads and stcx events #37,117,126,52,57,0,0,pm_ic_demand,ICache demand from BR redirect ##C20E3,C20E7,230E0,230E1,00009,00005 00000000,800C000F,C6CEC0C2,00000000 ICache demand from BR redirect #38,72,69,54,0,0,0,pm_ic_pref,ICache prefetch ##220E7,220E6,210C7,2208D,00009,00005 00000000,8000000C,CECC8E1A,00000000 ICache prefetch #39,69,67,60,59,0,0,pm_ic_miss,ICache misses ##12099,120E7,C30E4,00001,00009,00005 00000000,4003000E,32CEC802,00000001 ICache misses #40,210,184,1,3,0,0,pm_branch_miss,Branch mispredict, TLB and SLB misses ##80088,80088,230E5,230E6,00009,00005 00000000,80800003,1010CACC,00000000 Branch mispredict, TLB and SLB misses #41,9,8,3,5,0,0,pm_branch1,Branch operations ##23087,23087,23087,23087,00009,00005 00000000,80000003,0E0E0E0E,00000000 Branch operations #42,64,62,24,59,0,0,pm_branch2,Branch operations ##120E5,120E6,110C6,00001,00009,00005 00000000,4000000C,CACC8C02,00000001 Branch operations #43,20,21,100,106,0,0,pm_L1_tlbmiss,L1 load and TLB misses ##800C7,800C4,C1088,C1090,00009,00005 00000000,00B00000,8E881020,00000000 L1 load and TLB misses #44,13,137,165,171,0,0,pm_L1_DERAT_miss,L1 store and DERAT misses ##C3087,80090,C1090,C10C3,00009,00005 00000000,00B30000,0E202086,00000000 L1 store and DERAT misses #45,21,78,101,105,0,0,pm_L1_slbmiss,L1 load and SLB misses ##800C5,800C1,C10C2,C10C6,00009,00005 00000000,00B00000,8A82848C,00000000 L1 load and SLB misses #46,26,23,103,108,0,0,pm_L1_dtlbmiss_4K,L1 load references and 4K Data TLB references and misses ##C40C2,C40C0,C10C0,C10C4,00009,00005 00000000,08F00000,84808088,00000000 L1 load references and 4K Data TLB references and misses #47,25,22,166,173,0,0,pm_L1_dtlbmiss_16M,L1 store references and 16M Data TLB references and misses ##C40C6,C40C4,C10C1,C10C5,00009,00005 00000000,08F00000,8C88828A,00000000 L1 store references and 16M Data TLB references and misses #48,16,18,26,59,0,0,pm_dsource1,L3 cache and memory data access ##C308E,C3087,110C7,00001,00009,00005 00000000,40030000,1C0E8E02,00000001 L3 cache and memory data access #49,16,18,187,15,0,0,pm_dsource2,L3 cache and memory data access ##C308E,C3087,C309B,C3087,00009,00005 00000000,00030003,1C0E360E,00000000 L3 cache and memory data access #50,14,16,8,13,0,0,pm_dsource_L2,L2 cache data access ##C3097,C3097,C3097,C3097,00009,00005 00000000,00030003,2E2E2E2E,00000000 L2 cache data access #51,17,17,10,14,0,0,pm_dsource_L3,L3 cache data access ##C309E,C309E,C309E,C309E,00009,00005 00000000,00030003,3C3C3C3C,00000000 L3 cache data access #52,78,74,59,63,0,0,pm_isource1,Instruction source information ##2208D,2208D,2208D,22086,00009,00005 00000000,8000000C,1A1A1A0C,00000000 Instruction source information #53,76,77,55,0,0,0,pm_isource2,Instruction source information ##22086,22086,00001,2208D,00009,00005 00000000,8000000C,0C0C021A,00000001 Instruction source information #54,77,75,57,61,0,0,pm_isource_L2,L2 instruction source information ##22096,22096,22096,22096,00009,00005 00000000,8000000C,2C2C2C2C,00000000 L2 instruction source information #55,79,76,58,62,0,0,pm_isource_L3,L3 instruction source information ##2209D,2209D,2209D,2209D,00009,00005 00000000,8000000C,3A3A3A3A,00000000 L3 instruction source information #56,184,181,154,163,0,0,pm_pteg_source1,PTEG source information ##83097,83097,83097,83097,00009,00005 00000000,00020003,2E2E2E2E,00000000 PTEG source information #57,187,182,156,164,0,0,pm_pteg_source2,PTEG source information ##8309E,8309E,8309E,8309E,00009,00005 00000000,00020003,3C3C3C3C,00000000 PTEG source information #58,183,183,189,165,0,0,pm_pteg_source3,PTEG source information ##83087,83087,8309B,83087,00009,00005 00000000,00020003,0E0E360E,00000000 PTEG source information #59,186,64,51,16,0,0,pm_pteg_source4,L3 PTEG and group disptach events ##8308E,00002,00002,C10C7,00009,00005 00000000,00320000,1C04048E,00000000 L3 PTEG and group disptach events #60,83,82,64,69,0,0,pm_L2SA_ld,L2 slice A load events ##701C0,721E0,711C0,731E0,00009,00005 00000000,30554005,80C080C0,00000000 L2 slice A load events #61,85,84,66,71,0,0,pm_L2SA_st,L2 slice A store events ##702C0,722E0,712C0,732E0,00009,00005 00000000,30558005,80C080C0,00000000 L2 slice A store events #62,87,87,68,74,0,0,pm_L2SA_st2,L2 slice A store events ##703C0,723E0,713C0,733E0,00009,00005 00000000,3055C005,80C080C0,00000000 L2 slice A store events #63,91,90,72,77,0,0,pm_L2SB_ld,L2 slice B load events ##701C1,721E1,711C1,731E1,00009,00005 00000000,30554005,82C282C2,00000000 L2 slice B load events #64,93,92,74,79,0,0,pm_L2SB_st,L2 slice B store events ##702C1,722E1,712C1,732E1,00009,00005 00000000,30558005,82C282C2,00000000 L2 slice B store events #65,95,95,76,82,0,0,pm_L2SB_st2,L2 slice B store events ##703C1,723E1,713C1,733E1,00009,00005 00000000,3055C005,82C282C2,00000000 L2 slice B store events #66,99,98,80,85,0,0,pm_L2SB_ld,L2 slice C load events ##701C2,721E2,711C2,731E2,00009,00005 00000000,30554005,84C484C4,00000000 L2 slice C load events #67,101,100,82,87,0,0,pm_L2SB_st,L2 slice C store events ##702C2,722E2,712C2,732E2,00009,00005 00000000,30558005,84C484C4,00000000 L2 slice C store events #68,103,103,84,90,0,0,pm_L2SB_st2,L2 slice C store events ##703C2,723E2,713C2,733E2,00009,00005 00000000,3055C005,84C484C4,00000000 L2 slice C store events #69,107,71,89,94,0,0,pm_L3SA_trans,L3 slice A state transistions ##720E3,00001,730E3,710C3,00009,00005 00000000,3015000A,C602C686,00000001 L3 slice A state transistions #70,73,108,93,98,0,0,pm_L3SB_trans,L3 slice B state transistions ##00001,720E4,730E4,710C4,00009,00005 00000000,30150006,02C8C888,00000001 L3 slice B state transistions #71,73,111,97,102,0,0,pm_L3SC_trans,L3 slice C state transistions ##00001,720E5,730E5,710C5,00009,00005 00000000,30150006,02CACA8A,00000001 L3 slice C state transistions #72,82,86,63,73,0,0,pm_L2SA_trans,L2 slice A state transistions ##720E0,700C0,730E0,710C0,00009,00005 00000000,3055000A,C080C080,00000000 L2 slice A state transistions #73,90,94,71,81,0,0,pm_L2SB_trans,L2 slice B state transistions ##720E1,700C1,730E1,710C1,00009,00005 00000000,3055000A,C282C282,00000000 L2 slice B state transistions #74,98,102,79,89,0,0,pm_L2SC_trans,L2 slice C state transistions ##720E2,700C2,730E2,710C2,00009,00005 00000000,3055000A,C484C484,00000000 L2 slice C state transistions #75,106,107,91,99,0,0,pm_L3SAB_retry,L3 slice A/B snoop retry and all CI/CO busy ##721E3,721E4,731E3,731E4,00009,00005 00000000,3005100F,C6C8C6C8,00000000 L3 slice A/B snoop retry and all CI/CO busy #76,108,109,88,96,0,0,pm_L3SAB_hit,L3 slice A/B hit and reference ##701C3,701C4,711C3,711C4,00009,00005 00000000,30501000,86888688,00000000 L3 slice A/B hit and reference #77,112,112,99,100,0,0,pm_L3SC_retry_hit,L3 slice C hit & snoop retry ##721E5,701C5,731E5,711C5,00009,00005 00000000,3055100A,CA8ACA8A,00000000 L3 slice C hit & snoop retry #78,55,54,38,43,0,0,pm_fpu1,Floating Point events ##00088,00088,01088,01090,00009,00005 00000000,00000000,10101020,00000000 Floating Point events #79,56,53,39,44,0,0,pm_fpu2,Floating Point events ##00090,00090,01090,01088,00009,00005 00000000,00000000,20202010,00000000 Floating Point events #80,54,55,30,40,0,0,pm_fpu3,Floating point events ##02088,02088,010C3,010C7,00009,00005 00000000,0000000C,1010868E,00000000 Floating point events #81,58,56,55,115,0,0,pm_fpu4,Floating point events ##02090,02090,00001,C5090,00009,00005 00000000,0430000C,20200220,00000001 Floating point events #82,40,48,29,39,0,0,pm_fpu5,Floating point events by unit ##000C2,000C6,010C2,010C6,00009,00005 00000000,00000000,848C848C,00000000 Floating point events by unit #83,37,45,31,41,0,0,pm_fpu6,Floating point events by unit ##020E0,020E4,010C0,010C4,00009,00005 00000000,0000000C,C0C88088,00000000 Floating point events by unit #84,38,46,33,42,0,0,pm_fpu7,Floating point events by unit ##000C0,000C4,010C1,010C5,00009,00005 00000000,00000000,8088828A,00000000 Floating point events by unit #85,43,51,55,37,0,0,pm_fpu8,Floating point events by unit ##020E1,020E5,00001,030E0,00009,00005 00000000,0000000D,C2CA02C0,00000001 Floating point events by unit #86,42,50,105,111,0,0,pm_fpu9,Floating point events by unit ##020E3,020E7,C50C0,C50C4,00009,00005 00000000,0430000C,C6CE8088,00000000 Floating point events by unit #87,39,47,55,42,0,0,pm_fpu10,Floating point events by unit ##000C1,000C5,00001,010C5,00009,00005 00000000,00000000,828A028A,00000001 Floating point events by unit #88,36,44,30,59,0,0,pm_fpu11,Floating point events by unit ##000C3,000C7,010C3,00001,00009,00005 00000000,00000000,868E8602,00000001 Floating point events by unit #89,44,52,105,59,0,0,pm_fpu12,Floating point events by unit ##020E2,020E6,C50C0,00001,00009,00005 00000000,0430000C,C4CC8002,00000001 Floating point events by unit #90,59,57,42,49,0,0,pm_fxu1,Fixed Point events ##00012,00012,00012,00012,00009,00005 00000000,00000000,24242424,00000000 Fixed Point events #91,171,172,45,47,0,0,pm_fxu2,Fixed Point events ##00002,12091,13088,11090,00009,00005 00000000,40000006,04221020,00000001 Fixed Point events #92,4,4,43,50,0,0,pm_fxu3,Fixed Point events ##400C3,400C4,130E2,130E6,00009,00005 00000000,40400003,8688C4CC,00000000 Fixed Point events #93,206,203,171,178,0,0,pm_smt_priorities1,Thread priority events ##420E3,420E6,430E3,430E4,00009,00005 00000000,0005000F,C6CCC6C8,00000000 Thread priority events #94,205,202,173,180,0,0,pm_smt_priorities2,Thread priority events ##420E2,420E5,430E5,430E6,00009,00005 00000000,0005000F,C4CACACC,00000000 Thread priority events #95,204,201,175,182,0,0,pm_smt_priorities3,Thread priority events ##420E1,420E4,430E2,430E1,00009,00005 00000000,0005000F,C2C8C4C2,00000000 Thread priority events #96,203,68,177,59,0,0,pm_smt_priorities4,Thread priority events ##420E0,0000B,430E0,00001,00009,00005 00000000,0005000A,C016C002,00000001 Thread priority events #97,202,196,55,176,0,0,pm_smt_both,Thread common events ##0000B,00013,00001,41084,00009,00005 00000000,00100000,16260208,00000001 Thread common events #98,196,71,182,189,0,0,pm_smt_selection,Thread selection ##800C3,00001,410C0,410C1,00009,00005 00000000,00900000,86028082,00000001 Thread selection #99,73,0,178,185,0,0,pm_smt_selectover1,Thread selection overide ##00001,400C0,410C2,410C4,00009,00005 00000000,00500000,02808488,00000001 Thread selection overide #100,73,15,180,187,0,0,pm_smt_selectover2,Thread selection overide ##00001,0000F,410C5,410C3,00009,00005 00000000,00100000,021E8A86,00000001 Thread selection overide #101,27,27,17,23,0,0,pm_fabric1,Fabric events ##700C7,720E7,710C7,730E7,00009,00005 00000000,30550005,8ECE8ECE,00000000 Fabric events #102,32,29,20,28,0,0,pm_fabric2,Fabric data movement ##701C7,721E7,711C7,731E7,00009,00005 00000000,30550085,8ECE8ECE,00000000 Fabric data movement #103,33,33,21,27,0,0,pm_fabric3,Fabric data movement ##703C7,723E7,713C7,733E7,00009,00005 00000000,30550185,8ECE8ECE,00000000 Fabric data movement #104,31,28,15,24,0,0,pm_fabric4,Fabric data movement ##702C7,722E7,130E3,712C7,00009,00005 00000000,70540106,8ECEC68E,00000000 Fabric data movement #105,193,185,161,166,0,0,pm_snoop1,Snoop retry ##700C6,720E6,710C6,730E6,00009,00005 00000000,30550005,8CCC8CCC,00000000 Snoop retry #106,194,189,160,59,0,0,pm_snoop2,Snoop read retry ##705C6,725E6,715C6,00001,00009,00005 00000000,30540A04,8CCC8C02,00000001 Snoop read retry #107,197,150,162,127,0,0,pm_snoop3,Snoop write retry ##706C6,726E6,716C6,736E6,00009,00005 00000000,30550C05,8CCC8CCC,00000000 Snoop write retry #108,192,149,159,126,0,0,pm_snoop4,Snoop partial write retry ##707C6,727E6,717C6,737E6,00009,00005 00000000,30550E05,8CCC8CCC,00000000 Snoop partial write retry #109,156,155,125,20,0,0,pm_mem_rq,Memory read queue dispatch ##701C6,721E6,711C6,130E7,00009,00005 00000000,70540205,8CCC8CCE,00000000 Memory read queue dispatch #110,155,148,126,21,0,0,pm_mem_read,Memory read complete and cancel ##702C6,722E6,712C6,00003,00009,00005 00000000,30540404,8CCC8C06,00000000 Memory read complete and cancel #111,159,156,128,132,0,0,pm_mem_wq,Memory write queue dispatch ##703C6,723E6,713C6,733E6,00009,00005 00000000,30550605,8CCC8CCC,00000000 Memory write queue dispatch #112,153,152,124,128,0,0,pm_mem_pwq,Memory partial write queue ##704C6,724E6,714C6,734E6,00009,00005 00000000,30550805,8CCC8CCC,00000000 Memory partial write queue #113,171,173,185,158,0,0,pm_threshold,Thresholding ##00002,820E2,0000B,00014,00009,00005 00000000,00080004,04C41628,00000001 Thresholding #114,171,179,137,146,0,0,pm_mrk_grp1,Marked group events ##00002,820E3,00005,00013,00009,00005 00000000,00080004,04C60A26,00000001 Marked group events #115,172,158,138,147,0,0,pm_mrk_grp2,Marked group events ##00015,00005,C70E4,12091,00009,00005 00000000,41030002,2A0AC822,00000001 Marked group events #116,160,162,129,135,0,0,pm_mrk_dsource1,Marked data from ##C7087,C70A0,C70A2,C70A2,00009,00005 00000000,010B0003,0E404444,00000001 Marked data from #117,161,160,55,44,0,0,pm_mrk_dsource2,Marked data from ##C7097,C70A2,00001,01088,00009,00005 00000000,010B0000,2E440210,00000001 Marked data from #118,163,166,131,138,0,0,pm_mrk_dsource3,Marked data from ##C708E,C70A4,C70A6,C70A6,00009,00005 00000000,010B0003,1C484C4C,00000001 Marked data from #119,166,161,130,143,0,0,pm_mrk_dsource4,Marked data from ##C70A1,C70A3,C7097,C70A1,00009,00005 00000000,010B0003,42462E42,00000001 Marked data from #120,164,164,133,141,0,0,pm_mrk_dsource5,Marked data from ##C709E,C70A6,C70A0,C70A0,00009,00005 00000000,010B0003,3C4C4040,00000001 Marked data from #121,162,161,55,137,0,0,pm_mrk_dsource6,Marked data from ##C70A3,C70A3,00001,C70A3,00009,00005 00000000,010B0001,46460246,00000001 Marked data from #122,165,165,132,140,0,0,pm_mrk_dsource7,Marked data from ##C70A7,C70A7,C709E,C70A7,00009,00005 00000000,010B0003,4E4E3C4E,00000001 Marked data from #123,168,168,135,144,0,0,pm_mrk_lbmiss,Marked TLB and SLB misses ##C40C1,C40C5,C50C6,C50C7,00009,00005 00000000,0CF00000,828A8C8E,00000001 Marked TLB and SLB misses #124,170,170,55,144,0,0,pm_mrk_lbref,Marked TLB and SLB references ##C40C3,C40C7,00001,C50C7,00009,00005 00000000,0CF00000,868E028E,00000001 Marked TLB and SLB references #125,175,71,150,134,0,0,pm_mrk_lsmiss,Marked load and store miss ##82088,00001,00003,00005,00009,00005 00000000,00080008,1002060A,00000001 Marked load and store miss #126,179,179,148,160,0,0,pm_mrk_ulsflush,Mark unaligned load and store flushes ##00003,820E3,81090,81090,00009,00005 00000000,00280004,06C62020,00000001 Mark unaligned load and store flushes #127,178,178,136,148,0,0,pm_mrk_misc,Misc marked instructions ##820E6,00003,00014,0000B,00009,00005 00000000,00080008,CC062816,00000001 Misc marked instructions #128,13,74,165,106,0,0,pm_lsref_L1,Load/Store operations and L1 activity ##C3087,2208D,C1090,C1090,00009,00005 00000000,80330004,0E1A2020,00000000 Load/Store operations and L1 activity #129,16,18,165,106,0,0,pm_lsref_L2L3,Load/Store operations and L2,L3 activity ##C308E,C3087,C1090,C1090,00009,00005 00000000,00330000,1C0E2020,00000000 Load/Store operations and L2,L3 activity #130,81,21,165,106,0,0,pm_lsref_tlbmiss,Load/Store operations and TLB misses ##800C0,800C4,C1090,C1090,00009,00005 00000000,00B00000,80882020,00000000 Load/Store operations and TLB misses #131,16,18,100,171,0,0,pm_Dmiss,Data cache misses ##C308E,C3087,C1088,C10C3,00009,00005 00000000,00330000,1C0E1086,00000000 Data cache misses #132,12,69,61,91,0,0,pm_prefetchX,Prefetch events ##0000F,220E6,C70E7,C50C3,00009,00005 00000000,85330006,1ECCCE86,00000000 Prefetch events #133,9,8,3,1,0,0,pm_branchX,Branch operations ##23087,23087,23087,230E4,00009,00005 00000000,80000003,0E0E0EC8,00000000 Branch operations #134,43,51,30,37,0,0,pm_fpuX1,Floating point events by unit ##020E1,020E5,010C3,030E0,00009,00005 00000000,0000000D,C2CA86C0,00000000 Floating point events by unit #135,39,47,33,42,0,0,pm_fpuX2,Floating point events by unit ##000C1,000C5,010C1,010C5,00009,00005 00000000,00000000,828A828A,00000000 Floating point events by unit #136,36,44,30,40,0,0,pm_fpuX3,Floating point events by unit ##000C3,000C7,010C3,010C7,00009,00005 00000000,00000000,868E868E,00000000 Floating point events by unit #137,56,54,165,106,0,0,pm_fpuX4,Floating point and L1 events ##00090,00088,C1090,C1090,00009,00005 00000000,00300000,20102020,00000000 Floating point and L1 events #138,58,56,30,40,0,0,pm_fpuX5,Floating point events ##02090,02090,010C3,010C7,00009,00005 00000000,0000000C,2020868E,00000000 Floating point events #139,55,53,39,44,0,0,pm_fpuX6,Floating point events ##00088,00090,01090,01088,00009,00005 00000000,00000000,10202010,00000000 Floating point events #140,12,58,6,44,0,0,pm_hpmcount1,HPM group for set 1 ##0000F,00014,0000F,01088,00009,00005 00000000,00000000,1E281E10,00000000 HPM group for set 1 #141,12,56,56,115,0,0,pm_hpmcount2,HPM group for set 2 ##0000F,02090,00009,C5090,00009,00005 00000000,04300004,1E201220,00000000 HPM group for set 2 #142,12,72,100,171,0,0,pm_hpmcount3,HPM group for set 3 ##0000F,120E1,C1088,C10C3,00009,00005 00000000,40300004,1EC21086,00000000 HPM group for set 3 #143,210,15,165,106,0,0,pm_hpmcount4,HPM group for set 7 ##80088,0000F,C1090,C1090,00009,00005 00000000,00B00000,101E2020,00000000 HPM group for set 7 #144,56,54,6,59,0,0,pm_1flop_with_fma,One flop instructions plus FMA ##00090,00088,0000F,00001,00009,00005 00000000,00000000,20101E02,00000000 One flop instructions plus FMA papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.1000664 001750 001750 00000001455 13216244367 023127 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.1 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 7, stepping 0, clock 1107644 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 157 cycles PERFCTR INIT: rdtsc cost is 13.4 cycles (1015 total) PERFCTR INIT: rdpmc cost is 15.0 cycles (1121 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3472 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.8 cycles (3537 total) PERFCTR INIT: wrmsr (counter) cost is 82.6 cycles (5447 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.6 cycles (14981 total) PERFCTR INIT: read cr4 cost is 5.0 cycles (478 total) PERFCTR INIT: write cr4 cost is 62.8 cycles (4180 total) PERFCTR INIT: write LVTPC cost is 9.3 cycles (755 total) PERFCTR INIT: sync_core cost is 76.9 cycles (5082 total) perfctr: driver 2.7.4, cpu type AMD K7/K8 at 1107644 kHz papi-5.6.0/man/man3/PAPI_set_granularity.3000664 001750 001750 00000003637 13216244356 022272 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_set_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_set_granularity \- .PP Set the default counting granularity for eventsets bound to the cpu component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par C Prototype: \#include @n int PAPI_set_granularity( int granularity ); @param -- granularity one of the following constants as defined in the papi.h header file @arg PAPI_GRN_THR -- Count each individual thread @arg PAPI_GRN_PROC -- Count each individual process @arg PAPI_GRN_PROCG -- Count each individual process group @arg PAPI_GRN_SYS -- Count the current CPU @arg PAPI_GRN_SYS_CPU -- Count all CPUs individually @arg PAPI_GRN_MIN -- The finest available granularity @arg PAPI_GRN_MAX -- The coarsest available granularity .fi .PP .PP \fBReturn values:\fP .RS 4 \fIPAPI_OK\fP .br \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .RE .PP \fBPAPI_set_granularity\fP sets the default counting granularity for all new event sets created by \fBPAPI_create_eventset\fP\&. This call implicitly sets the granularity for the cpu component (component 0) and is included to preserve backward compatibility\&. .PP \fBExample:\fP .RS 4 .PP .nf int ret; // Initialize the library ret = PAPI_library_init(PAPI_VER_CURRENT); if (ret > 0 && ret != PAPI_VER_CURRENT) { fprintf(stderr,"PAPI library version mismatch!\n"); exit(1); } if (ret < 0) handle_error(ret); // Set the default granularity for the cpu component ret = PAPI_set_granularity(PAPI_GRN_PROC); if (ret != PAPI_OK) handle_error(ret); ret = PAPI_create_eventset(&EventSet); if (ret != PAPI_OK) handle_error(ret); * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_cmp_granularity\fP \fBPAPI_set_domain\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPI_unlock.3000664 001750 001750 00000001127 13216244356 020341 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_unlock" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_unlock \- .PP Unlock one of the mutex variables defined in \fBpapi\&.h\fP\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBParameters:\fP .RS 4 \fIlck\fP an integer value specifying one of the two user locks: PAPI_USR1_LOCK or PAPI_USR2_LOCK .RE .PP \fBPAPI_unlock()\fP unlocks the mutex acquired by a call to \fBPAPI_lock\fP \&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_thread_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.7.x/usr.lib/x86_cpuid.S000664 001750 001750 00000001141 13216244370 023051 0ustar00jshenry1963jshenry1963000000 000000 .file "x86_cpuid.S" /// struct cpuid { unsigned int eax, ebx, edx, ecx; }; /// void get_cpuid(unsigned int eax, struct cpuid*); /// move eax to %eax, then execute CPUID /// copy resulting %eax, %ebx, %edx, %ecx into the cpuid buf .text .align 4 .globl get_cpuid get_cpuid: pushl %ebp pushl %ebx movl 12(%esp),%eax /* argument for CPUID */ .byte 0x0F,0xA2 /* opcode for CPUID */ movl 16(%esp),%ebp /* ptr to struct cpuid buf */ movl %eax,(%ebp) movl %ebx,4(%ebp) movl %edx,8(%ebp) movl %ecx,12(%ebp) popl %ebx popl %ebp ret .align 4 .type get_cpuid,@function .size get_cpuid,.-get_cpuid papi-5.6.0/src/freebsd/map-p6-m.h000664 001750 001750 00000006242 13216244361 020443 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-p6-M.h * CVS: $Id$ * Author: Harald Servat * redcrash@gmail.com */ #ifndef FreeBSD_MAP_P6_M #define FreeBSD_MAP_P6_M enum NativeEvent_Value_P6_M_Processor { /* P6 common events */ PNE_P6_M_BACLEARS = PAPI_NATIVE_MASK, PNE_P6_M_BR_BOGUS, PNE_P6_M_BR_INST_DECODED, PNE_P6_M_BR_INST_RETIRED, PNE_P6_M_BR_MISS_PRED_RETIRED, PNE_P6_M_BR_MISS_PRED_TAKEN_RET, PNE_P6_M_BR_TAKEN_RETIRED, PNE_P6_M_BTB_MISSES, PNE_P6_M_BUS_BNR_DRV, PNE_P6_M_BUS_DATA_RCV, PNE_P6_M_BUS_DRDY_CLOCKS, PNE_P6_M_BUS_HIT_DRV, PNE_P6_M_BUS_HITM_DRV, PNE_P6_M_BUS_LOCK_CLOCKS, PNE_P6_M_BUS_REQ_OUTSTANDING, PNE_P6_M_BUS_SNOOP_STALL, PNE_P6_M_BUS_TRAN_ANY, PNE_P6_M_BUS_TRAN_BRD, PNE_P6_M_BUS_TRAN_BURST, PNE_P6_M_BUS_TRAN_DEF, PNE_P6_M_BUS_TRAN_IFETCH, PNE_P6_M_BUS_TRAN_INVAL, PNE_P6_M_BUS_TRAN_MEM, PNE_P6_M_BUS_TRAN_POWER, PNE_P6_M_BUS_TRAN_RFO, PNE_P6_M_BUS_TRANS_IO, PNE_P6_M_BUS_TRANS_P, PNE_P6_M_BUS_TRANS_WB, PNE_P6_M_CPU_CLK_UNHALTED, PNE_P6_M_CYCLES_DIV_BUSY, PNE_P6_M_CYCLES_IN_PENDING_AND_MASKED, PNE_P6_M_CYCLES_INT_MASKED, PNE_P6_M_DATA_MEM_REFS, PNE_P6_M_DCU_LINES_IN, PNE_P6_M_DCU_M_LINES_IN, PNE_P6_M_DCU_M_LINES_OUT, PNE_P6_M_DCU_MISS_OUTSTANDING, PNE_P6_M_DIV, PNE_P6_M_FLOPS, PNE_P6_M_FP_ASSIST, PNE_P6_M_FTP_COMPS_OPS_EXE, PNE_P6_M_HW_INT_RX, PNE_P6_M_IFU_FETCH, PNE_P6_M_IFU_FETCH_MISS, PNE_P6_M_IFU_MEM_STALL, PNE_P6_M_ILD_STALL, PNE_P6_M_INST_DECODED, PNE_P6_M_INST_RETIRED, PNE_P6_M_ITLB_MISS, PNE_P6_M_L2_ADS, PNE_P6_M_L2_DBUS_BUSY, PNE_P6_M_L2_DBUS_BUSY_RD, PNE_P6_M_L2_IFETCH, PNE_P6_M_L2_LD, PNE_P6_M_L2_LINES_IN, PNE_P6_M_L2_LINES_OUT, PNE_P6_M_L2M_LINES_INM, PNE_P6_M_L2M_LINES_OUTM, PNE_P6_M_L2_RQSTS, PNE_P6_M_L2_ST, PNE_P6_M_LD_BLOCKS, PNE_P6_M_MISALIGN_MEM_REF, PNE_P6_M_MUL, PNE_P6_M_PARTIAL_RAT_STALLS, PNE_P6_M_RESOURCE_STALL, PNE_P6_M_SB_DRAINS, PNE_P6_M_SEGMENT_REG_LOADS, PNE_P6_M_UOPS_RETIRED, /* Pentium 3 specific events */ PNE_P6_M_FP_MMX_TRANS, PNE_P6_M_MMX_ASSIST, PNE_P6_M_MMX_INSTR_EXEC, PNE_P6_M_MMX_INSTR_RET, PNE_P6_M_MMX_SAT_INSTR_EXEC, PNE_P6_M_MMX_UOPS_EXEC, PNE_P6_M_RET_SEG_RENAMES, PNE_P6_M_SEG_RENAME_STALLS, PNE_P6_M_EMON_KNI_COMP_INST_RET, PNE_P6_M_EMON_KNI_INST_RETIRED, PNE_P6_M_EMON_KNI_PREF_DISPATCHED, PNE_P6_M_EMON_KNI_PREF_MISS, /* Pentium M specific events */ PNE_P6_M_BR_BAC_MISSP_EXEC, PNE_P6_M_BR_CALL_EXEC, PNE_P6_M_BR_CALL_MISSP_EXEC, PNE_P6_M_BR_CND_EXEC, PNE_P6_M_BR_CND_MISSP_EXEC, PNE_P6_M_BR_IND_CALL_EXEC, PNE_P6_M_BR_IND_EXEC, PNE_P6_M_BR_IND_MISSP_EXEC, PNE_P6_M_BR_INST_EXEC, PNE_P6_M_BR_MISSP_EXEC, PNE_P6_M_BR_RET_BAC_MISSP_EXEC, PNE_P6_M_BR_RET_EXEC, PNE_P6_M_BR_RET_MISSP_EXEC, PNE_P6_M_EMON_ESP_UOPS, PNE_P6_M_EMON_EST_TRANS, PNE_P6_M_EMON_FUSED_UOPS_RET, PNE_P6_M_EMON_PREF_RQSTS_DN, PNE_P6_M_EMON_PREF_RQSTS_UP, PNE_P6_M_EMON_SIMD_INSTR_RETIRD, PNE_P6_M_EMON_SSE_SSE2_COMP_INST_RETIRED, PNE_P6_M_EMON_SSE_SSE2_INST_RETIRED, PNE_P6_M_EMON_SYNCH_UOPS, PNE_P6_M_EMON_THERMAL_TRIP, PNE_P6_M_EMON_UNFUSION, PNE_P6_M_NATNAME_GUARD }; extern Native_Event_LabelDescription_t P6_M_Processor_info[]; extern hwi_search_t P6_M_Processor_map[]; #endif papi-5.6.0/src/libpfm4/lib/pfmlib_intel_knl_unc_edc.c000664 001750 001750 00000010530 13216244365 024564 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_knl_unc_edc.c : Intel KnightsLanding Integrated EDRAM uncore PMU * * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_knl_unc_edc_events.h" #define DEFINE_EDC_UCLK_BOX(n) \ pfmlib_pmu_t intel_knl_unc_edc_uclk##n##_support = { \ .desc = "Intel KnightLanding EDC_UCLK_"#n" uncore", \ .name = "knl_unc_edc_uclk"#n, \ .perf_name = "uncore_edc_uclk_"#n, \ .pmu = PFM_PMU_INTEL_KNL_UNC_EDC_UCLK##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_edc_uclk_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 0, \ .max_encoding = 1, \ .pe = intel_knl_unc_edc_uclk_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ .pmu_detect = pfm_intel_knl_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_EDC_UCLK_BOX(0); DEFINE_EDC_UCLK_BOX(1); DEFINE_EDC_UCLK_BOX(2); DEFINE_EDC_UCLK_BOX(3); DEFINE_EDC_UCLK_BOX(4); DEFINE_EDC_UCLK_BOX(5); DEFINE_EDC_UCLK_BOX(6); DEFINE_EDC_UCLK_BOX(7); #define DEFINE_EDC_ECLK_BOX(n) \ pfmlib_pmu_t intel_knl_unc_edc_eclk##n##_support = { \ .desc = "Intel KnightLanding EDC_ECLK_"#n" uncore", \ .name = "knl_unc_edc_eclk"#n, \ .perf_name = "uncore_edc_eclk_"#n, \ .pmu = PFM_PMU_INTEL_KNL_UNC_EDC_ECLK##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_edc_eclk_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 0, \ .max_encoding = 1, \ .pe = intel_knl_unc_edc_eclk_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ .pmu_detect = pfm_intel_knl_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_EDC_ECLK_BOX(0); DEFINE_EDC_ECLK_BOX(1); DEFINE_EDC_ECLK_BOX(2); DEFINE_EDC_ECLK_BOX(3); DEFINE_EDC_ECLK_BOX(4); DEFINE_EDC_ECLK_BOX(5); DEFINE_EDC_ECLK_BOX(6); DEFINE_EDC_ECLK_BOX(7); papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_ha.c000664 001750 001750 00000006464 13216244365 024757 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snb_unc_ha.c : Intel SandyBridge-EP Home Agent (HA) uncore PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_snbep_unc_ha_events.h" static void display_ha(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; pfm_snbep_unc_reg_t f; __pfm_vbprintf("[UNC_HA=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d] %s\n", reg->val, reg->com.unc_event, reg->com.unc_umask, reg->com.unc_en, reg->com.unc_inv, reg->com.unc_edge, reg->com.unc_thres, pe[e->event].name); if (e->count == 1) return; f.val = e->codes[1]; __pfm_vbprintf("[UNC_HA_ADDR=0x%"PRIx64" lo_addr=0x%x hi_addr=0x%x]\n", f.val, f.ha_addr.lo_addr, f.ha_addr.hi_addr); f.val = e->codes[2]; __pfm_vbprintf("[UNC_HA_OPC=0x%"PRIx64" opc=0x%x]\n", f.val, f.ha_opc.opc); } pfmlib_pmu_t intel_snbep_unc_ha_support = { .desc = "Intel Sandy Bridge-EP HA uncore", .name = "snbep_unc_ha", .perf_name = "uncore_ha", .pmu = PFM_PMU_INTEL_SNBEP_UNC_HA, .pme_count = LIBPFM_ARRAY_SIZE(intel_snbep_unc_h_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 3, /* address matchers */ .pe = intel_snbep_unc_h_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | PFMLIB_PMU_FL_NO_SMPL, .pmu_detect = pfm_intel_snbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .display_reg = display_ha, }; papi-5.6.0/src/perfctr-2.6.x/linux/include/asm-ppc/perfctr.h000775 001750 001750 00000000041 13216244367 025504 0ustar00jshenry1963jshenry1963000000 000000 #include papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_hswep_unc_qpi.3000664 001750 001750 00000002700 13216244364 026123 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "May , 2015" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_hswep_unc_qpi - support for Intel Haswell-EP QPI uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: hswep_unc_qpi0, hswep_unc_qpi1 .B PMU desc: Intel Haswell-EP QPI uncore PMU .sp .SH DESCRIPTION The library supports the Intel Haswell Power QPI uncore PMU. This PMU model only exists on Haswell model 63. .SH MODIFIERS The following modifiers are supported on Haswell Bridge QPI uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of QPI cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B i Invert the meaning of the threshold or edge filter. If set, the event counts when strictly less than N occurrences occur per cycle if threshold is set to N. When invert is set, then threshold must be set to non-zero value. If set, the event counts when the event transitions from occurring to not occurring (falling edge) when edge detection is set. This is a boolean modifier .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/freebsd/map-p6-m.c000664 001750 001750 00000024304 13216244361 020435 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-p6-M.c * Author: Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** P6_M SUBSTRATE P6_M SUBSTRATE P6_M SUBSTRATE (aka Pentium M) P6_M SUBSTRATE P6_M SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_P6_M_Processor must match P6_M_Processor_info */ Native_Event_LabelDescription_t P6_M_Processor_info[] = { /* Common P6 counters */ { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, { "p6-br-bogus", "Count the number of bogus branches." }, { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, { "p6-br-inst-retired", "Count the number of branch instructions retired." }, { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, { "p6-br-taken-retired", "Count the number of taken branches retired." }, { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, { "p6-bus-tran-brd", "Count the number of burst read transactions." }, { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, { "p6-bus-trans-p", "Count the number of completed partial transactions." }, { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, /* { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, THIS IS DIFFERENT IN PM */ { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted and not in a thermal trip." }, { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, { "p6-flops", "Count the number of computational floating point operations retired." }, { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, { "p6-inst-decoded", "Count the number of instructions decoded." }, { "p6-inst-retired", "Count the number of instructions retired." }, { "p6-itlb-miss", "Count the number of instruction TLB misses." }, { "p6-l2-ads", "Count the number of L2 address strobes." }, { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, { "p6-l2-ld", "Count the number of L2 data loads." }, { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, { "p6-l2-rqsts", "Count the total number of L2 requests." }, { "p6-l2-st", "Count the number of L2 data stores." }, { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies." }, { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, { "p6-segment-reg-loads", "Count the number of segment register loads." }, { "p6-uops-retired", "Count the number of micro-ops retired."}, /* Specific Pentium 3 counters */ { "p6-fp-mmx-trans", "Count the number of transitions between MMX and floating-point instructions." }, { "p6-mmx-assist", "Count the number of MMX assists executed" }, { "p6-mmx-instr-exec", "Count the number of MMX instructions executed" }, { "p6-mmx-instr-ret", "Count the number of MMX instructions retired." }, { "p6-mmx-sat-instr-exec", "Count the number of MMX saturating instructions executed" }, { "p6-mmx-uops-exec", "Count the number of MMX micro-ops executed" }, { "p6-ret-seg-renames", "Count the number of segment register rename events retired." }, { "p6-seg-rename-stalls", "Count the number of segment register renaming stalls" }, { "p6-emon-kni-comp-inst-ret", "Count the number of SSE computational instructions retired" }, { "p6-emon-kni-inst-retired", "Count the number of SSE instructions retired." }, { "p6-emon-kni-pref-dispatched", "Count the number of SSE prefetch or weakly ordered instructions dispatched." }, { "p6-emon-kni-pref-miss", "Count the number of prefetch or weakly ordered instructions that miss all caches." }, /* Specific Pentium M counters */ { "p6-br-bac-missp-exec", "Count the number of branch instructions executed that where mispredicted at the Front End (BAC)." }, { "p6-br-call-exec", "Count the number of call instructions executed." }, { "p6-br-call-missp-exec", "Count the number of call instructions executed that were mispredicted." }, { "p6-br-cnd-exec", "Count the number of conditional branch instructions excuted" }, { "p6-br-cnd-missp-exec", "Count the number of conditional branch instructions executed that were mispredicted." }, { "p6-br-ind-call-exec", "Count the number of indirect call instructions executed" }, { "p6-br-ind-exec", "Count the number of indirect branch instructions executed" }, { "p6-br-ind-missp-exec", "Count the number of indirect branch instructions executed that were mispredicted." }, { "p6-br-inst-exec", "Count the number of branch instructions executed but necessarily retired." }, { "p6-br-missp-exec", "Count the number of branch instructions executed that were mispredicted at execution." }, { "p6-br-ret-bac-missp-exec", "Count the number of return instructions executed that were mispredicted at the Front End (BAC)." }, { "p6-br-ret-exec", "Count the number of return instructions executed." }, { "p6-br-ret-missp-exec", "Count the number of return instructions executed that were mispredicted at execution." }, { "p6-emon-esp-uops", "Count the total number of micro-ops." }, { "p6-emon-est-trans", "Count the number of Enhanced Intel SpeedStep transitions" }, { "p6-emon-fused-uops-ret", "Count the number of retired fused micro-ops." }, { "p6-emon-pref-rqsts-dn", "Count the number of downward prefetches issued." }, { "p6-emon-pref-rqsts-up", "Count the number of upward prefetches issued." }, { "p6-emon-simd-instr-retired", "Count the number of retired MMX instructions." }, { "p6-emon-sse-sse2-comp-inst-retired", "Count the number of computational SSE instructions retired." }, { "p6-emon-sse-sse2-inst-retired", "Count the number of SSE instructions retired." }, { "p6-emon-synch-uops", "Count the number of sync micro-ops." }, { "p6-emon-thermal-trip", "Count the duration or occurrences of thermal trips." }, { "p6-emon-unfusion", "Count the number of unfusion events in the reorder buffer." }, { NULL, NULL } }; papi-5.6.0/src/freebsd_events.csv000664 001750 001750 00000032174 13216244361 021044 0ustar00jshenry1963jshenry1963000000 000000 # # FreeBSD presets # these are needed as event names are different than those in libpfm4 # CPU,UNKNOWN PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES PRESET,PAPI_BR_INS,NOT_DERIVED,INTERRUPTS PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCH_MISPREDICTS PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_MISSES PRESET,PAPI_L2_ICM,NOT_DERIVED,IC_MISSES PRESET,PAPI_L2_TCM,DERIVED_ADD, IC_MISSES,DC_MISSES CPU,INTEL_P6 CPU,INTEL_PII CPU,INTEL_PIII CPU,INTEL_CL CPU,INTEL_PM PRESET,PAPI_L1_DCM,NOT_DERIVED,DCU_LINES_IN # L2_IFETCH defaults to MESI PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_IFETCH # BUS_TRAN_IFETCH defaults to SELF PRESET,PAPI_L2_DCM,DERIVED_SUB,L2_LINES_IN,BUS_TRAN_IFETCH # BUS_TRAN_IFETCH defaults to SELF PRESET,PAPI_L2_ICM,NOT_DERIVED,BUS_TRAN_IFETCH PRESET,PAPI_L1_TCM,NOT_DERIVED,L2_RQSTS PRESET,PAPI_L2_TCM,NOT_DERIVED,L2_LINES_IN PRESET,PAPI_CA_CLN,NOT_DERIVED,BUS_TRAN_RFO PRESET,PAPI_CA_ITV,NOT_DERIVED,BUS_TRAN_INVAL PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISS PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_LD PRESET,PAPI_L1_STM,NOT_DERIVED,L2_ST PRESET,PAPI_L2_LDM,DERIVED_SUB,L2_LINES_IN,L2M_LINES_INM PRESET,PAPI_L2_STM,NOT_DERIVED,L2M_LINES_INM PRESET,PAPI_BTAC_M,NOT_DERIVED,BTB_MISSES PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RX PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_TAKEN_RETIRED PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED,BR_TAKEN_RETIRED PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISS_PRED_RETIRED PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED,BR_MISS_PRED_RETIRED PRESET,PAPI_TOT_IIS,NOT_DERIVED,INST_DECODED PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED PRESET,PAPI_FP_INS,NOT_DERIVED,FLOPS PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED PRESET,PAPI_LST_INS,DERIVED_ADD,L2_LD,L2_ST PRESET,PAPI_L1_DCH,DERIVED_SUB,DATA_MEM_REFS, DCU_LINES_IN PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_MEM_REFS PRESET,PAPI_L2_DCA,DERIVED_ADD,L2_LD, L2_ST PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_LD PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_ST PRESET,PAPI_L1_ICH,DERIVED_SUB,IFU_FETCH, L2_IFETCH PRESET,PAPI_L2_ICH,DERIVED_SUB,L2_IFETCH, BUS_TRAN_IFETCH PRESET,PAPI_L1_ICA,NOT_DERIVED,IFU_FETCH PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L1_ICR,NOT_DERIVED,IFU_FETCH PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L2_TCH,DERIVED_SUB,L2_RQSTS, L2_LINES_IN PRESET,PAPI_L1_TCA,DERIVED_ADD,DATA_MEM_REFS, IFU_FETCH PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_LD, L2_IFETCH PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_ST PRESET,PAPI_FML_INS,NOT_DERIVED,MUL PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV PRESET,PAPI_FP_OPS,NOT_DERIVED,FLOPS CPU,INTEL_PM PRESET,PAPI_VEC_INS,DERIVED_ADD,MMX_INSTR_RET, EMON_SSE_SSE2_INST_RETIRED CPU,INTEL_PIII PRESET,PAPI_VEC_INS,DERIVED_ADD,MMX_INSTR_RET, EMON_KNI_INST_RETIRED CPU,INTEL_CL PRESET,PAPI_VEC_INS,NOT_DERIVED,MMX_INSTR_EXEC CPU,AMD_K7 PRESET,PAPI_L1_DCM,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2 PRESET,PAPI_L1_ICM,NOT_DERIVED,IC_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM PRESET,PAPI_L1_TCM,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2, IC_MISSES PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_AND_L2_DTLB_MISSES PRESET,PAPI_TLB_IM,NOT_DERIVED,L1_AND_L2_ITLB_MISSES PRESET,PAPI_TLB_TL,DERIVED_ADD,L1_AND_L2_DTLB_MISSES, L1_AND_L2_ITLB_MISSES PRESET,PAPI_L1_LDM,NOT_DERIVED,DC_REFILLS_FROM_L2_OES PRESET,PAPI_L1_STM,NOT_DERIVED,DC_REFILLS_FROM_L2_M PRESET,PAPI_L2_LDM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM_OES PRESET,PAPI_L2_STM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM_M PRESET,PAPI_HW_INT,NOT_DERIVED,HARDWARE_INTERRUPTS PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_FAR_CONTROL_TRANSFERS PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_BRANCHES PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCHES PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCHES, RETIRED_TAKEN_BRANCHES PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCHES_MISPREDICTED PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_BRANCHES, RETIRED_BRANCHES_MISPREDICTED PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_TAKEN_BRANCHES PRESET,PAPI_L1_DCA,NOT_DERIVED,DC_ACCESSES PRESET,PAPI_L2_DCA,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2 PRESET,PAPI_L1_ICA,NOT_DERIVED,IC_FETCHES PRESET,PAPI_L2_ICA,NOT_DERIVED,IC_MISSES PRESET,PAPI_L1_ICR,NOT_DERIVED,IC_FETCHES PRESET,PAPI_L1_TCA,DERIVED_ADD,DC_ACCESSES, IC_FETCHES CPU,AMD_K8 PRESET,PAPI_BR_INS,NOT_DERIVED,FR_RETIRED_BRANCHES PRESET,PAPI_RES_STL,NOT_DERIVED,FR_DISPATCH_STALLS PRESET,PAPI_TOT_CYC,NOT_DERIVED,BU_CPU_CLK_UNHALTED PRESET,PAPI_TOT_INS,NOT_DERIVED,FR_RETIRED_X86_INSTRUCTIONS PRESET,PAPI_STL_ICY,FR_DECODER_EMPTY PRESET,PAPI_HW_INT,NOT_DERIVED,FR_RETIRED_TAKEN_HARDWARE_INTERRUPTS PRESET,PAPI_BR_TKN,NOT_DERIVED,FR_RETIRED_TAKEN_BRANCHES PRESET,PAPI_BR_MSP,NOT_DERIVED,FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED PRESET,PAPI_TLB_DM,NOT_DERIVED,DC_L1_DTLB_MISS_AND_L2_DTLB_MISS PRESET,PAPI_TLB_IM,NOT_DERIVED,IC_L1_ITLB_MISS_AND_L2_ITLB_MISS PRESET,PAPI_TLB_TL,DERIVED_ADD,DC_L1_DTLB_MISS_AND_L2_DTLB_MISS,IC_L1_ITLB_MISS_AND_L2_ITLB_MISS PRESET,PAPI_L1_DCA,NOT_DERIVED,DC_ACCESS PRESET,PAPI_L1_ICA,NOT_DERIVED,IC_FETCH PRESET,PAPI_L1_TCA,DERIVED_ADD,DC_ACCESS, IC_FETCH PRESET,PAPI_L1_ICR,NOT_DERIVED,IC_FETCH PRESET,PAPI_L2_ICH,NOT_DERIVED,IC_REFILL_FROM_L2 PRESET,PAPI_L2_DCH,NOT_DERIVED,DC_REFILL_FROM_L2 PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_MOES PRESET,PAPI_L2_DCA,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES, DC_REFILL_FROM_L2_MOES PRESET,PAPI_L2_ICM,NOT_DERIVED,IC_REFILL_FROM_SYSTEM PRESET,PAPI_L2_DCR,NOT_DERIVED,DC_REFILL_FROM_L2_OES PRESET,PAPI_L2_DCW,NOT_DERIVED,DC_REFILL_FROM_L2_M PRESET,PAPI_L2_DCH,NOT_DERIVED,DC_REFILL_FROM_L2_MOES PRESET,PAPI_L1_LDM,NOT_DERIVED,DC_REFILL_FROM_L2_OES PRESET,PAPI_L1_STM,NOT_DERIVED,DC_REFILL_FROM_L2_M PRESET,PAPI_L2_LDM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_OES PRESET,PAPI_L2_STM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_M PRESET,PAPI_L1_DCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES, DC_REFILL_FROM_L2_MOES PRESET,PAPI_L1_ICM,DERIVED_ADD,IC_REFILL_FROM_L2, IC_REFILL_FROM_SYSTEM PRESET,PAPI_L1_TCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES,DC_REFILL_FROM_L2_MOES,IC_REFILL_FROM_SYSTEM,IC_REFILL_FROM_L2 PRESET,PAPI_L2_TCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES,IC_REFILL_FROM_SYSTEM PRESET,PAPI_L2_ICA,DERIVED_ADD,IC_REFILL_FROM_SYSTEM,IC_REFILL_FROM_L2 PRESET,PAPI_L2_TCH,DERIVED_ADD,IC_REFILL_FROM_L2,DC_REFILL_FROM_L2_MOES PRESET,PAPI_L2_TCA,DERIVED_ADD,IC_REFILL_FROM_L2,IC_REFILL_FROM_SYSTEM,DC_REFILL_FROM_L2_MOES,DC_REFILL_FROM_SYSTEM_MOES PRESET,PAPI_FML_INS,NOT_DERIVED,FP_DISPATCHED_FPU_MULS PRESET,PAPI_FAD_INS,NOT_DERIVED,FP_DISPATCHED_FPU_ADDS PRESET,PAPI_FP_OPS,NOT_DERIVED,FP_DISPATCHED_FPU_ADDS_AND_MULS PRESET,PAPI_FP_INS,NOT_DERIVED,FR_RETIRED_FPU_INSTRUCTIONS PRESET,PAPI_FPU_IDL,NOT_DERIVED,FP_CYCLES_WITH_NO_FPU_OPS_RETIRED CPU,INTEL_PIV PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL PRESET,PAPI_TOT_CYC,NOT_DERIVED,GLOBAL_POWER_EVENTS PRESET,PAPI_L1_ICM,NOT_DERIVED,BPU_FETCH_REQUEST PRESET,PAPI_L1_ICA,NOT_DERIVED,UOP_QUEUE_WRITES_TC_BUILD_DELIVER PRESET,PAPI_TLB_DM,NOT_DERIVED,PAGE_WALK_TYPE_D PRESET,PAPI_TLB_IM,NOT_DERIVED,PAGE_WALK_TYPE_I PRESET,PAPI_TLB_TL,NOT_DERIVED,PAGE_WALK_TYPE PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR_RETIRED_NON_BOGUS PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_TYPE PRESET,PAPI_BR_TKN,NOT_DERIVED,BRANCH_RETIRED_TAKEN PRESET,PAPI_BR_NTK,NOT_DERIVED,BRANCH_RETIRED_NOT_TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCH_RETIRED_MISPREDICTED PRESET,PAPI_BR_PRC,NOT_DERIVED,BRANCH_RETIRED_PREDICTED PRESET,PAPI_L2_TCH,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_HITS PRESET,PAPI_L2_TCM,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_MISSES PRESET,PAPI_L2_TCA,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_ACCESSES PRESET,PAPI_L3_TCH,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_HITS PRESET,PAPI_L3_TCM,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_MISSES PRESET,PAPI_L3_TCA,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_ACCESSES PRESET,PAPI_FP_INS,NOT_DERIVED,X87_FP_UOP CPU,INTEL_ATOM PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_ALL_REF, L1I_READS PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_TLB_MISSES.DTLB_MISS PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_EXEC PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC PRESET,PAPI_RES_STL,RESOURCE_STALLS.ANY PRESET,PAPI_TOT_CYC,CPU_CLK_UNHALTED.BUS PRESET,PAPI_TOT_INS,INST_RETIRED.ANY_P PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L2_MISS PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES CPU,INTEL_CORE PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INSTR_RET PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR_RET PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RX PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INSTR_RET PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISS PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISS, ITLB.MISSES PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD CPU,INTEL_CORE2 PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ANY PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.BUS PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED.ANY_P PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_ALL_REF, L1I_READS # PAPI_L2_ICH seems not to work PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L1D_MISS PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_LOAD_RETIRED_L1D_MISS, L1I_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L2_MISS CPU,INTEL_CORE2EXTREME PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ANY PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.BUS PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED.ANY_P PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS PRESET,PAPI_L1_TCA, DERIVED_ADD, L1D_ALL_REF, L1I_READS # PAPI_L2_ICH seems not to work PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_LOAD_RETIRED.L1D_MISS PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_LOAD_RETIRED.L1D_MISS, L1I_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED.L2_MISS CPU,INTELCOREI7 PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ALL_BRANCHES PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.CORE PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR.RETIRED_ANY PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_MISP_EXEC_TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_EXEC_ANY PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES_ANY PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB_MISSES_ANY PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF_ANY PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS PRESET,PAPI_L1_TCA, DERIVED_ADD, L1D_ALL_REF_ANY, L1I_READS # PAPI_L2_ICH seems not to work PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L2_DCH,NOT_DERIVED,MEM_LOAD_RETIRED.L2_HIT PRESET,PAPI_FP_INS,NOT_DERIVED,INST_RETIRED.X87 PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_PREFETCH_MISS PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_PREFETCH_MISS, L1I_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS_MISS CPU,INTEL_WESTMERE PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ALL_BRANCHES PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.CORE PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR.RETIRED_ANY PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_MISP_EXEC_TAKEN PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_EXEC_ANY PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES_ANY PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB_MISSES_ANY PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS # PAPI_L2_ICH seems not to work PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH PRESET,PAPI_L2_DCH,NOT_DERIVED,MEM_LOAD_RETIRED.L2_HIT PRESET,PAPI_FP_INS,NOT_DERIVED,INST_RETIRED.X87 PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_PREFETCH_MISS PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES PRESET,PAPI_L1_TCM, DERIVED_ADD, L1D_PREFETCH_MISS, L1I_MISSES PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS_MISS papi-5.6.0/src/examples/PAPI_add_remove_event.c000664 001750 001750 00000007206 13216244361 023430 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * This example shows how to use PAPI_add_event, PAPI_start, PAPI_read, * * PAPI_stop and PAPI_remove_event. * ******************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ #define NUM_EVENTS 2 #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } int main() { int EventSet = PAPI_NULL; int tmp, i; /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ long long values[NUM_EVENTS]; /*This is where we store the values we read from the eventset */ /* We use number to keep track of the number of events in the EventSet */ int retval, number; char errstring[PAPI_MAX_STR_LEN]; /*************************************************************************** * This part initializes the library and compares the version number of the* * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ***************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) ERROR_RETURN(retval); /* Creating the eventset */ if ( (retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval); /* Add Total Instructions Executed to the EventSet */ if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) ERROR_RETURN(retval); /* Add Total Cycles event to the EventSet */ if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) ERROR_RETURN(retval); /* get the number of events in the event set */ number = 0; if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK) ERROR_RETURN(retval); printf("There are %d events in the event set\n", number); /* Start counting */ if ( (retval = PAPI_start(EventSet)) != PAPI_OK) ERROR_RETURN(retval); /* you can replace your code here */ tmp=0; for (i = 0; i < 2000000; i++) { tmp = i + tmp; } /* read the counter values and store them in the values array */ if ( (retval=PAPI_read(EventSet, values)) != PAPI_OK) ERROR_RETURN(retval); printf("The total instructions executed for the first loop are %lld \n", values[0] ); printf("The total cycles executed for the first loop are %lld \n",values[1]); /* our slow code again */ tmp=0; for (i = 0; i < 2000000; i++) { tmp = i + tmp; } /* Stop counting and store the values into the array */ if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK) ERROR_RETURN(retval); printf("Total instructions executed are %lld \n", values[0] ); printf("Total cycles executed are %lld \n",values[1]); /* Remove event: We are going to take the PAPI_TOT_INS from the eventset */ if( (retval = PAPI_remove_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) ERROR_RETURN(retval); printf("Removing PAPI_TOT_INS from the eventset\n"); /* Now we list how many events are left on the event set */ number = 0; if ((retval=PAPI_list_events(EventSet, NULL, &number))!= PAPI_OK) ERROR_RETURN(retval); printf("There is only %d event left in the eventset now\n", number); /* free the resources used by PAPI */ PAPI_shutdown(); exit(0); } papi-5.6.0/src/libpfm-3.y/examples_v2.x/notify_self3.c000664 001750 001750 00000017772 13216244362 024464 0ustar00jshenry1963jshenry1963000000 000000 /* * notify_self3.c - example of how you can use overflow notifications with no messages * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define SMPL_PERIOD 1000000000ULL static volatile unsigned long notification_received; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static pfarg_pmd_t pd[NUM_PMDS]; static int ctx_fd; static char *event1_name; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void sigio_handler(int n) { if (pfm_read_pmds(ctx_fd, pd+1, 1) == -1) { fatal_error("pfm_read_pmds: %s", strerror(errno)); } /* * we do not need to extract the overflow message, we know * where it is coming from. */ /* * increment our notification counter */ notification_received++; /* * XXX: risky to do printf() in signal handler! */ if (event1_name) printf("Notification %02lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); else printf("Notification %02lu:\n", notification_received); /* * And resume monitoring */ if (pfm_restart(ctx_fd)) fatal_error("error pfm_restart: %d\n", errno); } /* * infinite loop waiting for notification to get out */ void busyloop(void) { /* * busy loop to burn CPU cycles */ for(;notification_received < 40;) ; } int main(int argc, char **argv) { int ret; pfarg_ctx_t ctx; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmc_t pc[NUM_PMCS]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; size_t len; unsigned int i, num_counters; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Install the signal handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = sigio_handler; sigaction (SIGIO, &act, 0); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; if (i > 1) { pfm_get_max_event_name_len(&len); event1_name = malloc(len+1); if (event1_name == NULL) fatal_error("cannot allocate event name\n"); pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); } /* * when we know we are self-monitoring and we have only one context, then * when we get an overflow we know where it is coming from. Therefore we can * save the call to the kernel to extract the notification message. By default, * a message is generated. The queue of messages has a limited size, therefore * it is important to clear the queue by reading the message on overflow. Failure * to do so may result in a queue full and you will lose notification messages. * * With the PFM_FL_OVFL_NO_MSG, no message will be queue, but you will still get * the signal. Similarly, the PFM_MSG_END will be generated. */ ctx.ctx_flags = PFM_FL_OVFL_NO_MSG; /* * now create the context for self monitoring/per-task */ ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * We want to get notified when the counter used for our first * event overflows */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; if (inp.pfp_event_count > 1) pd[0].reg_reset_pmds[0] |= 1UL << pd[1].reg_num; /* * we arm the first counter, such that it will overflow * after SMPL_PERIOD events have been observed */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers */ if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * we want to monitor ourself */ load_args.load_pid = getpid(); if (pfm_load_context(ctx_fd, &load_args)) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(ctx_fd, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now */ pfm_self_start(ctx_fd); busyloop(); pfm_self_stop(ctx_fd); /* * free our context */ close(ctx_fd); if (event1_name) free(event1_name); return 0; } papi-5.6.0/src/perfctr-2.7.x/examples/signal/ppc64.c000664 001750 001750 00000003350 13216244370 023735 0ustar00jshenry1963jshenry1963000000 000000 /* Maynard Johnson * PPC64-specific code. * */ #include #include #include #include #include "libperfctr.h" #include "arch.h" #include "ppc64.h" unsigned long ucontext_pc(const struct ucontext *uc) { /* glibc-2.3.3 (YDL4) changed the type of uc->uc_mcontext, * breaking code which worked in glibc-2.3.1 (YDL3.0.1). * This formulation works with both, and is cleaner than * selecting glibc-2.3.3 specific code with "#ifdef NGREG". */ return uc->uc_mcontext.regs->nip; } void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { memset(cpu_control, 0, sizeof *cpu_control); cpu_control->tsc_on = 1; cpu_control->nractrs = 0; cpu_control->nrictrs = 1; cpu_control->pmc_map[0] = 3; /* FLOPS COMPLETED */ if ((info->cpu_type == PERFCTR_PPC64_POWER4) || (info->cpu_type == PERFCTR_PPC64_POWER4p)) { cpu_control->ppc64.mmcr0 = 0x00000810ULL; cpu_control->ppc64.mmcr1 = 0x00000000420E84A0ULL; cpu_control->ppc64.mmcra = 0x00002000ULL; } else if (info->cpu_type == PERFCTR_PPC64_POWER5) { cpu_control->ppc64.mmcr0 = 0x00000000ULL; cpu_control->ppc64.mmcr1 = 0x0000000020202010ULL; cpu_control->ppc64.mmcra = 0x00000000ULL; } else if (info->cpu_type == PERFCTR_PPC64_970 || info->cpu_type == PERFCTR_PPC64_970MP) { cpu_control->ppc64.mmcr0 = 0x00000000ULL; cpu_control->ppc64.mmcr1 = 0x00000000001E0480ULL; cpu_control->ppc64.mmcra = 0x00002000ULL; } /* not kernel mode, enable PMCj interrupts */ cpu_control->ppc64.mmcr0 |= MMCR0_FCS | MMCR0_PMCjCE; /* overflow after 100 events */ cpu_control->ireset[0] = 0x80000000 - 100; } papi-5.6.0/src/examples/PAPI_add_remove_events.c000664 001750 001750 00000005732 13216244361 023615 0ustar00jshenry1963jshenry1963000000 000000 /****************************************************************************** * This is a simple low level function demonstration on using PAPI_add_events * * to add an array of events to a created eventset, we are going to use these * * events to monitor a set of instructions, start the counters, read the * * counters and then cleanup the eventset when done. In this example we use * * the presets PAPI_TOT_INS and PAPI_TOT_CYC. PAPI_add_events,PAPI_start, * * PAPI_stop, PAPI_clean_eventset, PAPI_destroy_eventset and * * PAPI_create_eventset all return PAPI_OK(which is 0) when succesful. * ******************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ #define NUM_EVENT 2 #define THRESHOLD 100000 #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } int main(){ int i,retval,tmp; int EventSet = PAPI_NULL; /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ int event_codes[NUM_EVENT]={PAPI_TOT_INS,PAPI_TOT_CYC}; char errstring[PAPI_MAX_STR_LEN]; long long values[NUM_EVENT]; /*************************************************************************** * This part initializes the library and compares the version number of the * * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ****************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { fprintf(stderr, "Error: %s\n", errstring); exit(1); } /* Creating event set */ if ((retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval); /* Add the array of events PAPI_TOT_INS and PAPI_TOT_CYC to the eventset*/ if ((retval=PAPI_add_events(EventSet, event_codes, NUM_EVENT)) != PAPI_OK) ERROR_RETURN(retval); /* Start counting */ if ( (retval=PAPI_start(EventSet)) != PAPI_OK) ERROR_RETURN(retval); /*** this is where your computation goes *********/ for(i=0;i<1000;i++) { tmp = tmp+i; } /* Stop counting, this reads from the counter as well as stop it. */ if ( (retval=PAPI_stop(EventSet,values)) != PAPI_OK) ERROR_RETURN(retval); printf("\nThe total instructions executed are %lld, total cycles %lld\n", values[0],values[1]); if ( (retval=PAPI_remove_events(EventSet,event_codes, NUM_EVENT))!=PAPI_OK) ERROR_RETURN(retval); /* Free all memory and data structures, EventSet must be empty. */ if ( (retval=PAPI_destroy_eventset(&EventSet)) != PAPI_OK) ERROR_RETURN(retval); /* free the resources used by PAPI */ PAPI_shutdown(); exit(0); } papi-5.6.0/man/man1/PAPI_derived_event_files.1000664 001750 001750 00000020077 13216244355 023053 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_derived_event_files" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_derived_event_files \- Describes derived event definition file syntax\&. .SH "Derived Events" .PP PAPI provides the ability to define events whose value will be derived from multiple native events\&. The list of native events to be used in a derived event and a formula which describes how to use them is provided in an event definition file\&. The PAPI team provides an event definition file which describes all of the supported PAPI preset events\&. PAPI also allows a user to provide an event definition file that describes a set of user defined events which can extend the events PAPI normally supports\&. .PP This page documents the syntax of the commands which can appear in an event definition file\&. .PP .br .SS "General Rules:" .PD 0 .IP "\(bu" 2 Blank lines are ignored\&. .IP "\(bu" 2 Lines that begin with '#' are comments (they are also ignored)\&. .IP "\(bu" 2 Names shown inside < > below represent values that must be provided by the user\&. .IP "\(bu" 2 If a user provided value contains white space, it must be protected with quotes\&. .PP .PP .br .SS "Commands:" \fBCPU,\fP .RS 4 Specifies a PMU name which controls if the PRESET and EVENT commands that follow this line should be processed\&. Multiple CPU commands can be entered without PRESET or EVENT commands between them to provide a list of PMU names to which the derived events that follow will apply\&. When a PMU name provided in the list matches a PMU name known to the running system, the events which follow will be created\&. If none of the PMU names provided in the list match a PMU name on the running system, the events which follow will be ignored\&. When a new CPU command follows either a PRESET or EVENT command, the PMU list is rebuilt\&. .br .br .RE .PP \fBPRESET,,,,LDESC,"",SDESC,"",NOTE,""\fP .RS 4 Declare a PAPI preset derived event\&. .br .br .RE .PP \fBEVENT,,,,LDESC,"",SDESC,"",NOTE,""\fP .RS 4 Declare a user defined derived event\&. .br .br .RE .PP \fBWhere:\fP .RS 4 .RE .PP \fBpmuName:\fP .RS 4 The PMU which the following events should apply to\&. A list of PMU names supported by your system can be obtained by running papi_component_avail on your system\&. .br .RE .PP \fBeventName:\fP .RS 4 Specifies the name used to identify this derived event\&. This name should be unique within the events on your system\&. .br .RE .PP \fBderivedType:\fP .RS 4 Specifies the kind of derived event being defined (see 'Derived Types' below)\&. .br .RE .PP \fBeventAttr:\fP .RS 4 Specifies a formula and a list of base events that are used to compute the derived events value\&. The syntax of this field depends on the 'derivedType' specified above (see 'Derived Types' below)\&. .br .RE .PP \fBlongDesc:\fP .RS 4 Provides the long description of the event\&. .br .RE .PP \fBshortDesc:\fP .RS 4 Provides the short description of the event\&. .br .RE .PP \fBnote:\fP .RS 4 Provides an event note\&. .br .RE .PP \fBbaseEvent (used below):\fP .RS 4 Identifies an event on which this derived event is based\&. This may be a native event (possibly with event masks), an already known preset event, or an already known user event\&. .br .RE .PP .br .SS "Notes:" The PRESET command has traditionally been used in the PAPI provided preset definition file\&. The EVENT command is intended to be used in user defined event definition files\&. The code treats them the same so they are interchangeable and they can both be used in either event definition file\&. .br .PP .br .SS "Derived Types:" This describes values allowed in the 'derivedType' field of the PRESET and EVENT commands\&. It also shows the syntax of the 'eventAttr' field for each derived type supported by these commands\&. All of the derived events provide a list of one or more events which the derived event is based on (baseEvent)\&. Some derived events provide a formula that specifies how to compute the derived events value using the baseEvents in the list\&. The following derived types are supported, the syntax of the 'eventAttr' parameter for each derived event type is shown in parentheses\&. .br .br .PP \fBNOT_DERIVED ():\fP .RS 4 This derived type defines an alias for the existing event 'baseEvent'\&. .br .RE .PP \fBDERIVED_ADD (,):\fP .RS 4 This derived type defines a new event that will be the sum of two other events\&. It has a value of 'baseEvent1' plus 'baseEvent2'\&. .br .RE .PP \fBDERIVED_PS (PAPI_TOT_CYC,):\fP .RS 4 This derived type defines a new event that will report the number of 'baseEvent1' events which occurred per second\&. It has a value of ((('baseEvent1' * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC)\&. The user must provide PAPI_TOT_CYC as the first event of two events in the event list for this to work correctly\&. .br .RE .PP \fBDERIVED_ADD_PS (PAPI_TOT_CYC,,):\fP .RS 4 This derived type defines a new event that will add together two event counters and then report the number which occurred per second\&. It has a value of (((('baseEvent1' + baseEvent2) * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC)\&. The user must provide PAPI_TOT_CYC as the first event of three events in the event list for this to work correctly\&. .br .RE .PP \fBDERIVED_CMPD (,,):\fP .RS 4 This derived type defines a new event that will be the difference between two other events\&. It has a value of 'baseEvent1' minus 'baseEvent2'\&. .br .RE .PP \fBDERIVED_POSTFIX (,,, \&.\&.\&. ,):\fP .RS 4 This derived type defines a new event whose value is computed from several native events using a postfix (reverse polish notation) formula\&. Its value is the result of processing the postfix formula\&. The 'pfFormula' is of the form 'N0|N1|N2|5|*|+|-|' where the '|' acts as a token separator and the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively\&. .br .RE .PP \fBDERIVED_INFIX (,,, \&.\&.\&. ,):\fP .RS 4 This derived type defines a new event whose value is computed from several native events using an infix (algebraic notation) formula\&. Its value is the result of processing the infix formula\&. The 'ifFormula' is of the form 'N0-(N1+(N2*5))' where the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively\&. .br .RE .PP .br .SS "Example:" In the following example, the events PAPI_SP_OPS, USER_SP_OPS, and ALIAS_SP_OPS will all measure the same events and return the same value\&. They just demonstrate different ways to use the PRESET and EVENT event definition commands\&. .br .br .PP .PD 0 .IP "\(bu" 2 # The following lines define pmu names that all share the following events .IP "\(bu" 2 CPU nhm .IP "\(bu" 2 CPU nhm-ex .IP "\(bu" 2 # Events which should be defined for either of the above pmu types .IP "\(bu" 2 PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES .IP "\(bu" 2 PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES .IP "\(bu" 2 PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|3|*|+|,FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,'Using a postfix formula' .IP "\(bu" 2 EVENT,USER_SP_OPS,DERIVED_INFIX,N0+(N1*3),FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,'Using the same formula in infix format' .IP "\(bu" 2 EVENT,ALIAS_SP_OPS,NOT_DERIVED,PAPI_SP_OPS,LDESC,'Alias for preset event PAPI_SP_OPS' .IP "\(bu" 2 # End of event definitions for above pmu names and start of a section for a new pmu name\&. .IP "\(bu" 2 CPU snb .PP papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_num_counters.3000664 001750 001750 00000000035 13216244361 025274 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_impl_pmcs.3 papi-5.6.0/src/libpfm4/lib/pfmlib_intel_knl_unc_imc.c000664 001750 001750 00000010135 13216244365 024602 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_knl_unc_imc.c : Intel KnightsLanding Integrated Memory Controller (IMC) uncore PMU * * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_knl_unc_imc_events.h" #define DEFINE_IMC_BOX(n) \ pfmlib_pmu_t intel_knl_unc_imc##n##_support = { \ .desc = "Intel KnightLanding IMC "#n" uncore", \ .name = "knl_unc_imc"#n, \ .perf_name = "uncore_imc_"#n, \ .pmu = PFM_PMU_INTEL_KNL_UNC_IMC##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_imc_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 1, \ .max_encoding = 1, \ .pe = intel_knl_unc_imc_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ .pmu_detect = pfm_intel_knl_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_IMC_BOX(0); DEFINE_IMC_BOX(1); DEFINE_IMC_BOX(2); DEFINE_IMC_BOX(3); DEFINE_IMC_BOX(4); DEFINE_IMC_BOX(5); #define DEFINE_IMC_UCLK_BOX(n) \ pfmlib_pmu_t intel_knl_unc_imc_uclk##n##_support = { \ .desc = "Intel KnightLanding IMC UCLK "#n" uncore", \ .name = "knl_unc_imc_uclk"#n, \ .perf_name = "uncore_mc_uclk_"#n, \ .pmu = PFM_PMU_INTEL_KNL_UNC_IMC_UCLK##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_imc_uclk_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 1, \ .max_encoding = 1, \ .pe = intel_knl_unc_imc_uclk_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ .pmu_detect = pfm_intel_knl_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_IMC_UCLK_BOX(0); DEFINE_IMC_UCLK_BOX(1); papi-5.6.0/src/libpfm4/tests/000775 001750 001750 00000000000 13216244366 020026 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_montecito_priv.h000664 001750 001750 00000012740 13216244365 024200 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef __PFMLIB_MONTECITO_PRIV_H__ #define __PFMLIB_MONTECITO_PRIV_H__ /* * Event type definitions * * The virtual events are not really defined in the specs but are an artifact used * to quickly and easily setup EAR and/or BTB. The event type encodes the exact feature * which must be configured in combination with a counting monitor. * For instance, DATA_EAR_CACHE_LAT4 is a virtual D-EAR cache event. If the user * requests this event, this will configure a counting monitor to count DATA_EAR_EVENTS * and PMC11 will be configured for cache mode. The latency is encoded in the umask, here * it would correspond to 4 cycles. * */ #define PFMLIB_MONT_EVENT_NORMAL 0x0 /* standard counter */ #define PFMLIB_MONT_EVENT_ETB 0x1 /* virtual event used with ETB configuration */ #define PFMLIB_MONT_EVENT_IEAR_TLB 0x2 /* virtual event used for I-EAR TLB configuration */ #define PFMLIB_MONT_EVENT_IEAR_CACHE 0x3 /* virtual event used for I-EAR cache configuration */ #define PFMLIB_MONT_EVENT_DEAR_TLB 0x4 /* virtual event used for D-EAR TLB configuration */ #define PFMLIB_MONT_EVENT_DEAR_CACHE 0x5 /* virtual event used for D-EAR cache configuration */ #define PFMLIB_MONT_EVENT_DEAR_ALAT 0x6 /* virtual event used for D-EAR ALAT configuration */ #define event_is_ear(e) ((e)->pme_type >= PFMLIB_MONT_EVENT_IEAR_TLB &&(e)->pme_type <= PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_iear(e) ((e)->pme_type == PFMLIB_MONT_EVENT_IEAR_TLB || (e)->pme_type == PFMLIB_MONT_EVENT_IEAR_CACHE) #define event_is_dear(e) ((e)->pme_type >= PFMLIB_MONT_EVENT_DEAR_TLB && (e)->pme_type <= PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_ear_cache(e) ((e)->pme_type == PFMLIB_MONT_EVENT_DEAR_CACHE || (e)->pme_type == PFMLIB_MONT_EVENT_IEAR_CACHE) #define event_is_ear_tlb(e) ((e)->pme_type == PFMLIB_MONT_EVENT_IEAR_TLB || (e)->pme_type == PFMLIB_MONT_EVENT_DEAR_TLB) #define event_is_ear_alat(e) ((e)->pme_type == PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_etb(e) ((e)->pme_type == PFMLIB_MONT_EVENT_ETB) /* * Itanium encoding structure * (code must be first 8 bits) */ typedef struct { unsigned long pme_code:8; /* major event code */ unsigned long pme_type:3; /* see definitions above */ unsigned long pme_caf:2; /* Active, Floating, Causal, Self-Floating */ unsigned long pme_ig1:3; /* ignored */ unsigned long pme_umask:16; /* unit mask*/ unsigned long pme_ig:32; /* ignored */ } pme_mont_entry_code_t; typedef union { unsigned long pme_vcode; pme_mont_entry_code_t pme_mont_code; /* must not be larger than vcode */ } pme_mont_code_t; typedef union { unsigned long qual; /* generic qualifier */ struct { unsigned long pme_iar:1; /* instruction address range supported */ unsigned long pme_opm:1; /* opcode match supported */ unsigned long pme_dar:1; /* data address range supported */ unsigned long pme_all:1; /* supports all_thrd=1 */ unsigned long pme_mesi:1; /* event supports MESI */ unsigned long pme_res1:11; /* reserved */ unsigned long pme_group:3; /* event group */ unsigned long pme_set:4; /* event set*/ unsigned long pme_res2:41; /* reserved */ } pme_qual; } pme_mont_qualifiers_t; typedef struct { char *pme_name; pme_mont_code_t pme_entry_code; unsigned long pme_counters; /* supported counters */ unsigned int pme_maxincr; pme_mont_qualifiers_t pme_qualifiers; char *pme_desc; /* text description of the event */ } pme_mont_entry_t; /* * We embed the umask value into the event code. Because it really is * like a subevent. * pme_code: * - lower 16 bits: major event code * - upper 16 bits: unit mask */ #define pme_code pme_entry_code.pme_mont_code.pme_code #define pme_umask pme_entry_code.pme_mont_code.pme_umask #define pme_used pme_qualifiers.pme_qual_struct.pme_used #define pme_type pme_entry_code.pme_mont_code.pme_type #define pme_caf pme_entry_code.pme_mont_code.pme_caf #define event_opcm_ok(e) ((e)->pme_qualifiers.pme_qual.pme_opm==1) #define event_iarr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_iar==1) #define event_darr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_dar==1) #define event_all_ok(e) ((e)->pme_qualifiers.pme_qual.pme_all==1) #define event_mesi_ok(e) ((e)->pme_qualifiers.pme_qual.pme_mesi==1) #endif /* __PFMLIB_MONTECITO_PRIV_H__ */ papi-5.6.0/man/man3/PAPI_flops.3000664 001750 001750 00000003765 13216244356 020203 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_flops" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_flops \- .PP Simplified call to get Mflops/s (floating point operation rate), real and processor time\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface: \fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops )\fP; .RE .PP \fBParameters:\fP .RS 4 \fI*rtime\fP total realtime since the first call .br \fI*ptime\fP total process time since the first call .br \fI*flpops\fP total floating point operations since the first call .br \fI*mflops\fP incremental (Mega) floating point operations per seconds since the last call .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flops()\fP\&. .br \fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. .br \fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. .RE .PP The first call to \fBPAPI_flops()\fP will initialize the PAPI High Level interface, set up the counters to monitor the PAPI_FP_OPS event and start the counters\&. .PP Subsequent calls will read the counters and return total real time, total process time, total floating point operations since the start of the measurement and the Mflop/s rate since latest call to \fBPAPI_flops()\fP\&. A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. .PP \fBPAPI_flops\fP returns information related to theoretical floating point operations rather than simple instructions\&. It uses the PAPI_FP_OPS event which attempts to 'correctly' account for, e\&.g\&., FMA undercounts and FP Store overcounts, etc\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_flips()\fP .PP \fBPAPI_ipc()\fP .PP \fBPAPI_epc()\fP .PP \fBPAPI_stop_counters()\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/debian/pyversions000664 001750 001750 00000000005 13216244363 022242 0ustar00jshenry1963jshenry1963000000 000000 2.4- papi-5.6.0/src/libpfm4/perf_examples/task_smpl.c000664 001750 001750 00000023404 13216244365 023661 0ustar00jshenry1963jshenry1963000000 000000 /* * task_smpl.c - example of a task sampling another one using a randomized sampling period * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define SMPL_PERIOD 240000000ULL typedef struct { int opt_no_show; int opt_inherit; int mem_mode; int branch_mode; int cpu; int mmap_pages; char *events; FILE *output_file; } options_t; static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; static int num_fds; static options_t options; static struct option the_options[]={ { "help", 0, 0, 1}, { "no-show", 0, &options.opt_no_show, 1}, { 0, 0, 0, 0} }; static char *gen_events = "cycles,instructions"; static void cld_handler(int n) { longjmp(jbuf, 1); } int child(char **arg) { execvp(arg[0], arg); /* not reached */ return -1; } struct timeval last_read, this_read; static void process_smpl_buf(perf_event_desc_t *hw) { struct perf_event_header ehdr; int ret; for(;;) { ret = perf_read_buffer(hw, &ehdr, sizeof(ehdr)); if (ret) return; /* nothing to read */ if (options.opt_no_show) { perf_skip_buffer(hw, ehdr.size - sizeof(ehdr)); continue; } switch(ehdr.type) { case PERF_RECORD_SAMPLE: collected_samples++; ret = perf_display_sample(fds, num_fds, hw - fds, &ehdr, options.output_file); if (ret) errx(1, "cannot parse sample"); break; case PERF_RECORD_EXIT: display_exit(hw, options.output_file); break; case PERF_RECORD_LOST: lost_samples += display_lost(hw, fds, num_fds, options.output_file); break; case PERF_RECORD_THROTTLE: display_freq(1, hw, options.output_file); break; case PERF_RECORD_UNTHROTTLE: display_freq(0, hw, options.output_file); break; default: printf("unknown sample type %d\n", ehdr.type); perf_skip_buffer(hw, ehdr.size - sizeof(ehdr)); } } } int mainloop(char **arg) { static uint64_t ovfl_count; /* static to avoid setjmp issue */ struct pollfd pollfds[1]; sigset_t bmask; int go[2], ready[2]; size_t pgsz; size_t map_size = 0; pid_t pid; int status, ret; int i; char buf; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed\n"); pgsz = sysconf(_SC_PAGESIZE); map_size = (options.mmap_pages+1)*pgsz; /* * does allocate fds */ ret = perf_setup_list_events(options.events, &fds, &num_fds); if (ret || !num_fds) errx(1, "cannot setup event list"); memset(pollfds, 0, sizeof(pollfds)); ret = pipe(ready); if (ret) err(1, "cannot create pipe ready"); ret = pipe(go); if (ret) err(1, "cannot create pipe go"); /* * Create the child task */ if ((pid=fork()) == -1) err(1, "cannot fork process\n"); if (pid == 0) { close(ready[0]); close(go[1]); /* * let the parent know we exist */ close(ready[1]); if (read(go[0], &buf, 1) == -1) err(1, "unable to read go_pipe"); exit(child(arg)); } close(ready[1]); close(go[0]); if (read(ready[0], &buf, 1) == -1) err(1, "unable to read child_ready_pipe"); close(ready[0]); fds[0].fd = -1; if (!fds[0].hw.sample_period) errx(1, "need to set sampling period or freq on first event, use :period= or :freq="); for(i=0; i < num_fds; i++) { if (i == 0) { fds[i].hw.disabled = 1; fds[i].hw.enable_on_exec = 1; /* start immediately */ } else fds[i].hw.disabled = 0; if (options.opt_inherit) fds[i].hw.inherit = 1; if (fds[i].hw.sample_period) { /* * set notification threshold to be halfway through the buffer */ fds[i].hw.wakeup_watermark = (options.mmap_pages*pgsz) / 2; fds[i].hw.watermark = 1; fds[i].hw.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_READ|PERF_SAMPLE_TIME|PERF_SAMPLE_PERIOD; /* * if we have more than one event, then record event identifier to help with parsing */ if (num_fds > 1) fds[i].hw.sample_type |= PERF_SAMPLE_IDENTIFIER; fprintf(options.output_file,"%s period=%"PRIu64" freq=%d\n", fds[i].name, fds[i].hw.sample_period, fds[i].hw.freq); fds[i].hw.read_format = PERF_FORMAT_SCALE; if (fds[i].hw.freq) fds[i].hw.sample_type |= PERF_SAMPLE_PERIOD; if (options.mem_mode) fds[i].hw.sample_type |= PERF_SAMPLE_WEIGHT | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_ADDR; if (options.branch_mode) { fds[i].hw.sample_type |= PERF_SAMPLE_BRANCH_STACK; fds[i].hw.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; } } /* * we are grouping the events, so there may be a limit */ fds[i].fd = perf_event_open(&fds[i].hw, pid, options.cpu, fds[0].fd, 0); if (fds[i].fd == -1) { if (fds[i].hw.precise_ip) err(1, "cannot attach event %s: precise mode may not be supported", fds[i].name); err(1, "cannot attach event %s", fds[i].name); } } /* * kernel adds the header page to the size of the mmapped region */ fds[0].buf = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fds[0].fd, 0); if (fds[0].buf == MAP_FAILED) err(1, "cannot mmap buffer"); /* does not include header page */ fds[0].pgmsk = (options.mmap_pages*pgsz)-1; /* * send samples for all events to first event's buffer */ for (i = 1; i < num_fds; i++) { if (!fds[i].hw.sample_period) continue; ret = ioctl(fds[i].fd, PERF_EVENT_IOC_SET_OUTPUT, fds[0].fd); if (ret) err(1, "cannot redirect sampling output"); } if (num_fds > 1 && fds[0].fd > -1) { for(i = 0; i < num_fds; i++) { /* * read the event identifier using ioctl * new method replaced the trick with PERF_FORMAT_GROUP + PERF_FORMAT_ID + read() */ ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ID, &fds[i].id); if (ret == -1) err(1, "cannot read ID"); fprintf(options.output_file,"ID %"PRIu64" %s\n", fds[i].id, fds[i].name); } } pollfds[0].fd = fds[0].fd; pollfds[0].events = POLLIN; for(i=0; i < num_fds; i++) { ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0); if (ret) err(1, "cannot enable event %s\n", fds[i].name); } signal(SIGCHLD, cld_handler); close(go[1]); if (setjmp(jbuf) == 1) goto terminate_session; sigemptyset(&bmask); sigaddset(&bmask, SIGCHLD); /* * core loop */ for(;;) { ret = poll(pollfds, 1, -1); if (ret < 0 && errno == EINTR) break; ovfl_count++; ret = sigprocmask(SIG_SETMASK, &bmask, NULL); if (ret) err(1, "setmask"); process_smpl_buf(&fds[0]); ret = sigprocmask(SIG_UNBLOCK, &bmask, NULL); if (ret) err(1, "unblock"); } terminate_session: /* * cleanup child */ wait4(pid, &status, 0, NULL); for(i=0; i < num_fds; i++) close(fds[i].fd); /* check for partial event buffer */ process_smpl_buf(&fds[0]); munmap(fds[0].buf, map_size); perf_free_fds(fds, num_fds); fprintf(options.output_file, "%"PRIu64" samples collected in %"PRIu64" poll events, %"PRIu64" lost samples\n", collected_samples, ovfl_count, lost_samples); /* free libpfm resources cleanly */ pfm_terminate(); fclose(options.output_file); return 0; } static void usage(void) { printf("usage: task_smpl [-h] [--help] [-i] [-c cpu] [-m mmap_pages] [-M] [-b] [-o output_file] [-e event1,...,eventn] cmd\n"); } int main(int argc, char **argv) { int c; setlocale(LC_ALL, ""); options.cpu = -1; options.output_file=stdout; while ((c=getopt_long(argc, argv,"+he:m:ic:o:Mb", the_options, 0)) != -1) { switch(c) { case 0: continue; case 'e': if (options.events) errx(1, "events specified twice\n"); options.events = optarg; break; case 'i': options.opt_inherit = 1; break; case 'm': if (options.mmap_pages) errx(1, "mmap pages already set\n"); options.mmap_pages = atoi(optarg); break; case 'M': options.mem_mode = 1; break; case 'b': options.branch_mode = 1; break; case 'c': options.cpu = atoi(optarg); break; case 'o': options.output_file=fopen(optarg,"w"); if (options.output_file==NULL) { printf("Invalid filename %s\n", optarg); exit(0); } break; case 'h': usage(); exit(0); default: errx(1, "unknown option"); } } if (argv[optind] == NULL) errx(1, "you must specify a command to execute\n"); if (!options.events) options.events = strdup(gen_events); if (!options.mmap_pages) options.mmap_pages = 1; if (options.mmap_pages > 1 && ((options.mmap_pages) & 0x1)) errx(1, "number of pages must be power of 2\n"); return mainloop(argv+optind); } papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumM-1.4000664 001750 001750 00000001365 13216244367 023443 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.4GHz Pentium-M] PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1396587 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 140 cycles PERFCTR INIT: rdtsc cost is 51.0 cycles (3404 total) PERFCTR INIT: rdpmc cost is 43.4 cycles (2920 total) PERFCTR INIT: rdmsr (counter) cost is 95.0 cycles (6221 total) PERFCTR INIT: rdmsr (evntsel) cost is 80.8 cycles (5314 total) PERFCTR INIT: wrmsr (counter) cost is 143.3 cycles (9313 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.0 cycles (8593 total) PERFCTR INIT: read cr4 cost is 2.5 cycles (302 total) PERFCTR INIT: write cr4 cost is 49.5 cycles (3311 total) PERFCTR INIT: write LVTPC cost is 8.3 cycles (674 total) perfctr: driver 2.7.3, cpu type Intel P6 at 1396587 kHz papi-5.6.0/man/man3/PAPI_option_t.3000664 001750 001750 00000002514 13216244356 020702 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_option_t \- .PP A pointer to the following is passed to PAPI_set/get_opt() .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "\fBPAPI_preload_info_t\fP \fBpreload\fP" .br .ti -1c .RI "\fBPAPI_debug_option_t\fP \fBdebug\fP" .br .ti -1c .RI "\fBPAPI_inherit_option_t\fP \fBinherit\fP" .br .ti -1c .RI "\fBPAPI_granularity_option_t\fP \fBgranularity\fP" .br .ti -1c .RI "\fBPAPI_granularity_option_t\fP \fBdefgranularity\fP" .br .ti -1c .RI "\fBPAPI_domain_option_t\fP \fBdomain\fP" .br .ti -1c .RI "\fBPAPI_domain_option_t\fP \fBdefdomain\fP" .br .ti -1c .RI "\fBPAPI_attach_option_t\fP \fBattach\fP" .br .ti -1c .RI "\fBPAPI_cpu_option_t\fP \fBcpu\fP" .br .ti -1c .RI "\fBPAPI_multiplex_option_t\fP \fBmultiplex\fP" .br .ti -1c .RI "\fBPAPI_itimer_option_t\fP \fBitimer\fP" .br .ti -1c .RI "\fBPAPI_hw_info_t\fP * \fBhw_info\fP" .br .ti -1c .RI "\fBPAPI_shlib_info_t\fP * \fBshlib_info\fP" .br .ti -1c .RI "\fBPAPI_exe_info_t\fP * \fBexe_info\fP" .br .ti -1c .RI "\fBPAPI_component_info_t\fP * \fBcmp_info\fP" .br .ti -1c .RI "\fBPAPI_addr_range_option_t\fP \fBaddr\fP" .br .ti -1c .RI "PAPI_user_defined_events_file_t \fBevents_file\fP" .br .in -1c .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_v2.x/ia64/mont_irr.c000664 001750 001750 00000024775 13216244362 024455 0ustar00jshenry1963jshenry1963000000 000000 /* * mont_irr.c - example of how to use code range restriction with the Dual-Core Itanium 2 PMU * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define VECTOR_SIZE 1000000UL typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "fp_ops_retired", VECTOR_SIZE<<1 }, { NULL, 0UL } }; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } void saxpy(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("saxpy done\n"); } void saxpy2(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("saxpy2 done\n"); } static int do_test(void) { unsigned long size; double *a, *b, *c; size = VECTOR_SIZE; a = malloc(size*sizeof(double)); b = malloc(size*sizeof(double)); c = malloc(size*sizeof(double)); if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); memset(a, 0, size*sizeof(double)); memset(b, 0, size*sizeof(double)); memset(c, 0, size*sizeof(double)); saxpy(a,b,c, size); saxpy2(a,b,c, size); return 0; } int main(int argc, char **argv) { event_desc_t *p; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_mont_input_param_t mont_inp; pfmlib_mont_output_param_t mont_outp; pfarg_pmd_t pd[NUM_PMDS]; pfarg_pmc_t pc[NUM_PMCS]; pfarg_pmc_t ibrs[8]; pfarg_ctx_t ctx; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct fd { /* function descriptor */ unsigned long addr; unsigned long gp; } *fd; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("Can't initialize library\n"); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_MONTECITO_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Compute the range we are interested in * * On IA-64, the function pointer does not point directly * to the function but to a descriptor which contains two * unsigned long: the first one is the actual start address * of the function, the second is the gp (global pointer) * to load into r1 before jumping into the function. Unlesss * we're jumping into a shared library the gp is the same as * the current gp. * * In the artificial example, we also rely on the compiler/linker * NOT reordering code layout. We depend on saxpy2() being just * after saxpy(). * */ fd = (struct fd *)saxpy; range_start = fd->addr; fd = (struct fd *)saxpy2; range_end = fd->addr; /* * linker may reorder saxpy() and saxpy2() */ if (range_end < range_start) { unsigned long tmp; tmp = range_start; range_start = range_end; range_end = tmp; } memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&ctx, 0, sizeof(ctx)); memset(ibrs,0, sizeof(ibrs)); memset(&load_args,0, sizeof(load_args)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&mont_inp,0, sizeof(mont_inp)); memset(&mont_outp,0, sizeof(mont_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("cannot find %s event\n", p->event_name); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. * * In the case of code range restriction on Itanium 2, the library will try to use the fine * mode first and then it will default to using multiple pairs to cover the range. */ mont_inp.pfp_mont_irange.rr_used = 1; /* indicate we use code range restriction */ mont_inp.pfp_mont_irange.rr_limits[0].rr_start = range_start; mont_inp.pfp_mont_irange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * print offsets */ printf("code range : [0x%016lx-0x%016lx)\n" "start_offset:-0x%lx end_offset:+0x%lx\n" "%d pairs of debug registers used\n", range_start, range_end, mont_outp.pfp_mont_irange.rr_infos[0].rr_soff, mont_outp.pfp_mont_irange.rr_infos[0].rr_eoff, mont_outp.pfp_mont_irange.rr_nbr_used >> 1); /* * now create the context for self monitoring/per-task */ id = pfm_create_context(&ctx, NULL, NULL, 0); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * propagate IBR settings. IBRS are mapped to PMC256-PMC263 */ for (i=0; i < mont_outp.pfp_mont_irange.rr_nbr_used; i++) { ibrs[i].reg_num = 256+mont_outp.pfp_mont_irange.rr_br[i].reg_num; ibrs[i].reg_value = mont_outp.pfp_mont_irange.rr_br[i].reg_value; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count)) fatal_error("child: pfm_write_pmcs error errno %d\n",errno); /* * Program the code debug registers. */ if (pfm_write_pmcs (id, ibrs, mont_outp.pfp_mont_irange.rr_nbr_used)) fatal_error("child: pfm_write_pmcs error for IBRS errno %d\n",errno); if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) fatal_error("child: pfm_write_pmds error errno %d\n",errno); /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (pfm_load_context(id, &load_args)) fatal_error("pfm_load_context error errno %d\n",errno); /* * Let's roll now. * * We run two distinct copies of the same function but we restrict measurement * to the first one (saxpy). Therefore the expected count is half what you would * get if code range restriction was not used. The core loop in both case uses * two floating point operation per iteration. */ pfm_self_start(id); do_test(); pfm_self_stop(id); /* * now read the results */ if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) { fatal_error( "pfm_read_pmds error errno %d\n",errno); } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); } /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/ctests/overflow2.c000664 001750 001750 00000012425 13216244360 020723 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow.c * Author: Nils Smeds [Based on tests/overflow.c by Philip Mucci] * smeds@pdc.kth.se */ /* This file performs the following test: overflow dispatch The Eventset contains: + PAPI_TOT_CYC (overflow monitor) + PAPI_FP_INS - Start eventset 1 - Do flops - Stop and measure eventset 1 - Set up overflow on eventset 1 - Start eventset 1 - Do flops - Stop eventset 1 */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" #define OUT_FMT "%-12s : %16lld%16lld\n" int total = 0; /* total overflows */ void handler( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) context; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); } total++; } int main( int argc, char **argv ) { int EventSet = PAPI_NULL; long long ( values[2] )[2]; long long min, max; int num_flops, retval; int PAPI_event, mythreshold; char event_name[PAPI_MAX_STR_LEN]; const PAPI_hw_info_t *hw_info = NULL; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); } #if defined(POWER3) || defined(__sparc__) PAPI_event = PAPI_TOT_INS; #else /* query and set up the right instruction to monitor */ PAPI_event = find_nonderived_event( ); #endif if (PAPI_event==0) { if (!quiet) printf("Trouble creating events\n"); test_skip(__FILE__,__LINE__,"Creating event",1); } if (( PAPI_event == PAPI_FP_OPS ) || ( PAPI_event == PAPI_FP_INS )) mythreshold = THRESHOLD; else #if defined(linux) mythreshold = ( int ) hw_info->cpu_max_mhz * 10000 * 2; #else mythreshold = THRESHOLD * 2; #endif retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); retval = PAPI_add_event( EventSet, PAPI_event ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); num_flops = NUM_FLOPS; #if defined(linux) || defined(__ia64__) || defined(_POWER4) num_flops *= 2; #endif if ( !quiet ) { if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); printf ( "Test case: Overflow dispatch of 1st event in set with 2 events.\n" ); printf ( "---------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", mythreshold ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-----------------------------------------------\n" ); printf( "Test type : %16d%16d\n", 1, 2 ); printf( OUT_FMT, event_name, ( values[0] )[0], ( values[1] )[0] ); printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[1], ( values[1] )[1] ); printf( "Overflows : %16s%16d\n", "", total ); printf( "-----------------------------------------------\n" ); printf( "Verification:\n" ); /* if (PAPI_event == PAPI_FP_INS) printf("Row 1 approximately equals %d %d\n", num_flops, num_flops); */ /* Note that the second run prints output on stdout. On some systems * this is costly. PAPI_TOT_INS or PAPI_TOT_CYC are likely to be _very_ * different between the two runs. * printf("Column 1 approximately equals column 2\n"); */ printf( "Row 3 approximately equals %u +- %u %%\n", ( unsigned ) ( ( values[0] )[0] / ( long long ) mythreshold ), ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); } /* min = (long long)((values[0])[0]*(1.0-TOLERANCE)); max = (long long)((values[0])[0]*(1.0+TOLERANCE)); if ( (values[1])[0] > max || (values[1])[0] < min ) test_fail(__FILE__, __LINE__, event_name, 1); */ min = ( long long ) ( ( ( double ) values[0][0] * ( 1.0 - OVR_TOLERANCE ) ) / ( double ) mythreshold ); max = ( long long ) ( ( ( double ) values[0][0] * ( 1.0 + OVR_TOLERANCE ) ) / ( double ) mythreshold ); if ( total > max || total < min ) test_fail( __FILE__, __LINE__, "Overflows", 1 ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/perf_examples/x86/000775 001750 001750 00000000000 13216244365 022142 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumM-1.7000664 001750 001750 00000001366 13216244367 023447 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.7 GHz Pentium-M] PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1694780 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 115 cycles PERFCTR INIT: rdtsc cost is 48.6 cycles (3229 total) PERFCTR INIT: rdpmc cost is 45.6 cycles (3038 total) PERFCTR INIT: rdmsr (counter) cost is 95.5 cycles (6228 total) PERFCTR INIT: rdmsr (evntsel) cost is 81.1 cycles (5311 total) PERFCTR INIT: wrmsr (counter) cost is 143.8 cycles (9321 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.4 cycles (8592 total) PERFCTR INIT: read cr4 cost is 2.8 cycles (298 total) PERFCTR INIT: write cr4 cost is 49.7 cycles (3297 total) PERFCTR INIT: write LVTPC cost is 8.6 cycles (667 total) perfctr: driver 2.7.3, cpu type Intel P6 at 1694780 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumM-1.6000664 001750 001750 00000001460 13216244367 023441 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.6 GHz Pentium-M] PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1599024 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 278 cycles PERFCTR INIT: rdtsc cost is 56.8 cycles (3917 total) PERFCTR INIT: rdpmc cost is 43.9 cycles (3092 total) PERFCTR INIT: rdmsr (counter) cost is 95.0 cycles (6359 total) PERFCTR INIT: rdmsr (evntsel) cost is 80.7 cycles (5448 total) PERFCTR INIT: wrmsr (counter) cost is 143.3 cycles (9455 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.0 cycles (8731 total) PERFCTR INIT: read cr4 cost is 2.8 cycles (460 total) PERFCTR INIT: write cr4 cost is 49.5 cycles (3451 total) PERFCTR INIT: write LVTPC cost is 8.1 cycles (798 total) PERFCTR INIT: sync_core cost is 140.7 cycles (9287 total) perfctr: driver 2.7.5, cpu type Intel P6 at 1599024 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumM-1.5000664 001750 001750 00000003103 13216244367 023434 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.5 GHz Pentium M] PERFCTR INIT: vendor 0, family 6, model 13, stepping 6, clock 1496565 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 263 cycles PERFCTR INIT: rdtsc cost is 45.7 cycles (3191 total) PERFCTR INIT: rdpmc cost is 44.1 cycles (3089 total) PERFCTR INIT: rdmsr (counter) cost is 104.4 cycles (6946 total) PERFCTR INIT: rdmsr (evntsel) cost is 87.9 cycles (5893 total) PERFCTR INIT: wrmsr (counter) cost is 154.7 cycles (10165 total) PERFCTR INIT: wrmsr (evntsel) cost is 143.2 cycles (9429 total) PERFCTR INIT: read cr4 cost is 3.0 cycles (459 total) PERFCTR INIT: write cr4 cost is 52.8 cycles (3648 total) PERFCTR INIT: write LVTPC cost is 8.8 cycles (827 total) PERFCTR INIT: sync_core cost is 148.5 cycles (9768 total) perfctr: driver 2.7.5, cpu type Intel P6 at 1496565 kHz PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1495323 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 282 cycles PERFCTR INIT: rdtsc cost is 48.2 cycles (3368 total) PERFCTR INIT: rdpmc cost is 44.8 cycles (3155 total) PERFCTR INIT: rdmsr (counter) cost is 95.0 cycles (6366 total) PERFCTR INIT: rdmsr (evntsel) cost is 80.7 cycles (5448 total) PERFCTR INIT: wrmsr (counter) cost is 143.4 cycles (9462 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.0 cycles (8733 total) PERFCTR INIT: read cr4 cost is 2.5 cycles (444 total) PERFCTR INIT: write cr4 cost is 49.5 cycles (3453 total) PERFCTR INIT: write LVTPC cost is 8.7 cycles (842 total) PERFCTR INIT: sync_core cost is 140.7 cycles (9291 total) perfctr: driver 2.7.4, cpu type Intel P6 at 1495323 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumM-1.3000664 001750 001750 00000004202 13216244367 023433 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.3GHz Pentium-M] PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1300408 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 282 cycles PERFCTR INIT: rdtsc cost is 47.6 cycles (3330 total) PERFCTR INIT: rdpmc cost is 43.3 cycles (3054 total) PERFCTR INIT: rdmsr (counter) cost is 95.0 cycles (6368 total) PERFCTR INIT: rdmsr (evntsel) cost is 80.8 cycles (5455 total) PERFCTR INIT: wrmsr (counter) cost is 143.4 cycles (9462 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.0 cycles (8735 total) PERFCTR INIT: read cr4 cost is 2.8 cycles (465 total) PERFCTR INIT: write cr4 cost is 49.7 cycles (3465 total) PERFCTR INIT: sync_core cost is 140.7 cycles (9291 total) on ac-power PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 1300434 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 113 cycles PERFCTR INIT: rdtsc cost is 50.0 cycles (3319 total) PERFCTR INIT: rdpmc cost is 41.2 cycles (2750 total) PERFCTR INIT: rdmsr (counter) cost is 95.3 cycles (6215 total) PERFCTR INIT: rdmsr (evntsel) cost is 81.1 cycles (5304 total) PERFCTR INIT: wrmsr (counter) cost is 143.7 cycles (9313 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.3 cycles (8584 total) PERFCTR INIT: read cr4 cost is 2.8 cycles (294 total) PERFCTR INIT: write cr4 cost is 49.9 cycles (3311 total) PERFCTR INIT: write LVTPC cost is 9.4 cycles (718 total) perfctr: driver 2.7.3, cpu type Intel P6 at 1300434 kHz on battery: PERFCTR INIT: vendor 0, family 6, model 9, stepping 5, clock 600199 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 113 cycles PERFCTR INIT: rdtsc cost is 47.3 cycles (3146 total) PERFCTR INIT: rdpmc cost is 41.2 cycles (2750 total) PERFCTR INIT: rdmsr (counter) cost is 95.3 cycles (6215 total) PERFCTR INIT: rdmsr (evntsel) cost is 81.1 cycles (5304 total) PERFCTR INIT: wrmsr (counter) cost is 143.7 cycles (9313 total) PERFCTR INIT: wrmsr (evntsel) cost is 132.3 cycles (8584 total) PERFCTR INIT: read cr4 cost is 2.8 cycles (294 total) PERFCTR INIT: write cr4 cost is 49.9 cycles (3311 total) PERFCTR INIT: write LVTPC cost is 8.4 cycles (652 total) perfctr: driver 2.7.3, cpu type Intel P6 at 600199 kHz papi-5.6.0/src/libpfm4/lib/pfmlib_arm.c000664 001750 001750 00000017055 13216244365 021715 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_arm.c : support for ARM chips * * Copyright (c) 2010 University of Tennessee * Contributed by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_arm_priv.h" const pfmlib_attr_desc_t arm_mods[]={ PFM_ATTR_B("k", "monitor at kernel level"), PFM_ATTR_B("u", "monitor at user level"), PFM_ATTR_B("hv", "monitor in hypervisor"), PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; pfm_arm_config_t pfm_arm_cfg; #ifdef CONFIG_PFMLIB_OS_LINUX /* * helper function to retrieve one value from /proc/cpuinfo * for internal libpfm use only * attr: the attribute (line) to look for * ret_buf: a buffer to store the value of the attribute (as a string) * maxlen : number of bytes of capacity in ret_buf * * ret_buf is null terminated. * * Return: * 0 : attribute found, ret_buf populated * -1: attribute not found */ static int pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen) { FILE *fp = NULL; int ret = -1; size_t attr_len, buf_len = 0; char *p, *value = NULL; char *buffer = NULL; if (attr == NULL || ret_buf == NULL || maxlen < 1) return -1; attr_len = strlen(attr); fp = fopen("/proc/cpuinfo", "r"); if (fp == NULL) return -1; while(pfmlib_getl(&buffer, &buf_len, fp) != -1){ /* skip blank lines */ if (*buffer == '\n') continue; p = strchr(buffer, ':'); if (p == NULL) goto error; /* * p+2: +1 = space, +2= firt character * strlen()-1 gets rid of \n */ *p = '\0'; value = p+2; value[strlen(value)-1] = '\0'; if (!strncmp(attr, buffer, attr_len)) break; } strncpy(ret_buf, value, maxlen-1); ret_buf[maxlen-1] = '\0'; ret = 0; error: free(buffer); fclose(fp); return ret; } #else static int pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen) { return -1; } #endif static int arm_num_mods(void *this, int idx) { const arm_entry_t *pe = this_pe(this); unsigned int mask; mask = pe[idx].modmsk; return pfmlib_popcnt(mask); } static inline int arm_attr2mod(void *this, int pidx, int attr_idx) { const arm_entry_t *pe = this_pe(this); size_t x; int n; n = attr_idx; pfmlib_for_each_bit(x, pe[pidx].modmsk) { if (n == 0) break; n--; } return x; } static void pfm_arm_display_reg(void *this, pfmlib_event_desc_t *e, pfm_arm_reg_t reg) { __pfm_vbprintf("[0x%x] %s\n", reg.val, e->fstr); } int pfm_arm_detect(void *this) { int ret; char buffer[128]; ret = pfmlib_getcpuinfo_attr("CPU implementer", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_arm_cfg.implementer = strtol(buffer, NULL, 16); ret = pfmlib_getcpuinfo_attr("CPU part", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_arm_cfg.part = strtol(buffer, NULL, 16); ret = pfmlib_getcpuinfo_attr("CPU architecture", buffer, sizeof(buffer)); if (ret == -1) return PFM_ERR_NOTSUPP; pfm_arm_cfg.architecture = strtol(buffer, NULL, 16); return PFM_SUCCESS; } int pfm_arm_get_encoding(void *this, pfmlib_event_desc_t *e) { const arm_entry_t *pe = this_pe(this); pfmlib_event_attr_info_t *a; pfm_arm_reg_t reg; unsigned int plm = 0; int i, idx, has_plm = 0; reg.val = pe[e->event].code; for (i = 0; i < e->nattrs; i++) { a = attr(e, i); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type > PFM_ATTR_UMASK) { uint64_t ival = e->attrs[i].ival; switch(a->idx) { case ARM_ATTR_U: /* USR */ if (ival) plm |= PFM_PLM3; has_plm = 1; break; case ARM_ATTR_K: /* OS */ if (ival) plm |= PFM_PLM0; has_plm = 1; break; case ARM_ATTR_HV: /* HYPERVISOR */ if (ival) plm |= PFM_PLMH; has_plm = 1; break; default: return PFM_ERR_ATTR; } } } if (arm_has_plm(this, e)) { if (!has_plm) plm = e->dfl_plm; reg.evtsel.excl_pl1 = !(plm & PFM_PLM0); reg.evtsel.excl_usr = !(plm & PFM_PLM3); reg.evtsel.excl_hyp = !(plm & PFM_PLMH); } evt_strcat(e->fstr, "%s", pe[e->event].name); e->codes[0] = reg.val; e->count = 1; for (i = 0; i < e->npattrs; i++) { if (e->pattrs[i].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[i].type == PFM_ATTR_UMASK) continue; idx = e->pattrs[i].idx; switch(idx) { case ARM_ATTR_K: evt_strcat(e->fstr, ":%s=%lu", arm_mods[idx].name, !reg.evtsel.excl_pl1); break; case ARM_ATTR_U: evt_strcat(e->fstr, ":%s=%lu", arm_mods[idx].name, !reg.evtsel.excl_usr); break; case ARM_ATTR_HV: evt_strcat(e->fstr, ":%s=%lu", arm_mods[idx].name, !reg.evtsel.excl_hyp); break; } } pfm_arm_display_reg(this, e, reg); return PFM_SUCCESS; } int pfm_arm_get_event_first(void *this) { return 0; } int pfm_arm_get_event_next(void *this, int idx) { pfmlib_pmu_t *p = this; if (idx >= (p->pme_count-1)) return -1; return idx+1; } int pfm_arm_event_is_valid(void *this, int pidx) { pfmlib_pmu_t *p = this; return pidx >= 0 && pidx < p->pme_count; } int pfm_arm_validate_table(void *this, FILE *fp) { pfmlib_pmu_t *pmu = this; const arm_entry_t *pe = this_pe(this); int i, error = 0; for(i=0; i < pmu->pme_count; i++) { if (!pe[i].name) { fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i, i > 1 ? pe[i-1].name : "??"); error++; } if (!pe[i].desc) { fprintf(fp, "pmu: %s event%d: %s :: no description\n", pmu->name, i, pe[i].name); error++; } } return error ? PFM_ERR_INVAL : PFM_SUCCESS; } int pfm_arm_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) { int idx; idx = arm_attr2mod(this, pidx, attr_idx); info->name = arm_mods[idx].name; info->desc = arm_mods[idx].desc; info->type = arm_mods[idx].type; info->code = idx; info->is_dfl = 0; info->equiv = NULL; info->ctrl = PFM_ATTR_CTRL_PMU; info->idx = idx; /* namespace specific index */ info->dfl_val64 = 0; info->is_precise = 0; return PFM_SUCCESS; } unsigned int pfm_arm_get_event_nattrs(void *this, int pidx) { return arm_num_mods(this, pidx); } int pfm_arm_get_event_info(void *this, int idx, pfm_event_info_t *info) { pfmlib_pmu_t *pmu = this; const arm_entry_t *pe = this_pe(this); info->name = pe[idx].name; info->desc = pe[idx].desc; info->code = pe[idx].code; info->equiv = NULL; info->idx = idx; /* private index */ info->pmu = pmu->pmu; info->is_precise = 0; /* no attributes defined for ARM yet */ info->nattrs = 0; return PFM_SUCCESS; } papi-5.6.0/src/perfctr-2.6.x/etc/costs/Athlon-1.46000775 001750 001750 00000001310 13216244366 023207 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.46 (1.7 marketing) GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1466764 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 18.6 cycles (1283 total) PERFCTR INIT: rdpmc cost is 19.8 cycles (1359 total) PERFCTR INIT: rdmsr (counter) cost is 51.5 cycles (3391 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.4 cycles (3446 total) PERFCTR INIT: wrmsr (counter) cost is 86.6 cycles (5636 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.7 cycles (14920 total) PERFCTR INIT: read cr4 cost is 5.9 cycles (473 total) PERFCTR INIT: write cr4 cost is 64.9 cycles (4246 total) perfctr: driver 2.3.9, cpu type AMD K7 at 1466764 kHz papi-5.6.0/src/libpfm4/perf_examples/perf_util.h000664 001750 001750 00000011152 13216244365 023657 0ustar00jshenry1963jshenry1963000000 000000 /* * perf_util.h - helper functions for perf_events * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PERF_UTIL_H__ #define __PERF_UTIL_H__ #include #include #include #include typedef struct { struct perf_event_attr hw; uint64_t values[3]; uint64_t prev_values[3]; char *name; uint64_t id; /* event id kernel */ void *buf; size_t pgmsk; int group_leader; int fd; int max_fds; int idx; /* opaque libpfm event identifier */ int cpu; /* cpu to program */ char *fstr; /* fstr from library, must be freed */ } perf_event_desc_t; /* handy shortcut */ #define PERF_FORMAT_SCALE (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING) extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, int *num_fds); extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, int *num_fds); extern int perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz); extern void perf_free_fds(perf_event_desc_t *fds, int num_fds); extern void perf_skip_buffer(perf_event_desc_t *hw, size_t sz); static inline int perf_read_buffer_32(perf_event_desc_t *hw, void *buf) { return perf_read_buffer(hw, buf, sizeof(uint32_t)); } static inline int perf_read_buffer_64(perf_event_desc_t *hw, void *buf) { return perf_read_buffer(hw, buf, sizeof(uint64_t)); } /* * values[0] = raw count * values[1] = TIME_ENABLED * values[2] = TIME_RUNNING */ static inline uint64_t perf_scale(uint64_t *values) { uint64_t res = 0; if (!values[2] && !values[1] && values[0]) warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n"); if (values[2] > values[1]) warnx("WARNING: time_running > time_enabled\n"); if (values[2]) res = (uint64_t)((double)values[0] * values[1]/values[2]); return res; } static inline uint64_t perf_scale_delta(uint64_t *values, uint64_t *prev_values) { double pval[3], val[3]; uint64_t res = 0; if (!values[2] && !values[1] && values[0]) warnx("WARNING: time_running = 0 = time_enabled, raw count not zero\n"); if (values[2] > values[1]) warnx("WARNING: time_running > time_enabled\n"); if (values[2] - prev_values[2]) { /* covnert everything to double to avoid overflows! */ pval[0] = prev_values[0]; pval[1] = prev_values[1]; pval[2] = prev_values[2]; val[0] = values[0]; val[1] = values[1]; val[2] = values[2]; res = (uint64_t)(((val[0] - pval[0]) * (val[1] - pval[1])/ (val[2] - pval[2]))); } return res; } /* * TIME_RUNNING/TIME_ENABLED */ static inline double perf_scale_ratio(uint64_t *values) { if (!values[1]) return 0.0; return values[2]*1.0/values[1]; } static inline int perf_fd2event(perf_event_desc_t *fds, int num_events, int fd) { int i; for(i=0; i < num_events; i++) if (fds[i].fd == fd) return i; return -1; } /* * id = PERF_FORMAT_ID */ static inline int perf_id2event(perf_event_desc_t *fds, int num_events, uint64_t id) { int j; for(j=0; j < num_events; j++) if (fds[j].id == id) return j; return -1; } static inline int perf_is_group_leader(perf_event_desc_t *fds, int idx) { return fds[idx].group_leader == idx; } extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader); extern int perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp); extern uint64_t display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp); extern void display_exit(perf_event_desc_t *hw, FILE *fp); extern void display_freq(int mode, perf_event_desc_t *hw, FILE *fp); #endif papi-5.6.0/src/components/cuda/tests/cuda_ld_preload_example.README000664 001750 001750 00000006642 13216244357 027275 0ustar00jshenry1963jshenry1963000000 000000 Example of using LD_PRELOAD with the CUDA component. Asim YarKhan (2015) A short example of using LD_PRELOAD on a Linux system to intercept function calls and PAPI-enable an un-instrumented CUDA binary. Several CUDA events (e.g. SM PM counters) require a CUcontext handle to be a provided since they are context switched. This means that we cannot use a PAPI_attach from an external process to measure those events in a preexisting executable. These events can only be measured from within the CUcontext, that is, within the CUDA enabled code we are trying to measure. If the user is unable to change the source code, they may be able to use LD_PRELOAD's ability to trap functions and measure the events for within the executable. This example is designed to work with the simpleMultiGPU_no_counters binary in the PAPI CUDA component tests directory. We use ltrace to figure out where to attach the PAPI start, PAPI eventset management and PAPI_stop. Please note that this is a rough example; return codes are not be checked and other changes may be required to make sure that the calls are intercepted at the right moment. First trace the library calls in simpleMultiGPU_no_counters binary were traced using ltrace. Note in the ltrace output that the CUDA C APIs are different from the CUDA calls visible to nvcc. Then figure out appropriate place to attach the PAPI calls. The initialization is attached to the first entry to cudaSetDevice. Each cudaSetDevice is also used to setup the PAPI events for that device. It was harder to figure out where to attach the PAPI_start. After running some tests, I attached it to the 18th invocation of gettimeofday (kind of arbitrary! Sorry! May need tweaking). The PAPI_stop was attached to the first invocation of cudaFreeHost. [Note: There are other events that do not require a CUcontext. The PM counter for TEX, L2, and FB are not context switched so it would be possible to sample these values from any context as long as the context is on the same CUDA device. These events could be measured using a PAPI_attach from another process using the same CUDA device.] -------------------------------------------------- How to use this example... please read carefully to make sense of the following. Build: make cuda_ld_preload_example.so Trace the executable using ltrace to figure out where to intercept the calls: # Do the tracing with a small example! # ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && ltrace --output ltrace.out --library /usr/lib64/libcuda.so.1 ./simpleMultiGPU_no_counters ) # ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ltrace ./simpleMultiGPU_no_counters ) Run using dynamic linking to find the correct libraries: ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ./simpleMultiGPU_no_counters ) make cuda_ld_preload_example.so && ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ./simpleMultiGPU_no_counters ) papi-5.6.0/src/examples/PAPI_ipc.c000664 001750 001750 00000003240 13216244361 020667 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * This example demonstrates the usage of the high level function PAPI_ipc * * which measures the number of instructions executed per cpu cycle * *****************************************************************************/ /***************************************************************************** * The first call to PAPI_ipc initializes the PAPI library, set up the * * counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events, and start the * * counters. Subsequent calls will read the counters and return total real * * time, total process time, total instructions, and the instructions per * * cycle rate since the last call to PAPI_ipc. * *****************************************************************************/ #include #include #include "papi.h" main() { float real_time, proc_time,ipc; long long ins; float real_time_i, proc_time_i, ipc_i; long long ins_i; int retval; if((retval=PAPI_ipc(&real_time_i,&proc_time_i,&ins_i,&ipc_i)) < PAPI_OK) { printf("Could not initialise PAPI_ipc \n"); printf("retval: %d\n", retval); exit(1); } your_slow_code(); if((retval=PAPI_ipc( &real_time, &proc_time, &ins, &ipc)) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include "perf_util.h" /* the **fd parameter must point to a null pointer on the first call * max_fds and num_fds must both point to a zero value on the first call * The return value is success (0) vs. failure (non-zero) */ int perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds) { perf_event_desc_t *fd; pfm_perf_encode_arg_t arg; int new_max, ret, num, max_fds; int group_leader; if (!(argv && fds && num_fds)) return -1; fd = *fds; if (fd) { max_fds = fd[0].max_fds; if (max_fds < 2) return -1; num = *num_fds; } else { max_fds = num = 0; /* bootstrap */ } group_leader = num; while(*argv) { if (num == max_fds) { if (max_fds == 0) new_max = 2; else new_max = max_fds << 1; if (new_max < max_fds) { warn("too many entries"); goto error; } fd = realloc(fd, new_max * sizeof(*fd)); if (!fd) { warn("cannot allocate memory"); goto error; } /* reset newly allocated chunk */ memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd)); max_fds = new_max; /* update max size */ fd[0].max_fds = max_fds; } /* ABI compatibility, set before calling libpfm */ fd[num].hw.size = sizeof(fd[num].hw); memset(&arg, 0, sizeof(arg)); arg.attr = &fd[num].hw; arg.fstr = &fd[num].fstr; /* fd[].fstr is NULL */ ret = pfm_get_os_event_encoding(*argv, PFM_PLM0|PFM_PLM3, PFM_OS_PERF_EVENT_EXT, &arg); if (ret != PFM_SUCCESS) { warnx("event %s: %s", *argv, pfm_strerror(ret)); goto error; } fd[num].name = strdup(*argv); fd[num].group_leader = group_leader; fd[num].idx = arg.idx; fd[num].cpu = arg.cpu; num++; argv++; } *num_fds = num; *fds = fd; return 0; error: perf_free_fds(fd, num); return -1; } int perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds) { const char **argv; char *p, *q, *events; int i, ret, num = 0; if (!(ev && fd && num_fds)) return -1; events = strdup(ev); if (!events) return -1; q = events; while((p = strchr(q, ','))) { num++; q = p + 1; } num++; num++; /* terminator */ argv = malloc(num * sizeof(char *)); if (!argv) { free(events); return -1; } i = 0; q = events; while((p = strchr(q, ','))) { *p = '\0'; argv[i++] = q; q = p + 1; } argv[i++] = q; argv[i] = NULL; ret = perf_setup_argv_events(argv, fd, num_fds); free(argv); free(events); /* strdup in perf_setup_argv_events() */ return ret; } void perf_free_fds(perf_event_desc_t *fds, int num_fds) { int i; for (i = 0 ; i < num_fds; i++) { free(fds[i].name); free(fds[i].fstr); } free(fds); } int perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx) { int leader; int i; if (idx < 0 || idx >= num) return 0; leader = fds[idx].group_leader; for (i = leader + 1; i < num; i++) { if (fds[i].group_leader != leader) { /* This is a new group leader, so the previous * event was the final event of the preceding * group. */ return i - leader; } } return i - leader; } int perf_read_buffer(perf_event_desc_t *hw, void *buf, size_t sz) { struct perf_event_mmap_page *hdr = hw->buf; size_t pgmsk = hw->pgmsk; void *data; unsigned long tail; size_t avail_sz, m, c; /* * data points to beginning of buffer payload */ data = ((void *)hdr)+sysconf(_SC_PAGESIZE); /* * position of tail within the buffer payload */ tail = hdr->data_tail & pgmsk; /* * size of what is available * * data_head, data_tail never wrap around */ avail_sz = hdr->data_head - hdr->data_tail; if (sz > avail_sz) return -1; /* * sz <= avail_sz, we can satisfy the request */ /* * c = size till end of buffer * * buffer payload size is necessarily * a power of two, so we can do: */ c = pgmsk + 1 - tail; /* * min with requested size */ m = c < sz ? c : sz; /* copy beginning */ memcpy(buf, data+tail, m); /* * copy wrapped around leftover */ if (sz > m) memcpy(buf+m, data, sz - m); //printf("\nhead=%lx tail=%lx new_tail=%lx sz=%zu\n", hdr->data_head, hdr->data_tail, hdr->data_tail+sz, sz); hdr->data_tail += sz; return 0; } void perf_skip_buffer(perf_event_desc_t *hw, size_t sz) { struct perf_event_mmap_page *hdr = hw->buf; if ((hdr->data_tail + sz) > hdr->data_head) sz = hdr->data_head - hdr->data_tail; hdr->data_tail += sz; } static size_t __perf_handle_raw(perf_event_desc_t *hw) { size_t sz = 0; uint32_t raw_sz, i; char *buf; int ret; ret = perf_read_buffer_32(hw, &raw_sz); if (ret) { warnx("cannot read raw size"); return -1; } sz += sizeof(raw_sz); printf("\n\tRAWSZ:%u\n", raw_sz); buf = malloc(raw_sz); if (!buf) { warn("cannot allocate raw buffer"); return -1; } ret = perf_read_buffer(hw, buf, raw_sz); if (ret) { warnx("cannot read raw data"); free(buf); return -1; } if (raw_sz) putchar('\t'); for(i=0; i < raw_sz; i++) { printf("0x%02x ", buf[i] & 0xff ); if (((i+1) % 16) == 0) printf("\n\t"); } if (raw_sz) putchar('\n'); free(buf); return sz + raw_sz; } static int perf_display_branch_stack(perf_event_desc_t *desc, FILE *fp) { struct perf_branch_entry b; uint64_t nr, n; int ret; ret = perf_read_buffer(desc, &n, sizeof(n)); if (ret) errx(1, "cannot read branch stack nr"); fprintf(fp, "\n\tBRANCH_STACK:%"PRIu64"\n", n); nr = n; /* * from most recent to least recent take branch */ while (nr--) { ret = perf_read_buffer(desc, &b, sizeof(b)); if (ret) errx(1, "cannot read branch stack entry"); fprintf(fp, "\tFROM:0x%016"PRIx64" TO:0x%016"PRIx64" MISPRED:%c PRED:%c IN_TX:%c ABORT:%c CYCLES:%d type:%d\n", b.from, b.to, !(b.mispred || b.predicted) ? '-': (b.mispred ? 'Y' :'N'), !(b.mispred || b.predicted) ? '-': (b.predicted? 'Y' :'N'), (b.in_tx? 'Y' :'N'), (b.abort? 'Y' :'N'), b.type, b.cycles); } return (int)(n * sizeof(b) + sizeof(n)); } static int perf_display_regs_user(perf_event_desc_t *hw, FILE *fp) { errx(1, "display regs_user not implemented yet\n"); return 0; } static int perf_display_regs_intr(perf_event_desc_t *hw, FILE *fp) { errx(1, "display regs_intr not implemented yet\n"); return 0; } static int perf_display_stack_user(perf_event_desc_t *hw, FILE *fp) { uint64_t nr; char buf[512]; size_t sz; int ret; ret = perf_read_buffer(hw, &nr, sizeof(nr)); if (ret) errx(1, "cannot user stack size"); fprintf(fp, "USER_STACK: SZ:%"PRIu64"\n", nr); /* consume content */ while (nr) { sz = nr; if (sz > sizeof(buf)) sz = sizeof(buf); ret = perf_read_buffer(hw, buf, sz); if (ret) errx(1, "cannot user stack content"); nr -= sz; } return 0; } int perf_display_sample(perf_event_desc_t *fds, int num_fds, int idx, struct perf_event_header *ehdr, FILE *fp) { perf_event_desc_t *hw; struct { uint32_t pid, tid; } pid; struct { uint64_t value, id; } grp; uint64_t time_enabled, time_running; size_t sz; uint64_t type, fmt; uint64_t val64; const char *str; int ret, e; if (!fds || !fp || !ehdr || num_fds < 0 || idx < 0 || idx >= num_fds) return -1; sz = ehdr->size - sizeof(*ehdr); hw = fds+idx; type = hw->hw.sample_type; fmt = hw->hw.read_format; if (type & PERF_SAMPLE_IDENTIFIER) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx("cannot read IP"); return -1; } fprintf(fp, "ID:%"PRIu64" ", val64); sz -= sizeof(val64); } /* * the sample_type information is laid down * based on the PERF_RECORD_SAMPLE format specified * in the perf_event.h header file. * That order is different from the enum perf_event_sample_format */ if (type & PERF_SAMPLE_IP) { const char *xtra = " "; ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx("cannot read IP"); return -1; } /* * MISC_EXACT_IP indicates that kernel is returning * th IIP of an instruction which caused the event, i.e., * no skid */ if (hw->hw.precise_ip && (ehdr->misc & PERF_RECORD_MISC_EXACT_IP)) xtra = " (exact) "; fprintf(fp, "IIP:%#016"PRIx64"%s", val64, xtra); sz -= sizeof(val64); } if (type & PERF_SAMPLE_TID) { ret = perf_read_buffer(hw, &pid, sizeof(pid)); if (ret) { warnx( "cannot read PID"); return -1; } fprintf(fp, "PID:%d TID:%d ", pid.pid, pid.tid); sz -= sizeof(pid); } if (type & PERF_SAMPLE_TIME) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read time"); return -1; } fprintf(fp, "TIME:%'"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_ADDR) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read addr"); return -1; } fprintf(fp, "ADDR:%#016"PRIx64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_ID) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read id"); return -1; } fprintf(fp, "ID:%"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_STREAM_ID) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read stream_id"); return -1; } fprintf(fp, "STREAM_ID:%"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_CPU) { struct { uint32_t cpu, reserved; } cpu; ret = perf_read_buffer(hw, &cpu, sizeof(cpu)); if (ret) { warnx( "cannot read cpu"); return -1; } fprintf(fp, "CPU:%u ", cpu.cpu); sz -= sizeof(cpu); } if (type & PERF_SAMPLE_PERIOD) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read period"); return -1; } fprintf(fp, "PERIOD:%'"PRIu64" ", val64); sz -= sizeof(val64); } /* struct read_format { * { u64 value; * { u64 time_enabled; } && PERF_FORMAT_ENABLED * { u64 time_running; } && PERF_FORMAT_RUNNING * { u64 id; } && PERF_FORMAT_ID * } && !PERF_FORMAT_GROUP * * { u64 nr; * { u64 time_enabled; } && PERF_FORMAT_ENABLED * { u64 time_running; } && PERF_FORMAT_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; */ if (type & PERF_SAMPLE_READ) { uint64_t values[3]; uint64_t nr; if (fmt & PERF_FORMAT_GROUP) { ret = perf_read_buffer_64(hw, &nr); if (ret) { warnx( "cannot read nr"); return -1; } sz -= sizeof(nr); time_enabled = time_running = 1; if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) { ret = perf_read_buffer_64(hw, &time_enabled); if (ret) { warnx( "cannot read timing info"); return -1; } sz -= sizeof(time_enabled); } if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) { ret = perf_read_buffer_64(hw, &time_running); if (ret) { warnx( "cannot read timing info"); return -1; } sz -= sizeof(time_running); } fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64" NR=%"PRIu64"\n", time_enabled, time_running, nr); values[1] = time_enabled; values[2] = time_running; while(nr--) { grp.id = -1; ret = perf_read_buffer_64(hw, &grp.value); if (ret) { warnx( "cannot read group value"); return -1; } sz -= sizeof(grp.value); if (fmt & PERF_FORMAT_ID) { ret = perf_read_buffer_64(hw, &grp.id); if (ret) { warnx( "cannot read leader id"); return -1; } sz -= sizeof(grp.id); } e = perf_id2event(fds, num_fds, grp.id); if (e == -1) str = "unknown sample event"; else str = fds[e].name; values[0] = grp.value; grp.value = perf_scale(values); fprintf(fp, "\t%'"PRIu64" %s (%"PRIu64"%s)\n", grp.value, str, grp.id, time_running != time_enabled ? ", scaled":""); } } else { time_enabled = time_running = 0; /* * this program does not use FORMAT_GROUP when there is only one event */ ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read value"); return -1; } sz -= sizeof(val64); if (fmt & PERF_FORMAT_TOTAL_TIME_ENABLED) { ret = perf_read_buffer_64(hw, &time_enabled); if (ret) { warnx( "cannot read timing info"); return -1; } sz -= sizeof(time_enabled); } if (fmt & PERF_FORMAT_TOTAL_TIME_RUNNING) { ret = perf_read_buffer_64(hw, &time_running); if (ret) { warnx( "cannot read timing info"); return -1; } sz -= sizeof(time_running); } if (fmt & PERF_FORMAT_ID) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read leader id"); return -1; } sz -= sizeof(val64); } fprintf(fp, "ENA=%'"PRIu64" RUN=%'"PRIu64"\n", time_enabled, time_running); values[0] = val64; values[1] = time_enabled; values[2] = time_running; val64 = perf_scale(values); fprintf(fp, "\t%'"PRIu64" %s %s\n", val64, fds[0].name, time_running != time_enabled ? ", scaled":""); } } if (type & PERF_SAMPLE_CALLCHAIN) { uint64_t nr, ip; ret = perf_read_buffer_64(hw, &nr); if (ret) { warnx( "cannot read callchain nr"); return -1; } sz -= sizeof(nr); while(nr--) { ret = perf_read_buffer_64(hw, &ip); if (ret) { warnx( "cannot read ip"); return -1; } sz -= sizeof(ip); fprintf(fp, "\t0x%"PRIx64"\n", ip); } } if (type & PERF_SAMPLE_RAW) { ret = __perf_handle_raw(hw); if (ret == -1) return -1; sz -= ret; } if (type & PERF_SAMPLE_BRANCH_STACK) { ret = perf_display_branch_stack(hw, fp); sz -= ret; } if (type & PERF_SAMPLE_REGS_USER) { ret = perf_display_regs_user(hw, fp); sz -= ret; } if (type & PERF_SAMPLE_STACK_USER) { ret = perf_display_stack_user(hw, fp); sz -= ret; } if (type & PERF_SAMPLE_WEIGHT) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read weight"); return -1; } fprintf(fp, "WEIGHT:%'"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_DATA_SRC) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read data src"); return -1; } fprintf(fp, "DATA_SRC:%'"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_TRANSACTION) { ret = perf_read_buffer_64(hw, &val64); if (ret) { warnx( "cannot read txn"); return -1; } fprintf(fp, "TXN:%'"PRIu64" ", val64); sz -= sizeof(val64); } if (type & PERF_SAMPLE_REGS_INTR) { ret = perf_display_regs_intr(hw, fp); sz -= ret; } /* * if we have some data left, it is because there is more * than what we know about. In fact, it is more complicated * because we may have the right size but wrong layout. But * that's the best we can do. */ if (sz) { warnx("did not correctly parse sample leftover=%zu", sz); perf_skip_buffer(hw, sz); } fputc('\n',fp); return 0; } uint64_t display_lost(perf_event_desc_t *hw, perf_event_desc_t *fds, int num_fds, FILE *fp) { struct { uint64_t id, lost; } lost; const char *str; int e, ret; ret = perf_read_buffer(hw, &lost, sizeof(lost)); if (ret) { warnx("cannot read lost info"); return 0; } e = perf_id2event(fds, num_fds, lost.id); if (e == -1) str = "unknown lost event"; else str = fds[e].name; fprintf(fp, "<<>>\n", lost.lost, str); return lost.lost; } void display_exit(perf_event_desc_t *hw, FILE *fp) { struct { pid_t pid, ppid, tid, ptid; } grp; int ret; ret = perf_read_buffer(hw, &grp, sizeof(grp)); if (ret) { warnx("cannot read exit info"); return; } fprintf(fp,"[%d] exited\n", grp.pid); } void display_freq(int mode, perf_event_desc_t *hw, FILE *fp) { struct { uint64_t time, id, stream_id; } thr; int ret; ret = perf_read_buffer(hw, &thr, sizeof(thr)); if (ret) { warnx("cannot read throttling info"); return; } fprintf(fp, "%s value=%"PRIu64" event ID=%"PRIu64"\n", mode ? "Throttled" : "Unthrottled", thr.id, thr.stream_id); } papi-5.6.0/src/libpfm4/lib/events/intel_core_events.h000664 001750 001750 00000147440 13216244364 024626 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: core (Intel Core) */ static const intel_x86_umask_t core_rs_uops_dispatched_cycles[]={ { .uname = "PORT_0", .udesc = "On port 0", .ucode = 0x100, }, { .uname = "PORT_1", .udesc = "On port 1", .ucode = 0x200, }, { .uname = "PORT_2", .udesc = "On port 2", .ucode = 0x400, }, { .uname = "PORT_3", .udesc = "On port 3", .ucode = 0x800, }, { .uname = "PORT_4", .udesc = "On port 4", .ucode = 0x1000, }, { .uname = "PORT_5", .udesc = "On port 5", .ucode = 0x2000, }, { .uname = "ANY", .udesc = "On any port", .uequiv = "PORT_0:PORT_1:PORT_2:PORT_3:PORT_4:PORT_5", .ucode = 0x3f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_load_block[]={ { .uname = "STA", .udesc = "Loads blocked by a preceding store with unknown address", .ucode = 0x200, }, { .uname = "STD", .udesc = "Loads blocked by a preceding store with unknown data", .ucode = 0x400, }, { .uname = "OVERLAP_STORE", .udesc = "Loads that partially overlap an earlier store, or 4K equived with a previous store", .ucode = 0x800, }, { .uname = "UNTIL_RETIRE", .udesc = "Loads blocked until retirement", .ucode = 0x1000, }, { .uname = "L1D", .udesc = "Loads blocked by the L1 data cache", .ucode = 0x2000, }, }; static const intel_x86_umask_t core_store_block[]={ { .uname = "ORDER", .udesc = "Cycles while store is waiting for a preceding store to be globally observed", .ucode = 0x200, }, { .uname = "SNOOP", .udesc = "A store is blocked due to a conflict with an external or internal snoop", .ucode = 0x800, }, }; static const intel_x86_umask_t core_sse_pre_exec[]={ { .uname = "NTA", .udesc = "Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1", .udesc = "Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L2", .udesc = "Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "STORES", .udesc = "Streaming SIMD Extensions (SSE) Weakly-ordered store instructions executed", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_dtlb_misses[]={ { .uname = "ANY", .udesc = "Any memory access that missed the DTLB", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, { .uname = "MISS_LD", .udesc = "DTLB misses due to load operations", .ucode = 0x200, }, { .uname = "L0_MISS_LD", .udesc = "L0 DTLB misses due to load operations", .ucode = 0x400, }, { .uname = "MISS_ST", .udesc = "DTLB misses due to store operations", .ucode = 0x800, }, }; static const intel_x86_umask_t core_memory_disambiguation[]={ { .uname = "RESET", .udesc = "Memory disambiguation reset cycles", .ucode = 0x100, }, { .uname = "SUCCESS", .udesc = "Number of loads that were successfully disambiguated", .ucode = 0x200, }, }; static const intel_x86_umask_t core_page_walks[]={ { .uname = "COUNT", .udesc = "Number of page-walks executed", .ucode = 0x100, }, { .uname = "CYCLES", .udesc = "Duration of page-walks in core cycles", .ucode = 0x200, }, }; static const intel_x86_umask_t core_delayed_bypass[]={ { .uname = "FP", .udesc = "Delayed bypass to FP operation", .ucode = 0x0, }, { .uname = "SIMD", .udesc = "Delayed bypass to SIMD operation", .ucode = 0x100, }, { .uname = "LOAD", .udesc = "Delayed bypass to load operation", .ucode = 0x200, }, }; static const intel_x86_umask_t core_l2_ads[]={ { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_l2_lines_in[]={ { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "ANY", .udesc = "All inclusive", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "PREFETCH", .udesc = "Hardware prefetch only", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, { .uname = "EXCL_PREFETCH", .udesc = "Exclude hardware prefetch", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, }; static const intel_x86_umask_t core_l2_ifetch[]={ { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "MESI", .udesc = "Any cacheline access", .uequiv = "M_STATE:E_STATE:S_STATE:I_STATE", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "I_STATE", .udesc = "Invalid cacheline", .ucode = 0x100, .grpid = 1, }, { .uname = "S_STATE", .udesc = "Shared cacheline", .ucode = 0x200, .grpid = 1, }, { .uname = "E_STATE", .udesc = "Exclusive cacheline", .ucode = 0x400, .grpid = 1, }, { .uname = "M_STATE", .udesc = "Modified cacheline", .ucode = 0x800, .grpid = 1, }, }; static const intel_x86_umask_t core_l2_ld[]={ { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "ANY", .udesc = "All inclusive", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "PREFETCH", .udesc = "Hardware prefetch only", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, { .uname = "EXCL_PREFETCH", .udesc = "Exclude hardware prefetch", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, { .uname = "MESI", .udesc = "Any cacheline access", .uequiv = "M_STATE:E_STATE:S_STATE:I_STATE", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 2, }, { .uname = "I_STATE", .udesc = "Invalid cacheline", .ucode = 0x100, .grpid = 2, }, { .uname = "S_STATE", .udesc = "Shared cacheline", .ucode = 0x200, .grpid = 2, }, { .uname = "E_STATE", .udesc = "Exclusive cacheline", .ucode = 0x400, .grpid = 2, }, { .uname = "M_STATE", .udesc = "Modified cacheline", .ucode = 0x800, .grpid = 2, }, }; static const intel_x86_umask_t core_cpu_clk_unhalted[]={ { .uname = "CORE_P", .udesc = "Core cycles when core is not halted", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "BUS", .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NO_OTHER", .udesc = "Bus cycles when core is active and the other is halted", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_l1d_cache_ld[]={ { .uname = "MESI", .udesc = "Any cacheline access", .uequiv = "M_STATE:E_STATE:S_STATE:I_STATE", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "I_STATE", .udesc = "Invalid cacheline", .ucode = 0x100, }, { .uname = "S_STATE", .udesc = "Shared cacheline", .ucode = 0x200, }, { .uname = "E_STATE", .udesc = "Exclusive cacheline", .ucode = 0x400, }, { .uname = "M_STATE", .udesc = "Modified cacheline", .ucode = 0x800, }, }; static const intel_x86_umask_t core_l1d_split[]={ { .uname = "LOADS", .udesc = "Cache line split loads from the L1 data cache", .ucode = 0x100, }, { .uname = "STORES", .udesc = "Cache line split stores to the L1 data cache", .ucode = 0x200, }, }; static const intel_x86_umask_t core_sse_pre_miss[]={ { .uname = "NTA", .udesc = "Streaming SIMD Extensions (SSE) Prefetch NTA instructions missing all cache levels", .ucode = 0x0, }, { .uname = "L1", .udesc = "Streaming SIMD Extensions (SSE) PrefetchT0 instructions missing all cache levels", .ucode = 0x100, }, { .uname = "L2", .udesc = "Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions missing all cache levels", .ucode = 0x200, }, }; static const intel_x86_umask_t core_l1d_prefetch[]={ { .uname = "REQUESTS", .udesc = "L1 data cache prefetch requests", .ucode = 0x1000, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_bus_request_outstanding[]={ { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "THIS_AGENT", .udesc = "This agent", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "ALL_AGENTS", .udesc = "Any agent on the bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, }; static const intel_x86_umask_t core_bus_bnr_drv[]={ { .uname = "THIS_AGENT", .udesc = "This agent", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL_AGENTS", .udesc = "Any agent on the bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_ext_snoop[]={ { .uname = "ANY", .udesc = "Any external snoop response", .ucode = 0xb00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "CLEAN", .udesc = "External snoop CLEAN response", .ucode = 0x100, .grpid = 0, }, { .uname = "HIT", .udesc = "External snoop HIT response", .ucode = 0x200, .grpid = 0, }, { .uname = "HITM", .udesc = "External snoop HITM response", .ucode = 0x800, .grpid = 0, }, { .uname = "THIS_AGENT", .udesc = "This agent", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "ALL_AGENTS", .udesc = "Any agent on the bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, }; static const intel_x86_umask_t core_cmp_snoop[]={ { .uname = "ANY", .udesc = "L1 data cache is snooped by other core", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "SHARE", .udesc = "L1 data cache is snooped for sharing by other core", .ucode = 0x100, .grpid = 0, }, { .uname = "INVALIDATE", .udesc = "L1 data cache is snooped for Invalidation by other core", .ucode = 0x200, .grpid = 0, }, { .uname = "SELF", .udesc = "This core", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, { .uname = "BOTH_CORES", .udesc = "Both cores", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, .grpid = 1, }, }; static const intel_x86_umask_t core_itlb[]={ { .uname = "SMALL_MISS", .udesc = "ITLB small page misses", .ucode = 0x200, }, { .uname = "LARGE_MISS", .udesc = "ITLB large page misses", .ucode = 0x1000, }, { .uname = "FLUSH", .udesc = "ITLB flushes", .ucode = 0x4000, }, { .uname = "MISSES", .udesc = "ITLB misses", .ucode = 0x1200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_inst_queue[]={ { .uname = "FULL", .udesc = "Cycles during which the instruction queue is full", .ucode = 0x200, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_macro_insts[]={ { .uname = "DECODED", .udesc = "Instructions decoded", .ucode = 0x100, }, { .uname = "CISC_DECODED", .udesc = "CISC instructions decoded", .ucode = 0x800, }, }; static const intel_x86_umask_t core_esp[]={ { .uname = "SYNCH", .udesc = "ESP register content synchronization", .ucode = 0x100, }, { .uname = "ADDITIONS", .udesc = "ESP register automatic additions", .ucode = 0x200, }, }; static const intel_x86_umask_t core_simd_uop_type_exec[]={ { .uname = "MUL", .udesc = "SIMD packed multiply micro-ops executed", .ucode = 0x100, }, { .uname = "SHIFT", .udesc = "SIMD packed shift micro-ops executed", .ucode = 0x200, }, { .uname = "PACK", .udesc = "SIMD pack micro-ops executed", .ucode = 0x400, }, { .uname = "UNPACK", .udesc = "SIMD unpack micro-ops executed", .ucode = 0x800, }, { .uname = "LOGICAL", .udesc = "SIMD packed logical micro-ops executed", .ucode = 0x1000, }, { .uname = "ARITHMETIC", .udesc = "SIMD packed arithmetic micro-ops executed", .ucode = 0x2000, }, }; static const intel_x86_umask_t core_inst_retired[]={ { .uname = "ANY_P", .udesc = "Instructions retired (Precise Event)", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "LOADS", .udesc = "Instructions retired, which contain a load", .ucode = 0x100, }, { .uname = "STORES", .udesc = "Instructions retired, which contain a store", .ucode = 0x200, }, { .uname = "OTHER", .udesc = "Instructions retired, with no load or store operation", .ucode = 0x400, }, }; static const intel_x86_umask_t core_x87_ops_retired[]={ { .uname = "FXCH", .udesc = "FXCH instructions retired", .ucode = 0x100, }, { .uname = "ANY", .udesc = "Retired floating-point computational operations (Precise Event)", .ucode = 0xfe00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_uops_retired[]={ { .uname = "LD_IND_BR", .udesc = "Fused load+op or load+indirect branch retired", .ucode = 0x100, }, { .uname = "STD_STA", .udesc = "Fused store address + data retired", .ucode = 0x200, }, { .uname = "MACRO_FUSION", .udesc = "Retired instruction pairs fused into one micro-op", .ucode = 0x400, }, { .uname = "NON_FUSED", .udesc = "Non-fused micro-ops retired", .ucode = 0x800, }, { .uname = "FUSED", .udesc = "Fused micro-ops retired", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Micro-ops retired", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_machine_nukes[]={ { .uname = "SMC", .udesc = "Self-Modifying Code detected", .ucode = 0x100, }, { .uname = "MEM_ORDER", .udesc = "Execution pipeline restart due to memory ordering conflict or memory disambiguation misprediction", .ucode = 0x400, }, }; static const intel_x86_umask_t core_br_inst_retired[]={ { .uname = "ANY", .udesc = "Retired branch instructions", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "PRED_NOT_TAKEN", .udesc = "Retired branch instructions that were predicted not-taken", .ucode = 0x100, }, { .uname = "MISPRED_NOT_TAKEN", .udesc = "Retired branch instructions that were mispredicted not-taken", .ucode = 0x200, }, { .uname = "PRED_TAKEN", .udesc = "Retired branch instructions that were predicted taken", .ucode = 0x400, }, { .uname = "MISPRED_TAKEN", .udesc = "Retired branch instructions that were mispredicted taken", .ucode = 0x800, }, { .uname = "TAKEN", .udesc = "Retired taken branch instructions", .ucode = 0xc00, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t core_simd_inst_retired[]={ { .uname = "PACKED_SINGLE", .udesc = "Retired Streaming SIMD Extensions (SSE) packed-single instructions", .ucode = 0x100, }, { .uname = "SCALAR_SINGLE", .udesc = "Retired Streaming SIMD Extensions (SSE) scalar-single instructions", .ucode = 0x200, }, { .uname = "PACKED_DOUBLE", .udesc = "Retired Streaming SIMD Extensions 2 (SSE2) packed-double instructions", .ucode = 0x400, }, { .uname = "SCALAR_DOUBLE", .udesc = "Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions", .ucode = 0x800, }, { .uname = "VECTOR", .udesc = "Retired Streaming SIMD Extensions 2 (SSE2) vector integer instructions", .ucode = 0x1000, }, { .uname = "ANY", .udesc = "Retired Streaming SIMD instructions (Precise Event)", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_simd_comp_inst_retired[]={ { .uname = "PACKED_SINGLE", .udesc = "Retired computational Streaming SIMD Extensions (SSE) packed-single instructions", .ucode = 0x100, }, { .uname = "SCALAR_SINGLE", .udesc = "Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions", .ucode = 0x200, }, { .uname = "PACKED_DOUBLE", .udesc = "Retired computational Streaming SIMD Extensions 2 (SSE2) packed-double instructions", .ucode = 0x400, }, { .uname = "SCALAR_DOUBLE", .udesc = "Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions", .ucode = 0x800, }, }; static const intel_x86_umask_t core_mem_load_retired[]={ { .uname = "L1D_MISS", .udesc = "Retired loads that miss the L1 data cache (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_PEBS, }, { .uname = "L1D_LINE_MISS", .udesc = "L1 data cache line missed by retired loads (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_PEBS, }, { .uname = "L2_MISS", .udesc = "Retired loads that miss the L2 cache (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_PEBS, }, { .uname = "L2_LINE_MISS", .udesc = "L2 cache line missed by retired loads (Precise Event)", .ucode = 0x800, .uflags= INTEL_X86_PEBS, }, { .uname = "DTLB_MISS", .udesc = "Retired loads that miss the DTLB (Precise Event)", .ucode = 0x1000, .uflags= INTEL_X86_PEBS, }, }; static const intel_x86_umask_t core_fp_mmx_trans[]={ { .uname = "TO_FP", .udesc = "Transitions from MMX (TM) Instructions to Floating Point Instructions", .ucode = 0x200, }, { .uname = "TO_MMX", .udesc = "Transitions from Floating Point to MMX (TM) Instructions", .ucode = 0x100, }, }; static const intel_x86_umask_t core_rat_stalls[]={ { .uname = "ROB_READ_PORT", .udesc = "ROB read port stalls cycles", .ucode = 0x100, }, { .uname = "PARTIAL_CYCLES", .udesc = "Partial register stall cycles", .ucode = 0x200, }, { .uname = "FLAGS", .udesc = "Flag stall cycles", .ucode = 0x400, }, { .uname = "FPSW", .udesc = "FPU status word stall", .ucode = 0x800, }, { .uname = "ANY", .udesc = "All RAT stall cycles", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_seg_rename_stalls[]={ { .uname = "ES", .udesc = "Segment rename stalls - ES ", .ucode = 0x100, }, { .uname = "DS", .udesc = "Segment rename stalls - DS", .ucode = 0x200, }, { .uname = "FS", .udesc = "Segment rename stalls - FS", .ucode = 0x400, }, { .uname = "GS", .udesc = "Segment rename stalls - GS", .ucode = 0x800, }, { .uname = "ANY", .udesc = "Any (ES/DS/FS/GS) segment rename stall", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_seg_reg_renames[]={ { .uname = "ES", .udesc = "Segment renames - ES", .ucode = 0x100, }, { .uname = "DS", .udesc = "Segment renames - DS", .ucode = 0x200, }, { .uname = "FS", .udesc = "Segment renames - FS", .ucode = 0x400, }, { .uname = "GS", .udesc = "Segment renames - GS", .ucode = 0x800, }, { .uname = "ANY", .udesc = "Any (ES/DS/FS/GS) segment rename", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t core_resource_stalls[]={ { .uname = "ROB_FULL", .udesc = "Cycles during which the ROB is full", .ucode = 0x100, }, { .uname = "RS_FULL", .udesc = "Cycles during which the RS is full", .ucode = 0x200, }, { .uname = "LD_ST", .udesc = "Cycles during which the pipeline has exceeded load or store limit or waiting to commit all stores", .ucode = 0x400, }, { .uname = "FPCW", .udesc = "Cycles stalled due to FPU control word write", .ucode = 0x800, }, { .uname = "BR_MISS_CLEAR", .udesc = "Cycles stalled due to branch misprediction", .ucode = 0x1000, }, { .uname = "ANY", .udesc = "Resource related stalls", .ucode = 0x1f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_entry_t intel_core_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x200000003ull, .code = 0x3c, }, { .name = "INSTRUCTION_RETIRED", .desc = "Count the number of instructions at retirement", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x100000003ull, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias from INSTRUCTION_RETIRED", .modmsk = INTEL_X86_ATTRS, .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x100000003ull, .code = 0xc0, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycles", .modmsk = INTEL_FIXED2_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "LLC_REFERENCES", .desc = "Count each request originating equiv the core to reference a cache line in the last level cache. The count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to L2_RQSTS:SELF_DEMAND_MESI", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LAST_LEVEL_CACHE_REFERENCES", .desc = "This is an alias for LLC_REFERENCES", .modmsk = INTEL_X86_ATTRS, .equiv = "LLC_REFERENCES", .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LLC_MISSES", .desc = "Count each cache miss condition for references to the last level cache. The event count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to event L2_RQSTS:SELF_DEMAND_I_STATE", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x412e, }, { .name = "LAST_LEVEL_CACHE_MISSES", .desc = "This is an alias for LLC_MISSES", .modmsk = INTEL_X86_ATTRS, .equiv = "LLC_MISSES", .cntmsk = 0x3, .code = 0x412e, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction.", .modmsk = INTEL_X86_ATTRS, .equiv = "BR_INST_RETIRED:ANY", .cntmsk = 0x3, .code = 0xc4, }, { .name = "MISPREDICTED_BRANCH_RETIRED", .desc = "Count mispredicted branch instructions at retirement. Specifically, this event counts at retirement of the last micro-op of a branch instruction in the architectural path of the execution and experienced misprediction in the branch prediction hardware.", .modmsk = INTEL_X86_ATTRS, .equiv = "BR_INST_RETIRED_MISPRED", .cntmsk = 0x3, .code = 0xc5, .flags= INTEL_X86_PEBS, }, { .name = "RS_UOPS_DISPATCHED_CYCLES", .desc = "Cycles micro-ops dispatched for execution", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xa1, .numasks = LIBPFM_ARRAY_SIZE(core_rs_uops_dispatched_cycles), .ngrp = 1, .umasks = core_rs_uops_dispatched_cycles, }, { .name = "RS_UOPS_DISPATCHED", .desc = "Number of micro-ops dispatched for execution", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xa0, }, { .name = "RS_UOPS_DISPATCHED_NONE", .desc = "Number of of cycles in which no micro-ops is dispatched for execution", .modmsk =0x0, .equiv = "RS_UOPS_DISPATCHED:i=1:c=1", .cntmsk = 0x3, .code = 0xa0 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), }, { .name = "LOAD_BLOCK", .desc = "Loads blocked", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3, .numasks = LIBPFM_ARRAY_SIZE(core_load_block), .ngrp = 1, .umasks = core_load_block, }, { .name = "SB_DRAIN_CYCLES", .desc = "Cycles while stores are blocked due to store buffer drain", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x104, }, { .name = "STORE_BLOCK", .desc = "Cycles while store is waiting", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4, .numasks = LIBPFM_ARRAY_SIZE(core_store_block), .ngrp = 1, .umasks = core_store_block, }, { .name = "SEGMENT_REG_LOADS", .desc = "Number of segment register loads", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6, }, { .name = "SSE_PRE_EXEC", .desc = "Streaming SIMD Extensions (SSE) Prefetch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7, .numasks = LIBPFM_ARRAY_SIZE(core_sse_pre_exec), .ngrp = 1, .umasks = core_sse_pre_exec, }, { .name = "DTLB_MISSES", .desc = "Memory accesses that missed the DTLB", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8, .numasks = LIBPFM_ARRAY_SIZE(core_dtlb_misses), .ngrp = 1, .umasks = core_dtlb_misses, }, { .name = "MEMORY_DISAMBIGUATION", .desc = "Memory disambiguation", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x9, .numasks = LIBPFM_ARRAY_SIZE(core_memory_disambiguation), .ngrp = 1, .umasks = core_memory_disambiguation, }, { .name = "PAGE_WALKS", .desc = "Number of page-walks executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc, .numasks = LIBPFM_ARRAY_SIZE(core_page_walks), .ngrp = 1, .umasks = core_page_walks, }, { .name = "FP_COMP_OPS_EXE", .desc = "Floating point computational micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x10, }, { .name = "FP_ASSIST", .desc = "Floating point assists", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x11, }, { .name = "MUL", .desc = "Multiply operations executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x12, }, { .name = "DIV", .desc = "Divide operations executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x13, }, { .name = "CYCLES_DIV_BUSY", .desc = "Cycles the divider is busy", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x14, }, { .name = "IDLE_DURING_DIV", .desc = "Cycles the divider is busy and all other execution units are idle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x18, }, { .name = "DELAYED_BYPASS", .desc = "Delayed bypass", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x19, .numasks = LIBPFM_ARRAY_SIZE(core_delayed_bypass), .ngrp = 1, .umasks = core_delayed_bypass, }, { .name = "L2_ADS", .desc = "Cycles L2 address bus is in use", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x21, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, }, { .name = "L2_DBUS_BUSY_RD", .desc = "Cycles the L2 transfers data to the core", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x23, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_IN", .desc = "L2 cache misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(core_l2_lines_in), .ngrp = 2, .umasks = core_l2_lines_in, }, { .name = "L2_M_LINES_IN", .desc = "L2 cache line modifications", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x25, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_OUT", .desc = "L2 cache lines evicted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x26, .numasks = LIBPFM_ARRAY_SIZE(core_l2_lines_in), .ngrp = 2, .umasks = core_l2_lines_in, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_OUT", .desc = "Modified lines evicted from the L2 cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x27, .numasks = LIBPFM_ARRAY_SIZE(core_l2_lines_in), .ngrp = 2, .umasks = core_l2_lines_in, /* identical to actual umasks list for this event */ }, { .name = "L2_IFETCH", .desc = "L2 cacheable instruction fetch requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ifetch), .ngrp = 2, .umasks = core_l2_ifetch, }, { .name = "L2_LD", .desc = "L2 cache reads", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ld), .ngrp = 3, .umasks = core_l2_ld, }, { .name = "L2_ST", .desc = "L2 store requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ifetch), .ngrp = 2, .umasks = core_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_LOCK", .desc = "L2 locked accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2b, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ifetch), .ngrp = 2, .umasks = core_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_RQSTS", .desc = "L2 cache requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ld), .ngrp = 3, .umasks = core_l2_ld, /* identical to actual umasks list for this event */ }, { .name = "L2_REJECT_BUSQ", .desc = "Rejected L2 cache requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x30, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ld), .ngrp = 3, .umasks = core_l2_ld, /* identical to actual umasks list for this event */ }, { .name = "L2_NO_REQ", .desc = "Cycles no L2 cache requests are pending", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x32, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, /* identical to actual umasks list for this event */ }, { .name = "EIST_TRANS", .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3a, }, { .name = "THERMAL_TRIP", .desc = "Number of thermal trips", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc03b, }, { .name = "CPU_CLK_UNHALTED", .desc = "Core cycles when core is not halted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3c, .numasks = LIBPFM_ARRAY_SIZE(core_cpu_clk_unhalted), .ngrp = 1, .umasks = core_cpu_clk_unhalted, }, { .name = "L1D_CACHE_LD", .desc = "L1 cacheable data reads", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x40, .numasks = LIBPFM_ARRAY_SIZE(core_l1d_cache_ld), .ngrp = 1, .umasks = core_l1d_cache_ld, }, { .name = "L1D_CACHE_ST", .desc = "L1 cacheable data writes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x41, .numasks = LIBPFM_ARRAY_SIZE(core_l1d_cache_ld), .ngrp = 1, .umasks = core_l1d_cache_ld, /* identical to actual umasks list for this event */ }, { .name = "L1D_CACHE_LOCK", .desc = "L1 data cacheable locked reads", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(core_l1d_cache_ld), .ngrp = 1, .umasks = core_l1d_cache_ld, /* identical to actual umasks list for this event */ }, { .name = "L1D_ALL_REF", .desc = "All references to the L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x143, }, { .name = "L1D_ALL_CACHE_REF", .desc = "L1 Data cacheable reads and writes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x243, }, { .name = "L1D_REPL", .desc = "Cache lines allocated in the L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xf45, }, { .name = "L1D_M_REPL", .desc = "Modified cache lines allocated in the L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x46, }, { .name = "L1D_M_EVICT", .desc = "Modified cache lines evicted from the L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x47, }, { .name = "L1D_PEND_MISS", .desc = "Total number of outstanding L1 data cache misses at any cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x48, }, { .name = "L1D_SPLIT", .desc = "Cache line split from L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x49, .numasks = LIBPFM_ARRAY_SIZE(core_l1d_split), .ngrp = 1, .umasks = core_l1d_split, }, { .name = "SSE_PRE_MISS", .desc = "Streaming SIMD Extensions (SSE) instructions missing all cache levels", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(core_sse_pre_miss), .ngrp = 1, .umasks = core_sse_pre_miss, }, { .name = "LOAD_HIT_PRE", .desc = "Load operations conflicting with a software prefetch to the same address", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4c, }, { .name = "L1D_PREFETCH", .desc = "L1 data cache prefetch", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4e, .numasks = LIBPFM_ARRAY_SIZE(core_l1d_prefetch), .ngrp = 1, .umasks = core_l1d_prefetch, }, { .name = "BUS_REQUEST_OUTSTANDING", .desc = "Number of pending full cache line read transactions on the bus occurring in each cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x60, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, }, { .name = "BUS_BNR_DRV", .desc = "Number of Bus Not Ready signals asserted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x61, .numasks = LIBPFM_ARRAY_SIZE(core_bus_bnr_drv), .ngrp = 1, .umasks = core_bus_bnr_drv, }, { .name = "BUS_DRDY_CLOCKS", .desc = "Bus cycles when data is sent on the bus", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(core_bus_bnr_drv), .ngrp = 1, .umasks = core_bus_bnr_drv, /* identical to actual umasks list for this event */ }, { .name = "BUS_LOCK_CLOCKS", .desc = "Bus cycles when a LOCK signal is asserted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_DATA_RCV", .desc = "Bus cycles while processor receives data", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x64, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_BRD", .desc = "Burst read bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_RFO", .desc = "RFO bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x66, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_WB", .desc = "Explicit writeback bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IFETCH", .desc = "Instruction-fetch bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_INVAL", .desc = "Invalidate bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_PWR", .desc = "Partial write bus transaction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6a, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_P", .desc = "Partial bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6b, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IO", .desc = "IO bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_DEF", .desc = "Deferred bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_BURST", .desc = "Burst (full cache-line) bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6e, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_MEM", .desc = "Memory bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6f, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_ANY", .desc = "All bus transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x70, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "EXT_SNOOP", .desc = "External snoops responses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x77, .numasks = LIBPFM_ARRAY_SIZE(core_ext_snoop), .ngrp = 2, .umasks = core_ext_snoop, }, { .name = "CMP_SNOOP", .desc = "L1 data cache is snooped by other core", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x78, .numasks = LIBPFM_ARRAY_SIZE(core_cmp_snoop), .ngrp = 2, .umasks = core_cmp_snoop, }, { .name = "BUS_HIT_DRV", .desc = "HIT signal asserted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7a, .numasks = LIBPFM_ARRAY_SIZE(core_bus_bnr_drv), .ngrp = 1, .umasks = core_bus_bnr_drv, /* identical to actual umasks list for this event */ }, { .name = "BUS_HITM_DRV", .desc = "HITM signal asserted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7b, .numasks = LIBPFM_ARRAY_SIZE(core_bus_bnr_drv), .ngrp = 1, .umasks = core_bus_bnr_drv, /* identical to actual umasks list for this event */ }, { .name = "BUSQ_EMPTY", .desc = "Bus queue is empty", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7d, .numasks = LIBPFM_ARRAY_SIZE(core_bus_bnr_drv), .ngrp = 1, .umasks = core_bus_bnr_drv, /* identical to actual umasks list for this event */ }, { .name = "SNOOP_STALL_DRV", .desc = "Bus stalled for snoops", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7e, .numasks = LIBPFM_ARRAY_SIZE(core_bus_request_outstanding), .ngrp = 2, .umasks = core_bus_request_outstanding, /* identical to actual umasks list for this event */ }, { .name = "BUS_IO_WAIT", .desc = "IO requests waiting in the bus queue", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7f, .numasks = LIBPFM_ARRAY_SIZE(core_l2_ads), .ngrp = 1, .umasks = core_l2_ads, /* identical to actual umasks list for this event */ }, { .name = "L1I_READS", .desc = "Instruction fetches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x80, }, { .name = "L1I_MISSES", .desc = "Instruction Fetch Unit misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x81, }, { .name = "ITLB", .desc = "ITLB small page misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x82, .numasks = LIBPFM_ARRAY_SIZE(core_itlb), .ngrp = 1, .umasks = core_itlb, }, { .name = "INST_QUEUE", .desc = "Cycles during which the instruction queue is full", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x83, .numasks = LIBPFM_ARRAY_SIZE(core_inst_queue), .ngrp = 1, .umasks = core_inst_queue, }, { .name = "CYCLES_L1I_MEM_STALLED", .desc = "Cycles during which instruction fetches are stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, }, { .name = "ILD_STALL", .desc = "Instruction Length Decoder stall cycles due to a length changing prefix", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x87, }, { .name = "BR_INST_EXEC", .desc = "Branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x88, }, { .name = "BR_MISSP_EXEC", .desc = "Mispredicted branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x89, }, { .name = "BR_BAC_MISSP_EXEC", .desc = "Branch instructions mispredicted at decoding", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8a, }, { .name = "BR_CND_EXEC", .desc = "Conditional branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8b, }, { .name = "BR_CND_MISSP_EXEC", .desc = "Mispredicted conditional branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8c, }, { .name = "BR_IND_EXEC", .desc = "Indirect branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8d, }, { .name = "BR_IND_MISSP_EXEC", .desc = "Mispredicted indirect branch instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8e, }, { .name = "BR_RET_EXEC", .desc = "RET instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x8f, }, { .name = "BR_RET_MISSP_EXEC", .desc = "Mispredicted RET instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x90, }, { .name = "BR_RET_BAC_MISSP_EXEC", .desc = "RET instructions executed mispredicted at decoding", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x91, }, { .name = "BR_CALL_EXEC", .desc = "CALL instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x92, }, { .name = "BR_CALL_MISSP_EXEC", .desc = "Mispredicted CALL instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x93, }, { .name = "BR_IND_CALL_EXEC", .desc = "Indirect CALL instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x94, }, { .name = "BR_TKN_BUBBLE_1", .desc = "Branch predicted taken with bubble I", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x97, }, { .name = "BR_TKN_BUBBLE_2", .desc = "Branch predicted taken with bubble II", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x98, }, { .name = "MACRO_INSTS", .desc = "Instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xaa, .numasks = LIBPFM_ARRAY_SIZE(core_macro_insts), .ngrp = 1, .umasks = core_macro_insts, }, { .name = "ESP", .desc = "ESP register content synchronization", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xab, .numasks = LIBPFM_ARRAY_SIZE(core_esp), .ngrp = 1, .umasks = core_esp, }, { .name = "SIMD_UOPS_EXEC", .desc = "SIMD micro-ops executed (excluding stores)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb0, }, { .name = "SIMD_SAT_UOP_EXEC", .desc = "SIMD saturated arithmetic micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb1, }, { .name = "SIMD_UOP_TYPE_EXEC", .desc = "SIMD packed multiply micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb3, .numasks = LIBPFM_ARRAY_SIZE(core_simd_uop_type_exec), .ngrp = 1, .umasks = core_simd_uop_type_exec, }, { .name = "INST_RETIRED", .desc = "Instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc0, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(core_inst_retired), .ngrp = 1, .umasks = core_inst_retired, }, { .name = "X87_OPS_RETIRED", .desc = "FXCH instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc1, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(core_x87_ops_retired), .ngrp = 1, .umasks = core_x87_ops_retired, }, { .name = "UOPS_RETIRED", .desc = "Fused load+op or load+indirect branch retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc2, .numasks = LIBPFM_ARRAY_SIZE(core_uops_retired), .ngrp = 1, .umasks = core_uops_retired, }, { .name = "MACHINE_NUKES", .desc = "Self-Modifying Code detected", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc3, .numasks = LIBPFM_ARRAY_SIZE(core_machine_nukes), .ngrp = 1, .umasks = core_machine_nukes, }, { .name = "BR_INST_RETIRED", .desc = "Retired branch instructions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc4, .numasks = LIBPFM_ARRAY_SIZE(core_br_inst_retired), .ngrp = 1, .umasks = core_br_inst_retired, }, { .name = "BR_INST_RETIRED_MISPRED", .desc = "Retired mispredicted branch instructions (Precise_Event)", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc5, .flags= INTEL_X86_PEBS, }, { .name = "CYCLES_INT_MASKED", .desc = "Cycles during which interrupts are disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x1c6, }, { .name = "CYCLES_INT_PENDING_AND_MASKED", .desc = "Cycles during which interrupts are pending and disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2c6, }, { .name = "SIMD_INST_RETIRED", .desc = "Retired Streaming SIMD Extensions (SSE) packed-single instructions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc7, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(core_simd_inst_retired), .ngrp = 1, .umasks = core_simd_inst_retired, }, { .name = "HW_INT_RCV", .desc = "Hardware interrupts received", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc8, }, { .name = "ITLB_MISS_RETIRED", .desc = "Retired instructions that missed the ITLB", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc9, }, { .name = "SIMD_COMP_INST_RETIRED", .desc = "Retired computational Streaming SIMD Extensions (SSE) packed-single instructions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xca, .numasks = LIBPFM_ARRAY_SIZE(core_simd_comp_inst_retired), .ngrp = 1, .umasks = core_simd_comp_inst_retired, }, { .name = "MEM_LOAD_RETIRED", .desc = "Retired loads that miss the L1 data cache", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xcb, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(core_mem_load_retired), .ngrp = 1, .umasks = core_mem_load_retired, }, { .name = "FP_MMX_TRANS", .desc = "Transitions from MMX (TM) Instructions to Floating Point Instructions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(core_fp_mmx_trans), .ngrp = 1, .umasks = core_fp_mmx_trans, }, { .name = "SIMD_ASSIST", .desc = "SIMD assists invoked", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcd, }, { .name = "SIMD_INSTR_RETIRED", .desc = "SIMD Instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xce, }, { .name = "SIMD_SAT_INSTR_RETIRED", .desc = "Saturated arithmetic instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcf, }, { .name = "RAT_STALLS", .desc = "ROB read port stalls cycles", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd2, .numasks = LIBPFM_ARRAY_SIZE(core_rat_stalls), .ngrp = 1, .umasks = core_rat_stalls, }, { .name = "SEG_RENAME_STALLS", .desc = "Segment rename stalls - ES ", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd4, .numasks = LIBPFM_ARRAY_SIZE(core_seg_rename_stalls), .ngrp = 1, .umasks = core_seg_rename_stalls, }, { .name = "SEG_REG_RENAMES", .desc = "Segment renames - ES", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd5, .numasks = LIBPFM_ARRAY_SIZE(core_seg_reg_renames), .ngrp = 1, .umasks = core_seg_reg_renames, }, { .name = "RESOURCE_STALLS", .desc = "Cycles during which the ROB is full", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xdc, .numasks = LIBPFM_ARRAY_SIZE(core_resource_stalls), .ngrp = 1, .umasks = core_resource_stalls, }, { .name = "BR_INST_DECODED", .desc = "Branch instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe0, }, { .name = "BOGUS_BR", .desc = "Bogus branches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe4, }, { .name = "BACLEARS", .desc = "BACLEARS asserted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe6, }, { .name = "PREF_RQSTS_UP", .desc = "Upward prefetches issued from the DPL", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xf0, }, { .name = "PREF_RQSTS_DN", .desc = "Downward prefetches issued from the DPL", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xf8, }, }; papi-5.6.0/src/libpfm4/debian/docs000664 001750 001750 00000000007 13216244363 020753 0ustar00jshenry1963jshenry1963000000 000000 README papi-5.6.0/src/components/lustre/fake_proc/fs/lustre/llite/hpcdata-ffff81022a732800/read_ahead_stats000664 001750 001750 00000000720 13216244357 034752 0ustar00jshenry1963jshenry1963000000 000000 snapshot_time: 1251851453.382275 (secs.usecs) pending issued pages: 0 hits 7301235 misses 10546 readpage not consecutive 14369 miss inside window 1 failed grab_cache_page 6285314 failed lock match 0 read but discarded 98955 zero length file 0 zero size window 3495 read-ahead to EOF 172 hit max r-a issue 783042 wrong page from grab_cache_page 0 papi-5.6.0/man/man3/PAPIF_num_cmp_hwctrs.3000664 001750 001750 00000001055 13216244355 022203 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_num_cmp_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_num_cmp_hwctrs \- .PP Return the number of hardware counters on the specified component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_num_cmp_hwctrs( C_INT cidx, C_INT num )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_num_hwctrs\fP .PP \fBPAPI_num_cmp_hwctrs\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/ita2_dear.c000664 001750 001750 00000024447 13216244362 024453 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_dear.c - example of how use the D-EAR with the Itanium 2 PMU * * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define SMPL_PERIOD (40) #define EVENT_NAME "data_ear_cache_lat4" typedef pfm_dfl_smpl_hdr_t dear_hdr_t; typedef pfm_dfl_smpl_entry_t dear_entry_t; typedef pfm_dfl_smpl_arg_t smpl_arg_t; static void *smpl_vaddr; static unsigned long entry_size; static int id; #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } long do_test(unsigned long size) { unsigned long i, sum = 0; int *array; printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); array = (int *)malloc(size * sizeof(int)); if (array == NULL ) { printf("line = %d No memory available!\n", __LINE__); exit(1); } for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (dear_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita2_pmd_reg_t*)(ent+1); safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); reg++; safe_printf("PMD3 : 0x%016lx, latency %u\n", reg->pmd_val, reg->pmd3_ita2_reg.dear_latency); reg++; safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", reg->pmd_val, reg->pmd17_ita2_reg.dear_vl ? 'Y': 'N', (reg->pmd17_ita2_reg.dear_iaddr << 4) | (unsigned long)reg->pmd17_ita2_reg.dear_slot); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (pfm_restart(id) == -1) { perror("pfm_restart"); exit(1); } } int main(void) { pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmd_attr_t pa[NUM_PMDS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; smpl_arg_t buf_arg; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; int ret, type = 0; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(pa, 0, sizeof(pa)); memset(&buf_arg, 0, sizeof(buf_arg)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find event %s\n", EVENT_NAME); /* * set the (global) privilege mode: * PFM_PLM0 : kernel level only */ inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS * * We use all global settings for this EAR. */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ buf_arg.buf_size = getpagesize(); /* * now create the session */ id = pfm_create(PFM_FL_SMPL_FMT, NULL, "default", &buf_arg, sizeof(buf_arg)); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; pfm_bv_set(pa[0].reg_smpl_pmds, 2); pfm_bv_set(pa[0].reg_smpl_pmds, 3); pfm_bv_set(pa[0].reg_smpl_pmds, 17); entry_size = sizeof(dear_entry_t) + 3 * 8; /* * initialize the PMD and the sampling period */ pd[0].reg_value = - SMPL_PERIOD; pa[0].reg_long_reset = - SMPL_PERIOD; pa[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pc)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * attach session to stopped task */ if (pfm_attach(id, 0, getpid()) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); do_test(100000); if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer, i.e. which may not be full */ process_smpl_buffer(); /* * let's stop this now */ munmap(smpl_vaddr, (size_t)buf_arg.buf_size); close(id); return 0; } papi-5.6.0/src/libpfm-3.y/python/Makefile000664 001750 001750 00000002423 13216244363 022166 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2008 Google, Inc. # Contributed by Arun Sharma # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # all: ./setup.py build install: ./setup.py install clean: $(RM) src/perfmon_int_wrap.c src/perfmon_int.py src/*.pyc $(RM) -r build papi-5.6.0/man/man3/000775 001750 001750 00000000000 13216244356 016150 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/doc/Doxyfile-html000664 001750 001750 00000045703 13216244355 017764 0ustar00jshenry1963jshenry1963000000 000000 # Doxyfile 1.6.2 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") @INCLUDE = Doxyfile-common #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = YES # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = DEBUG # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = PAPIERROR LEAKDBG MEMDBG MPXDBG OVFDBG PAPIDEBUG SUBDBG PRFDBG INTDBG THRDBG APIDBG #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = YES # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = YES # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = YES # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = ../src ../src/components/README # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = YES # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. # Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. GENERATE_TREEVIEW = YES # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, # and Class Hierarchy pages using a tree view instead of an ordered list. USE_INLINE_TREES = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # When the SEARCHENGINE tag is enabled doxygen will generate a search box for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be implemented using a PHP enabled web server instead of at the web client using Javascript. Doxygen will generate the search PHP script and index # file to put on the web server. The advantage of the server based approach is that it scales better to large projects and allows full text search. The disadvances is that it is more difficult to setup # and does not have live searching capabilities. SERVER_BASED_SEARCH = NO #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = YES # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = YES # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = NO # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES papi-5.6.0/man/man1/000775 001750 001750 00000000000 13216244355 016145 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/infiniband_umad/README000664 001750 001750 00000001136 13216244357 023504 0ustar00jshenry1963jshenry1963000000 000000 /** * @file: README * CVS: $Id$ * @author: Dan Terpstra * terpstra@icl.utk.edu * @defgroup papi_components Components * @brief Component Specific Readme file: Infiniband */ /** @page component_readme Component Readme @section Component Specific Information infiniband_umad/ These files have the source code for a component that enables PAPI-C to access hardware monitoring counters for InfiniBand devices through the OFED library. Since a new interface was introduced with OFED version 1.4 (released Dec 2008), the current InfiniBand component does not support OFED versions < 1.4. */ papi-5.6.0/src/INSTALL000664 001750 001750 00000000231 13216244356 016353 0ustar00jshenry1963jshenry1963000000 000000 /* * File: papi/src/README * CVS: $Id$ * Author: Philip Mucci * mucci@cs.utk.edu */ Please see the INSTALL.txt in the root directory. papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_regmask_set.3000664 001750 001750 00000005070 13216244361 024224 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "Apr, 2006" "" "Linux Programmer's Manual" .SH NAME pfm_regmask_set, pfm_regmask_isset, pfm_regmask_clr, pfm_regmask_weight, pfm_regmask_eq, pfm_regmask_and, pfm_regmask_or, pfm_regmask_copy -\ operations on pfmlib_regmask_t bitmasks .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_regmask_isset(pfmlib_regmask_t *"mask ", unsigned int "b ");" .BI "int pfm_regmask_set(pfmlib_regmask_t *"mask ", unsigned int "b ");" .BI "int pfm_regmask_clr(pfmlib_regmask_t *"mask ", unsigned int "b ");" .BI "int pfm_regmask_weight(pfmlib_regmask_t *"mask ", unsigned int *"w ");" .BI "int pfm_regmask_eq(pfmlib_regmask_t *"mask1 ", pfmlib_regmask_t *"mask2 ");" .BI "int pfm_regmask_and(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"m1 ", pmlib_regmask_t *"m2 ");" .BI "int pfm_regmask_or(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"m1 ", pmlib_regmask_t *"m2 ");" .BI "int pfm_regmask_copy(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"src ");" .sp .SH DESCRIPTION This set of function is used to operate of the \fBpfmlib_regmask_t\fR bitmasks that are returned by certain functions or passed to the \fBpfm_dispatch_events()\fR function. To ensure portability, it is important that applications use \fBonly\fR the functions specified here to access the bitmasks. It is strongly discouraged to access the internal fields of the \fBpfm_regmask_t\fR structure. The \fBpfm_regmask_set()\fR function is used to set bit \fBb\fR in the bitmask \fBmask\fR. The \fBpfm_regmask_clr()\fR function is used to clear bit \fBb\fR in the bitmask \fBmask\fR. The \fBpfm_regmask_isset()\fR function returns a non-zero value if \fBb\fR is set in the bitmask \fBmask\fR. The \fBpfm_regmask_weight()\fR function returns in \fBw\fR the number of bits set in the bitmask \fBmask\fR. The \fBpfm_regmask_eq()\fR function returns a non-zero value if the bitmasks \fBmask1\fR and \fBmask2\fR are identical. The \fBpfm_regmask_and()\fR function returns in bitmask \fBdest\fR the result of the logical AND operation between bitmask \fBm1\fR and bitmask \fBm2\fR. The \fBpfm_regmask_or()\fR function returns in bitmask \fBdest\fR the result of the logical OR operation between bitmask \fBm1\fR and bitmask \fBm2\fR. The \fBpfm_regmask_copy()\fR function copies bitmask \fBsrc\fR into bitmask \fRdest\fR. .SH RETURN The function returns whether or not it was successful. A return value of \fBPFMLIB_SUCCESS\fR indicates success, otherwise the value is the error code. .SH ERRORS .B PFMLIB_ERR_INVAL the bit \fBb\fR exceeds the limit supported by the library .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/libpfm4/lib/pfmlib_intel_slm.c000664 001750 001750 00000005153 13216244365 023120 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_slm.c : Intel Silvermont core PMU * * Copyright (c) 2013 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Based on Intel Software Optimization Guide June 2013 */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_slm_events.h" static const int slm_models[] = { 55, /* Silvermont */ 77, /* Silvermont Avoton */ 76, /* Airmont */ 0 }; static int pfm_intel_slm_init(void *this) { pfm_intel_x86_cfg.arch_version = 2; return PFM_SUCCESS; } pfmlib_pmu_t intel_slm_support={ .desc = "Intel Silvermont", .name = "slm", .pmu = PFM_PMU_INTEL_SLM, .pme_count = LIBPFM_ARRAY_SIZE(intel_slm_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 4, .num_fixed_cntrs = 3, .max_encoding = 2, .pe = intel_slm_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .supported_plm = INTEL_X86_PLM, .cpu_family = 6, .cpu_models = slm_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_intel_slm_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.22000664 001750 001750 00000073614 13216244370 024230 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.22/CREDITS linux-2.6.22.perfctr27/CREDITS --- linux-2.6.22/CREDITS 2007-07-09 22:01:30.000000000 +0200 +++ linux-2.6.22.perfctr27/CREDITS 2007-09-28 12:46:31.000000000 +0200 @@ -2685,7 +2685,7 @@ S: Canada K2P 0X8 N: Mikael Pettersson E: mikpe@it.uu.se -W: http://www.csd.uu.se/~mikpe/ +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes N: Reed H. Petty diff -rupN linux-2.6.22/MAINTAINERS linux-2.6.22.perfctr27/MAINTAINERS --- linux-2.6.22/MAINTAINERS 2007-07-09 22:01:30.000000000 +0200 +++ linux-2.6.22.perfctr27/MAINTAINERS 2007-09-28 12:46:31.000000000 +0200 @@ -2844,6 +2844,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.22/arch/i386/Kconfig linux-2.6.22.perfctr27/arch/i386/Kconfig --- linux-2.6.22/arch/i386/Kconfig 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/i386/Kconfig 2007-09-28 12:46:31.000000000 +0200 @@ -781,6 +781,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.22/arch/i386/kernel/entry.S linux-2.6.22.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.22/arch/i386/kernel/entry.S 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/i386/kernel/entry.S 2007-09-28 12:46:31.000000000 +0200 @@ -637,6 +637,22 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault diff -rupN linux-2.6.22/arch/i386/kernel/i8259.c linux-2.6.22.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.22/arch/i386/kernel/i8259.c 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/i386/kernel/i8259.c 2007-09-28 12:46:31.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -409,6 +410,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. diff -rupN linux-2.6.22/arch/i386/kernel/process.c linux-2.6.22.perfctr27/arch/i386/kernel/process.c --- linux-2.6.22/arch/i386/kernel/process.c 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/i386/kernel/process.c 2007-09-28 12:46:31.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -384,6 +385,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -435,6 +437,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -719,6 +723,8 @@ struct task_struct fastcall * __switch_t x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.22/arch/i386/kernel/syscall_table.S linux-2.6.22.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.22/arch/i386/kernel/syscall_table.S 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/i386/kernel/syscall_table.S 2007-09-28 12:53:08.000000000 +0200 @@ -323,3 +323,8 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_ni_syscall + .long sys_vperfctr_open /* 325 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.22/arch/powerpc/Kconfig linux-2.6.22.perfctr27/arch/powerpc/Kconfig --- linux-2.6.22/arch/powerpc/Kconfig 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/powerpc/Kconfig 2007-09-28 12:55:07.000000000 +0200 @@ -393,6 +393,9 @@ config NOT_COHERENT_CACHE config CONFIG_CHECK_CACHE_COHERENCY bool + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.22/arch/powerpc/kernel/process.c linux-2.6.22.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.22/arch/powerpc/kernel/process.c 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/powerpc/kernel/process.c 2007-09-28 12:46:31.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -329,7 +330,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -455,6 +458,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -571,6 +575,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.22/arch/x86_64/Kconfig linux-2.6.22.perfctr27/arch/x86_64/Kconfig --- linux-2.6.22/arch/x86_64/Kconfig 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/x86_64/Kconfig 2007-09-28 12:46:31.000000000 +0200 @@ -661,6 +661,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config K8_NB diff -rupN linux-2.6.22/arch/x86_64/ia32/ia32entry.S linux-2.6.22.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.22/arch/x86_64/ia32/ia32entry.S 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/x86_64/ia32/ia32entry.S 2007-09-28 12:54:03.000000000 +0200 @@ -719,4 +719,9 @@ ia32_sys_call_table: .quad compat_sys_signalfd .quad compat_sys_timerfd .quad sys_eventfd + .quad quiet_ni_syscall + .quad sys_vperfctr_open /* 325 */ + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.22/arch/x86_64/kernel/entry.S linux-2.6.22.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.22/arch/x86_64/kernel/entry.S 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/x86_64/kernel/entry.S 2007-09-28 12:46:31.000000000 +0200 @@ -692,6 +692,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.22/arch/x86_64/kernel/i8259.c linux-2.6.22.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.22/arch/x86_64/kernel/i8259.c 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/x86_64/kernel/i8259.c 2007-09-28 12:46:31.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -551,6 +552,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.22/arch/x86_64/kernel/process.c linux-2.6.22.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.22/arch/x86_64/kernel/process.c 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/arch/x86_64/kernel/process.c 2007-09-28 12:46:31.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -487,6 +489,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -670,6 +674,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.22/drivers/Makefile linux-2.6.22.perfctr27/drivers/Makefile --- linux-2.6.22/drivers/Makefile 2007-07-09 22:01:31.000000000 +0200 +++ linux-2.6.22.perfctr27/drivers/Makefile 2007-09-28 12:46:31.000000000 +0200 @@ -74,6 +74,7 @@ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.22/include/asm-i386/mach-default/irq_vectors.h linux-2.6.22.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.22/include/asm-i386/mach-default/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.22.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2007-09-28 12:46:31.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.22/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.22.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.22/include/asm-i386/mach-visws/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.22.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2007-09-28 12:46:31.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.22/include/asm-i386/processor.h linux-2.6.22.perfctr27/include/asm-i386/processor.h --- linux-2.6.22/include/asm-i386/processor.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-i386/processor.h 2007-09-28 12:46:31.000000000 +0200 @@ -370,6 +370,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.22/include/asm-i386/system.h linux-2.6.22.perfctr27/include/asm-i386/system.h --- linux-2.6.22/include/asm-i386/system.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-i386/system.h 2007-09-28 12:46:31.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ diff -rupN linux-2.6.22/include/asm-i386/unistd.h linux-2.6.22.perfctr27/include/asm-i386/unistd.h --- linux-2.6.22/include/asm-i386/unistd.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-i386/unistd.h 2007-09-28 12:52:10.000000000 +0200 @@ -329,10 +329,14 @@ #define __NR_signalfd 321 #define __NR_timerfd 322 #define __NR_eventfd 323 +#define __NR_vperfctr_open 325 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define NR_syscalls 324 +#define NR_syscalls 329 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.22/include/asm-powerpc/processor.h linux-2.6.22.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.22/include/asm-powerpc/processor.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-powerpc/processor.h 2007-09-28 12:46:31.000000000 +0200 @@ -165,6 +165,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.22/include/asm-powerpc/reg.h linux-2.6.22.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.22/include/asm-powerpc/reg.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-powerpc/reg.h 2007-09-28 12:46:31.000000000 +0200 @@ -368,10 +368,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -414,13 +412,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -533,33 +524,7 @@ #define SPRN_PA6T_PMC5 792 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -571,6 +536,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* diff -rupN linux-2.6.22/include/asm-powerpc/systbl.h linux-2.6.22.perfctr27/include/asm-powerpc/systbl.h --- linux-2.6.22/include/asm-powerpc/systbl.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-powerpc/systbl.h 2007-09-28 12:49:16.000000000 +0200 @@ -311,4 +311,9 @@ COMPAT_SYS_SPU(utimensat) COMPAT_SYS_SPU(signalfd) COMPAT_SYS_SPU(timerfd) SYSCALL_SPU(eventfd) -COMPAT_SYS_SPU(sync_file_range2) +COMPAT_SYS_SPU(sync_file_range2) /* 308 */ +SYSCALL(ni_syscall) /* 309 */ +SYSCALL(vperfctr_open) /* 310 */ +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.22/include/asm-powerpc/unistd.h linux-2.6.22.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.22/include/asm-powerpc/unistd.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-powerpc/unistd.h 2007-09-28 12:48:25.000000000 +0200 @@ -331,10 +331,14 @@ #define __NR_timerfd 306 #define __NR_eventfd 307 #define __NR_sync_file_range2 308 +#define __NR_vperfctr_open 310 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define __NR_syscalls 309 +#define __NR_syscalls 314 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff -rupN linux-2.6.22/include/asm-x86_64/hw_irq.h linux-2.6.22.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.22/include/asm-x86_64/hw_irq.h 2007-04-26 14:59:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-x86_64/hw_irq.h 2007-09-28 12:46:31.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.22/include/asm-x86_64/irq.h linux-2.6.22.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.22/include/asm-x86_64/irq.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.22.perfctr27/include/asm-x86_64/irq.h 2007-09-28 12:46:31.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS diff -rupN linux-2.6.22/include/asm-x86_64/processor.h linux-2.6.22.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.22/include/asm-x86_64/processor.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-x86_64/processor.h 2007-09-28 12:46:31.000000000 +0200 @@ -239,6 +239,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.22/include/asm-x86_64/system.h linux-2.6.22.perfctr27/include/asm-x86_64/system.h --- linux-2.6.22/include/asm-x86_64/system.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-x86_64/system.h 2007-09-28 12:46:31.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.22/include/asm-x86_64/unistd.h linux-2.6.22.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.22/include/asm-x86_64/unistd.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/asm-x86_64/unistd.h 2007-09-28 12:50:48.000000000 +0200 @@ -630,6 +630,15 @@ __SYSCALL(__NR_signalfd, sys_signalfd) __SYSCALL(__NR_timerfd, sys_timerfd) #define __NR_eventfd 284 __SYSCALL(__NR_eventfd, sys_eventfd) +__SYSCALL(285, sys_ni_syscall) /* fallocate */ +#define __NR_vperfctr_open 286 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.22/include/linux/sched.h linux-2.6.22.perfctr27/include/linux/sched.h --- linux-2.6.22/include/linux/sched.h 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/include/linux/sched.h 2007-09-28 12:46:31.000000000 +0200 @@ -1496,6 +1496,9 @@ static inline int thread_group_empty(str * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.22/kernel/exit.c linux-2.6.22.perfctr27/kernel/exit.c --- linux-2.6.22/kernel/exit.c 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/kernel/exit.c 2007-09-28 12:46:31.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -182,6 +183,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); diff -rupN linux-2.6.22/kernel/sched.c linux-2.6.22.perfctr27/kernel/sched.c --- linux-2.6.22/kernel/sched.c 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/kernel/sched.c 2007-09-28 12:46:31.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -5121,6 +5122,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.22/kernel/sys_ni.c linux-2.6.22.perfctr27/kernel/sys_ni.c --- linux-2.6.22/kernel/sys_ni.c 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/kernel/sys_ni.c 2007-09-28 12:46:31.000000000 +0200 @@ -73,6 +73,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.22/kernel/timer.c linux-2.6.22.perfctr27/kernel/timer.c --- linux-2.6.22/kernel/timer.c 2007-07-09 22:01:37.000000000 +0200 +++ linux-2.6.22.perfctr27/kernel/timer.c 2007-09-28 12:46:31.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -816,6 +817,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.21000664 001750 001750 00000075074 13216244370 024231 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.21/CREDITS linux-2.6.21.perfctr27/CREDITS --- linux-2.6.21/CREDITS 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/CREDITS 2007-09-28 11:02:19.000000000 +0200 @@ -2679,7 +2679,7 @@ S: Canada K2P 0X8 N: Mikael Pettersson E: mikpe@it.uu.se -W: http://www.csd.uu.se/~mikpe/ +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes N: Reed H. Petty diff -rupN linux-2.6.21/MAINTAINERS linux-2.6.21.perfctr27/MAINTAINERS --- linux-2.6.21/MAINTAINERS 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/MAINTAINERS 2007-09-28 11:01:00.000000000 +0200 @@ -2641,6 +2641,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.21/arch/i386/Kconfig linux-2.6.21.perfctr27/arch/i386/Kconfig --- linux-2.6.21/arch/i386/Kconfig 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/i386/Kconfig 2007-09-28 11:01:00.000000000 +0200 @@ -773,6 +773,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.21/arch/i386/kernel/entry.S linux-2.6.21.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.21/arch/i386/kernel/entry.S 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/i386/kernel/entry.S 2007-09-28 11:01:00.000000000 +0200 @@ -648,6 +648,22 @@ ENDPROC(name) BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault diff -rupN linux-2.6.21/arch/i386/kernel/i8259.c linux-2.6.21.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.21/arch/i386/kernel/i8259.c 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/i386/kernel/i8259.c 2007-09-28 11:01:00.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -410,6 +411,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. diff -rupN linux-2.6.21/arch/i386/kernel/process.c linux-2.6.21.perfctr27/arch/i386/kernel/process.c --- linux-2.6.21/arch/i386/kernel/process.c 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/i386/kernel/process.c 2007-09-28 11:01:00.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -430,6 +432,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -714,6 +718,8 @@ struct task_struct fastcall * __switch_t write_pda(pcurrent, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.21/arch/i386/kernel/syscall_table.S linux-2.6.21.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.21/arch/i386/kernel/syscall_table.S 2007-09-28 09:07:25.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/i386/kernel/syscall_table.S 2007-09-28 11:01:00.000000000 +0200 @@ -319,3 +319,12 @@ ENTRY(sys_call_table) .long sys_move_pages .long sys_getcpu .long sys_epoll_pwait + .long sys_ni_syscall /* 320 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 325 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.21/arch/powerpc/Kconfig linux-2.6.21.perfctr27/arch/powerpc/Kconfig --- linux-2.6.21/arch/powerpc/Kconfig 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/powerpc/Kconfig 2007-09-28 11:01:00.000000000 +0200 @@ -363,6 +363,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.21/arch/powerpc/kernel/process.c linux-2.6.21.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.21/arch/powerpc/kernel/process.c 2007-09-28 10:44:19.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/powerpc/kernel/process.c 2007-09-28 11:01:00.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -575,6 +579,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.21/arch/x86_64/Kconfig linux-2.6.21.perfctr27/arch/x86_64/Kconfig --- linux-2.6.21/arch/x86_64/Kconfig 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/x86_64/Kconfig 2007-09-28 11:01:00.000000000 +0200 @@ -625,6 +625,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER diff -rupN linux-2.6.21/arch/x86_64/ia32/ia32entry.S linux-2.6.21.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.21/arch/x86_64/ia32/ia32entry.S 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/x86_64/ia32/ia32entry.S 2007-09-28 11:07:11.000000000 +0200 @@ -719,4 +719,13 @@ ia32_sys_call_table: .quad compat_sys_move_pages .quad sys_getcpu .quad sys_epoll_pwait + .quad quiet_ni_syscall /* 320 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad sys_vperfctr_open /* 325 */ + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.21/arch/x86_64/kernel/entry.S linux-2.6.21.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.21/arch/x86_64/kernel/entry.S 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/x86_64/kernel/entry.S 2007-09-28 11:01:00.000000000 +0200 @@ -692,6 +692,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.21/arch/x86_64/kernel/i8259.c linux-2.6.21.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.21/arch/x86_64/kernel/i8259.c 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/x86_64/kernel/i8259.c 2007-09-28 11:01:00.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -552,6 +553,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.21/arch/x86_64/kernel/process.c linux-2.6.21.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.21/arch/x86_64/kernel/process.c 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/arch/x86_64/kernel/process.c 2007-09-28 11:01:00.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -485,6 +487,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -668,6 +672,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.21/drivers/Makefile linux-2.6.21.perfctr27/drivers/Makefile --- linux-2.6.21/drivers/Makefile 2007-09-28 10:44:20.000000000 +0200 +++ linux-2.6.21.perfctr27/drivers/Makefile 2007-09-28 11:01:00.000000000 +0200 @@ -74,6 +74,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.21/include/asm-i386/mach-default/irq_vectors.h linux-2.6.21.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.21/include/asm-i386/mach-default/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.21.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2007-09-28 11:01:00.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.21/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.21.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.21/include/asm-i386/mach-visws/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.21.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2007-09-28 11:01:00.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.21/include/asm-i386/processor.h linux-2.6.21.perfctr27/include/asm-i386/processor.h --- linux-2.6.21/include/asm-i386/processor.h 2007-09-28 10:44:28.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-i386/processor.h 2007-09-28 11:01:00.000000000 +0200 @@ -418,6 +418,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.21/include/asm-i386/system.h linux-2.6.21.perfctr27/include/asm-i386/system.h --- linux-2.6.21/include/asm-i386/system.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-i386/system.h 2007-09-28 11:01:00.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ diff -rupN linux-2.6.21/include/asm-i386/unistd.h linux-2.6.21.perfctr27/include/asm-i386/unistd.h --- linux-2.6.21/include/asm-i386/unistd.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-i386/unistd.h 2007-09-28 11:01:00.000000000 +0200 @@ -325,10 +325,14 @@ #define __NR_move_pages 317 #define __NR_getcpu 318 #define __NR_epoll_pwait 319 +#define __NR_vperfctr_open 325 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 329 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.21/include/asm-powerpc/processor.h linux-2.6.21.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.21/include/asm-powerpc/processor.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-powerpc/processor.h 2007-09-28 11:01:00.000000000 +0200 @@ -166,6 +166,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.21/include/asm-powerpc/reg.h linux-2.6.21.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.21/include/asm-powerpc/reg.h 2007-09-28 10:44:28.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-powerpc/reg.h 2007-09-28 11:01:00.000000000 +0200 @@ -368,10 +368,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -414,13 +412,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -477,33 +468,7 @@ #define PA6T_SPRN_PMC5 792 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -515,6 +480,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* diff -rupN linux-2.6.21/include/asm-powerpc/systbl.h linux-2.6.21.perfctr27/include/asm-powerpc/systbl.h --- linux-2.6.21/include/asm-powerpc/systbl.h 2007-09-28 10:44:28.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-powerpc/systbl.h 2007-09-28 11:04:21.000000000 +0200 @@ -306,4 +306,14 @@ COMPAT_SYS_SPU(get_robust_list) COMPAT_SYS_SPU(set_robust_list) COMPAT_SYS_SPU(move_pages) SYSCALL_SPU(getcpu) -COMPAT_SYS(epoll_pwait) +COMPAT_SYS(epoll_pwait) /* 303 */ +SYSCALL(ni_syscall) /* 304 */ +SYSCALL(ni_syscall) /* 305 */ +SYSCALL(ni_syscall) /* 306 */ +SYSCALL(ni_syscall) /* 307 */ +SYSCALL(ni_syscall) /* 308 */ +SYSCALL(ni_syscall) /* 309 */ +SYSCALL(vperfctr_open) /* 310 */ +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.21/include/asm-powerpc/unistd.h linux-2.6.21.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.21/include/asm-powerpc/unistd.h 2007-09-28 10:44:28.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-powerpc/unistd.h 2007-09-28 11:03:12.000000000 +0200 @@ -326,10 +326,14 @@ #define __NR_move_pages 301 #define __NR_getcpu 302 #define __NR_epoll_pwait 303 +#define __NR_vperfctr_open 310 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define __NR_syscalls 304 +#define __NR_syscalls 314 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff -rupN linux-2.6.21/include/asm-x86_64/hw_irq.h linux-2.6.21.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.21/include/asm-x86_64/hw_irq.h 2007-09-28 10:44:29.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-x86_64/hw_irq.h 2007-09-28 11:05:31.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.21/include/asm-x86_64/irq.h linux-2.6.21.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.21/include/asm-x86_64/irq.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.21.perfctr27/include/asm-x86_64/irq.h 2007-09-28 11:01:00.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS diff -rupN linux-2.6.21/include/asm-x86_64/processor.h linux-2.6.21.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.21/include/asm-x86_64/processor.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-x86_64/processor.h 2007-09-28 11:01:00.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.21/include/asm-x86_64/system.h linux-2.6.21.perfctr27/include/asm-x86_64/system.h --- linux-2.6.21/include/asm-x86_64/system.h 2007-09-28 09:07:41.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-x86_64/system.h 2007-09-28 11:01:00.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.21/include/asm-x86_64/unistd.h linux-2.6.21.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.21/include/asm-x86_64/unistd.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.21.perfctr27/include/asm-x86_64/unistd.h 2007-09-28 11:01:00.000000000 +0200 @@ -619,8 +619,22 @@ __SYSCALL(__NR_sync_file_range, sys_sync __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) +__SYSCALL(280, sys_ni_syscall) /* utimensat */ +__SYSCALL(281, sys_ni_syscall) /* epoll_wait */ +__SYSCALL(282, sys_ni_syscall) /* signalfd */ +__SYSCALL(283, sys_ni_syscall) /* timerfd */ +__SYSCALL(284, sys_ni_syscall) /* eventfd */ +__SYSCALL(285, sys_ni_syscall) /* fallocate */ +#define __NR_vperfctr_open 286 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_move_pages +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.21/include/linux/sched.h linux-2.6.21.perfctr27/include/linux/sched.h --- linux-2.6.21/include/linux/sched.h 2007-09-28 10:44:29.000000000 +0200 +++ linux-2.6.21.perfctr27/include/linux/sched.h 2007-09-28 11:01:00.000000000 +0200 @@ -1469,6 +1469,9 @@ static inline int thread_group_empty(str * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.21/kernel/exit.c linux-2.6.21.perfctr27/kernel/exit.c --- linux-2.6.21/kernel/exit.c 2007-09-28 10:44:29.000000000 +0200 +++ linux-2.6.21.perfctr27/kernel/exit.c 2007-09-28 11:01:00.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); diff -rupN linux-2.6.21/kernel/sched.c linux-2.6.21.perfctr27/kernel/sched.c --- linux-2.6.21/kernel/sched.c 2007-09-28 10:44:29.000000000 +0200 +++ linux-2.6.21.perfctr27/kernel/sched.c 2007-09-28 11:01:00.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4836,6 +4837,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.21/kernel/sys_ni.c linux-2.6.21.perfctr27/kernel/sys_ni.c --- linux-2.6.21/kernel/sys_ni.c 2007-09-28 09:07:41.000000000 +0200 +++ linux-2.6.21.perfctr27/kernel/sys_ni.c 2007-09-28 11:01:00.000000000 +0200 @@ -73,6 +73,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.21/kernel/timer.c linux-2.6.21.perfctr27/kernel/timer.c --- linux-2.6.21/kernel/timer.c 2007-09-28 10:44:29.000000000 +0200 +++ linux-2.6.21.perfctr27/kernel/timer.c 2007-09-28 11:01:00.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1213,6 +1214,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.20000664 001750 001750 00000075374 13216244370 024233 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.20/CREDITS linux-2.6.20.perfctr27/CREDITS --- linux-2.6.20/CREDITS 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/CREDITS 2007-09-28 09:33:35.000000000 +0200 @@ -2668,9 +2668,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.20/MAINTAINERS linux-2.6.20.perfctr27/MAINTAINERS --- linux-2.6.20/MAINTAINERS 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/MAINTAINERS 2007-09-28 09:33:35.000000000 +0200 @@ -2577,6 +2577,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.20/arch/i386/Kconfig linux-2.6.20.perfctr27/arch/i386/Kconfig --- linux-2.6.20/arch/i386/Kconfig 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/i386/Kconfig 2007-09-28 09:33:35.000000000 +0200 @@ -745,6 +745,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.20/arch/i386/kernel/entry.S linux-2.6.20.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.20/arch/i386/kernel/entry.S 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/i386/kernel/entry.S 2007-09-28 09:33:35.000000000 +0200 @@ -626,6 +626,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault diff -rupN linux-2.6.20/arch/i386/kernel/i8259.c linux-2.6.20.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.20/arch/i386/kernel/i8259.c 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/i386/kernel/i8259.c 2007-09-28 09:33:35.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -409,6 +410,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.20/arch/i386/kernel/process.c linux-2.6.20.perfctr27/arch/i386/kernel/process.c --- linux-2.6.20/arch/i386/kernel/process.c 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/i386/kernel/process.c 2007-09-28 09:33:35.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -376,6 +377,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -427,6 +429,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -695,6 +699,8 @@ struct task_struct fastcall * __switch_t if (next_p->fpu_counter > 5) math_state_restore(); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.20/arch/i386/kernel/syscall_table.S linux-2.6.20.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.20/arch/i386/kernel/syscall_table.S 2007-09-28 09:07:25.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/i386/kernel/syscall_table.S 2007-09-28 09:33:35.000000000 +0200 @@ -319,3 +319,12 @@ ENTRY(sys_call_table) .long sys_move_pages .long sys_getcpu .long sys_epoll_pwait + .long sys_ni_syscall /* 320 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 325 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.20/arch/powerpc/Kconfig linux-2.6.20.perfctr27/arch/powerpc/Kconfig --- linux-2.6.20/arch/powerpc/Kconfig 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/powerpc/Kconfig 2007-09-28 09:33:35.000000000 +0200 @@ -356,6 +356,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.20/arch/powerpc/kernel/process.c linux-2.6.20.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.20/arch/powerpc/kernel/process.c 2007-09-28 09:08:22.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/powerpc/kernel/process.c 2007-09-28 09:33:35.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -570,6 +574,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.20/arch/x86_64/Kconfig linux-2.6.20.perfctr27/arch/x86_64/Kconfig --- linux-2.6.20/arch/x86_64/Kconfig 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/x86_64/Kconfig 2007-09-28 09:33:35.000000000 +0200 @@ -607,6 +607,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER diff -rupN linux-2.6.20/arch/x86_64/ia32/ia32entry.S linux-2.6.20.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.20/arch/x86_64/ia32/ia32entry.S 2007-09-28 09:08:23.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/x86_64/ia32/ia32entry.S 2007-09-28 09:33:35.000000000 +0200 @@ -718,4 +718,14 @@ ia32_sys_call_table: .quad compat_sys_vmsplice .quad compat_sys_move_pages .quad sys_getcpu + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 320 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad sys_vperfctr_open /* 325 */ + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.20/arch/x86_64/kernel/entry.S linux-2.6.20.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.20/arch/x86_64/kernel/entry.S 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/x86_64/kernel/entry.S 2007-09-28 09:33:35.000000000 +0200 @@ -689,6 +689,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.20/arch/x86_64/kernel/i8259.c linux-2.6.20.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.20/arch/x86_64/kernel/i8259.c 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/x86_64/kernel/i8259.c 2007-09-28 09:33:35.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -553,6 +554,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.20/arch/x86_64/kernel/process.c linux-2.6.20.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.20/arch/x86_64/kernel/process.c 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/arch/x86_64/kernel/process.c 2007-09-28 09:33:35.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -482,6 +484,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -665,6 +669,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.20/drivers/Makefile linux-2.6.20.perfctr27/drivers/Makefile --- linux-2.6.20/drivers/Makefile 2007-09-28 09:30:15.000000000 +0200 +++ linux-2.6.20.perfctr27/drivers/Makefile 2007-09-28 09:33:35.000000000 +0200 @@ -73,6 +73,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.20/include/asm-i386/mach-default/irq_vectors.h linux-2.6.20.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.20/include/asm-i386/mach-default/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.20.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2007-09-28 09:33:35.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.20/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.20.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.20/include/asm-i386/mach-visws/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.20.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2007-09-28 09:33:35.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.20/include/asm-i386/processor.h linux-2.6.20.perfctr27/include/asm-i386/processor.h --- linux-2.6.20/include/asm-i386/processor.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-i386/processor.h 2007-09-28 09:33:35.000000000 +0200 @@ -418,6 +418,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.20/include/asm-i386/system.h linux-2.6.20.perfctr27/include/asm-i386/system.h --- linux-2.6.20/include/asm-i386/system.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-i386/system.h 2007-09-28 09:33:35.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ diff -rupN linux-2.6.20/include/asm-i386/unistd.h linux-2.6.20.perfctr27/include/asm-i386/unistd.h --- linux-2.6.20/include/asm-i386/unistd.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-i386/unistd.h 2007-09-28 09:36:03.000000000 +0200 @@ -325,10 +325,14 @@ #define __NR_move_pages 317 #define __NR_getcpu 318 #define __NR_epoll_pwait 319 +#define __NR_vperfctr_open 325 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 329 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.20/include/asm-powerpc/processor.h linux-2.6.20.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.20/include/asm-powerpc/processor.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-powerpc/processor.h 2007-09-28 09:33:35.000000000 +0200 @@ -166,6 +166,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.20/include/asm-powerpc/reg.h linux-2.6.20.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.20/include/asm-powerpc/reg.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-powerpc/reg.h 2007-09-28 09:33:35.000000000 +0200 @@ -367,10 +367,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -407,13 +405,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -463,33 +454,7 @@ #define SPRN_SDAR 781 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -501,6 +466,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* diff -rupN linux-2.6.20/include/asm-powerpc/systbl.h linux-2.6.20.perfctr27/include/asm-powerpc/systbl.h --- linux-2.6.20/include/asm-powerpc/systbl.h 2007-09-28 09:08:36.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-powerpc/systbl.h 2007-09-28 09:33:35.000000000 +0200 @@ -304,4 +304,16 @@ SYSCALL_SPU(fchmodat) SYSCALL_SPU(faccessat) COMPAT_SYS_SPU(get_robust_list) COMPAT_SYS_SPU(set_robust_list) -COMPAT_SYS(move_pages) +COMPAT_SYS(move_pages) /* 301 */ +SYSCALL(ni_syscall) /* 302 */ +SYSCALL(ni_syscall) /* 303 */ +SYSCALL(ni_syscall) /* 304 */ +SYSCALL(ni_syscall) /* 305 */ +SYSCALL(ni_syscall) /* 306 */ +SYSCALL(ni_syscall) /* 307 */ +SYSCALL(ni_syscall) /* 308 */ +SYSCALL(ni_syscall) /* 309 */ +SYSCALL(vperfctr_open) /* 310 */ +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.20/include/asm-powerpc/unistd.h linux-2.6.20.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.20/include/asm-powerpc/unistd.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-powerpc/unistd.h 2007-09-28 09:33:35.000000000 +0200 @@ -324,10 +324,14 @@ #define __NR_get_robust_list 299 #define __NR_set_robust_list 300 #define __NR_move_pages 301 +#define __NR_vperfctr_open 310 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define __NR_syscalls 302 +#define __NR_syscalls 314 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff -rupN linux-2.6.20/include/asm-x86_64/hw_irq.h linux-2.6.20.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.20/include/asm-x86_64/hw_irq.h 2007-09-28 09:08:36.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-x86_64/hw_irq.h 2007-09-28 09:33:35.000000000 +0200 @@ -63,14 +63,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.20/include/asm-x86_64/irq.h linux-2.6.20.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.20/include/asm-x86_64/irq.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.20.perfctr27/include/asm-x86_64/irq.h 2007-09-28 09:33:35.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS diff -rupN linux-2.6.20/include/asm-x86_64/processor.h linux-2.6.20.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.20/include/asm-x86_64/processor.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-x86_64/processor.h 2007-09-28 09:33:35.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.20/include/asm-x86_64/system.h linux-2.6.20.perfctr27/include/asm-x86_64/system.h --- linux-2.6.20/include/asm-x86_64/system.h 2007-09-28 09:07:41.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-x86_64/system.h 2007-09-28 09:33:35.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.20/include/asm-x86_64/unistd.h linux-2.6.20.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.20/include/asm-x86_64/unistd.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/asm-x86_64/unistd.h 2007-09-28 09:33:35.000000000 +0200 @@ -619,8 +619,22 @@ __SYSCALL(__NR_sync_file_range, sys_sync __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) +__SYSCALL(280, sys_ni_syscall) /* utimensat */ +__SYSCALL(281, sys_ni_syscall) /* epoll_wait */ +__SYSCALL(282, sys_ni_syscall) /* signalfd */ +__SYSCALL(283, sys_ni_syscall) /* timerfd */ +__SYSCALL(284, sys_ni_syscall) /* eventfd */ +__SYSCALL(285, sys_ni_syscall) /* fallocate */ +#define __NR_vperfctr_open 286 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_move_pages +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff -rupN linux-2.6.20/include/linux/sched.h linux-2.6.20.perfctr27/include/linux/sched.h --- linux-2.6.20/include/linux/sched.h 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/include/linux/sched.h 2007-09-28 09:33:35.000000000 +0200 @@ -1470,6 +1470,9 @@ static inline int thread_group_empty(str * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.20/kernel/exit.c linux-2.6.20.perfctr27/kernel/exit.c --- linux-2.6.20/kernel/exit.c 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/kernel/exit.c 2007-09-28 09:33:35.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); diff -rupN linux-2.6.20/kernel/sched.c linux-2.6.20.perfctr27/kernel/sched.c --- linux-2.6.20/kernel/sched.c 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/kernel/sched.c 2007-09-28 09:33:35.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -5005,6 +5006,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.20/kernel/sys_ni.c linux-2.6.20.perfctr27/kernel/sys_ni.c --- linux-2.6.20/kernel/sys_ni.c 2007-09-28 09:07:41.000000000 +0200 +++ linux-2.6.20.perfctr27/kernel/sys_ni.c 2007-09-28 09:33:35.000000000 +0200 @@ -73,6 +73,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.20/kernel/timer.c linux-2.6.20.perfctr27/kernel/timer.c --- linux-2.6.20/kernel/timer.c 2007-09-28 09:30:24.000000000 +0200 +++ linux-2.6.20.perfctr27/kernel/timer.c 2007-09-28 09:33:35.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -1110,6 +1111,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/ctests/zero_fork.c000664 001750 001750 00000007452 13216244361 021003 0ustar00jshenry1963jshenry1963000000 000000 /* * File: zero_fork.c * Author: Philip Mucci * mucci@cs.utk.edu * Mods: * */ /* This file performs the following test: PAPI_library_init() Add two events PAPI_start() fork() / \ parent child | PAPI_library_init() | Add two events | PAPI_start() | PAPI_stop() | fork()-----\ | child parent PAPI_library_init() | Add two events | PAPI_start() | PAPI_stop() | wait() wait() | PAPI_stop() No validation is done */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int EventSet1 = PAPI_NULL; int PAPI_event, mask1; int num_events1 = 2; long long elapsed_us, elapsed_cyc; long long **values; char event_name[PAPI_MAX_STR_LEN]; int retval, num_tests = 1; void process_init( void ) { if (!TESTS_QUIET) printf( "Process %d \n", ( int ) getpid( ) ); /* Initialize PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); values = allocate_test_space( num_tests, num_events1 ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } } void process_fini( void ) { retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; remove_test_events( &EventSet1, mask1 ); if (!TESTS_QUIET) { printf( "Process %d %-12s : \t%lld\n", ( int ) getpid( ), event_name, values[0][1] ); printf( "Process %d PAPI_TOT_CYC : \t%lld\n", ( int ) getpid( ), values[0][0] ); printf( "Process %d Real usec : \t%lld\n", ( int ) getpid( ), elapsed_us ); printf( "Process %d Real cycles : \t%lld\n", ( int ) getpid( ), elapsed_cyc ); } free_test_space( values, num_tests ); } int main( int argc, char **argv ) { int flops1; int retval; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ # if (defined(__ALPHA) && defined(__osf__)) test_skip( __FILE__, __LINE__, "main: fork not supported.", 0 ); #endif if (!TESTS_QUIET) { printf( "This tests if PAPI_library_init(),2*fork(),PAPI_library_init() works.\n" ); } /* Initialize PAPI for this process */ process_init( ); flops1 = 1000000; if ( fork( ) == 0 ) { /* Initialize PAPI for the child process */ process_init( ); /* Let the child process do work */ do_flops( flops1 ); /* Measure the child process */ process_fini( ); exit( 0 ); } flops1 = 2000000; if ( fork( ) == 0 ) { /* Initialize PAPI for the child process */ process_init( ); /* Let the child process do work */ do_flops( flops1 ); /* Measure the child process */ process_fini( ); exit( 0 ); } /* Let this process do work */ flops1 = 4000000; do_flops( flops1 ); /* Wait for child to finish */ wait( &retval ); /* Wait for child to finish */ wait( &retval ); /* Measure this process */ process_fini( ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/perfmon2/Rules.perfmon2000664 001750 001750 00000000330 13216244360 023776 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/perfmon2/perfmon.c COMPOBJS += perfmon.o perfmon.o: components/perfmon2/perfmon.c components/perfmon2/perfmon.h $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfmon2/perfmon.c -o perfmon.o papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumII-266b000775 001750 001750 00000001314 13216244366 023720 0ustar00jshenry1963jshenry1963000000 000000 [data from a 266MHz Pentium II (Deschutes)] PERFCTR INIT: vendor 0, family 6, model 5, stepping 0, clock 267278 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 33.3 cycles (2223 total) PERFCTR INIT: rdpmc cost is 29.8 cycles (2000 total) PERFCTR INIT: rdmsr (counter) cost is 81.4 cycles (5302 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4533 total) PERFCTR INIT: wrmsr (counter) cost is 97.4 cycles (6325 total) PERFCTR INIT: wrmsr (evntsel) cost is 87.9 cycles (5721 total) PERFCTR INIT: read cr4 cost is 1.9 cycles (217 total) PERFCTR INIT: write cr4 cost is 42.2 cycles (2795 total) perfctr: driver 2.3.3, cpu type Intel Pentium II at 267278 kHz papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumII-266a000775 001750 001750 00000001313 13216244366 023716 0ustar00jshenry1963jshenry1963000000 000000 [data from a 266Mhz Pentium II (Klamath)] PERFCTR INIT: vendor 0, family 6, model 3, stepping 4, clock 266621 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 102 cycles PERFCTR INIT: rdtsc cost is 33.2 cycles (2230 total) PERFCTR INIT: rdpmc cost is 28.4 cycles (1924 total) PERFCTR INIT: rdmsr (counter) cost is 81.2 cycles (5302 total) PERFCTR INIT: rdmsr (evntsel) cost is 69.4 cycles (4545 total) PERFCTR INIT: wrmsr (counter) cost is 82.3 cycles (5372 total) PERFCTR INIT: wrmsr (evntsel) cost is 74.8 cycles (4890 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (217 total) PERFCTR INIT: write cr4 cost is 39.0 cycles (2604 total) perfctr: driver 2.3.3, cpu type Intel Pentium II at 266621 kHz papi-5.6.0/src/libpfm-3.y/lib/amd64_events_k7.h000664 001750 001750 00000015132 13216244362 023024 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2006, 2007 Advanced Micro Devices, Inc. * Contributed by Ray Bryant * Contributed by Robert Richter * Modified for K7 by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ /* * Definitions taken from "AMD Athlon Processor x86 Code Optimization Guide" * Table 11 February 2002 */ static pme_amd64_entry_t amd64_k7_pe[]={ /* 0 */{.pme_name = "DATA_CACHE_ACCESSES", .pme_code = 0x40, .pme_desc = "Data Cache Accesses", }, /* 1 */{.pme_name = "DATA_CACHE_MISSES", .pme_code = 0x41, .pme_desc = "Data Cache Misses", }, /* 2 */{.pme_name = "DATA_CACHE_REFILLS", .pme_code = 0x42, .pme_desc = "Data Cache Refills from L2", .pme_flags = PFMLIB_AMD64_UMASK_COMBO, .pme_numasks = 6, .pme_umasks = { { .pme_uname = "L2_INVALID", .pme_udesc = "Invalid line from L2", .pme_ucode = 0x01, }, { .pme_uname = "L2_SHARED", .pme_udesc = "Shared-state line from L2", .pme_ucode = 0x02, }, { .pme_uname = "L2_EXCLUSIVE", .pme_udesc = "Exclusive-state line from L2", .pme_ucode = 0x04, }, { .pme_uname = "L2_OWNED", .pme_udesc = "Owned-state line from L2", .pme_ucode = 0x08, }, { .pme_uname = "L2_MODIFIED", .pme_udesc = "Modified-state line from L2", .pme_ucode = 0x10, }, { .pme_uname = "ALL", .pme_udesc = "Shared, Exclusive, Owned, Modified State Refills", .pme_ucode = 0x1F, }, }, }, /* 3 */{.pme_name = "DATA_CACHE_REFILLS_FROM_SYSTEM", .pme_code = 0x43, .pme_desc = "Data Cache Refills from System", .pme_flags = PFMLIB_AMD64_UMASK_COMBO, .pme_numasks = 6, .pme_umasks = { { .pme_uname = "INVALID", .pme_udesc = "Invalid", .pme_ucode = 0x01, }, { .pme_uname = "SHARED", .pme_udesc = "Shared", .pme_ucode = 0x02, }, { .pme_uname = "EXCLUSIVE", .pme_udesc = "Exclusive", .pme_ucode = 0x04, }, { .pme_uname = "OWNED", .pme_udesc = "Owned", .pme_ucode = 0x08, }, { .pme_uname = "MODIFIED", .pme_udesc = "Modified", .pme_ucode = 0x10, }, { .pme_uname = "ALL", .pme_udesc = "Invalid, Shared, Exclusive, Owned, Modified", .pme_ucode = 0x1F, }, }, }, /* 4 */{.pme_name = "DATA_CACHE_LINES_EVICTED", .pme_code = 0x44, .pme_desc = "Data Cache Lines Evicted", .pme_flags = PFMLIB_AMD64_UMASK_COMBO, .pme_numasks = 6, .pme_umasks = { { .pme_uname = "INVALID", .pme_udesc = "Invalid", .pme_ucode = 0x01, }, { .pme_uname = "SHARED", .pme_udesc = "Shared", .pme_ucode = 0x02, }, { .pme_uname = "EXCLUSIVE", .pme_udesc = "Exclusive", .pme_ucode = 0x04, }, { .pme_uname = "OWNED", .pme_udesc = "Owned", .pme_ucode = 0x08, }, { .pme_uname = "MODIFIED", .pme_udesc = "Modified", .pme_ucode = 0x10, }, { .pme_uname = "ALL", .pme_udesc = "Invalid, Shared, Exclusive, Owned, Modified", .pme_ucode = 0x1F, }, }, }, /* 5 */{.pme_name = "L1_DTLB_MISS_AND_L2_DTLB_HIT", .pme_code = 0x45, .pme_desc = "L1 DTLB Miss and L2 DTLB Hit", }, /* 6 */{.pme_name = "L1_DTLB_AND_L2_DTLB_MISS", .pme_code = 0x46, .pme_desc = "L1 DTLB and L2 DTLB Miss", }, /* 7 */{.pme_name = "MISALIGNED_ACCESSES", .pme_code = 0x47, .pme_desc = "Misaligned Accesses", }, /* CPU_CLK_UNHALTED is undocumented in the Athlon Guide? */ /* 8 */{.pme_name = "CPU_CLK_UNHALTED", .pme_code = 0x76, .pme_desc = "CPU Clocks not Halted", }, /* 9 */{.pme_name = "INSTRUCTION_CACHE_FETCHES", .pme_code = 0x80, .pme_desc = "Instruction Cache Fetches", }, /* 10 */{.pme_name = "INSTRUCTION_CACHE_MISSES", .pme_code = 0x81, .pme_desc = "Instruction Cache Misses", }, /* 11 */{.pme_name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .pme_code = 0x84, .pme_desc = "L1 ITLB Miss and L2 ITLB Hit", }, /* 12 */{.pme_name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", .pme_code = 0x85, .pme_desc = "L1 ITLB Miss and L2 ITLB Miss", }, /* 13 */{.pme_name = "RETIRED_INSTRUCTIONS", .pme_code = 0xC0, .pme_desc = "Retired Instructions (includes exceptions, interrupts, resyncs)", }, /* 14 */{.pme_name = "RETIRED_UOPS", .pme_code = 0xC1, .pme_desc = "Retired uops", }, /* 15 */{.pme_name = "RETIRED_BRANCH_INSTRUCTIONS", .pme_code = 0xC2, .pme_desc = "Retired Branch Instructions", }, /* 16 */{.pme_name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .pme_code = 0xC3, .pme_desc = "Retired Mispredicted Branch Instructions", }, /* 17 */{.pme_name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .pme_code = 0xC4, .pme_desc = "Retired Taken Branch Instructions", }, /* 18 */{.pme_name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .pme_code = 0xC5, .pme_desc = "Retired Taken Branch Instructions Mispredicted", }, /* 19 */{.pme_name = "RETIRED_FAR_CONTROL_TRANSFERS", .pme_code = 0xC6, .pme_desc = "Retired Far Control Transfers", }, /* 20 */{.pme_name = "RETIRED_BRANCH_RESYNCS", .pme_code = 0xC7, .pme_desc = "Retired Branch Resyncs (only non-control transfer branches)", }, /* 21 */{.pme_name = "INTERRUPTS_MASKED_CYCLES", .pme_code = 0xCD, .pme_desc = "Interrupts-Masked Cycles", }, /* 22 */{.pme_name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .pme_code = 0xCE, .pme_desc = "Interrupts-Masked Cycles with Interrupt Pending", }, /* 23 */{.pme_name = "INTERRUPTS_TAKEN", .pme_code = 0xCF, .pme_desc = "Interrupts Taken", }, }; #define PME_AMD64_K7_EVENT_COUNT (sizeof(amd64_k7_pe)/sizeof(pme_amd64_entry_t)) #define PME_AMD64_K7_CPU_CLK_UNHALTED 8 #define PME_AMD64_K7_RETIRED_INSTRUCTIONS 13 papi-5.6.0/ChangeLogP551.txt000664 001750 001750 00000003657 13216244355 017514 0ustar00jshenry1963jshenry1963000000 000000 2016-11-17 * 4b7c2c8b src/components/coretemp/linux-coretemp.c src/components/cuda/configure src/components/cuda/configure.in...: Handing some of the problems exposed by Coverity Mostly adding strncpy termination to some components (coretemp, lmsensors, micpower). Removed some unused component writing functions (lustre, mx). Fixed CUDA component configure.in to get the correct version of nvcc. Fixed division so it works in double precision rather than integer in the rapl component. Fixed a minor complaint about a stack counter variable in papi_preset. Thanks to William Cohen for sending the Coverity results report. 2016-11-15 * 7384d4d1 src/components/rapl/linux-rapl.c: Enable RAPL for Broadwell-EP 2016-11-04 * 0e90ecd4 src/Makefile.inc: Minor change: Removed unneeded characters in src/Makefile.inc. (Thanks to Steve Kaufmann) 2016-10-24 * b72df977 src/components/perf_event/perf_event_lib.h: Increase PERF_EVENT_MAX_MPX_COUNTERS to 384 to support KNL uncore events * Update libpfm4 to enable Intel Knights Landing untile PMU support. 2016-09-18 * b92abb7c src/components/powercap/utils/Makefile src/components/powercap/utils/powercap_plot.c src/components/powercap/utils/powercap_write_test.c: changed the tool in /powercap/utils to behave as the similiar tool in /rapl/utils does. removed the old code residing in /powercap/utils. 2016-09-16 * 51d76878 src/threads.c: threads: silence compiler warning our_tid is only being used in debug statements * 33aacc65 src/papi_preset.c: papi_preset: quiet a compiler warning we were setting the papi_preset variable but only using it in debug statements. tell the compiler to not warn in this case. * 7ff9a01c src/ctests/zero_omp.c: tests/zero_omp: fix warning in zero_omp we weren't using the maxthr variable * 33deefbd src/components/rapl/tests/rapl_basic.c: componensts/rapl: fix compiler warning in rapl_basic test papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.18000664 001750 001750 00000075144 13216244370 024235 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.18/CREDITS linux-2.6.18.perfctr27/CREDITS --- linux-2.6.18/CREDITS 2007-09-27 20:28:30.000000000 +0200 +++ linux-2.6.18.perfctr27/CREDITS 2007-09-27 21:04:27.000000000 +0200 @@ -2629,9 +2629,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.18/MAINTAINERS linux-2.6.18.perfctr27/MAINTAINERS --- linux-2.6.18/MAINTAINERS 2007-09-27 20:28:31.000000000 +0200 +++ linux-2.6.18.perfctr27/MAINTAINERS 2007-09-27 20:35:38.000000000 +0200 @@ -2272,6 +2272,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.18/arch/i386/Kconfig linux-2.6.18.perfctr27/arch/i386/Kconfig --- linux-2.6.18/arch/i386/Kconfig 2007-09-27 20:28:32.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/i386/Kconfig 2007-09-27 20:34:06.000000000 +0200 @@ -737,6 +737,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.18/arch/i386/kernel/entry.S linux-2.6.18.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.18/arch/i386/kernel/entry.S 2007-09-27 20:28:32.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/i386/kernel/entry.S 2007-09-27 20:34:06.000000000 +0200 @@ -591,6 +591,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + ENTRY(divide_error) RING0_INT_FRAME pushl $0 # no error code diff -rupN linux-2.6.18/arch/i386/kernel/i8259.c linux-2.6.18.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.18/arch/i386/kernel/i8259.c 2007-09-27 20:28:32.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/i386/kernel/i8259.c 2007-09-27 20:34:06.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -420,6 +421,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.18/arch/i386/kernel/process.c linux-2.6.18.perfctr27/arch/i386/kernel/process.c --- linux-2.6.18/arch/i386/kernel/process.c 2007-09-27 20:28:32.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/i386/kernel/process.c 2007-09-27 20:34:06.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -431,6 +433,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -696,6 +700,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.18/arch/i386/kernel/syscall_table.S linux-2.6.18.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.18/arch/i386/kernel/syscall_table.S 2007-09-27 20:28:32.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/i386/kernel/syscall_table.S 2007-09-27 20:49:23.000000000 +0200 @@ -317,3 +317,14 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 320 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 325 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.18/arch/powerpc/Kconfig linux-2.6.18.perfctr27/arch/powerpc/Kconfig --- linux-2.6.18/arch/powerpc/Kconfig 2007-09-27 20:28:33.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/powerpc/Kconfig 2007-09-27 20:34:06.000000000 +0200 @@ -320,6 +320,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.18/arch/powerpc/kernel/process.c linux-2.6.18.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.18/arch/powerpc/kernel/process.c 2007-09-27 20:28:33.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/powerpc/kernel/process.c 2007-09-27 20:34:06.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -464,6 +467,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -576,6 +580,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.18/arch/x86_64/Kconfig linux-2.6.18.perfctr27/arch/x86_64/Kconfig --- linux-2.6.18/arch/x86_64/Kconfig 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/x86_64/Kconfig 2007-09-27 20:34:06.000000000 +0200 @@ -526,6 +526,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER diff -rupN linux-2.6.18/arch/x86_64/ia32/ia32entry.S linux-2.6.18.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.18/arch/x86_64/ia32/ia32entry.S 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/x86_64/ia32/ia32entry.S 2007-09-27 20:50:45.000000000 +0200 @@ -713,4 +713,15 @@ ia32_sys_call_table: .quad sys_tee .quad compat_sys_vmsplice .quad compat_sys_move_pages + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 320 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad sys_vperfctr_open /* 325 */ + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.18/arch/x86_64/kernel/entry.S linux-2.6.18.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.18/arch/x86_64/kernel/entry.S 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/x86_64/kernel/entry.S 2007-09-27 20:34:06.000000000 +0200 @@ -693,6 +693,12 @@ ENTRY(spurious_interrupt) END(spurious_interrupt) #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.18/arch/x86_64/kernel/i8259.c linux-2.6.18.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.18/arch/x86_64/kernel/i8259.c 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/x86_64/kernel/i8259.c 2007-09-27 20:34:06.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -590,6 +591,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.18/arch/x86_64/kernel/process.c linux-2.6.18.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.18/arch/x86_64/kernel/process.c 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/arch/x86_64/kernel/process.c 2007-09-27 20:34:06.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -461,6 +463,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -618,6 +622,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.18/drivers/Makefile linux-2.6.18.perfctr27/drivers/Makefile --- linux-2.6.18/drivers/Makefile 2007-09-27 20:28:35.000000000 +0200 +++ linux-2.6.18.perfctr27/drivers/Makefile 2007-09-27 20:34:06.000000000 +0200 @@ -71,6 +71,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.18/include/asm-i386/mach-default/irq_vectors.h linux-2.6.18.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.18/include/asm-i386/mach-default/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.18.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2007-09-27 20:34:06.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.18/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.18.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.18/include/asm-i386/mach-visws/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.18.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2007-09-27 20:34:06.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.18/include/asm-i386/processor.h linux-2.6.18.perfctr27/include/asm-i386/processor.h --- linux-2.6.18/include/asm-i386/processor.h 2007-09-27 20:28:47.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-i386/processor.h 2007-09-27 20:34:06.000000000 +0200 @@ -471,6 +471,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.18/include/asm-i386/system.h linux-2.6.18.perfctr27/include/asm-i386/system.h --- linux-2.6.18/include/asm-i386/system.h 2007-09-27 20:28:47.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-i386/system.h 2007-09-27 20:36:58.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ diff -rupN linux-2.6.18/include/asm-i386/unistd.h linux-2.6.18.perfctr27/include/asm-i386/unistd.h --- linux-2.6.18/include/asm-i386/unistd.h 2007-09-27 20:28:47.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-i386/unistd.h 2007-09-27 20:47:19.000000000 +0200 @@ -323,10 +323,14 @@ #define __NR_tee 315 #define __NR_vmsplice 316 #define __NR_move_pages 317 +#define __NR_vperfctr_open 325 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define NR_syscalls 318 +#define NR_syscalls 329 /* * user-visible error numbers are in the range -1 - -128: see diff -rupN linux-2.6.18/include/asm-powerpc/processor.h linux-2.6.18.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.18/include/asm-powerpc/processor.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-powerpc/processor.h 2007-09-27 20:34:06.000000000 +0200 @@ -169,6 +169,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.18/include/asm-powerpc/reg.h linux-2.6.18.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.18/include/asm-powerpc/reg.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-powerpc/reg.h 2007-09-27 20:34:06.000000000 +0200 @@ -365,10 +365,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -405,13 +403,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -461,33 +452,7 @@ #define SPRN_SDAR 781 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -499,6 +464,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* diff -rupN linux-2.6.18/include/asm-powerpc/systbl.h linux-2.6.18.perfctr27/include/asm-powerpc/systbl.h --- linux-2.6.18/include/asm-powerpc/systbl.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-powerpc/systbl.h 2007-09-27 20:45:09.000000000 +0200 @@ -303,4 +303,17 @@ SYSCALL_SPU(readlinkat) SYSCALL_SPU(fchmodat) SYSCALL_SPU(faccessat) COMPAT_SYS_SPU(get_robust_list) -COMPAT_SYS_SPU(set_robust_list) +COMPAT_SYS_SPU(set_robust_list) /* 300 */ +SYSCALL(ni_syscall) /* 301 */ +SYSCALL(ni_syscall) /* 302 */ +SYSCALL(ni_syscall) /* 303 */ +SYSCALL(ni_syscall) /* 304 */ +SYSCALL(ni_syscall) /* 305 */ +SYSCALL(ni_syscall) /* 306 */ +SYSCALL(ni_syscall) /* 307 */ +SYSCALL(ni_syscall) /* 308 */ +SYSCALL(ni_syscall) /* 309 */ +SYSCALL(vperfctr_open) /* 310 */ +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.18/include/asm-powerpc/unistd.h linux-2.6.18.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.18/include/asm-powerpc/unistd.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-powerpc/unistd.h 2007-09-27 20:42:27.000000000 +0200 @@ -323,10 +323,14 @@ #define __NR_faccessat 298 #define __NR_get_robust_list 299 #define __NR_set_robust_list 300 +#define __NR_vperfctr_open 310 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define __NR_syscalls 301 +#define __NR_syscalls 314 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff -rupN linux-2.6.18/include/asm-x86_64/hw_irq.h linux-2.6.18.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.18/include/asm-x86_64/hw_irq.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-x86_64/hw_irq.h 2007-09-27 20:34:06.000000000 +0200 @@ -64,14 +64,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.18/include/asm-x86_64/irq.h linux-2.6.18.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.18/include/asm-x86_64/irq.h 2007-09-27 18:15:36.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-x86_64/irq.h 2007-09-27 20:34:06.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR diff -rupN linux-2.6.18/include/asm-x86_64/processor.h linux-2.6.18.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.18/include/asm-x86_64/processor.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-x86_64/processor.h 2007-09-27 20:34:06.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.18/include/asm-x86_64/system.h linux-2.6.18.perfctr27/include/asm-x86_64/system.h --- linux-2.6.18/include/asm-x86_64/system.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-x86_64/system.h 2007-09-27 20:34:06.000000000 +0200 @@ -20,7 +20,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -40,7 +41,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.18/include/asm-x86_64/unistd.h linux-2.6.18.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.18/include/asm-x86_64/unistd.h 2007-09-27 20:28:48.000000000 +0200 +++ linux-2.6.18.perfctr27/include/asm-x86_64/unistd.h 2007-09-27 20:56:09.000000000 +0200 @@ -619,10 +619,24 @@ __SYSCALL(__NR_sync_file_range, sys_sync __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) +__SYSCALL(280, sys_ni_syscall) /* utimensat */ +__SYSCALL(281, sys_ni_syscall) /* epoll_wait */ +__SYSCALL(282, sys_ni_syscall) /* signalfd */ +__SYSCALL(283, sys_ni_syscall) /* timerfd */ +__SYSCALL(284, sys_ni_syscall) /* eventfd */ +__SYSCALL(285, sys_ni_syscall) /* fallocate */ +#define __NR_vperfctr_open 286 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) #ifdef __KERNEL__ -#define __NR_syscall_max __NR_move_pages +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS diff -rupN linux-2.6.18/include/linux/sched.h linux-2.6.18.perfctr27/include/linux/sched.h --- linux-2.6.18/include/linux/sched.h 2007-09-27 20:28:49.000000000 +0200 +++ linux-2.6.18.perfctr27/include/linux/sched.h 2007-09-27 20:34:06.000000000 +0200 @@ -1351,6 +1351,9 @@ static inline int thread_group_empty(str * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.18/kernel/exit.c linux-2.6.18.perfctr27/kernel/exit.c --- linux-2.6.18/kernel/exit.c 2007-09-27 20:28:49.000000000 +0200 +++ linux-2.6.18.perfctr27/kernel/exit.c 2007-09-27 20:34:06.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -166,6 +167,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); diff -rupN linux-2.6.18/kernel/sched.c linux-2.6.18.perfctr27/kernel/sched.c --- linux-2.6.18/kernel/sched.c 2007-09-27 20:28:49.000000000 +0200 +++ linux-2.6.18.perfctr27/kernel/sched.c 2007-09-27 20:34:06.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4845,6 +4846,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.18/kernel/sys_ni.c linux-2.6.18.perfctr27/kernel/sys_ni.c --- linux-2.6.18/kernel/sys_ni.c 2007-09-27 20:28:49.000000000 +0200 +++ linux-2.6.18.perfctr27/kernel/sys_ni.c 2007-09-27 20:34:06.000000000 +0200 @@ -72,6 +72,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.18/kernel/timer.c linux-2.6.18.perfctr27/kernel/timer.c --- linux-2.6.18/kernel/timer.c 2007-09-27 20:28:49.000000000 +0200 +++ linux-2.6.18.perfctr27/kernel/timer.c 2007-09-27 20:34:06.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -1181,6 +1182,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/components/powercap/tests/powercap_limit.c000664 001750 001750 00000012247 13216244360 025660 0ustar00jshenry1963jshenry1963000000 000000 /** * @author Philip Vaccaro * Test case for powercap component * @brief * Tests basic functionality of powercap component */ #include #include #include #include #include "papi.h" #include "papi_test.h" #define MAX_powercap_EVENTS 64 int main ( int argc, char **argv ) { (void) argv; (void) argc; int retval,cid,powercap_cid=-1,numcmp; int EventSet = PAPI_NULL; long long values[MAX_powercap_EVENTS]; int limit_map[MAX_powercap_EVENTS]; int num_events=0, num_limits=0; int code; char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; int r,i; const PAPI_component_info_t *cmpinfo = NULL; /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__,"PAPI_library_init\n",retval ); if ( !TESTS_QUIET ) printf( "Trying all powercap events\n" ); numcmp = PAPI_num_components(); for( cid=0; cidname,"powercap" ) ) { powercap_cid=cid; if ( !TESTS_QUIET ) printf( "Found powercap component at cid %d\n",powercap_cid ); if ( cmpinfo->disabled ) { if ( !TESTS_QUIET ) { printf( "powercap component disabled: %s\n", cmpinfo->disabled_reason ); } test_skip( __FILE__,__LINE__,"powercap component disabled",0 ); } break; } } /* Component not found */ if ( cid==numcmp ) test_skip( __FILE__,__LINE__,"No powercap component found\n",0 ); /* Skip if component has no counters */ if ( cmpinfo->num_cntrs==0 ) test_skip( __FILE__,__LINE__,"No counters in the powercap component\n",0 ); /* Create EventSet */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); /* Add all package limit events */ code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); /* find all package power events */ while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( code, event_names[num_events] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__,"PAPI_event_code_to_name", retval ); retval = PAPI_add_event(EventSet, code); if (retval != PAPI_OK) break; /* We've hit an event limit */ if (!(strstr(event_names[num_events],"SUBZONE")) && (strstr(event_names[num_events],"POWER_LIMIT"))) { limit_map[num_limits] = num_events; num_limits++; } num_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); } /* start collecting power data */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); /* initial read of package limits */ retval = PAPI_read( EventSet, values ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); printf("\nCURRENT LIMITS\n"); for( i=0; i * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include "pfmlib_priv.h" #include "pfmlib_mips_priv.h" #include "pfmlib_perf_event_priv.h" int pfm_mips_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { struct perf_event_attr *attr = e->os_data; int ret; ret = pfm_mips_get_encoding(this, e); if (ret != PFM_SUCCESS) return ret; if (e->count != 2) { DPRINT("unexpected encoding count=%d\n", e->count); return PFM_ERR_INVAL; } attr->type = PERF_TYPE_RAW; /* * priv levels are ignored because they are managed * directly through perf excl_*. */ attr->config = e->codes[0] >> 5; /* * codes[1] contains counter mask supported by the event. * Events support either odd or even indexed counters * except for cycles (code = 0) and instructions (code =1) * which work on all counters. * * The kernel expects bit 7 of config to indicate whether * the event works only on odd-indexed counters */ if ((e->codes[1] & 0x2) && attr->config > 1) attr->config |= 1ULL << 7; return PFM_SUCCESS; } void pfm_mips_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) { int i, compact; for (i = 0; i < e->npattrs; i++) { compact = 0; /* umasks never conflict */ if (e->pattrs[i].type == PFM_ATTR_UMASK) continue; /* * remove PMU-provided attributes which are either * not accessible under perf_events or fully controlled * by perf_events, e.g., priv levels filters */ if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PMU) { /* * with perf_event, priv levels under full * control of perf_event. */ if ( e->pattrs[i].idx == MIPS_ATTR_K ||e->pattrs[i].idx == MIPS_ATTR_U ||e->pattrs[i].idx == MIPS_ATTR_S ||e->pattrs[i].idx == MIPS_ATTR_E) compact = 1; } /* * remove perf_event generic attributes not supported * by MIPS */ if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PERF_EVENT) { /* no precise sampling on MIPS */ if (e->pattrs[i].idx == PERF_ATTR_PR) compact = 1; } if (compact) { pfmlib_compact_pattrs(e, i); i--; } } } papi-5.6.0/src/libpfm4/lib/pfmlib_perf_event_raw.c000664 001750 001750 00000010707 13216244365 024141 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_perf_events_raw.c: support for raw event syntax * * Copyright (c) 2014 Google, Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "pfmlib_priv.h" #include "pfmlib_perf_event_priv.h" static int pfm_perf_raw_detect(void *this) { #ifdef __linux__ /* ought to find a better way of detecting PERF */ #define PERF_OLD_PROC_FILE "/proc/sys/kernel/perf_counter_paranoid" #define PERF_PROC_FILE "/proc/sys/kernel/perf_event_paranoid" return !(access(PERF_PROC_FILE, F_OK) && access(PERF_OLD_PROC_FILE, F_OK)) ? PFM_SUCCESS: PFM_ERR_NOTSUPP; #else return PFM_SUCCESS; #endif } static int pfm_perf_raw_get_event_first(void *this) { return 0; } static int pfm_perf_raw_get_event_next(void *this, int idx) { /* only one pseudo event */ return -1; } static int pfm_perf_raw_get_encoding(void *this, pfmlib_event_desc_t *e) { /* * actual enoding done in pfm_perf_raw_match_event() */ e->fstr[0] = '\0'; evt_strcat(e->fstr, "r%"PRIx64, e->codes[0]); return PFM_SUCCESS; } static int pfm_perf_raw_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { struct perf_event_attr *attr; attr = e->os_data; attr->type = PERF_TYPE_RAW; attr->config = e->codes[0]; attr->config1 = e->codes[1]; attr->config2 = e->codes[2]; return PFM_SUCCESS; } static int pfm_perf_raw_event_is_valid(void *this, int idx) { return idx == 0; } static int pfm_perf_raw_get_event_attr_info(void *this, int idx, int attr_idx, pfmlib_event_attr_info_t *info) { return PFM_ERR_ATTR; } static int pfm_perf_raw_get_event_info(void *this, int idx, pfm_event_info_t *info) { pfmlib_pmu_t *pmu = this; info->name = "r0000"; info->desc = "perf_events raw event syntax: r[0-9a-fA-F]+", info->code = 0; info->equiv = NULL; info->idx = 0; info->pmu = pmu->pmu; info->is_precise = 0; /* unit masks + modifiers */ info->nattrs = 0; return PFM_SUCCESS; } static unsigned int pfm_perf_raw_get_event_nattrs(void *this, int idx) { return 0; } /* * remove attrs which are in conflicts (or duplicated) with os layer */ static void pfm_perf_raw_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) { } /* * returns 0 if match (like strcmp()) */ static int pfm_perf_raw_match_event(void *this, pfmlib_event_desc_t *d, const char *e, const char *s) { uint64_t code; int ret; if (*s != 'r' || !isxdigit(*(s+1))) return 1; ret = sscanf(s+1, "%"PRIx64, &code); if (ret != 1) return 1; /* * stash code in final position */ d->codes[0] = code; d->count = 1; return 0; } pfmlib_pmu_t perf_event_raw_support={ .desc = "perf_events raw PMU", .name = "perf_raw", .pmu = PFM_PMU_PERF_EVENT_RAW, .pme_count = 1, .type = PFM_PMU_TYPE_OS_GENERIC, .max_encoding = 1, .supported_plm = PERF_PLM_ALL, .pmu_detect = pfm_perf_raw_detect, .get_event_encoding[PFM_OS_NONE] = pfm_perf_raw_get_encoding, PFMLIB_ENCODE_PERF(pfm_perf_raw_get_perf_encoding), .get_event_first = pfm_perf_raw_get_event_first, .get_event_next = pfm_perf_raw_get_event_next, .event_is_valid = pfm_perf_raw_event_is_valid, .get_event_info = pfm_perf_raw_get_event_info, .get_event_attr_info = pfm_perf_raw_get_event_attr_info, .get_event_nattrs = pfm_perf_raw_get_event_nattrs, .match_event = pfm_perf_raw_match_event, PFMLIB_VALID_PERF_PATTRS(pfm_perf_raw_perf_validate_pattrs), }; papi-5.6.0/man/man1/papi_mem_info.1000664 001750 001750 00000001421 13216244355 021027 0ustar00jshenry1963jshenry1963000000 000000 .TH "papi_mem_info" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME papi_mem_info \- papi_mem_info utility\&. .PP file papi_mem_info\&.c .SH "NAME" .PP papi_mem_info - provides information on the memory architecture of the current processor\&. .SH "Synopsis" .PP .SH "Description" .PP papi_mem_info is a PAPI utility program that reports information about the cache memory architecture of the current processor, including number, types, sizes and associativities of instruction and data caches and Translation Lookaside Buffers\&. .SH "Options" .PP This utility has no command line options\&. .SH "Bugs" .PP There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. papi-5.6.0/src/libpfm-3.y/Makefile000664 001750 001750 00000004321 13216244361 020642 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # # Look in config.mk for options # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi) include config.mk DIRS=lib include docs EXAMPLES_DIRS = examples_v2.x ifneq ($(CONFIG_PFMLIB_OLD_PFMV2),y) EXAMPLES_DIRS += examples_v3.x endif ifeq ($(ARCH),ia64) DIRS +=examples_ia64_v2.0 endif ifeq ($(SYS),Linux) DIRS +=libpfms endif DIRS += $(EXAMPLES_DIRS) all: @echo Compiling for \'$(ARCH)\' target @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done lib: $(MAKE) -C lib clean: @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done distclean: clean depend: @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done tar: clean a=`basename $$PWD`; cd ..; tar zcf $$a.tar.gz $$a; echo generated ../$$a.tar.gz; tarcvs: clean a=`basename $$PWD`; cd ..; tar --exclude=CVS -zcf $$a.tar.gz $$a; echo generated ../$$a.tar.gz; install: @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done install_examples: @set -e ; for d in $(EXAMPLES_DIRS) ; do $(MAKE) -C $$d $@ ; done .PHONY: tar tarcvs lib # DO NOT DELETE papi-5.6.0/src/libpfm4/lib/events/amd64_events_fam11h.h000664 001750 001750 00000115200 13216244364 024540 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2012 University of Tennessee * Contributed by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: amd64_fam11h (AMD64 Fam11h) */ static const amd64_umask_t amd64_fam11h_dispatched_fpu[]={ { .uname = "OPS_ADD", .udesc = "Add pipe ops excluding load ops and SSE move ops", .ucode = 0x1, }, { .uname = "OPS_MULTIPLY", .udesc = "Multiply pipe ops excluding load ops and SSE move ops", .ucode = 0x2, }, { .uname = "OPS_STORE", .udesc = "Store pipe ops excluding load ops and SSE move ops", .ucode = 0x4, }, { .uname = "OPS_ADD_PIPE_LOAD_OPS", .udesc = "Add pipe load ops and SSE move ops", .ucode = 0x8, }, { .uname = "OPS_MULTIPLY_PIPE_LOAD_OPS", .udesc = "Multiply pipe load ops and SSE move ops", .ucode = 0x10, }, { .uname = "OPS_STORE_PIPE_LOAD_OPS", .udesc = "Store pipe load ops and SSE move ops", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_segment_register_loads[]={ { .uname = "ES", .udesc = "ES", .ucode = 0x1, }, { .uname = "CS", .udesc = "CS", .ucode = 0x2, }, { .uname = "SS", .udesc = "SS", .ucode = 0x4, }, { .uname = "DS", .udesc = "DS", .ucode = 0x8, }, { .uname = "FS", .udesc = "FS", .ucode = 0x10, }, { .uname = "GS", .udesc = "GS", .ucode = 0x20, }, { .uname = "HS", .udesc = "HS", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_locked_ops[]={ { .uname = "EXECUTED", .udesc = "The number of locked instructions executed", .ucode = 0x1, }, { .uname = "CYCLES_SPECULATIVE_PHASE", .udesc = "The number of cycles spent in speculative phase", .ucode = 0x2, }, { .uname = "CYCLES_NON_SPECULATIVE_PHASE", .udesc = "The number of cycles spent in non-speculative phase (including cache miss penalty)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_data_cache_refills[]={ { .uname = "SYSTEM", .udesc = "Refill from the Northbridge", .ucode = 0x1, }, { .uname = "L2_SHARED", .udesc = "Shared-state line from L2", .ucode = 0x2, }, { .uname = "L2_EXCLUSIVE", .udesc = "Exclusive-state line from L2", .ucode = 0x4, }, { .uname = "L2_OWNED", .udesc = "Owned-state line from L2", .ucode = 0x8, }, { .uname = "L2_MODIFIED", .udesc = "Modified-state line from L2", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_data_cache_refills_from_system[]={ { .uname = "INVALID", .udesc = "Invalid", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_data_cache_lines_evicted[]={ { .uname = "INVALID", .udesc = "Invalid", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_scrubber_single_bit_ecc_errors[]={ { .uname = "SCRUBBER_ERROR", .udesc = "Scrubber error", .ucode = 0x1, }, { .uname = "PIGGYBACK_ERROR", .udesc = "Piggyback scrubber errors", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_prefetch_instructions_dispatched[]={ { .uname = "LOAD", .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", .ucode = 0x1, }, { .uname = "STORE", .udesc = "Store (PrefetchW)", .ucode = 0x2, }, { .uname = "NTA", .udesc = "NTA (PrefetchNTA)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_dcache_misses_by_locked_instructions[]={ { .uname = "DATA_CACHE_MISSES_BY_LOCKED_INSTRUCTIONS", .udesc = "Data cache misses by locked instructions", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x2, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_memory_requests[]={ { .uname = "NON_CACHEABLE", .udesc = "Requests to non-cacheable (UC) memory", .ucode = 0x1, }, { .uname = "WRITE_COMBINING", .udesc = "Requests to write-combining (WC) memory or WC buffer flushes to WB memory", .ucode = 0x2, }, { .uname = "STREAMING_STORE", .udesc = "Streaming store (SS) requests", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x83, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_data_prefetches[]={ { .uname = "CANCELLED", .udesc = "Cancelled prefetches", .ucode = 0x1, }, { .uname = "ATTEMPTED", .udesc = "Prefetch attempts", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_system_read_responses[]={ { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x1, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x2, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x4, }, { .uname = "DATA_ERROR", .udesc = "Data Error", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x17, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_quadwords_written_to_system[]={ { .uname = "QUADWORD_WRITE_TRANSFER", .udesc = "Quadword write transfer", .ucode = 0x1, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_requests_to_l2[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB fill (page table walks)", .ucode = 0x4, }, { .uname = "SNOOP", .udesc = "Tag snoop request", .ucode = 0x8, }, { .uname = "CANCELLED", .udesc = "Cancelled request", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_l2_cache_miss[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill (includes possible replays, whereas EventSelect 041h does not)", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB page table walk", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_l2_fill_writeback[]={ { .uname = "L2_FILLS", .udesc = "L2 fills (victims from L1 caches, TLB page table walks and data prefetches)", .ucode = 0x1, }, { .uname = "L2_WRITEBACKS", .udesc = "L2 Writebacks to system.", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_retired_mmx_and_fp_instructions[]={ { .uname = "X87", .udesc = "X87 instructions", .ucode = 0x1, }, { .uname = "MMX_AND_3DNOW", .udesc = "MMX and 3DNow! instructions", .ucode = 0x2, }, { .uname = "PACKED_SSE_AND_SSE2", .udesc = "Packed SSE and SSE2 instructions", .ucode = 0x4, }, { .uname = "SCALAR_SSE_AND_SSE2", .udesc = "Scalar SSE and SSE2 instructions", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_retired_fastpath_double_op_instructions[]={ { .uname = "POSITION_0", .udesc = "With low op in position 0", .ucode = 0x1, }, { .uname = "POSITION_1", .udesc = "With low op in position 1", .ucode = 0x2, }, { .uname = "POSITION_2", .udesc = "With low op in position 2", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_interrupt_events[]={ { .uname = "FIXED_AND_LPA", .udesc = "Fixed and LPA", .ucode = 0x1, }, { .uname = "LPA", .udesc = "LPA", .ucode = 0x2, }, { .uname = "SMI", .udesc = "SMI", .ucode = 0x4, }, { .uname = "NMI", .udesc = "NMI", .ucode = 0x8, }, { .uname = "INIT", .udesc = "INIT", .ucode = 0x10, }, { .uname = "STARTUP", .udesc = "STARTUP", .ucode = 0x20, }, { .uname = "INT", .udesc = "INT", .ucode = 0x40, }, { .uname = "EOI", .udesc = "EOI", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_sideband_signals[]={ { .uname = "HALT", .udesc = "HALT", .ucode = 0x1, }, { .uname = "STOPGRANT", .udesc = "STOPGRANT", .ucode = 0x2, }, { .uname = "SHUTDOWN", .udesc = "SHUTDOWN", .ucode = 0x4, }, { .uname = "WBINVD", .udesc = "WBINVD", .ucode = 0x8, }, { .uname = "INVD", .udesc = "INVD", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_fpu_exceptions[]={ { .uname = "X87_RECLASS_MICROFAULTS", .udesc = "X87 reclass microfaults", .ucode = 0x1, }, { .uname = "SSE_RETYPE_MICROFAULTS", .udesc = "SSE retype microfaults", .ucode = 0x2, }, { .uname = "SSE_RECLASS_MICROFAULTS", .udesc = "SSE reclass microfaults", .ucode = 0x4, }, { .uname = "SSE_AND_X87_MICROTRAPS", .udesc = "SSE and x87 microtraps", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_dram_accesses[]={ { .uname = "DCT0_PAGE_HIT", .udesc = "DCT0 Page hit", .ucode = 0x1, }, { .uname = "DCT0_PAGE_MISS", .udesc = "DCT0 Page Miss", .ucode = 0x2, }, { .uname = "DCT0_PAGE_CONFLICT", .udesc = "DCT0 Page Conflict", .ucode = 0x4, }, { .uname = "DCT1_PAGE_HIT", .udesc = "DCT1 Page hit", .ucode = 0x8, }, { .uname = "DCT1_PAGE_MISS", .udesc = "DCT1 Page Miss", .ucode = 0x10, }, { .uname = "DCT1_PAGE_CONFLICT", .udesc = "DCT1 Page Conflict", .ucode = 0x20, }, { .uname = "WRITE_REQUEST", .udesc = "Write request.", .ucode = 0x40, }, { .uname = "READ_REQUEST", .udesc = "Read request.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_dram_controller_page_table_events[]={ { .uname = "DCT_PAGE_TABLE_OVERFLOW", .udesc = "DCT Page Table Overflow", .ucode = 0x1, }, { .uname = "STALE_TABLE_ENTRY_HITS", .udesc = "Number of stale table entry hits. (hit on a page closed too soon).", .ucode = 0x2, }, { .uname = "PAGE_TABLE_IDLE_CYCLE_LIMIT_INCREMENTED", .udesc = "Page table idle cycle limit incremented.", .ucode = 0x4, }, { .uname = "PAGE_TABLE_IDLE_CYCLE_LIMIT_DECREMENTED", .udesc = "Page table idle cycle limit decremented.", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_memory_controller_turnarounds[]={ { .uname = "DCT0_READ_TO_WRITE", .udesc = "DCT0 read-to-write turnaround.", .ucode = 0x1, }, { .uname = "DCT0_WRITE_TO_READ", .udesc = "DCT0 write-to-read turnaround", .ucode = 0x2, }, { .uname = "DCT0_DIMM", .udesc = "DCT0 DIMM (chip select) turnaround", .ucode = 0x4, }, { .uname = "DCT1_READ_TO_WRITE", .udesc = "DCT1 read-to-write turnaround.", .ucode = 0x8, }, { .uname = "DCT1_WRITE_TO_READ", .udesc = "DCT1 write-to-read turnaround", .ucode = 0x10, }, { .uname = "DCT1_DIMM", .udesc = "DCT1 DIMM (chip select) turnaround", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_memory_rbd_queue[]={ { .uname = "COUNTER_REACHED", .udesc = "F2x[1,0]94[DcqBypassMax] counter reached.", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x4, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_thermal_status[]={ { .uname = "MEMHOT_L_ASSERTIONS", .udesc = "Number of clocks MEMHOT_L is asserted.", .ucode = 0x1, }, { .uname = "HTC_TRANSITIONS", .udesc = "Number of times the HTC transitions from inactive to active.", .ucode = 0x4, }, { .uname = "CLOCKS_HTC_P_STATE_INACTIVE", .udesc = "Number of clocks HTC P-state is inactive.", .ucode = 0x20, }, { .uname = "CLOCKS_HTC_P_STATE_ACTIVE", .udesc = "Number of clocks HTC P-state is active", .ucode = 0x40, }, { .uname = "PROCHOT_L_ASSERTIONS", .udesc = "PROCHOT_L asserted by an external source and the assertion causes a P-state change.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xe5, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_cpu_io_requests_to_memory_io[]={ { .uname = "I_O_TO_I_O", .udesc = "IO to IO", .ucode = 0xa1, .uflags= AMD64_FL_NCOMBO, }, { .uname = "I_O_TO_MEM", .udesc = "IO to Mem", .ucode = 0xa2, .uflags= AMD64_FL_NCOMBO, }, { .uname = "CPU_TO_I_O", .udesc = "CPU to IO", .ucode = 0xa4, .uflags= AMD64_FL_NCOMBO, }, { .uname = "CPU_TO_MEM", .udesc = "CPU to Mem", .ucode = 0xa8, .uflags= AMD64_FL_NCOMBO, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xaf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_cache_block[]={ { .uname = "VICTIM_WRITEBACK", .udesc = "Victim Block (Writeback)", .ucode = 0x1, }, { .uname = "DCACHE_LOAD_MISS", .udesc = "Read Block (Dcache load miss refill)", .ucode = 0x4, }, { .uname = "SHARED_ICACHE_REFILL", .udesc = "Read Block Shared (Icache refill)", .ucode = 0x8, }, { .uname = "READ_BLOCK_MODIFIED", .udesc = "Read Block Modified (Dcache store miss refill)", .ucode = 0x10, }, { .uname = "READ_TO_DIRTY", .udesc = "Change-to-Dirty (first store to clean block already in cache)", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3d, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_sized_commands[]={ { .uname = "NON_POSTED_WRITE_BYTE", .udesc = "Non-Posted SzWr Byte (1-32 bytes) Legacy or mapped IO, typically 1-4 bytes", .ucode = 0x1, }, { .uname = "NON_POSTED_WRITE_DWORD", .udesc = "Non-Posted SzWr DW (1-16 dwords) Legacy or mapped IO, typically 1 DWORD", .ucode = 0x2, }, { .uname = "POSTED_WRITE_BYTE", .udesc = "Posted SzWr Byte (1-32 bytes) Subcache-line DMA writes, size varies; also flushes of partially-filled Write Combining buffer", .ucode = 0x4, }, { .uname = "POSTED_WRITE_DWORD", .udesc = "Posted SzWr DW (1-16 dwords) Block-oriented DMA writes, often cache-line sized; also processor Write Combining buffer flushes", .ucode = 0x8, }, { .uname = "READ_BYTE_4_BYTES", .udesc = "SzRd Byte (4 bytes) Legacy or mapped IO", .ucode = 0x10, }, { .uname = "READ_DWORD_1_16_DWORDS", .udesc = "SzRd DW (1-16 dwords) Block-oriented DMA reads, typically cache-line size", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_probe[]={ { .uname = "MISS", .udesc = "Probe miss", .ucode = 0x1, }, { .uname = "HIT_CLEAN", .udesc = "Probe hit clean", .ucode = 0x2, }, { .uname = "HIT_DIRTY_NO_MEMORY_CANCEL", .udesc = "Probe hit dirty without memory cancel (probed by Sized Write or Change2Dirty)", .ucode = 0x4, }, { .uname = "HIT_DIRTY_WITH_MEMORY_CANCEL", .udesc = "Probe hit dirty with memory cancel (probed by DMA read or cache refill request)", .ucode = 0x8, }, { .uname = "UPSTREAM_DISPLAY_REFRESH_READS", .udesc = "Upstream display refresh/ISOC reads.", .ucode = 0x10, }, { .uname = "UPSTREAM_NON_DISPLAY_REFRESH_READS", .udesc = "Upstream non-display refresh reads.", .ucode = 0x20, }, { .uname = "UPSTREAM_ISOC_WRITES", .udesc = "Upstream ISOC writes.", .ucode = 0x40, }, { .uname = "UPSTREAM_NON_ISOC_WRITES", .udesc = "Upstream non-ISOC writes.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_dev[]={ { .uname = "DEV_HIT", .udesc = "DEV hit", .ucode = 0x10, }, { .uname = "DEV_MISS", .udesc = "DEV miss", .ucode = 0x20, }, { .uname = "DEV_ERROR", .udesc = "DEV error", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x70, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_memory_controller_requests[]={ { .uname = "32_BYTES_WRITES", .udesc = "32 Bytes Sized Writes", .ucode = 0x8, }, { .uname = "64_BYTES_WRITES", .udesc = "64 Bytes Sized Writes", .ucode = 0x10, }, { .uname = "32_BYTES_READS", .udesc = "32 Bytes Sized Reads", .ucode = 0x20, }, { .uname = "64_BYTES_READS", .udesc = "64 Byte Sized Reads", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x78, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam11h_hypertransport_link0[]={ { .uname = "COMMAND_DWORD_SENT", .udesc = "Command DWORD sent", .ucode = 0x1, .grpid = 0, }, { .uname = "ADDRESS_DWORD_SENT", .udesc = "Address DWORD sent", .ucode = 0x2, .grpid = 0, }, { .uname = "DATA_DWORD_SENT", .udesc = "Data DWORD sent", .ucode = 0x4, .grpid = 0, }, { .uname = "BUFFER_RELEASE_DWORD_SENT", .udesc = "Buffer release DWORD sent", .ucode = 0x8, .grpid = 0, }, { .uname = "NOP_DWORD_SENT", .udesc = "Nop DW sent (idle)", .ucode = 0x10, .grpid = 0, }, { .uname = "PER_PACKET_CRC_SENT", .udesc = "Per packet CRC sent", .ucode = 0x20, .grpid = 0, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, .grpid = 0, }, }; static const amd64_entry_t amd64_fam11h_pe[]={ { .name = "DISPATCHED_FPU", .desc = "Dispatched FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_dispatched_fpu), .ngrp = 1, .umasks = amd64_fam11h_dispatched_fpu, }, { .name = "CYCLES_NO_FPU_OPS_RETIRED", .desc = "Cycles in which the FPU is Empty", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1, }, { .name = "DISPATCHED_FPU_OPS_FAST_FLAG", .desc = "Dispatched Fast Flag FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2, }, { .name = "SEGMENT_REGISTER_LOADS", .desc = "Segment Register Loads", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x20, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_segment_register_loads), .ngrp = 1, .umasks = amd64_fam11h_segment_register_loads, }, { .name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", .desc = "Pipeline Restart Due to Self-Modifying Code", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x21, }, { .name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", .desc = "Pipeline Restart Due to Probe Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x22, }, { .name = "LS_BUFFER_2_FULL_CYCLES", .desc = "LS Buffer 2 Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x23, }, { .name = "LOCKED_OPS", .desc = "Locked Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_locked_ops), .ngrp = 1, .umasks = amd64_fam11h_locked_ops, }, { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", .desc = "Retired CLFLUSH Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x26, }, { .name = "RETIRED_CPUID_INSTRUCTIONS", .desc = "Retired CPUID Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x27, }, { .name = "DATA_CACHE_ACCESSES", .desc = "Data Cache Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x40, }, { .name = "DATA_CACHE_MISSES", .desc = "Data Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x41, }, { .name = "DATA_CACHE_REFILLS", .desc = "Data Cache Refills from L2 or System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_data_cache_refills), .ngrp = 1, .umasks = amd64_fam11h_data_cache_refills, }, { .name = "DATA_CACHE_REFILLS_FROM_SYSTEM", .desc = "Data Cache Refills from the System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x43, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_data_cache_refills_from_system), .ngrp = 1, .umasks = amd64_fam11h_data_cache_refills_from_system, }, { .name = "DATA_CACHE_LINES_EVICTED", .desc = "Data Cache Lines Evicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x44, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_data_cache_lines_evicted), .ngrp = 1, .umasks = amd64_fam11h_data_cache_lines_evicted, }, { .name = "L1_DTLB_MISS_AND_L2_DTLB_HIT", .desc = "Number of data cache accesses that miss in L1 DTLB and hit in L2 DTLB", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x45, }, { .name = "L1_DTLB_AND_L2_DTLB_MISS", .desc = "Number of data cache accesses that miss both the L1 and L2 DTLBs", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x46, }, { .name = "MISALIGNED_ACCESSES", .desc = "Misaligned Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x47, }, { .name = "MICROARCHITECTURAL_LATE_CANCEL_OF_AN_ACCESS", .desc = "Microarchitectural Late Cancel of an Access", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x48, }, { .name = "MICROARCHITECTURAL_EARLY_CANCEL_OF_AN_ACCESS", .desc = "Microarchitectural Early Cancel of an Access", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x49, }, { .name = "SCRUBBER_SINGLE_BIT_ECC_ERRORS", .desc = "Single-bit ECC Errors Recorded by Scrubber", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4a, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_scrubber_single_bit_ecc_errors), .ngrp = 1, .umasks = amd64_fam11h_scrubber_single_bit_ecc_errors, }, { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", .desc = "Prefetch Instructions Dispatched", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_prefetch_instructions_dispatched), .ngrp = 1, .umasks = amd64_fam11h_prefetch_instructions_dispatched, }, { .name = "DCACHE_MISSES_BY_LOCKED_INSTRUCTIONS", .desc = "DCACHE Misses by Locked Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_dcache_misses_by_locked_instructions), .ngrp = 1, .umasks = amd64_fam11h_dcache_misses_by_locked_instructions, }, { .name = "MEMORY_REQUESTS", .desc = "Memory Requests by Type", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_memory_requests), .ngrp = 1, .umasks = amd64_fam11h_memory_requests, }, { .name = "DATA_PREFETCHES", .desc = "Data Prefetcher", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_data_prefetches), .ngrp = 1, .umasks = amd64_fam11h_data_prefetches, }, { .name = "SYSTEM_READ_RESPONSES", .desc = "System Read Responses by Coherency State", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_system_read_responses), .ngrp = 1, .umasks = amd64_fam11h_system_read_responses, }, { .name = "QUADWORDS_WRITTEN_TO_SYSTEM", .desc = "Quadwords Written to System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_quadwords_written_to_system), .ngrp = 1, .umasks = amd64_fam11h_quadwords_written_to_system, }, { .name = "CPU_CLK_UNHALTED", .desc = "CPU Clocks not Halted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x76, }, { .name = "REQUESTS_TO_L2", .desc = "Requests to L2 Cache", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_requests_to_l2), .ngrp = 1, .umasks = amd64_fam11h_requests_to_l2, }, { .name = "L2_CACHE_MISS", .desc = "L2 Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7e, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_l2_cache_miss), .ngrp = 1, .umasks = amd64_fam11h_l2_cache_miss, }, { .name = "L2_FILL_WRITEBACK", .desc = "L2 Fill/Writeback", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7f, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_l2_fill_writeback), .ngrp = 1, .umasks = amd64_fam11h_l2_fill_writeback, }, { .name = "INSTRUCTION_CACHE_FETCHES", .desc = "Instruction Cache Fetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x80, }, { .name = "INSTRUCTION_CACHE_MISSES", .desc = "Instruction Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x81, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", .desc = "Instruction Cache Refills from L2", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x82, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", .desc = "Instruction Cache Refills from System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x83, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .desc = "L1 ITLB Miss and L2 ITLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x84, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", .desc = "L1 ITLB Miss and L2 ITLB Miss", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x85, }, { .name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", .desc = "Pipeline Restart Due to Instruction Stream Probe", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x86, }, { .name = "INSTRUCTION_FETCH_STALL", .desc = "Instruction Fetch Stall", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x87, }, { .name = "RETURN_STACK_HITS", .desc = "Return Stack Hits", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x88, }, { .name = "RETURN_STACK_OVERFLOWS", .desc = "Return Stack Overflows", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x89, }, { .name = "RETIRED_INSTRUCTIONS", .desc = "Retired Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc0, }, { .name = "RETIRED_UOPS", .desc = "Retired uops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc1, }, { .name = "RETIRED_BRANCH_INSTRUCTIONS", .desc = "Retired Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc2, }, { .name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .desc = "Retired Mispredicted Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc3, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .desc = "Retired Taken Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc4, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .desc = "Retired Taken Branch Instructions Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc5, }, { .name = "RETIRED_FAR_CONTROL_TRANSFERS", .desc = "Retired Far Control Transfers", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc6, }, { .name = "RETIRED_BRANCH_RESYNCS", .desc = "Retired Branch Resyncs", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc7, }, { .name = "RETIRED_NEAR_RETURNS", .desc = "Retired Near Returns", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc8, }, { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", .desc = "Retired Near Returns Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc9, }, { .name = "RETIRED_INDIRECT_BRANCHES_MISPREDICTED", .desc = "Retired Indirect Branches Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xca, }, { .name = "RETIRED_MMX_AND_FP_INSTRUCTIONS", .desc = "Retired MMX/FP Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_retired_mmx_and_fp_instructions), .ngrp = 1, .umasks = amd64_fam11h_retired_mmx_and_fp_instructions, }, { .name = "RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS", .desc = "Retired Fastpath Double Op Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_retired_fastpath_double_op_instructions), .ngrp = 1, .umasks = amd64_fam11h_retired_fastpath_double_op_instructions, }, { .name = "INTERRUPTS_MASKED_CYCLES", .desc = "Interrupts-Masked Cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcd, }, { .name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .desc = "Interrupts-Masked Cycles with Interrupt Pending", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xce, }, { .name = "INTERRUPTS_TAKEN", .desc = "Interrupts Taken", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcf, }, { .name = "DECODER_EMPTY", .desc = "Decoder Empty", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd0, }, { .name = "DISPATCH_STALLS", .desc = "Dispatch Stalls", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd1, }, { .name = "DISPATCH_STALL_FOR_BRANCH_ABORT", .desc = "Dispatch Stall for Branch Abort to Retire", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd2, }, { .name = "DISPATCH_STALL_FOR_SERIALIZATION", .desc = "Dispatch Stall for Serialization", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd3, }, { .name = "DISPATCH_STALL_FOR_SEGMENT_LOAD", .desc = "Dispatch Stall for Segment Load", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd4, }, { .name = "DISPATCH_STALL_FOR_REORDER_BUFFER_FULL", .desc = "Dispatch Stall for Reorder Buffer Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd5, }, { .name = "DISPATCH_STALL_FOR_RESERVATION_STATION_FULL", .desc = "Dispatch Stall for Reservation Station Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd6, }, { .name = "DISPATCH_STALL_FOR_FPU_FULL", .desc = "Dispatch Stall for FPU Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd7, }, { .name = "DISPATCH_STALL_FOR_LS_FULL", .desc = "Dispatch Stall for LS Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd8, }, { .name = "DISPATCH_STALL_WAITING_FOR_ALL_QUIET", .desc = "Dispatch Stall Waiting for All Quiet", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd9, }, { .name = "DISPATCH_STALL_FOR_FAR_TRANSFER_OR_RSYNC", .desc = "Dispatch Stall for Far Transfer or Resync to Retire", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xda, }, { .name = "FPU_EXCEPTIONS", .desc = "FPU Exceptions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_fpu_exceptions), .ngrp = 1, .umasks = amd64_fam11h_fpu_exceptions, }, { .name = "DR0_BREAKPOINT_MATCHES", .desc = "DR0 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdc, }, { .name = "DR1_BREAKPOINT_MATCHES", .desc = "DR1 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdd, }, { .name = "DR2_BREAKPOINT_MATCHES", .desc = "DR2 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xde, }, { .name = "DR3_BREAKPOINT_MATCHES", .desc = "DR3 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdf, }, { .name = "DRAM_ACCESSES", .desc = "DRAM Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_dram_accesses), .ngrp = 1, .umasks = amd64_fam11h_dram_accesses, }, { .name = "DRAM_CONTROLLER_PAGE_TABLE_EVENTS", .desc = "DRAM Controller Page Table Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe1, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_dram_controller_page_table_events), .ngrp = 1, .umasks = amd64_fam11h_dram_controller_page_table_events, }, { .name = "MEMORY_CONTROLLER_TURNAROUNDS", .desc = "Memory Controller Turnarounds", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe3, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_memory_controller_turnarounds), .ngrp = 1, .umasks = amd64_fam11h_memory_controller_turnarounds, }, { .name = "MEMORY_CONTROLLER_RBD_QUEUE", .desc = "Memory Controller RBD Queue Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe4, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_memory_rbd_queue), .ngrp = 1, .umasks = amd64_fam11h_memory_rbd_queue, }, { .name = "THERMAL_STATUS", .desc = "Thermal Status", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe8, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_thermal_status), .ngrp = 1, .umasks = amd64_fam11h_thermal_status, }, { .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", .desc = "CPU/IO Requests to Memory/IO", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe9, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_cpu_io_requests_to_memory_io), .ngrp = 1, .umasks = amd64_fam11h_cpu_io_requests_to_memory_io, }, { .name = "CACHE_BLOCK", .desc = "Cache Block Commands", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xea, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_cache_block), .ngrp = 1, .umasks = amd64_fam11h_cache_block, }, { .name = "SIZED_COMMANDS", .desc = "Sized Commands", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xeb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_sized_commands), .ngrp = 1, .umasks = amd64_fam11h_sized_commands, }, { .name = "PROBE", .desc = "Probe Responses and Upstream Requests", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xec, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_probe), .ngrp = 1, .umasks = amd64_fam11h_probe, }, { .name = "DEV", .desc = "DEV Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xee, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_dev), .ngrp = 1, .umasks = amd64_fam11h_dev, }, { .name = "HYPERTRANSPORT_LINK0", .desc = "HyperTransport Link 0 Transmit Bandwidth", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xf6, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_hypertransport_link0), .ngrp = 1, .umasks = amd64_fam11h_hypertransport_link0, }, { .name = "MEMORY_CONTROLLER_REQUESTS", .desc = "Memory Controller Requests", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1f0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_memory_controller_requests), .ngrp = 1, .umasks = amd64_fam11h_memory_controller_requests, }, { .name = "SIDEBAND_SIGNALS", .desc = "Sideband Signals and Special Cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1e9, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_sideband_signals), .ngrp = 1, .umasks = amd64_fam11h_sideband_signals, }, { .name = "INTERRUPT_EVENTS", .desc = "Interrupt Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1ea, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam11h_interrupt_events), .ngrp = 1, .umasks = amd64_fam11h_interrupt_events, }, }; papi-5.6.0/src/components/appio/tests/iozone/000775 001750 001750 00000000000 13216244356 023270 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_cbo.c000664 001750 001750 00000007074 13216244365 025130 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snb_unc_cbo.c : Intel SandyBridge-EP C-Box uncore PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_snbep_unc_cbo_events.h" static void display_cbo(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; pfm_snbep_unc_reg_t f; __pfm_vbprintf("[UNC_CBO=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d tid_en=%d] %s\n", reg->val, reg->cbo.unc_event, reg->cbo.unc_umask, reg->cbo.unc_en, reg->cbo.unc_inv, reg->cbo.unc_edge, reg->cbo.unc_thres, reg->cbo.unc_tid, pe[e->event].name); if (e->count == 1) return; f.val = e->codes[1]; __pfm_vbprintf("[UNC_CBOX_FILTER=0x%"PRIx64" tid=%d core=0x%x nid=0x%x" " state=0x%x opc=0x%x]\n", f.val, f.cbo_filt.tid, f.cbo_filt.cid, f.cbo_filt.nid, f.cbo_filt.state, f.cbo_filt.opc); } #define DEFINE_C_BOX(n) \ pfmlib_pmu_t intel_snbep_unc_cb##n##_support = {\ .desc = "Intel Sandy Bridge-EP C-Box "#n" uncore",\ .name = "snbep_unc_cbo"#n,\ .perf_name = "uncore_cbox_"#n,\ .pmu = PFM_PMU_INTEL_SNBEP_UNC_CB##n,\ .pme_count = LIBPFM_ARRAY_SIZE(intel_snbep_unc_c_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 4,\ .num_fixed_cntrs = 0,\ .max_encoding = 2,\ .pe = intel_snbep_unc_c_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK\ | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_snbep_unc_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ .can_auto_encode = pfm_intel_x86_can_auto_encode, \ .display_reg = display_cbo,\ } DEFINE_C_BOX(0); DEFINE_C_BOX(1); DEFINE_C_BOX(2); DEFINE_C_BOX(3); DEFINE_C_BOX(4); DEFINE_C_BOX(5); DEFINE_C_BOX(6); DEFINE_C_BOX(7); papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.17000664 001750 001750 00000074471 13216244370 024236 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.17/CREDITS linux-2.6.17.perfctr27/CREDITS --- linux-2.6.17/CREDITS 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/CREDITS 2006-08-11 02:40:43.000000000 +0200 @@ -2631,6 +2631,7 @@ N: Mikael Pettersson E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.17/MAINTAINERS linux-2.6.17.perfctr27/MAINTAINERS --- linux-2.6.17/MAINTAINERS 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/MAINTAINERS 2006-08-11 02:40:43.000000000 +0200 @@ -2185,6 +2185,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.17/arch/i386/Kconfig linux-2.6.17.perfctr27/arch/i386/Kconfig --- linux-2.6.17/arch/i386/Kconfig 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/i386/Kconfig 2006-08-11 02:40:43.000000000 +0200 @@ -708,6 +708,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.17/arch/i386/kernel/entry.S linux-2.6.17.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.17/arch/i386/kernel/entry.S 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/i386/kernel/entry.S 2006-08-11 02:40:43.000000000 +0200 @@ -436,6 +436,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error diff -rupN linux-2.6.17/arch/i386/kernel/i8259.c linux-2.6.17.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.17/arch/i386/kernel/i8259.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/i386/kernel/i8259.c 2006-08-11 02:40:43.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.17/arch/i386/kernel/process.c linux-2.6.17.perfctr27/arch/i386/kernel/process.c --- linux-2.6.17/arch/i386/kernel/process.c 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/i386/kernel/process.c 2006-08-11 02:40:43.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -380,6 +381,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -431,6 +433,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -689,6 +693,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.17/arch/i386/kernel/syscall_table.S linux-2.6.17.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.17/arch/i386/kernel/syscall_table.S 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/i386/kernel/syscall_table.S 2006-08-11 02:40:46.000000000 +0200 @@ -316,3 +316,8 @@ ENTRY(sys_call_table) .long sys_sync_file_range .long sys_tee /* 315 */ .long sys_vmsplice + .long sys_ni_syscall + .long sys_vperfctr_open + .long sys_vperfctr_control + .long sys_vperfctr_write /* 320 */ + .long sys_vperfctr_read diff -rupN linux-2.6.17/arch/powerpc/Kconfig linux-2.6.17.perfctr27/arch/powerpc/Kconfig --- linux-2.6.17/arch/powerpc/Kconfig 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/powerpc/Kconfig 2006-08-11 02:40:43.000000000 +0200 @@ -299,6 +299,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.17/arch/powerpc/kernel/process.c linux-2.6.17.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.17/arch/powerpc/kernel/process.c 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/powerpc/kernel/process.c 2006-08-11 02:40:43.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -333,7 +334,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -465,6 +468,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -577,6 +581,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.17/arch/powerpc/kernel/systbl.S linux-2.6.17.perfctr27/arch/powerpc/kernel/systbl.S --- linux-2.6.17/arch/powerpc/kernel/systbl.S 2006-06-18 12:13:01.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/powerpc/kernel/systbl.S 2006-08-11 02:40:43.000000000 +0200 @@ -340,6 +340,10 @@ SYSCALL(fchmodat) SYSCALL(faccessat) COMPAT_SYS(get_robust_list) COMPAT_SYS(set_robust_list) +SYSCALL(vperfctr_open) +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) /* * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c diff -rupN linux-2.6.17/arch/x86_64/Kconfig linux-2.6.17.perfctr27/arch/x86_64/Kconfig --- linux-2.6.17/arch/x86_64/Kconfig 2006-06-18 12:13:02.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/x86_64/Kconfig 2006-08-11 02:40:43.000000000 +0200 @@ -491,6 +491,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER diff -rupN linux-2.6.17/arch/x86_64/ia32/ia32entry.S linux-2.6.17.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.17/arch/x86_64/ia32/ia32entry.S 2006-06-18 12:13:02.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/x86_64/ia32/ia32entry.S 2006-08-11 02:40:46.000000000 +0200 @@ -694,6 +694,11 @@ ia32_sys_call_table: .quad compat_sys_get_robust_list .quad sys_splice .quad sys_sync_file_range - .quad sys_tee + .quad sys_tee /* 315 */ .quad compat_sys_vmsplice + .quad quiet_ni_syscall + .quad sys_vperfctr_open + .quad sys_vperfctr_control + .quad sys_vperfctr_write /* 320 */ + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.17/arch/x86_64/kernel/entry.S linux-2.6.17.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.17/arch/x86_64/kernel/entry.S 2006-06-18 12:13:02.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/x86_64/kernel/entry.S 2006-08-11 02:40:43.000000000 +0200 @@ -642,6 +642,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.17/arch/x86_64/kernel/i8259.c linux-2.6.17.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.17/arch/x86_64/kernel/i8259.c 2006-03-20 10:40:03.000000000 +0100 +++ linux-2.6.17.perfctr27/arch/x86_64/kernel/i8259.c 2006-08-11 02:40:43.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -591,6 +592,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.17/arch/x86_64/kernel/process.c linux-2.6.17.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.17/arch/x86_64/kernel/process.c 2006-06-18 12:13:02.000000000 +0200 +++ linux-2.6.17.perfctr27/arch/x86_64/kernel/process.c 2006-08-11 02:40:43.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -358,6 +359,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -459,6 +461,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -616,6 +620,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.17/drivers/Makefile linux-2.6.17.perfctr27/drivers/Makefile --- linux-2.6.17/drivers/Makefile 2006-06-18 12:13:02.000000000 +0200 +++ linux-2.6.17.perfctr27/drivers/Makefile 2006-08-11 02:40:43.000000000 +0200 @@ -71,6 +71,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.17/include/asm-i386/mach-default/irq_vectors.h linux-2.6.17.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.17/include/asm-i386/mach-default/irq_vectors.h 2004-05-10 11:14:37.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2006-08-11 02:40:43.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.17/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.17.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.17/include/asm-i386/mach-visws/irq_vectors.h 2004-01-09 13:19:11.000000000 +0100 +++ linux-2.6.17.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2006-08-11 02:40:43.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.17/include/asm-i386/processor.h linux-2.6.17.perfctr27/include/asm-i386/processor.h --- linux-2.6.17/include/asm-i386/processor.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-i386/processor.h 2006-08-11 02:40:43.000000000 +0200 @@ -469,6 +469,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.17/include/asm-i386/system.h linux-2.6.17.perfctr27/include/asm-i386/system.h --- linux-2.6.17/include/asm-i386/system.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-i386/system.h 2006-08-11 02:40:43.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ diff -rupN linux-2.6.17/include/asm-i386/unistd.h linux-2.6.17.perfctr27/include/asm-i386/unistd.h --- linux-2.6.17/include/asm-i386/unistd.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-i386/unistd.h 2006-08-11 02:40:46.000000000 +0200 @@ -322,8 +322,12 @@ #define __NR_sync_file_range 314 #define __NR_tee 315 #define __NR_vmsplice 316 +#define __NR_vperfctr_open 318 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define NR_syscalls 317 +#define NR_syscalls 322 /* * user-visible error numbers are in the range -1 - -128: see diff -rupN linux-2.6.17/include/asm-powerpc/processor.h linux-2.6.17.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.17/include/asm-powerpc/processor.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-powerpc/processor.h 2006-08-11 02:40:43.000000000 +0200 @@ -169,6 +169,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.17/include/asm-powerpc/reg.h linux-2.6.17.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.17/include/asm-powerpc/reg.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-powerpc/reg.h 2006-08-11 02:40:43.000000000 +0200 @@ -365,10 +365,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -403,13 +401,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -455,33 +446,7 @@ #define SPRN_SDAR 781 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -493,6 +458,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* Processor Version Register (PVR) field extraction */ diff -rupN linux-2.6.17/include/asm-powerpc/unistd.h linux-2.6.17.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.17/include/asm-powerpc/unistd.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-powerpc/unistd.h 2006-08-11 02:40:43.000000000 +0200 @@ -323,8 +323,12 @@ #define __NR_faccessat 298 #define __NR_get_robust_list 299 #define __NR_set_robust_list 300 +#define __NR_vperfctr_open 301 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 301 +#define __NR_syscalls 305 #ifdef __KERNEL__ #define __NR__exit __NR_exit diff -rupN linux-2.6.17/include/asm-x86_64/hw_irq.h linux-2.6.17.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.17/include/asm-x86_64/hw_irq.h 2006-03-20 10:40:11.000000000 +0100 +++ linux-2.6.17.perfctr27/include/asm-x86_64/hw_irq.h 2006-08-11 02:40:43.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.17/include/asm-x86_64/ia32_unistd.h linux-2.6.17.perfctr27/include/asm-x86_64/ia32_unistd.h --- linux-2.6.17/include/asm-x86_64/ia32_unistd.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-x86_64/ia32_unistd.h 2006-08-11 02:40:46.000000000 +0200 @@ -316,5 +316,9 @@ #define __NR_ia32_pselect6 308 #define __NR_ia32_ppoll 309 #define __NR_ia32_unshare 310 +#define __NR_ia32_vperfctr_open 318 +#define __NR_ia32_vperfctr_control (__NR_ia32_vperfctr_open+1) +#define __NR_ia32_vperfctr_write (__NR_ia32_vperfctr_open+2) +#define __NR_ia32_vperfctr_read (__NR_ia32_vperfctr_open+3) #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff -rupN linux-2.6.17/include/asm-x86_64/irq.h linux-2.6.17.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.17/include/asm-x86_64/irq.h 2006-03-20 10:40:11.000000000 +0100 +++ linux-2.6.17.perfctr27/include/asm-x86_64/irq.h 2006-08-11 02:40:43.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR diff -rupN linux-2.6.17/include/asm-x86_64/processor.h linux-2.6.17.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.17/include/asm-x86_64/processor.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-x86_64/processor.h 2006-08-11 02:40:43.000000000 +0200 @@ -264,6 +264,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.17/include/asm-x86_64/system.h linux-2.6.17.perfctr27/include/asm-x86_64/system.h --- linux-2.6.17/include/asm-x86_64/system.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-x86_64/system.h 2006-08-11 02:40:43.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.17/include/asm-x86_64/unistd.h linux-2.6.17.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.17/include/asm-x86_64/unistd.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/asm-x86_64/unistd.h 2006-08-11 02:40:46.000000000 +0200 @@ -617,8 +617,18 @@ __SYSCALL(__NR_tee, sys_tee) __SYSCALL(__NR_sync_file_range, sys_sync_file_range) #define __NR_vmsplice 278 __SYSCALL(__NR_vmsplice, sys_vmsplice) +#define __NR_move_pages 279 +__SYSCALL(__NR_move_pages, sys_ni_syscall) +#define __NR_vperfctr_open 280 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_vmsplice +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS diff -rupN linux-2.6.17/include/linux/sched.h linux-2.6.17.perfctr27/include/linux/sched.h --- linux-2.6.17/include/linux/sched.h 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/include/linux/sched.h 2006-08-11 02:40:43.000000000 +0200 @@ -1229,6 +1229,9 @@ static inline int thread_group_empty(tas * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.17/kernel/exit.c linux-2.6.17.perfctr27/kernel/exit.c --- linux-2.6.17/kernel/exit.c 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/kernel/exit.c 2006-08-11 02:40:43.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); diff -rupN linux-2.6.17/kernel/sched.c linux-2.6.17.perfctr27/kernel/sched.c --- linux-2.6.17/kernel/sched.c 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/kernel/sched.c 2006-08-11 02:40:43.000000000 +0200 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -4425,6 +4426,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.17/kernel/sys_ni.c linux-2.6.17.perfctr27/kernel/sys_ni.c --- linux-2.6.17/kernel/sys_ni.c 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/kernel/sys_ni.c 2006-08-11 02:40:43.000000000 +0200 @@ -72,6 +72,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.17/kernel/timer.c linux-2.6.17.perfctr27/kernel/timer.c --- linux-2.6.17/kernel/timer.c 2006-06-18 12:13:11.000000000 +0200 +++ linux-2.6.17.perfctr27/kernel/timer.c 2006-08-11 02:40:43.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -830,6 +831,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.19000664 001750 001750 00000075304 13216244370 024234 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.19/CREDITS linux-2.6.19.perfctr27/CREDITS --- linux-2.6.19/CREDITS 2007-09-27 22:58:27.000000000 +0200 +++ linux-2.6.19.perfctr27/CREDITS 2007-09-27 23:05:04.000000000 +0200 @@ -2656,9 +2656,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.19/MAINTAINERS linux-2.6.19.perfctr27/MAINTAINERS --- linux-2.6.19/MAINTAINERS 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/MAINTAINERS 2007-09-27 23:05:04.000000000 +0200 @@ -2355,6 +2355,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff -rupN linux-2.6.19/arch/i386/Kconfig linux-2.6.19.perfctr27/arch/i386/Kconfig --- linux-2.6.19/arch/i386/Kconfig 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/i386/Kconfig 2007-09-27 23:05:04.000000000 +0200 @@ -741,6 +741,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.19/arch/i386/kernel/entry.S linux-2.6.19.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.19/arch/i386/kernel/entry.S 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/i386/kernel/entry.S 2007-09-27 23:19:45.000000000 +0200 @@ -602,6 +602,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault diff -rupN linux-2.6.19/arch/i386/kernel/i8259.c linux-2.6.19.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.19/arch/i386/kernel/i8259.c 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/i386/kernel/i8259.c 2007-09-27 23:05:04.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -406,6 +407,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.19/arch/i386/kernel/process.c linux-2.6.19.perfctr27/arch/i386/kernel/process.c --- linux-2.6.19/arch/i386/kernel/process.c 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/i386/kernel/process.c 2007-09-27 23:05:04.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -381,6 +382,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -433,6 +435,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -697,6 +701,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.19/arch/i386/kernel/syscall_table.S linux-2.6.19.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.19/arch/i386/kernel/syscall_table.S 2007-09-27 22:58:28.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/i386/kernel/syscall_table.S 2007-09-27 23:06:52.000000000 +0200 @@ -319,3 +319,12 @@ ENTRY(sys_call_table) .long sys_move_pages .long sys_getcpu .long sys_epoll_pwait + .long sys_ni_syscall /* 320 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 325 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.19/arch/powerpc/Kconfig linux-2.6.19.perfctr27/arch/powerpc/Kconfig --- linux-2.6.19/arch/powerpc/Kconfig 2007-09-27 22:58:29.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/powerpc/Kconfig 2007-09-27 23:05:04.000000000 +0200 @@ -320,6 +320,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.19/arch/powerpc/kernel/process.c linux-2.6.19.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.19/arch/powerpc/kernel/process.c 2007-09-27 22:58:29.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/powerpc/kernel/process.c 2007-09-27 23:05:04.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -570,6 +574,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.19/arch/x86_64/Kconfig linux-2.6.19.perfctr27/arch/x86_64/Kconfig --- linux-2.6.19/arch/x86_64/Kconfig 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/x86_64/Kconfig 2007-09-27 23:05:04.000000000 +0200 @@ -573,6 +573,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER diff -rupN linux-2.6.19/arch/x86_64/ia32/ia32entry.S linux-2.6.19.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.19/arch/x86_64/ia32/ia32entry.S 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/x86_64/ia32/ia32entry.S 2007-09-27 23:08:24.000000000 +0200 @@ -718,4 +718,14 @@ ia32_sys_call_table: .quad compat_sys_vmsplice .quad compat_sys_move_pages .quad sys_getcpu + .quad quiet_ni_syscall + .quad quiet_ni_syscall /* 320 */ + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad quiet_ni_syscall + .quad sys_vperfctr_open /* 325 */ + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: diff -rupN linux-2.6.19/arch/x86_64/kernel/entry.S linux-2.6.19.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.19/arch/x86_64/kernel/entry.S 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/x86_64/kernel/entry.S 2007-09-27 23:05:04.000000000 +0200 @@ -713,6 +713,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.19/arch/x86_64/kernel/i8259.c linux-2.6.19.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.19/arch/x86_64/kernel/i8259.c 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/x86_64/kernel/i8259.c 2007-09-27 23:05:04.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -552,6 +553,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.19/arch/x86_64/kernel/process.c linux-2.6.19.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.19/arch/x86_64/kernel/process.c 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/arch/x86_64/kernel/process.c 2007-09-27 23:18:14.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -370,6 +371,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -475,6 +477,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -658,6 +662,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.19/drivers/Makefile linux-2.6.19.perfctr27/drivers/Makefile --- linux-2.6.19/drivers/Makefile 2007-09-27 22:58:31.000000000 +0200 +++ linux-2.6.19.perfctr27/drivers/Makefile 2007-09-27 23:05:04.000000000 +0200 @@ -72,6 +72,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.19/include/asm-i386/mach-default/irq_vectors.h linux-2.6.19.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.19/include/asm-i386/mach-default/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.19.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2007-09-27 23:05:04.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.19/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.19.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.19/include/asm-i386/mach-visws/irq_vectors.h 2007-02-04 19:44:54.000000000 +0100 +++ linux-2.6.19.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2007-09-27 23:05:04.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.19/include/asm-i386/processor.h linux-2.6.19.perfctr27/include/asm-i386/processor.h --- linux-2.6.19/include/asm-i386/processor.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-i386/processor.h 2007-09-27 23:05:04.000000000 +0200 @@ -467,6 +467,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.19/include/asm-i386/system.h linux-2.6.19.perfctr27/include/asm-i386/system.h --- linux-2.6.19/include/asm-i386/system.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-i386/system.h 2007-09-27 23:05:04.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ diff -rupN linux-2.6.19/include/asm-i386/unistd.h linux-2.6.19.perfctr27/include/asm-i386/unistd.h --- linux-2.6.19/include/asm-i386/unistd.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-i386/unistd.h 2007-09-27 23:06:05.000000000 +0200 @@ -325,10 +325,14 @@ #define __NR_move_pages 317 #define __NR_getcpu 318 #define __NR_epoll_pwait 319 +#define __NR_vperfctr_open 325 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 329 #include /* diff -rupN linux-2.6.19/include/asm-powerpc/processor.h linux-2.6.19.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.19/include/asm-powerpc/processor.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-powerpc/processor.h 2007-09-27 23:05:04.000000000 +0200 @@ -170,6 +170,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.19/include/asm-powerpc/reg.h linux-2.6.19.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.19/include/asm-powerpc/reg.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-powerpc/reg.h 2007-09-27 23:05:04.000000000 +0200 @@ -365,10 +365,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -405,13 +403,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -461,33 +452,7 @@ #define SPRN_SDAR 781 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -499,6 +464,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* diff -rupN linux-2.6.19/include/asm-powerpc/systbl.h linux-2.6.19.perfctr27/include/asm-powerpc/systbl.h --- linux-2.6.19/include/asm-powerpc/systbl.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-powerpc/systbl.h 2007-09-27 23:14:37.000000000 +0200 @@ -304,4 +304,16 @@ SYSCALL_SPU(fchmodat) SYSCALL_SPU(faccessat) COMPAT_SYS_SPU(get_robust_list) COMPAT_SYS_SPU(set_robust_list) -COMPAT_SYS(move_pages) +COMPAT_SYS(move_pages) /* 301 */ +SYSCALL(ni_syscall) /* 302 */ +SYSCALL(ni_syscall) /* 303 */ +SYSCALL(ni_syscall) /* 304 */ +SYSCALL(ni_syscall) /* 305 */ +SYSCALL(ni_syscall) /* 306 */ +SYSCALL(ni_syscall) /* 307 */ +SYSCALL(ni_syscall) /* 308 */ +SYSCALL(ni_syscall) /* 309 */ +SYSCALL(vperfctr_open) /* 310 */ +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.19/include/asm-powerpc/unistd.h linux-2.6.19.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.19/include/asm-powerpc/unistd.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-powerpc/unistd.h 2007-09-27 23:13:11.000000000 +0200 @@ -324,10 +324,14 @@ #define __NR_get_robust_list 299 #define __NR_set_robust_list 300 #define __NR_move_pages 301 +#define __NR_vperfctr_open 310 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) #ifdef __KERNEL__ -#define __NR_syscalls 302 +#define __NR_syscalls 314 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff -rupN linux-2.6.19/include/asm-x86_64/hw_irq.h linux-2.6.19.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.19/include/asm-x86_64/hw_irq.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-x86_64/hw_irq.h 2007-09-27 23:05:04.000000000 +0200 @@ -63,14 +63,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.19/include/asm-x86_64/irq.h linux-2.6.19.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.19/include/asm-x86_64/irq.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-x86_64/irq.h 2007-09-27 23:05:04.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS diff -rupN linux-2.6.19/include/asm-x86_64/processor.h linux-2.6.19.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.19/include/asm-x86_64/processor.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-x86_64/processor.h 2007-09-27 23:05:04.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.19/include/asm-x86_64/system.h linux-2.6.19.perfctr27/include/asm-x86_64/system.h --- linux-2.6.19/include/asm-x86_64/system.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-x86_64/system.h 2007-09-27 23:10:47.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.19/include/asm-x86_64/unistd.h linux-2.6.19.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.19/include/asm-x86_64/unistd.h 2007-09-27 22:58:42.000000000 +0200 +++ linux-2.6.19.perfctr27/include/asm-x86_64/unistd.h 2007-09-27 23:09:58.000000000 +0200 @@ -619,8 +619,22 @@ __SYSCALL(__NR_sync_file_range, sys_sync __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 279 __SYSCALL(__NR_move_pages, sys_move_pages) +__SYSCALL(280, sys_ni_syscall) /* utimensat */ +__SYSCALL(281, sys_ni_syscall) /* epoll_wait */ +__SYSCALL(282, sys_ni_syscall) /* signalfd */ +__SYSCALL(283, sys_ni_syscall) /* timerfd */ +__SYSCALL(284, sys_ni_syscall) /* eventfd */ +__SYSCALL(285, sys_ni_syscall) /* fallocate */ +#define __NR_vperfctr_open 286 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_move_pages +#define __NR_syscall_max __NR_vperfctr_read #ifdef __KERNEL__ #include diff -rupN linux-2.6.19/include/linux/sched.h linux-2.6.19.perfctr27/include/linux/sched.h --- linux-2.6.19/include/linux/sched.h 2007-09-27 22:58:43.000000000 +0200 +++ linux-2.6.19.perfctr27/include/linux/sched.h 2007-09-27 23:05:04.000000000 +0200 @@ -1431,6 +1431,9 @@ static inline int thread_group_empty(str * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.19/kernel/exit.c linux-2.6.19.perfctr27/kernel/exit.c --- linux-2.6.19/kernel/exit.c 2007-09-27 22:58:43.000000000 +0200 +++ linux-2.6.19.perfctr27/kernel/exit.c 2007-09-27 23:05:04.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); diff -rupN linux-2.6.19/kernel/sched.c linux-2.6.19.perfctr27/kernel/sched.c --- linux-2.6.19/kernel/sched.c 2007-09-27 22:58:43.000000000 +0200 +++ linux-2.6.19.perfctr27/kernel/sched.c 2007-09-27 23:05:04.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4909,6 +4910,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.19/kernel/sys_ni.c linux-2.6.19.perfctr27/kernel/sys_ni.c --- linux-2.6.19/kernel/sys_ni.c 2007-09-27 22:58:43.000000000 +0200 +++ linux-2.6.19.perfctr27/kernel/sys_ni.c 2007-09-27 23:05:04.000000000 +0200 @@ -73,6 +73,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.19/kernel/timer.c linux-2.6.19.perfctr27/kernel/timer.c --- linux-2.6.19/kernel/timer.c 2007-09-27 22:58:43.000000000 +0200 +++ linux-2.6.19.perfctr27/kernel/timer.c 2007-09-27 23:05:04.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -972,6 +973,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/libpfm-3.y/libpfms/lib/Makefile000664 001750 001750 00000005156 13216244363 023055 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/../.. include $(TOPDIR)/config.mk include $(TOPDIR)/rules.mk CFLAGS+= -pthread -D_GNU_SOURCE LDFLAGS+=-static PFMSINCDIR=../include # # Library version # VERSION=0 REVISION=1 AGE=0 SRCS=libpfms.c HEADERS=../include/libpfms.h ALIBPFM=libpfms.a TARGETS=$(ALIBPFM) ifneq ($(CONFIG_PFMLIB_ARCH_CRAYX2),y) SLIBPFM=libpfms.so.$(VERSION).$(REVISION).$(AGE) VLIBPFM=libpfms.so.$(VERSION) endif OBJS=$(SRCS:.c=.o) SOBJS=$(OBJS:.o=.lo) # # assume that if llibpfm built static, libpfms should # also be static, i.e., likely platform does not support # shared libraries. # ifeq ($(CONFIG_PFMLIB_SHARED),y) TARGETS += $(SLIBPFM) endif ifeq ($(SYS),Linux) SLDFLAGS=-shared -Wl,-soname -Wl,libpfms.so.$(VERSION) endif CFLAGS+=-I$(PFMSINCDIR) all: $(TARGETS) $(OBJS) $(SOBJS): $(HEADERS) $(TOPDIR)/config.mk $(TOPDIR)/rules.mk Makefile libpfms.a: $(OBJS) $(RM) $@ $(AR) cru $@ $(OBJS) $(SLIBPFM): $(SOBJS) $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LN) -sf $@ libpfms.so.$(VERSION) clean: $(RM) -f *.o *.lo *.a *.so* *~ distclean: clean install: $(TARGETS) install: -mkdir -p $(DESTDIR)$(LIBDIR) $(INSTALL) -m 644 $(ALIBPFM) $(DESTDIR)$(LIBDIR) $(INSTALL) $(SLIBPFM) $(DESTDIR)$(LIBDIR) cd $(DESTDIR)$(LIBDIR); $(LN) $(SLIBPFM) $(VLIBPFM) cd $(DESTDIR)$(LIBDIR); $(LN) $(SLIBPFM) libpfms.so -mkdir -p $(DESTDIR)$(INCDIR)/perfmon $(INSTALL) -m 644 $(HEADERS) $(DESTDIR)$(INCDIR)/perfmon papi-5.6.0/src/freebsd/map.h000664 001750 001750 00000002446 13216244361 017670 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: freebsd-map.h * Author: Harald Servat * redcrash@gmail.com */ #ifndef _FreeBSD_MAP_H_ #define _FreeBSD_MAP_H_ #include "../papi.h" #include "../papi_internal.h" #include "../papi_vector.h" enum { CPU_UNKNOWN = 0, CPU_P6, CPU_P6_C, CPU_P6_2, CPU_P6_3, CPU_P6_M, CPU_P4, CPU_K7, CPU_K8, CPU_ATOM, CPU_CORE, CPU_CORE2, CPU_CORE2EXTREME, CPU_COREI7, CPU_COREWESTMERE, CPU_LAST }; typedef struct Native_Event_LabelDescription { char *name; char *description; } Native_Event_LabelDescription_t; typedef struct Native_Event_Info { /* Name and description for all native events */ Native_Event_LabelDescription_t *info; } Native_Event_Info_t; extern Native_Event_Info_t _papi_hwd_native_info[CPU_LAST+1]; extern void init_freebsd_libpmc_mappings (void); extern int freebsd_number_of_events (int processortype); #include "map-unknown.h" #include "map-p6.h" #include "map-p6-c.h" #include "map-p6-2.h" #include "map-p6-3.h" #include "map-p6-m.h" #include "map-p4.h" #include "map-k7.h" #include "map-k8.h" #include "map-atom.h" #include "map-core.h" #include "map-core2.h" #include "map-core2-extreme.h" #include "map-i7.h" #include "map-westmere.h" #endif /* _FreeBSD_MAP_H_ */ papi-5.6.0/src/freebsd/map-p6.h000664 001750 001750 00000003653 13216244361 020214 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-p6.h * CVS: $Id$ * Author: Harald Servat * redcrash@gmail.com */ #ifndef FreeBSD_MAP_P6 #define FreeBSD_MAP_P6 enum NativeEvent_Value_P6Processor { PNE_P6_BACLEARS = PAPI_NATIVE_MASK, PNE_P6_BR_BOGUS, PNE_P6_BR_INST_DECODED, PNE_P6_BR_INST_RETIRED, PNE_P6_BR_MISS_PRED_RETIRED, PNE_P6_BR_MISS_PRED_TAKEN_RET, PNE_P6_BR_TAKEN_RETIRED, PNE_P6_BTB_MISSES, PNE_P6_BUS_BNR_DRV, PNE_P6_BUS_DATA_RCV, PNE_P6_BUS_DRDY_CLOCKS, PNE_P6_BUS_HIT_DRV, PNE_P6_BUS_HITM_DRV, PNE_P6_BUS_LOCK_CLOCKS, PNE_P6_BUS_REQ_OUTSTANDING, PNE_P6_BUS_SNOOP_STALL, PNE_P6_BUS_TRAN_ANY, PNE_P6_BUS_TRAN_BRD, PNE_P6_BUS_TRAN_BURST, PNE_P6_BUS_TRAN_DEF, PNE_P6_BUS_TRAN_IFETCH, PNE_P6_BUS_TRAN_INVAL, PNE_P6_BUS_TRAN_MEM, PNE_P6_BUS_TRAN_POWER, PNE_P6_BUS_TRAN_RFO, PNE_P6_BUS_TRANS_IO, PNE_P6_BUS_TRANS_P, PNE_P6_BUS_TRANS_WB, PNE_P6_CPU_CLK_UNHALTED, PNE_P6_CYCLES_DIV_BUSY, PNE_P6_CYCLES_IN_PENDING_AND_MASKED, PNE_P6_CYCLES_INT_MASKED, PNE_P6_DATA_MEM_REFS, PNE_P6_DCU_LINES_IN, PNE_P6_DCU_M_LINES_IN, PNE_P6_DCU_M_LINES_OUT, PNE_P6_DCU_MISS_OUTSTANDING, PNE_P6_DIV, PNE_P6_FLOPS, PNE_P6_FP_ASSIST, PNE_P6_FTP_COMPS_OPS_EXE, PNE_P6_HW_INT_RX, PNE_P6_IFU_FETCH, PNE_P6_IFU_FETCH_MISS, PNE_P6_IFU_MEM_STALL, PNE_P6_ILD_STALL, PNE_P6_INST_DECODED, PNE_P6_INST_RETIRED, PNE_P6_ITLB_MISS, PNE_P6_L2_ADS, PNE_P6_L2_DBUS_BUSY, PNE_P6_L2_DBUS_BUSY_RD, PNE_P6_L2_IFETCH, PNE_P6_L2_LD, PNE_P6_L2_LINES_IN, PNE_P6_L2_LINES_OUT, PNE_P6_L2M_LINES_INM, PNE_P6_L2M_LINES_OUTM, PNE_P6_L2_RQSTS, PNE_P6_L2_ST, PNE_P6_LD_BLOCKS, PNE_P6_MISALIGN_MEM_REF, PNE_P6_MUL, PNE_P6_PARTIAL_RAT_STALLS, PNE_P6_RESOURCE_STALL, PNE_P6_SB_DRAINS, PNE_P6_SEGMENT_REG_LOADS, PNE_P6_UOPS_RETIRED, PNE_P6_NATNAME_GUARD }; extern Native_Event_LabelDescription_t P6Processor_info[]; extern hwi_search_t P6Processor_map[]; #endif papi-5.6.0/src/libpfm4/lib/events/intel_hswep_unc_irp_events.h000664 001750 001750 00000022332 13216244364 026533 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: hswep_unc_irp (Intel Haswell-EP IRP uncore) */ static const intel_x86_umask_t hswep_unc_i_cache_ack_pending_occupancy[]={ { .uname = "ANY", .udesc = "Any source", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "SOURCE", .udesc = "Track all requests from any source port", .ucode = 0x200, }, }; static const intel_x86_umask_t hswep_unc_i_coherent_ops[]={ { .uname = "PCIRDCUR", .udesc = "PCI read current", .ucode = 0x100, }, { .uname = "CRD", .udesc = "CRD", .ucode = 0x200, }, { .uname = "DRD", .udesc = "DRD", .ucode = 0x400, }, { .uname = "RFO", .udesc = "RFO", .ucode = 0x800, }, { .uname = "PCITOM", .udesc = "DRITOM", .ucode = 0x1000, }, { .uname = "PCIDCAHINT", .udesc = "PCIDCAHINT", .ucode = 0x2000, }, { .uname = "WBMTOI", .udesc = "WBMTOI", .ucode = 0x4000, }, { .uname = "CFLUSH", .udesc = "CFLUSH", .ucode = 0x8000, }, }; static const intel_x86_umask_t hswep_unc_i_misc0[]={ { .uname = "FAST_REQ", .udesc = "Fastpath requests", .ucode = 0x100, }, { .uname = "FAST_REJ", .udesc = "Fastpath rejects", .ucode = 0x200, }, { .uname = "2ND_RD_INSERT", .udesc = "Cache insert of read transaction as secondary", .ucode = 0x400, }, { .uname = "2ND_WR_INSERT", .udesc = "Cache insert of write transaction as secondary", .ucode = 0x800, }, { .uname = "2ND_ATOMIC_INSERT", .udesc = "Cache insert of atomic transaction as secondary", .ucode = 0x1000, }, { .uname = "FAST_XFER", .udesc = "Fastpath trasnfers from primary to secondary", .ucode = 0x2000, }, { .uname = "PF_ACK_HINT", .udesc = "Prefetch ack hints from primary to secondary", .ucode = 0x4000, }, { .uname = "PF_TIMEOUT", .udesc = "Prefetch timeout", .ucode = 0x8000, }, }; static const intel_x86_umask_t hswep_unc_i_misc1[]={ { .uname = "SLOW_I", .udesc = "Slow transfer of I-state cacheline", .ucode = 0x100, }, { .uname = "SLOW_S", .udesc = "Slow transfer of S-state cacheline", .ucode = 0x200, }, { .uname = "SLOW_E", .udesc = "Slow transfer of e-state cacheline", .ucode = 0x400, }, { .uname = "SLOW_M", .udesc = "Slow transfer of M-state cacheline", .ucode = 0x800, }, { .uname = "LOST_FWD", .udesc = "LOST forwards", .ucode = 0x1000, }, { .uname = "SEC_RCVD_INVLD", .udesc = "Received Invalid", .ucode = 0x2000, }, { .uname = "SEC_RCVD_VLD", .udesc = "Received Valid", .ucode = 0x4000, }, { .uname = "DATA_THROTTLE", .udesc = "Data throttled", .ucode = 0x8000, }, }; static const intel_x86_umask_t hswep_unc_i_snoop_resp[]={ { .uname = "MISS", .udesc = "Miss", .ucode = 0x100, }, { .uname = "HIT_I", .udesc = "Hit in Invalid state", .ucode = 0x200, }, { .uname = "HIT_ES", .udesc = "Hit in Exclusive or Shared state", .ucode = 0x400, }, { .uname = "HIT_M", .udesc = "Hit in Modified state", .ucode = 0x800, }, { .uname = "SNPCODE", .udesc = "Snoop Code", .ucode = 0x1000, }, { .uname = "SNPDATA", .udesc = "Snoop Data", .ucode = 0x2000, }, { .uname = "SNPINV", .udesc = "Snoop Invalid", .ucode = 0x4000, }, }; static const intel_x86_umask_t hswep_unc_i_transactions[]={ { .uname = "READS", .udesc = "Reads (not including prefetches)", .ucode = 0x100, }, { .uname = "WRITES", .udesc = "Writes", .ucode = 0x200, }, { .uname = "RD_PREF", .udesc = "Read prefetches", .ucode = 0x400, }, { .uname = "WR_PREF", .udesc = "Write prefetches", .ucode = 0x800, }, { .uname = "ATOMIC", .udesc = "Atomic transactions", .ucode = 0x1000, }, { .uname = "OTHER", .udesc = "Other kinds of transactions", .ucode = 0x2000, }, { .uname = "ORDERINGQ", .udesc = "Track request coming from port designated in IRP OrderingQ filter", .ucode = 0x4000, }, }; static const intel_x86_entry_t intel_hswep_unc_i_pe[]={ { .name = "UNC_I_CLOCKTICKS", .desc = "Number of uclks in domain", .code = 0x0, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_SNOOP_RESP", .desc = "Snoop responses", .code = 0x17, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_snoop_resp), .umasks = hswep_unc_i_snoop_resp, }, { .name = "UNC_I_MISC0", .desc = "Miscellaneous events", .code = 0x14, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_misc0), .umasks = hswep_unc_i_misc0, }, { .name = "UNC_I_COHERENT_OPS", .desc = "Coherent operations", .code = 0x13, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_coherent_ops), .umasks = hswep_unc_i_coherent_ops, }, { .name = "UNC_I_CACHE_TOTAL_OCCUPANCY", .desc = "Total write cache occupancy", .code = 0x12, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_cache_ack_pending_occupancy), .umasks = hswep_unc_i_cache_ack_pending_occupancy /* shared */ }, { .name = "UNC_I_RXR_AK_INSERTS", .desc = "Egress cycles full", .code = 0xa, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_DRS_CYCLES_FULL", .desc = "TBD", .code = 0x4, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_DRS_INSERTS", .desc = "BL Ingress occupancy DRS", .code = 0x1, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_DRS_OCCUPANCY", .desc = "TBD", .code = 0x7, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCB_CYCLES_FULL", .desc = "TBD", .code = 0x5, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCB_INSERTS", .desc = "BL Ingress occupancy NCB", .code = 0x2, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCB_OCCUPANCY", .desc = "TBD", .code = 0x8, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCS_CYCLES_FULL", .desc = "TBD", .code = 0x6, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCS_INSERTS", .desc = "BL Ingress Occupancy NCS", .code = 0x3, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_RXR_BL_NCS_OCCUPANCY", .desc = "TBD", .code = 0x9, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_TRANSACTIONS", .desc = "Inbound transactions", .code = 0x16, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_transactions), .umasks = hswep_unc_i_transactions, }, { .name = "UNC_I_MISC1", .desc = "Misc events", .code = 0x15, .cntmsk = 0x3, .ngrp = 1, .modmsk = HSWEP_UNC_IRP_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_i_misc1), .umasks = hswep_unc_i_misc1, }, { .name = "UNC_I_TXR_AD_STALL_CREDIT_CYCLES", .desc = "No AD Egress credit stalls", .code = 0x18, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_TXR_BL_STALL_CREDIT_CYCLES", .desc = "No BL Egress credit stalls", .code = 0x19, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_TXR_DATA_INSERTS_NCB", .desc = "Outbound read requests", .code = 0xe, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_TXR_DATA_INSERTS_NCS", .desc = "Outbound read requests", .code = 0xf, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, { .name = "UNC_I_TXR_REQUEST_OCCUPANCY", .desc = "Outbound request queue occupancy", .code = 0xd, .cntmsk = 0x3, .modmsk = HSWEP_UNC_IRP_ATTRS, }, }; papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.14000664 001750 001750 00000103251 13216244370 024220 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.14/CREDITS linux-2.6.14.perfctr27/CREDITS --- linux-2.6.14/CREDITS 2005-10-28 11:24:05.000000000 +0200 +++ linux-2.6.14.perfctr27/CREDITS 2005-11-08 01:40:51.000000000 +0100 @@ -2629,6 +2629,7 @@ N: Mikael Pettersson E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.14/MAINTAINERS linux-2.6.14.perfctr27/MAINTAINERS --- linux-2.6.14/MAINTAINERS 2005-10-28 11:24:05.000000000 +0200 +++ linux-2.6.14.perfctr27/MAINTAINERS 2005-11-08 01:40:51.000000000 +0100 @@ -1923,6 +1923,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@csd.uu.se +W: http://www.csd.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de diff -rupN linux-2.6.14/arch/i386/Kconfig linux-2.6.14.perfctr27/arch/i386/Kconfig --- linux-2.6.14/arch/i386/Kconfig 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/i386/Kconfig 2005-11-08 01:40:51.000000000 +0100 @@ -982,6 +982,9 @@ config CRASH_DUMP depends on HIGHMEM help Generate crash dump after being started by kexec. + +source "drivers/perfctr/Kconfig" + endmenu diff -rupN linux-2.6.14/arch/i386/kernel/entry.S linux-2.6.14.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.14/arch/i386/kernel/entry.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/i386/kernel/entry.S 2005-11-08 01:40:51.000000000 +0100 @@ -430,6 +430,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error diff -rupN linux-2.6.14/arch/i386/kernel/i8259.c linux-2.6.14.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.14/arch/i386/kernel/i8259.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/i386/kernel/i8259.c 2005-11-08 01:40:51.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.14/arch/i386/kernel/process.c linux-2.6.14.perfctr27/arch/i386/kernel/process.c --- linux-2.6.14/arch/i386/kernel/process.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/i386/kernel/process.c 2005-11-08 01:40:51.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -393,6 +394,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -472,6 +474,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -733,6 +737,7 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); return prev_p; } diff -rupN linux-2.6.14/arch/i386/kernel/syscall_table.S linux-2.6.14.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.14/arch/i386/kernel/syscall_table.S 2005-08-29 14:34:27.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/i386/kernel/syscall_table.S 2005-11-08 01:40:51.000000000 +0100 @@ -294,3 +294,9 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_ni_syscall + .long sys_ni_syscall /* 295 */ + .long sys_vperfctr_open + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.14/arch/ppc/Kconfig linux-2.6.14.perfctr27/arch/ppc/Kconfig --- linux-2.6.14/arch/ppc/Kconfig 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc/Kconfig 2005-11-08 01:40:51.000000000 +0100 @@ -288,6 +288,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx || E200 default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" diff -rupN linux-2.6.14/arch/ppc/kernel/head.S linux-2.6.14.perfctr27/arch/ppc/kernel/head.S --- linux-2.6.14/arch/ppc/kernel/head.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc/kernel/head.S 2005-11-08 01:40:51.000000000 +0100 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. diff -rupN linux-2.6.14/arch/ppc/kernel/misc.S linux-2.6.14.perfctr27/arch/ppc/kernel/misc.S --- linux-2.6.14/arch/ppc/kernel/misc.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc/kernel/misc.S 2005-11-08 01:40:51.000000000 +0100 @@ -1455,3 +1455,9 @@ _GLOBAL(sys_call_table) .long sys_inotify_init /* 275 */ .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open /* 280 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read diff -rupN linux-2.6.14/arch/ppc/kernel/process.c linux-2.6.14.perfctr27/arch/ppc/kernel/process.c --- linux-2.6.14/arch/ppc/kernel/process.c 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.14.perfctr27/arch/ppc/kernel/process.c 2005-11-08 01:40:51.000000000 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.14/arch/ppc64/Kconfig linux-2.6.14.perfctr27/arch/ppc64/Kconfig --- linux-2.6.14/arch/ppc64/Kconfig 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc64/Kconfig 2005-11-08 01:40:51.000000000 +0100 @@ -377,6 +377,8 @@ config CMDLINE some command-line options at build time by entering them here. In most cases you will need to specify the root device here. +source "drivers/perfctr/Kconfig" + endmenu config ISA_DMA_API diff -rupN linux-2.6.14/arch/ppc64/kernel/misc.S linux-2.6.14.perfctr27/arch/ppc64/kernel/misc.S --- linux-2.6.14/arch/ppc64/kernel/misc.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc64/kernel/misc.S 2005-11-08 01:40:51.000000000 +0100 @@ -1230,6 +1230,12 @@ _GLOBAL(sys_call_table32) .llong .sys_inotify_init /* 275 */ .llong .sys_inotify_add_watch .llong .sys_inotify_rm_watch + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vperfctr_open /* 280 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read .balign 8 _GLOBAL(sys_call_table) @@ -1511,3 +1517,9 @@ _GLOBAL(sys_call_table) .llong .sys_inotify_init /* 275 */ .llong .sys_inotify_add_watch .llong .sys_inotify_rm_watch + .llong .sys_ni_syscall + .llong .sys_ni_syscall + .llong .sys_vperfctr_open /* 280 */ + .llong .sys_vperfctr_control + .llong .sys_vperfctr_write + .llong .sys_vperfctr_read diff -rupN linux-2.6.14/arch/ppc64/kernel/process.c linux-2.6.14.perfctr27/arch/ppc64/kernel/process.c --- linux-2.6.14/arch/ppc64/kernel/process.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/ppc64/kernel/process.c 2005-11-08 01:40:51.000000000 +0100 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -245,7 +246,9 @@ struct task_struct *__switch_to(struct t } local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -345,6 +348,7 @@ void exit_thread(void) last_task_used_altivec = NULL; #endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_SMP */ + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -450,6 +454,8 @@ copy_thread(int nr, unsigned long clone_ */ kregs->nip = *((unsigned long *)ret_from_fork); + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.14/arch/x86_64/Kconfig linux-2.6.14.perfctr27/arch/x86_64/Kconfig --- linux-2.6.14/arch/x86_64/Kconfig 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/x86_64/Kconfig 2005-11-08 01:40:55.000000000 +0100 @@ -421,6 +421,8 @@ config SECCOMP source kernel/Kconfig.hz +source "drivers/perfctr/Kconfig" + endmenu # diff -rupN linux-2.6.14/arch/x86_64/ia32/ia32entry.S linux-2.6.14.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.14/arch/x86_64/ia32/ia32entry.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/x86_64/ia32/ia32entry.S 2005-11-08 01:40:51.000000000 +0100 @@ -643,6 +643,12 @@ ia32_sys_call_table: .quad sys_inotify_init .quad sys_inotify_add_watch .quad sys_inotify_rm_watch + .quad quiet_ni_syscall /* pselect6 */ + .quad quiet_ni_syscall /* ppoll 295 */ + .quad sys_vperfctr_open + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff -rupN linux-2.6.14/arch/x86_64/kernel/entry.S linux-2.6.14.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.14/arch/x86_64/kernel/entry.S 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/x86_64/kernel/entry.S 2005-11-08 01:40:51.000000000 +0100 @@ -645,6 +645,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.14/arch/x86_64/kernel/i8259.c linux-2.6.14.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.14/arch/x86_64/kernel/i8259.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/x86_64/kernel/i8259.c 2005-11-08 01:40:51.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -590,6 +591,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.14/arch/x86_64/kernel/process.c linux-2.6.14.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.14/arch/x86_64/kernel/process.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.14.perfctr27/arch/x86_64/kernel/process.c 2005-11-08 01:40:51.000000000 +0100 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -345,6 +346,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -454,6 +456,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -603,6 +607,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.14/drivers/Makefile linux-2.6.14.perfctr27/drivers/Makefile --- linux-2.6.14/drivers/Makefile 2005-10-28 11:24:07.000000000 +0200 +++ linux-2.6.14.perfctr27/drivers/Makefile 2005-11-08 01:40:51.000000000 +0100 @@ -63,6 +63,7 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ obj-y += firmware/ diff -rupN linux-2.6.14/include/asm-i386/mach-default/irq_vectors.h linux-2.6.14.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.14/include/asm-i386/mach-default/irq_vectors.h 2004-05-10 11:14:37.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2005-11-08 01:40:51.000000000 +0100 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.14/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.14.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.14/include/asm-i386/mach-visws/irq_vectors.h 2004-01-09 13:19:11.000000000 +0100 +++ linux-2.6.14.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2005-11-08 01:40:51.000000000 +0100 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.14/include/asm-i386/processor.h linux-2.6.14.perfctr27/include/asm-i386/processor.h --- linux-2.6.14/include/asm-i386/processor.h 2005-10-28 11:24:11.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-i386/processor.h 2005-11-08 01:40:51.000000000 +0100 @@ -458,6 +458,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.14/include/asm-i386/system.h linux-2.6.14.perfctr27/include/asm-i386/system.h --- linux-2.6.14/include/asm-i386/system.h 2005-10-28 11:24:11.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-i386/system.h 2005-11-08 01:40:51.000000000 +0100 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ diff -rupN linux-2.6.14/include/asm-i386/unistd.h linux-2.6.14.perfctr27/include/asm-i386/unistd.h --- linux-2.6.14/include/asm-i386/unistd.h 2005-10-28 11:24:11.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-i386/unistd.h 2005-11-08 01:40:51.000000000 +0100 @@ -299,8 +299,12 @@ #define __NR_inotify_init 291 #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 +#define __NR_vperfctr_open 296 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define NR_syscalls 294 +#define NR_syscalls 300 /* * user-visible error numbers are in the range -1 - -128: see diff -rupN linux-2.6.14/include/asm-ppc/processor.h linux-2.6.14.perfctr27/include/asm-ppc/processor.h --- linux-2.6.14/include/asm-ppc/processor.h 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.14.perfctr27/include/asm-ppc/processor.h 2005-11-08 01:40:51.000000000 +0100 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.14/include/asm-ppc/reg.h linux-2.6.14.perfctr27/include/asm-ppc/reg.h --- linux-2.6.14/include/asm-ppc/reg.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-ppc/reg.h 2005-11-08 01:40:51.000000000 +0100 @@ -275,22 +275,14 @@ #define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ #define SPRN_LDSTDB 0x3f4 /* */ #define SPRN_LR 0x008 /* Link Register */ -#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ -#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ #ifndef SPRN_PIR #define SPRN_PIR 0x3FF /* Processor Identification Register */ #endif -#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ -#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ -#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ -#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ #define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ #define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -317,16 +309,79 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FB0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07B00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + /* Bit definitions for MMCR0 and PMC1 / PMC2. */ #define MMCR0_PMC1_CYCLES (1 << 7) #define MMCR0_PMC1_ICACHEMISS (5 << 7) @@ -335,7 +390,6 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 -#define MMCR0_PMXE (1 << 26) /* Processor Version Register */ diff -rupN linux-2.6.14/include/asm-ppc/unistd.h linux-2.6.14.perfctr27/include/asm-ppc/unistd.h --- linux-2.6.14/include/asm-ppc/unistd.h 2005-08-29 14:34:33.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-ppc/unistd.h 2005-11-08 01:40:51.000000000 +0100 @@ -282,8 +282,12 @@ #define __NR_inotify_init 275 #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 +#define __NR_vperfctr_open 280 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 278 +#define __NR_syscalls 284 #define __NR(n) #n diff -rupN linux-2.6.14/include/asm-ppc64/processor.h linux-2.6.14.perfctr27/include/asm-ppc64/processor.h --- linux-2.6.14/include/asm-ppc64/processor.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-ppc64/processor.h 2005-11-08 01:40:51.000000000 +0100 @@ -442,6 +442,8 @@ struct thread_struct { unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + /* performance counters */ + struct vperfctr *perfctr; }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.14/include/asm-ppc64/unistd.h linux-2.6.14.perfctr27/include/asm-ppc64/unistd.h --- linux-2.6.14/include/asm-ppc64/unistd.h 2005-08-29 14:34:33.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-ppc64/unistd.h 2005-11-08 01:40:51.000000000 +0100 @@ -288,8 +288,12 @@ #define __NR_inotify_init 275 #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 +#define __NR_vperfctr_open 280 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 278 +#define __NR_syscalls 284 #ifdef __KERNEL__ #define NR_syscalls __NR_syscalls #endif diff -rupN linux-2.6.14/include/asm-x86_64/hw_irq.h linux-2.6.14.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.14/include/asm-x86_64/hw_irq.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/hw_irq.h 2005-11-08 01:40:51.000000000 +0100 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.14/include/asm-x86_64/ia32_unistd.h linux-2.6.14.perfctr27/include/asm-x86_64/ia32_unistd.h --- linux-2.6.14/include/asm-x86_64/ia32_unistd.h 2005-08-29 14:34:33.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/ia32_unistd.h 2005-11-08 01:40:51.000000000 +0100 @@ -299,7 +299,11 @@ #define __NR_ia32_inotify_init 291 #define __NR_ia32_inotify_add_watch 292 #define __NR_ia32_inotify_rm_watch 293 +#define __NR_ia32_vperfctr_open 296 +#define __NR_ia32_vperfctr_control (__NR_ia32_vperfctr_open+1) +#define __NR_ia32_vperfctr_write (__NR_ia32_vperfctr_open+2) +#define __NR_ia32_vperfctr_read (__NR_ia32_vperfctr_open+3) -#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 300 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff -rupN linux-2.6.14/include/asm-x86_64/irq.h linux-2.6.14.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.14/include/asm-x86_64/irq.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/irq.h 2005-11-08 01:40:51.000000000 +0100 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR diff -rupN linux-2.6.14/include/asm-x86_64/processor.h linux-2.6.14.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.14/include/asm-x86_64/processor.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/processor.h 2005-11-08 01:40:51.000000000 +0100 @@ -252,6 +252,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.14/include/asm-x86_64/system.h linux-2.6.14.perfctr27/include/asm-x86_64/system.h --- linux-2.6.14/include/asm-x86_64/system.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/system.h 2005-11-08 01:40:51.000000000 +0100 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.14/include/asm-x86_64/unistd.h linux-2.6.14.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.14/include/asm-x86_64/unistd.h 2005-08-29 14:34:33.000000000 +0200 +++ linux-2.6.14.perfctr27/include/asm-x86_64/unistd.h 2005-11-08 01:40:55.000000000 +0100 @@ -571,8 +571,18 @@ __SYSCALL(__NR_inotify_init, sys_inotify __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 255 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_migrate_pages 256 +__SYSCALL(__NR_migrate_pages, sys_ni_syscall) +#define __NR_vperfctr_open 257 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_inotify_rm_watch +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ diff -rupN linux-2.6.14/include/linux/sched.h linux-2.6.14.perfctr27/include/linux/sched.h --- linux-2.6.14/include/linux/sched.h 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/include/linux/sched.h 2005-11-08 01:40:51.000000000 +0100 @@ -1168,6 +1168,9 @@ extern void unhash_process(struct task_s * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.14/kernel/exit.c linux-2.6.14.perfctr27/kernel/exit.c --- linux-2.6.14/kernel/exit.c 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/kernel/exit.c 2005-11-08 01:40:51.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); diff -rupN linux-2.6.14/kernel/sched.c linux-2.6.14.perfctr27/kernel/sched.c --- linux-2.6.14/kernel/sched.c 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/kernel/sched.c 2005-11-08 01:40:51.000000000 +0100 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4361,6 +4362,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.14/kernel/sys_ni.c linux-2.6.14.perfctr27/kernel/sys_ni.c --- linux-2.6.14/kernel/sys_ni.c 2005-08-29 14:34:34.000000000 +0200 +++ linux-2.6.14.perfctr27/kernel/sys_ni.c 2005-11-08 01:40:51.000000000 +0100 @@ -68,6 +68,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.14/kernel/timer.c linux-2.6.14.perfctr27/kernel/timer.c --- linux-2.6.14/kernel/timer.c 2005-10-28 11:24:12.000000000 +0200 +++ linux-2.6.14.perfctr27/kernel/timer.c 2005-11-08 01:40:51.000000000 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -846,6 +847,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.16000664 001750 001750 00000076651 13216244370 024237 0ustar00jshenry1963jshenry1963000000 000000 diff -rupN linux-2.6.16/CREDITS linux-2.6.16.perfctr27/CREDITS --- linux-2.6.16/CREDITS 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/CREDITS 2006-08-11 03:48:19.000000000 +0200 @@ -2634,6 +2634,7 @@ N: Mikael Pettersson E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net diff -rupN linux-2.6.16/MAINTAINERS linux-2.6.16.perfctr27/MAINTAINERS --- linux-2.6.16/MAINTAINERS 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/MAINTAINERS 2006-08-11 03:48:19.000000000 +0200 @@ -2071,6 +2071,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de diff -rupN linux-2.6.16/arch/i386/Kconfig linux-2.6.16.perfctr27/arch/i386/Kconfig --- linux-2.6.16/arch/i386/Kconfig 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/i386/Kconfig 2006-08-11 03:48:19.000000000 +0200 @@ -685,6 +685,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC diff -rupN linux-2.6.16/arch/i386/kernel/entry.S linux-2.6.16.perfctr27/arch/i386/kernel/entry.S --- linux-2.6.16/arch/i386/kernel/entry.S 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/i386/kernel/entry.S 2006-08-11 03:48:19.000000000 +0200 @@ -432,6 +432,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error diff -rupN linux-2.6.16/arch/i386/kernel/i8259.c linux-2.6.16.perfctr27/arch/i386/kernel/i8259.c --- linux-2.6.16/arch/i386/kernel/i8259.c 2005-10-28 11:24:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/i386/kernel/i8259.c 2006-08-11 03:48:19.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.16/arch/i386/kernel/process.c linux-2.6.16.perfctr27/arch/i386/kernel/process.c --- linux-2.6.16/arch/i386/kernel/process.c 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/i386/kernel/process.c 2006-08-11 03:48:19.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -388,6 +389,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -439,6 +441,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -697,6 +701,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.16/arch/i386/kernel/syscall_table.S linux-2.6.16.perfctr27/arch/i386/kernel/syscall_table.S --- linux-2.6.16/arch/i386/kernel/syscall_table.S 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/i386/kernel/syscall_table.S 2006-08-11 03:48:23.000000000 +0200 @@ -310,3 +310,14 @@ ENTRY(sys_call_table) .long sys_pselect6 .long sys_ppoll .long sys_unshare /* 310 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_ni_syscall /* 315 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_vperfctr_open + .long sys_vperfctr_control + .long sys_vperfctr_write /* 320 */ + .long sys_vperfctr_read diff -rupN linux-2.6.16/arch/powerpc/Kconfig linux-2.6.16.perfctr27/arch/powerpc/Kconfig --- linux-2.6.16/arch/powerpc/Kconfig 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/powerpc/Kconfig 2006-08-11 03:48:19.000000000 +0200 @@ -268,6 +268,9 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +source "drivers/perfctr/Kconfig" + endmenu source "init/Kconfig" diff -rupN linux-2.6.16/arch/powerpc/kernel/process.c linux-2.6.16.perfctr27/arch/powerpc/kernel/process.c --- linux-2.6.16/arch/powerpc/kernel/process.c 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/powerpc/kernel/process.c 2006-08-11 03:48:19.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -330,7 +331,9 @@ struct task_struct *__switch_to(struct t #endif local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -459,6 +462,7 @@ void exit_thread(void) { kprobe_flush_task(current); discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -571,6 +575,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } diff -rupN linux-2.6.16/arch/powerpc/kernel/systbl.S linux-2.6.16.perfctr27/arch/powerpc/kernel/systbl.S --- linux-2.6.16/arch/powerpc/kernel/systbl.S 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/powerpc/kernel/systbl.S 2006-08-11 03:48:23.000000000 +0200 @@ -322,3 +322,25 @@ SYSCALL(spu_create) COMPAT_SYS(pselect6) COMPAT_SYS(ppoll) SYSCALL(unshare) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(ni_syscall) +SYSCALL(vperfctr_open) +SYSCALL(vperfctr_control) +SYSCALL(vperfctr_write) +SYSCALL(vperfctr_read) diff -rupN linux-2.6.16/arch/x86_64/Kconfig linux-2.6.16.perfctr27/arch/x86_64/Kconfig --- linux-2.6.16/arch/x86_64/Kconfig 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/x86_64/Kconfig 2006-08-11 03:48:19.000000000 +0200 @@ -462,6 +462,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz endmenu diff -rupN linux-2.6.16/arch/x86_64/ia32/ia32entry.S linux-2.6.16.perfctr27/arch/x86_64/ia32/ia32entry.S --- linux-2.6.16/arch/x86_64/ia32/ia32entry.S 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/x86_64/ia32/ia32entry.S 2006-08-11 03:48:23.000000000 +0200 @@ -688,6 +688,17 @@ ia32_sys_call_table: .quad sys_ni_syscall /* pselect6 for now */ .quad sys_ni_syscall /* ppoll for now */ .quad sys_unshare /* 310 */ + .quad sys_ni_syscall + .quad sys_ni_syscall + .quad sys_ni_syscall + .quad sys_ni_syscall + .quad sys_ni_syscall /* 315 */ + .quad sys_ni_syscall + .quad sys_ni_syscall + .quad sys_vperfctr_open + .quad sys_vperfctr_control + .quad sys_vperfctr_write /* 320 */ + .quad sys_vperfctr_read ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff -rupN linux-2.6.16/arch/x86_64/kernel/entry.S linux-2.6.16.perfctr27/arch/x86_64/kernel/entry.S --- linux-2.6.16/arch/x86_64/kernel/entry.S 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/x86_64/kernel/entry.S 2006-08-11 03:48:19.000000000 +0200 @@ -655,6 +655,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ diff -rupN linux-2.6.16/arch/x86_64/kernel/i8259.c linux-2.6.16.perfctr27/arch/x86_64/kernel/i8259.c --- linux-2.6.16/arch/x86_64/kernel/i8259.c 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/x86_64/kernel/i8259.c 2006-08-11 03:48:19.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -591,6 +592,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: diff -rupN linux-2.6.16/arch/x86_64/kernel/process.c linux-2.6.16.perfctr27/arch/x86_64/kernel/process.c --- linux-2.6.16/arch/x86_64/kernel/process.c 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/arch/x86_64/kernel/process.c 2006-08-11 03:48:19.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -372,6 +373,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -473,6 +475,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -627,6 +631,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } diff -rupN linux-2.6.16/drivers/Makefile linux-2.6.16.perfctr27/drivers/Makefile --- linux-2.6.16/drivers/Makefile 2006-08-10 21:57:06.000000000 +0200 +++ linux-2.6.16.perfctr27/drivers/Makefile 2006-08-11 03:48:19.000000000 +0200 @@ -70,6 +70,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ diff -rupN linux-2.6.16/include/asm-i386/mach-default/irq_vectors.h linux-2.6.16.perfctr27/include/asm-i386/mach-default/irq_vectors.h --- linux-2.6.16/include/asm-i386/mach-default/irq_vectors.h 2004-05-10 11:14:37.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2006-08-11 03:48:19.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.16/include/asm-i386/mach-visws/irq_vectors.h linux-2.6.16.perfctr27/include/asm-i386/mach-visws/irq_vectors.h --- linux-2.6.16/include/asm-i386/mach-visws/irq_vectors.h 2004-01-09 13:19:11.000000000 +0100 +++ linux-2.6.16.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2006-08-11 03:48:19.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 diff -rupN linux-2.6.16/include/asm-i386/processor.h linux-2.6.16.perfctr27/include/asm-i386/processor.h --- linux-2.6.16/include/asm-i386/processor.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-i386/processor.h 2006-08-11 03:48:19.000000000 +0200 @@ -464,6 +464,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ diff -rupN linux-2.6.16/include/asm-i386/system.h linux-2.6.16.perfctr27/include/asm-i386/system.h --- linux-2.6.16/include/asm-i386/system.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-i386/system.h 2006-08-11 03:48:19.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ diff -rupN linux-2.6.16/include/asm-i386/unistd.h linux-2.6.16.perfctr27/include/asm-i386/unistd.h --- linux-2.6.16/include/asm-i386/unistd.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-i386/unistd.h 2006-08-11 03:48:23.000000000 +0200 @@ -316,8 +316,12 @@ #define __NR_pselect6 308 #define __NR_ppoll 309 #define __NR_unshare 310 +#define __NR_vperfctr_open 318 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define NR_syscalls 311 +#define NR_syscalls 322 /* * user-visible error numbers are in the range -1 - -128: see diff -rupN linux-2.6.16/include/asm-powerpc/processor.h linux-2.6.16.perfctr27/include/asm-powerpc/processor.h --- linux-2.6.16/include/asm-powerpc/processor.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-powerpc/processor.h 2006-08-11 03:48:19.000000000 +0200 @@ -193,6 +193,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 diff -rupN linux-2.6.16/include/asm-powerpc/reg.h linux-2.6.16.perfctr27/include/asm-powerpc/reg.h --- linux-2.6.16/include/asm-powerpc/reg.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-powerpc/reg.h 2006-08-11 03:48:19.000000000 +0200 @@ -365,10 +365,8 @@ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ #define SPRN_ASR 0x118 /* Address Space Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -403,13 +401,6 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ @@ -455,33 +446,7 @@ #define SPRN_SDAR 781 #else /* 32-bit */ -#define SPRN_MMCR0 952 /* Monitor Mode Control Register 0 */ -#define MMCR0_FC 0x80000000UL /* freeze counters */ -#define MMCR0_FCS 0x40000000UL /* freeze in supervisor state */ -#define MMCR0_FCP 0x20000000UL /* freeze in problem state */ -#define MMCR0_FCM1 0x10000000UL /* freeze counters while MSR mark = 1 */ -#define MMCR0_FCM0 0x08000000UL /* freeze counters while MSR mark = 0 */ -#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ -#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ -#define MMCR0_TBEE 0x00400000UL /* time base exception enable */ -#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCnCE 0x00004000UL /* count enable for all but PMC 1*/ -#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ -#define MMCR0_PMC1SEL 0x00001fc0UL /* PMC 1 Event */ -#define MMCR0_PMC2SEL 0x0000003fUL /* PMC 2 Event */ - -#define SPRN_MMCR1 956 -#define MMCR1_PMC3SEL 0xf8000000UL /* PMC 3 Event */ -#define MMCR1_PMC4SEL 0x07c00000UL /* PMC 4 Event */ -#define MMCR1_PMC5SEL 0x003e0000UL /* PMC 5 Event */ -#define MMCR1_PMC6SEL 0x0001f800UL /* PMC 6 Event */ -#define SPRN_MMCR2 944 -#define SPRN_PMC1 953 /* Performance Counter Register 1 */ -#define SPRN_PMC2 954 /* Performance Counter Register 2 */ -#define SPRN_PMC3 957 /* Performance Counter Register 3 */ -#define SPRN_PMC4 958 /* Performance Counter Register 4 */ -#define SPRN_PMC5 945 /* Performance Counter Register 5 */ -#define SPRN_PMC6 946 /* Performance Counter Register 6 */ #define SPRN_SIAR 955 /* Sampled Instruction Address Register */ @@ -493,6 +458,77 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 + +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + #endif /* Processor Version Register (PVR) field extraction */ diff -rupN linux-2.6.16/include/asm-powerpc/unistd.h linux-2.6.16.perfctr27/include/asm-powerpc/unistd.h --- linux-2.6.16/include/asm-powerpc/unistd.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-powerpc/unistd.h 2006-08-11 03:48:23.000000000 +0200 @@ -301,8 +301,12 @@ #define __NR_pselect6 280 #define __NR_ppoll 281 #define __NR_unshare 282 +#define __NR_vperfctr_open 301 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 283 +#define __NR_syscalls 305 #ifdef __KERNEL__ #define __NR__exit __NR_exit diff -rupN linux-2.6.16/include/asm-x86_64/hw_irq.h linux-2.6.16.perfctr27/include/asm-x86_64/hw_irq.h --- linux-2.6.16/include/asm-x86_64/hw_irq.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/hw_irq.h 2006-08-11 03:48:19.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ diff -rupN linux-2.6.16/include/asm-x86_64/ia32_unistd.h linux-2.6.16.perfctr27/include/asm-x86_64/ia32_unistd.h --- linux-2.6.16/include/asm-x86_64/ia32_unistd.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/ia32_unistd.h 2006-08-11 03:48:23.000000000 +0200 @@ -316,7 +316,11 @@ #define __NR_ia32_pselect6 308 #define __NR_ia32_ppoll 309 #define __NR_ia32_unshare 310 +#define __NR_ia32_vperfctr_open 318 +#define __NR_ia32_vperfctr_control (__NR_ia32_vperfctr_open+1) +#define __NR_ia32_vperfctr_write (__NR_ia32_vperfctr_open+2) +#define __NR_ia32_vperfctr_read (__NR_ia32_vperfctr_open+3) -#define IA32_NR_syscalls 315 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 322 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff -rupN linux-2.6.16/include/asm-x86_64/irq.h linux-2.6.16.perfctr27/include/asm-x86_64/irq.h --- linux-2.6.16/include/asm-x86_64/irq.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/irq.h 2006-08-11 03:48:19.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR diff -rupN linux-2.6.16/include/asm-x86_64/processor.h linux-2.6.16.perfctr27/include/asm-x86_64/processor.h --- linux-2.6.16/include/asm-x86_64/processor.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/processor.h 2006-08-11 03:48:19.000000000 +0200 @@ -260,6 +260,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ diff -rupN linux-2.6.16/include/asm-x86_64/system.h linux-2.6.16.perfctr27/include/asm-x86_64/system.h --- linux-2.6.16/include/asm-x86_64/system.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/system.h 2006-08-11 03:48:19.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); diff -rupN linux-2.6.16/include/asm-x86_64/unistd.h linux-2.6.16.perfctr27/include/asm-x86_64/unistd.h --- linux-2.6.16/include/asm-x86_64/unistd.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/asm-x86_64/unistd.h 2006-08-11 03:48:23.000000000 +0200 @@ -605,8 +605,30 @@ __SYSCALL(__NR_pselect6, sys_ni_syscall) __SYSCALL(__NR_ppoll, sys_ni_syscall) /* for now */ #define __NR_unshare 272 __SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 273 +__SYSCALL(__NR_set_robust_list, sys_ni_syscall) +#define __NR_get_robust_list 274 +__SYSCALL(__NR_get_robust_list, sys_ni_syscall) +#define __NR_splice 275 +__SYSCALL(__NR_splice, sys_ni_syscall) +#define __NR_tee 276 +__SYSCALL(__NR_tee, sys_ni_syscall) +#define __NR_sync_file_range 277 +__SYSCALL(__NR_sync_file_range, sys_ni_syscall) +#define __NR_vmsplice 278 +__SYSCALL(__NR_vmsplice, sys_ni_syscall) +#define __NR_move_pages 279 +__SYSCALL(__NR_move_pages, sys_ni_syscall) +#define __NR_vperfctr_open 280 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_unshare +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS diff -rupN linux-2.6.16/include/linux/sched.h linux-2.6.16.perfctr27/include/linux/sched.h --- linux-2.6.16/include/linux/sched.h 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/include/linux/sched.h 2006-08-11 03:48:19.000000000 +0200 @@ -1222,6 +1222,9 @@ extern void unhash_process(struct task_s * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. diff -rupN linux-2.6.16/kernel/exit.c linux-2.6.16.perfctr27/kernel/exit.c --- linux-2.6.16/kernel/exit.c 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/kernel/exit.c 2006-08-11 03:48:19.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); diff -rupN linux-2.6.16/kernel/sched.c linux-2.6.16.perfctr27/kernel/sched.c --- linux-2.6.16/kernel/sched.c 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/kernel/sched.c 2006-08-11 03:48:19.000000000 +0200 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -4393,6 +4394,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; diff -rupN linux-2.6.16/kernel/sys_ni.c linux-2.6.16.perfctr27/kernel/sys_ni.c --- linux-2.6.16/kernel/sys_ni.c 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/kernel/sys_ni.c 2006-08-11 03:48:19.000000000 +0200 @@ -68,6 +68,10 @@ cond_syscall(compat_sys_mq_timedsend); cond_syscall(compat_sys_mq_timedreceive); cond_syscall(compat_sys_mq_notify); cond_syscall(compat_sys_mq_getsetattr); +cond_syscall(sys_vperfctr_open); +cond_syscall(sys_vperfctr_control); +cond_syscall(sys_vperfctr_write); +cond_syscall(sys_vperfctr_read); cond_syscall(sys_mbind); cond_syscall(sys_get_mempolicy); cond_syscall(sys_set_mempolicy); diff -rupN linux-2.6.16/kernel/timer.c linux-2.6.16.perfctr27/kernel/timer.c --- linux-2.6.16/kernel/timer.c 2006-08-10 21:57:10.000000000 +0200 +++ linux-2.6.16.perfctr27/kernel/timer.c 2006-08-11 03:48:19.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -837,6 +838,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/freebsd/map.c000664 001750 001750 00000003014 13216244361 017653 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: freebsd-map.c * Author: Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /** See other freebsd-map*.* for more details! **/ Native_Event_Info_t _papi_hwd_native_info[CPU_LAST+1]; void init_freebsd_libpmc_mappings (void) { _papi_hwd_native_info[CPU_UNKNOWN].info = UnkProcessor_info; _papi_hwd_native_info[CPU_P6].info = P6Processor_info; _papi_hwd_native_info[CPU_P6_C].info = P6_C_Processor_info; _papi_hwd_native_info[CPU_P6_2].info = P6_2_Processor_info; _papi_hwd_native_info[CPU_P6_3].info = P6_3_Processor_info; _papi_hwd_native_info[CPU_P6_M].info = P6_M_Processor_info; _papi_hwd_native_info[CPU_P4].info = P4Processor_info; _papi_hwd_native_info[CPU_K7].info = K7Processor_info; _papi_hwd_native_info[CPU_K8].info = K8Processor_info; _papi_hwd_native_info[CPU_ATOM].info = AtomProcessor_info; _papi_hwd_native_info[CPU_CORE].info = CoreProcessor_info; _papi_hwd_native_info[CPU_CORE2].info = Core2Processor_info; _papi_hwd_native_info[CPU_CORE2EXTREME].info = Core2ExtremeProcessor_info; _papi_hwd_native_info[CPU_COREI7].info = i7Processor_info; _papi_hwd_native_info[CPU_COREWESTMERE].info = WestmereProcessor_info; _papi_hwd_native_info[CPU_LAST].info = NULL; } int freebsd_number_of_events (int processortype) { int counter = 0; while (_papi_hwd_native_info[processortype].info[counter].name != NULL) counter++; return counter; } papi-5.6.0/src/freebsd/map-p6.c000664 001750 001750 00000015317 13216244361 020207 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-p6.c * Author: Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** P6 SUBSTRATE P6 SUBSTRATE P6 SUBSTRATE (aka Pentium Pro) P6 SUBSTRATE P6 SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_P6Processor must match P6Processor_info */ Native_Event_LabelDescription_t P6Processor_info[] = { { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, { "p6-br-bogus", "Count the number of bogus branches." }, { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, { "p6-br-inst-retired", "Count the number of branch instructions retired." }, { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, { "p6-br-taken-retired", "Count the number of taken branches retired." }, { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, { "p6-bus-tran-brd", "Count the number of burst read transactions." }, { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, { "p6-bus-trans-p", "Count the number of completed partial transactions." }, { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, { "p6-flops", "Count the number of computational floating point operations retired." }, { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, { "p6-inst-decoded", "Count the number of instructions decoded." }, { "p6-inst-retired", "Count the number of instructions retired." }, { "p6-itlb-miss", "Count the number of instruction TLB misses." }, { "p6-l2-ads", "Count the number of L2 address strobes." }, { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, { "p6-l2-ld", "Count the number of L2 data loads." }, { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, { "p6-l2-rqsts", "Count the total number of L2 requests." }, { "p6-l2-st", "Count the number of L2 data stores." }, { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies." }, { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, { "p6-segment-reg-loads", "Count the number of segment register loads." }, { "p6-uops-retired", "Count the number of micro-ops retired." }, { NULL, NULL } }; papi-5.6.0/src/validation_tests/papi_tot_cyc.c000664 001750 001750 00000007503 13216244370 023524 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the PAPI_TOT_CYC */ /* performance counter. */ /* by Vince Weaver, */ #include #include #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "matrix_multiply.h" #define SLEEP_RUNS 3 static long long convert_to_ns(struct timespec *before, struct timespec *after) { long long seconds; long long ns; seconds=after->tv_sec - before->tv_sec; ns = after->tv_nsec - before->tv_nsec; ns = (seconds*1000000000ULL)+ns; return ns; } int main(int argc, char **argv) { int quiet; double mmm_ghz; double error; int i; long long count,high=0,low=0,total=0,average=0; long long nsecs; long long mmm_count; long long expected; int retval; int eventset=PAPI_NULL; struct timespec before,after; quiet=tests_quiet(argc,argv); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if (!quiet) { printf("\nTesting PAPI_TOT_CYC\n\n"); } if (!quiet) { printf("Testing a sleep of 1 second (%d times):\n",SLEEP_RUNS); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_TOT_CYC"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_TOT_CYC\n"); test_skip( __FILE__, __LINE__, "adding PAPI_TOT_CYC", retval ); } for(i=0;ihigh) high=count; if ((low==0) || (count100000) { if (!quiet) printf("Average cycle count too high!\n"); test_fail( __FILE__, __LINE__, "idle average", retval ); } /*****************************/ /* testing Matrix Matrix GHz */ /*****************************/ if (!quiet) { printf("\nEstimating GHz with matrix matrix multiply\n"); } clock_gettime(CLOCK_REALTIME,&before); PAPI_reset(eventset); PAPI_start(eventset); naive_matrix_multiply(quiet); retval=PAPI_stop(eventset,&count); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "Problem stopping!", retval ); } clock_gettime(CLOCK_REALTIME,&after); nsecs=convert_to_ns(&before,&after); mmm_ghz=(double)count/(double)nsecs; if (!quiet) { printf("\tActual measured cycles = %lld\n",count); printf("\tEstimated actual GHz = %.2lfGHz\n",mmm_ghz); } mmm_count=count; /************************************/ /* Check for Linear Speedup */ /************************************/ if (!quiet) printf("\nTesting for a linear cycle increase\n"); #define REPITITIONS 2 clock_gettime(CLOCK_REALTIME,&before); PAPI_reset(eventset); PAPI_start(eventset); for(i=0;i10.0) || (error<-10.0)) { if (!quiet) printf("Error too high!\n"); test_fail( __FILE__, __LINE__, "Error too high", retval ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.7.x/patches/patch-kernel-2.6.11000664 001750 001750 00000066236 13216244370 024230 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.11.perfctr27/CREDITS.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/CREDITS 2005-03-14 11:10:47.000000000 +0100 @@ -2620,6 +2620,7 @@ N: Mikael Pettersson E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.11.perfctr27/MAINTAINERS.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/MAINTAINERS 2005-03-14 11:10:47.000000000 +0100 @@ -1769,6 +1769,12 @@ M: tsbogend@alpha.franken.de L: linux-net@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@csd.uu.se +W: http://www.csd.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.11.perfctr27/arch/i386/Kconfig.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/i386/Kconfig 2005-03-14 11:10:47.000000000 +0100 @@ -888,6 +888,8 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.11.perfctr27/arch/i386/kernel/entry.S.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/i386/kernel/entry.S 2005-03-14 11:18:16.000000000 +0100 @@ -372,6 +372,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error @@ -862,5 +872,11 @@ ENTRY(sys_call_table) .long sys_add_key .long sys_request_key .long sys_keyctl + .long sys_ni_syscall /* reserved for sys_ioprio_set */ + .long sys_ni_syscall /* reserved for sys_ioprio_get */ /* 290 */ + .long sys_vperfctr_open + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read syscall_table_size=(.-sys_call_table) --- linux-2.6.11.perfctr27/arch/i386/kernel/i8259.c.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/i386/kernel/i8259.c 2005-03-14 11:10:47.000000000 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -413,6 +414,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.11.perfctr27/arch/i386/kernel/process.c.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/i386/kernel/process.c 2005-03-14 11:10:47.000000000 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -339,6 +340,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -400,6 +402,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -623,6 +627,8 @@ struct task_struct fastcall * __switch_t if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.11.perfctr27/arch/ppc/Kconfig.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/ppc/Kconfig 2005-03-14 11:10:47.000000000 +0100 @@ -245,6 +245,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.11.perfctr27/arch/ppc/kernel/head.S.~1~ 2004-10-19 13:01:17.000000000 +0200 +++ linux-2.6.11.perfctr27/arch/ppc/kernel/head.S 2005-03-14 11:10:47.000000000 +0100 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.11.perfctr27/arch/ppc/kernel/misc.S.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/ppc/kernel/misc.S 2005-03-14 11:14:05.000000000 +0100 @@ -1450,3 +1450,10 @@ _GLOBAL(sys_call_table) .long sys_add_key .long sys_request_key /* 270 */ .long sys_keyctl + .long sys_ni_syscall /* 272 reserved for sys_waitid */ + .long sys_ni_syscall /* 273 reserved for sys_ioprio_set */ + .long sys_ni_syscall /* 274 reserved for sys_ioprio_get */ + .long sys_vperfctr_open /* 275 */ + .long sys_vperfctr_control + .long sys_vperfctr_write + .long sys_vperfctr_read --- linux-2.6.11.perfctr27/arch/ppc/kernel/process.c.~1~ 2005-03-02 19:24:14.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/ppc/kernel/process.c 2005-03-14 11:10:47.000000000 +0100 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.11.perfctr27/arch/x86_64/Kconfig.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/x86_64/Kconfig 2005-03-14 11:10:47.000000000 +0100 @@ -350,6 +350,9 @@ config X86_MCE_INTEL help Additional support for intel specific MCE features such as the thermal monitor. + +source "drivers/perfctr/Kconfig" + endmenu # --- linux-2.6.11.perfctr27/arch/x86_64/ia32/ia32entry.S.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/x86_64/ia32/ia32entry.S 2005-03-14 11:19:23.000000000 +0100 @@ -595,6 +595,12 @@ ia32_sys_call_table: .quad sys_add_key .quad sys_request_key .quad sys_keyctl + .quad quiet_ni_syscall /* reserved for sys_ioprio_set */ + .quad quiet_ni_syscall /* reserved for sys_ioprio_get */ /* 290 */ + .quad sys_vperfctr_open + .quad sys_vperfctr_control + .quad sys_vperfctr_write + .quad sys_vperfctr_read /* don't forget to change IA32_NR_syscalls */ ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 --- linux-2.6.11.perfctr27/arch/x86_64/kernel/entry.S.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/x86_64/kernel/entry.S 2005-03-14 11:10:47.000000000 +0100 @@ -562,6 +562,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.11.perfctr27/arch/x86_64/kernel/i8259.c.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/x86_64/kernel/i8259.c 2005-03-14 11:10:47.000000000 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -562,6 +563,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.11.perfctr27/arch/x86_64/kernel/process.c.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/arch/x86_64/kernel/process.c 2005-03-14 11:10:47.000000000 +0100 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -293,6 +294,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -395,6 +397,8 @@ int copy_thread(int nr, unsigned long cl asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -544,6 +548,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.11.perfctr27/drivers/Makefile.~1~ 2005-03-02 19:24:15.000000000 +0100 +++ linux-2.6.11.perfctr27/drivers/Makefile 2005-03-14 11:10:47.000000000 +0100 @@ -61,5 +61,6 @@ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ +obj-$(CONFIG_PERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ --- linux-2.6.11.perfctr27/include/asm-i386/mach-default/irq_vectors.h.~1~ 2004-05-10 11:14:37.000000000 +0200 +++ linux-2.6.11.perfctr27/include/asm-i386/mach-default/irq_vectors.h 2005-03-14 11:10:47.000000000 +0100 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.11.perfctr27/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2004-01-09 13:19:11.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-i386/mach-visws/irq_vectors.h 2005-03-14 11:10:47.000000000 +0100 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.11.perfctr27/include/asm-i386/processor.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-i386/processor.h 2005-03-14 11:10:47.000000000 +0100 @@ -444,6 +444,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.11.perfctr27/include/asm-i386/system.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-i386/system.h 2005-03-14 11:10:47.000000000 +0100 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.11.perfctr27/include/asm-i386/unistd.h.~1~ 2004-12-25 12:16:22.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-i386/unistd.h 2005-03-14 11:16:46.000000000 +0100 @@ -294,8 +294,12 @@ #define __NR_add_key 286 #define __NR_request_key 287 #define __NR_keyctl 288 +#define __NR_vperfctr_open 291 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define NR_syscalls 289 +#define NR_syscalls 295 /* * user-visible error numbers are in the range -1 - -128: see --- linux-2.6.11.perfctr27/include/asm-ppc/processor.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-ppc/processor.h 2005-03-14 11:10:47.000000000 +0100 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.11.perfctr27/include/asm-ppc/reg.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-ppc/reg.h 2005-03-14 11:10:47.000000000 +0100 @@ -273,22 +273,14 @@ #define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ #define SPRN_LDSTDB 0x3f4 /* */ #define SPRN_LR 0x008 /* Link Register */ -#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ -#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ #ifndef SPRN_PIR #define SPRN_PIR 0x3FF /* Processor Identification Register */ #endif -#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ -#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ -#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ -#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ #define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ #define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ #define SPRN_PVR 0x11F /* Processor Version Register */ #define SPRN_RPA 0x3D6 /* Required Physical Address Register */ -#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ #define SPRN_SDR1 0x019 /* MMU Hash Base Register */ -#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ #define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ #define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ #define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ @@ -315,16 +307,79 @@ #define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ #define THRM3_E (1<<0) #define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ -#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ -#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ -#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ -#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ -#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ -#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ -#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ +/* Performance-monitoring control and counter registers */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ +#define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ +#define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ +#define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register (604 and up) */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register (604/604e only) */ +#define SPRN_BAMR 0x3B7 /* Breakpoint Address Mask Register (7400 and up) */ + +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 (750 and up) */ +#define SPRN_UMMCR2 0x3A0 /* User Monitor Mode Control Register 0 (7400 and up) */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 (750 and up) */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 (750 and up) */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 (750 and up) */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 (750 and up) */ +#define SPRN_UPMC5 0x3A1 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_UPMC6 0x3A2 /* User Performance Counter Register 5 (7450 and up) */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register (750 and up) */ +#define SPRN_UBAMR 0x3A7 /* User Breakpoint Address Mask Register (7400 and up) */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FB0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07B00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) + /* Bit definitions for MMCR0 and PMC1 / PMC2. */ #define MMCR0_PMC1_CYCLES (1 << 7) #define MMCR0_PMC1_ICACHEMISS (5 << 7) @@ -333,7 +388,6 @@ #define MMCR0_PMC2_CYCLES 0x1 #define MMCR0_PMC2_ITLB 0x7 #define MMCR0_PMC2_LOADMISSTIME 0x5 -#define MMCR0_PMXE (1 << 26) /* Short-hand versions for a number of the above SPRNs */ #define CTR SPRN_CTR /* Counter Register */ --- linux-2.6.11.perfctr27/include/asm-ppc/unistd.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-ppc/unistd.h 2005-03-14 11:12:17.000000000 +0100 @@ -276,8 +276,12 @@ #define __NR_add_key 269 #define __NR_request_key 270 #define __NR_keyctl 271 +#define __NR_vperfctr_open 275 +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) -#define __NR_syscalls 272 +#define __NR_syscalls 279 #define __NR(n) #n --- linux-2.6.11.perfctr27/include/asm-x86_64/hw_irq.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/hw_irq.h 2005-03-14 11:10:47.000000000 +0100 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.11.perfctr27/include/asm-x86_64/ia32_unistd.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/ia32_unistd.h 2005-03-14 11:20:08.000000000 +0100 @@ -294,7 +294,11 @@ #define __NR_ia32_add_key 286 #define __NR_ia32_request_key 287 #define __NR_ia32_keyctl 288 +#define __NR_ia32_vperfctr_open 291 +#define __NR_ia32_vperfctr_control (__NR_ia32_vperfctr_open+1) +#define __NR_ia32_vperfctr_write (__NR_ia32_vperfctr_open+2) +#define __NR_ia32_vperfctr_read (__NR_ia32_vperfctr_open+3) -#define IA32_NR_syscalls 290 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ --- linux-2.6.11.perfctr27/include/asm-x86_64/irq.h.~1~ 2004-12-25 12:16:24.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/irq.h 2005-03-14 11:10:47.000000000 +0100 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.11.perfctr27/include/asm-x86_64/processor.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/processor.h 2005-03-14 11:10:47.000000000 +0100 @@ -253,6 +253,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.11.perfctr27/include/asm-x86_64/system.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/system.h 2005-03-14 11:10:47.000000000 +0100 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.11.perfctr27/include/asm-x86_64/unistd.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/asm-x86_64/unistd.h 2005-03-14 11:16:05.000000000 +0100 @@ -563,8 +563,20 @@ __SYSCALL(__NR_add_key, sys_add_key) __SYSCALL(__NR_request_key, sys_request_key) #define __NR_keyctl 250 __SYSCALL(__NR_keyctl, sys_keyctl) +#define __NR_ioprio_set 251 +__SYSCALL(__NR_ioprio_set, sys_ni_syscall) +#define __NR_ioprio_get 252 +__SYSCALL(__NR_ioprio_get, sys_ni_syscall) +#define __NR_vperfctr_open 253 +__SYSCALL(__NR_vperfctr_open, sys_vperfctr_open) +#define __NR_vperfctr_control (__NR_vperfctr_open+1) +__SYSCALL(__NR_vperfctr_control, sys_vperfctr_control) +#define __NR_vperfctr_write (__NR_vperfctr_open+2) +__SYSCALL(__NR_vperfctr_write, sys_vperfctr_write) +#define __NR_vperfctr_read (__NR_vperfctr_open+3) +__SYSCALL(__NR_vperfctr_read, sys_vperfctr_read) -#define __NR_syscall_max __NR_keyctl +#define __NR_syscall_max __NR_vperfctr_read #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ --- linux-2.6.11.perfctr27/include/linux/sched.h.~1~ 2005-03-02 19:24:19.000000000 +0100 +++ linux-2.6.11.perfctr27/include/linux/sched.h 2005-03-14 11:10:47.000000000 +0100 @@ -1035,6 +1035,9 @@ extern void unhash_process(struct task_s * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. * + * Synchronises set_cpus_allowed(), unlink, and creat of ->thread.perfctr. + * [if CONFIG_PERFCTR_VIRTUAL] + * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), * neither inside nor outside. --- linux-2.6.11.perfctr27/kernel/exit.c.~1~ 2005-03-02 19:24:20.000000000 +0100 +++ linux-2.6.11.perfctr27/kernel/exit.c 2005-03-14 11:10:47.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.11.perfctr27/kernel/sched.c.~1~ 2005-03-02 19:24:20.000000000 +0100 +++ linux-2.6.11.perfctr27/kernel/sched.c 2005-03-14 11:10:47.000000000 +0100 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -4115,6 +4116,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.11.perfctr27/kernel/sys_ni.c.~1~ 2005-03-02 19:24:20.000000000 +0100 +++ linux-2.6.11.perfctr27/kernel/sys_ni.c 2005-03-14 11:10:47.000000000 +0100 @@ -83,3 +83,8 @@ cond_syscall(sys_pciconfig_write) cond_syscall(sys_pciconfig_iobase) cond_syscall(sys32_ipc) cond_syscall(sys32_sysctl) + +cond_syscall(sys_vperfctr_open) +cond_syscall(sys_vperfctr_control) +cond_syscall(sys_vperfctr_write) +cond_syscall(sys_vperfctr_read) --- linux-2.6.11.perfctr27/kernel/timer.c.~1~ 2005-03-02 19:24:20.000000000 +0100 +++ linux-2.6.11.perfctr27/kernel/timer.c 2005-03-14 11:10:47.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -820,6 +821,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.7.x/examples/signal/000775 001750 001750 00000000000 13216244370 022634 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/s390x_cpumf_events.h000664 001750 001750 00000116047 13216244365 024563 0ustar00jshenry1963jshenry1963000000 000000 #ifndef __S390X_CPUMF_EVENTS_H__ #define __S390X_CPUMF_EVENTS_H__ #define __stringify(x) #x #define STRINGIFY(x) __stringify(x) /* CPUMF counter sets */ #define CPUMF_CTRSET_NONE 0 #define CPUMF_CTRSET_BASIC 2 #define CPUMF_CTRSET_PROBLEM_STATE 4 #define CPUMF_CTRSET_CRYPTO 8 #define CPUMF_CTRSET_EXTENDED 1 #define CPUMF_CTRSET_MT_DIAG 32 static const pme_cpumf_ctr_t cpumcf_generic_counters[] = { { .ctrnum = 0, .ctrset = CPUMF_CTRSET_BASIC, .name = "CPU_CYCLES", .desc = "Cycle Count", }, { .ctrnum = 1, .ctrset = CPUMF_CTRSET_BASIC, .name = "INSTRUCTIONS", .desc = "Instruction Count", }, { .ctrnum = 2, .ctrset = CPUMF_CTRSET_BASIC, .name = "L1I_DIR_WRITES", .desc = "Level-1 I-Cache Directory Write Count", }, { .ctrnum = 3, .ctrset = CPUMF_CTRSET_BASIC, .name = "L1I_PENALTY_CYCLES", .desc = "Level-1 I-Cache Penalty Cycle Count", }, { .ctrnum = 4, .ctrset = CPUMF_CTRSET_BASIC, .name = "L1D_DIR_WRITES", .desc = "Level-1 D-Cache Directory Write Count", }, { .ctrnum = 5, .ctrset = CPUMF_CTRSET_BASIC, .name = "L1D_PENALTY_CYCLES", .desc = "Level-1 D-Cache Penalty Cycle Count", }, { .ctrnum = 32, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_CPU_CYCLES", .desc = "Problem-State Cycle Count", }, { .ctrnum = 33, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_INSTRUCTIONS", .desc = "Problem-State Instruction Count", }, { .ctrnum = 34, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_L1I_DIR_WRITES", .desc = "Problem-State Level-1 I-Cache Directory Write Count", }, { .ctrnum = 35, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_L1I_PENALTY_CYCLES", .desc = "Problem-State Level-1 I-Cache Penalty Cycle Count", }, { .ctrnum = 36, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_L1D_DIR_WRITES", .desc = "Problem-State Level-1 D-Cache Directory Write Count", }, { .ctrnum = 37, .ctrset = CPUMF_CTRSET_PROBLEM_STATE, .name = "PROBLEM_STATE_L1D_PENALTY_CYCLES", .desc = "Problem-State Level-1 D-Cache Penalty Cycle Count", }, { .ctrnum = 64, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "PRNG_FUNCTIONS", .desc = "Total number of the PRNG functions issued by the" " CPU", }, { .ctrnum = 65, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "PRNG_CYCLES", .desc = "Total number of CPU cycles when the DEA/AES" " coprocessor is busy performing PRNG functions" " issued by the CPU", }, { .ctrnum = 66, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "PRNG_BLOCKED_FUNCTIONS", .desc = "Total number of the PRNG functions that are issued" " by the CPU and are blocked because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 67, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "PRNG_BLOCKED_CYCLES", .desc = "Total number of CPU cycles blocked for the PRNG" " functions issued by the CPU because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 68, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "SHA_FUNCTIONS", .desc = "Total number of SHA functions issued by the CPU", }, { .ctrnum = 69, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "SHA_CYCLES", .desc = "Total number of CPU cycles when the SHA coprocessor" " is busy performing the SHA functions issued by the" " CPU", }, { .ctrnum = 70, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "SHA_BLOCKED_FUNCTIONS", .desc = "Total number of the SHA functions that are issued" " by the CPU and are blocked because the SHA" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 71, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "SHA_BLOCKED_CYCLES", .desc = "Total number of CPU cycles blocked for the SHA" " functions issued by the CPU because the SHA" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 72, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "DEA_FUNCTIONS", .desc = "Total number of the DEA functions issued by the CPU", }, { .ctrnum = 73, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "DEA_CYCLES", .desc = "Total number of CPU cycles when the DEA/AES" " coprocessor is busy performing the DEA functions" " issued by the CPU", }, { .ctrnum = 74, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "DEA_BLOCKED_FUNCTIONS", .desc = "Total number of the DEA functions that are issued" " by the CPU and are blocked because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 75, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "DEA_BLOCKED_CYCLES", .desc = "Total number of CPU cycles blocked for the DEA" " functions issued by the CPU because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 76, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "AES_FUNCTIONS", .desc = "Total number of AES functions issued by the CPU", }, { .ctrnum = 77, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "AES_CYCLES", .desc = "Total number of CPU cycles when the DEA/AES" " coprocessor is busy performing the AES functions" " issued by the CPU", }, { .ctrnum = 78, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "AES_BLOCKED_FUNCTIONS", .desc = "Total number of AES functions that are issued by" " the CPU and are blocked because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, { .ctrnum = 79, .ctrset = CPUMF_CTRSET_CRYPTO, .name = "AES_BLOCKED_CYCLES", .desc = "Total number of CPU cycles blocked for the AES" " functions issued by the CPU because the DEA/AES" " coprocessor is busy performing a function issued by" " another CPU", }, }; static const pme_cpumf_ctr_t cpumcf_z10_counters[] = { { .ctrnum = 128, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L2_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from the" " Level-2 (L1.5) cache", }, { .ctrnum = 129, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L2_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the installed cache line was sourced from the" " Level-2 (L1.5) cache", }, { .ctrnum = 130, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L3_LOCAL_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the installed cache line was sourced from the" " Level-3 cache that is on the same book as the" " Instruction cache (Local L2 cache)", }, { .ctrnum = 131, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L3_LOCAL_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the installtion cache line was source from" " the Level-3 cache that is on the same book as the" " Data cache (Local L2 cache)", }, { .ctrnum = 132, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L3_REMOTE_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the installed cache line was sourced from a" " Level-3 cache that is not on the same book as the" " Instruction cache (Remote L2 cache)", }, { .ctrnum = 133, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L3_REMOTE_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the installed cache line was sourced from a" " Level-3 cache that is not on the same book as the" " Data cache (Remote L2 cache)", }, { .ctrnum = 134, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the installed cache line was sourced from" " memory that is attached to the same book as the" " Data cache (Local Memory)", }, { .ctrnum = 135, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache where the" " installed cache line was sourced from memory that" " is attached to the s ame book as the Instruction" " cahe (local Memory)", }, { .ctrnum = 136, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_RO_EXCL_WRITES", .desc = "A directory write to the Level-1 D-Cache where the" " line was originally in a Read-Only state in the" " cache but has been updated to be in the Exclusive" " state that allows stores to the cache line", }, { .ctrnum = 137, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_CACHELINE_INVALIDATES", .desc = "A cache line in the Level-1 I-Cache has been" " invalidated by a store on the same CPU as the" " Level-1 I-Cache", }, { .ctrnum = 138, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_WRITES", .desc = "A translation entry has been written into the" " Level-1 Instruction Translation Lookaside Buffer", }, { .ctrnum = 139, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer", }, { .ctrnum = 140, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_PTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Page Table Entry arrays", }, { .ctrnum = 141, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays", }, { .ctrnum = 142, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays for a" " one-megabyte large page translation", }, { .ctrnum = 145, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_MISSES", .desc = "Level-1 Instruction TLB miss in progress." " Incremented by one for every cycle an ITLB1 miss is" " in progress", }, { .ctrnum = 146, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_MISSES", .desc = "Level-1 Data TLB miss in progress. Incremented by" " one for every cycle an DTLB1 miss is in progress", }, { .ctrnum = 147, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L2C_STORES_SENT", .desc = "Incremented by one for every store sent to Level-2" " (L1.5) cache", }, }; static const pme_cpumf_ctr_t cpumcf_z196_counters[] = { { .ctrnum = 128, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L2_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from the" " Level-2 cache", }, { .ctrnum = 129, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L2_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from the" " Level-2 cache", }, { .ctrnum = 130, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_MISSES", .desc = "Level-1 Data TLB miss in progress. Incremented by" " one for every cycle a DTLB1 miss is in progress.", }, { .ctrnum = 131, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_MISSES", .desc = "Level-1 Instruction TLB miss in progress." " Incremented by one for every cycle a ITLB1 miss is" " in progress.", }, { .ctrnum = 133, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L2C_STORES_SENT", .desc = "Incremented by one for every store sent to Level-2" " cache", }, { .ctrnum = 134, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFBOOK_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from an" " Off Book Level-3 cache", }, { .ctrnum = 135, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from an" " On Book Level-4 cache", }, { .ctrnum = 136, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from an" " On Book Level-4 cache", }, { .ctrnum = 137, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_RO_EXCL_WRITES", .desc = "A directory write to the Level-1 D-Cache where the" " line was originally in a Read-Only state in the" " cache but has been updated to be in the Exclusive" " state that allows stores to the cache line", }, { .ctrnum = 138, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from an" " Off Book Level-4 cache", }, { .ctrnum = 139, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from an" " Off Book Level-4 cache", }, { .ctrnum = 140, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer for a one-" " megabyte page", }, { .ctrnum = 141, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache where the" " installed cache line was sourced from memory that" " is attached to the same book as the Data cache" " (Local Memory)", }, { .ctrnum = 142, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache where the" " installed cache line was sourced from memory that" " is attached to the same book as the Instruction" " cache (Local Memory)", }, { .ctrnum = 143, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFBOOK_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from an" " Off Book Level-3 cache", }, { .ctrnum = 144, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer", }, { .ctrnum = 145, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Instruction Translation Lookaside Buffer", }, { .ctrnum = 146, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_PTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Page Table Entry arrays", }, { .ctrnum = 147, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays for a" " one-megabyte large page translation", }, { .ctrnum = 148, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays", }, { .ctrnum = 150, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from an" " On Chip Level-3 cache", }, { .ctrnum = 152, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 D-Cache directory" " where the returned cache line was sourced from an" " Off Chip/On Book Level-3 cache", }, { .ctrnum = 153, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from an" " On Chip Level-3 cache", }, { .ctrnum = 155, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 I-Cache directory" " where the returned cache line was sourced from an" " Off Chip/On Book Level-3 cache", }, }; static const pme_cpumf_ctr_t cpumcf_zec12_counters[] = { { .ctrnum = 128, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_MISSES", .desc = "Level-1 Data TLB miss in progress. Incremented by" " one for every cycle a DTLB1 miss is in progress.", }, { .ctrnum = 129, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_MISSES", .desc = "Level-1 Instruction TLB miss in progress." " Incremented by one for every cycle a ITLB1 miss is" " in progress.", }, { .ctrnum = 130, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L2I_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from the Level-2 Instruction cache", }, { .ctrnum = 131, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L2I_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from the Level-2 Instruction cache", }, { .ctrnum = 132, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L2D_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from the Level-2 Data cache", }, { .ctrnum = 133, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer", }, { .ctrnum = 135, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache where" " the installed cache line was sourced from memory" " that is attached to the same book as the Data cache" " (Local Memory)", }, { .ctrnum = 137, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_LMEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " where the installed cache line was sourced from" " memory that is attached to the same book as the" " Instruction cache (Local Memory)", }, { .ctrnum = 138, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_RO_EXCL_WRITES", .desc = "A directory write to the Level-1 D-Cache where the" " line was originally in a Read-Only state in the" " cache but has been updated to be in the Exclusive" " state that allows stores to the cache line", }, { .ctrnum = 139, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer for a one-" " megabyte page", }, { .ctrnum = 140, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Instruction Translation Lookaside Buffer", }, { .ctrnum = 141, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_PTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Page Table Entry arrays", }, { .ctrnum = 142, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays for a" " one-megabyte large page translation", }, { .ctrnum = 143, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Common Region Segment Table Entry arrays", }, { .ctrnum = 144, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On Chip Level-3 cache without intervention", }, { .ctrnum = 145, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off Chip/On Book Level-3 cache without" " intervention", }, { .ctrnum = 146, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFBOOK_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off Book Level-3 cache without intervention", }, { .ctrnum = 147, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On Book Level-4 cache", }, { .ctrnum = 148, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off Book Level-4 cache", }, { .ctrnum = 149, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_NC_TEND", .desc = "A TEND instruction has completed in a" " nonconstrained transactional-execution mode", }, { .ctrnum = 150, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from a On Chip Level-3 cache with intervention", }, { .ctrnum = 151, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off Chip/On Book Level-3 cache with" " intervention", }, { .ctrnum = 152, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFBOOK_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off Book Level-3 cache with intervention", }, { .ctrnum = 153, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On Chip Level-3 cache without intervention", }, { .ctrnum = 154, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off Chip/On Book Level-3 cache without" " intervention", }, { .ctrnum = 155, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFBOOK_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off Book Level-3 cache without intervention", }, { .ctrnum = 156, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On Book Level-4 cache", }, { .ctrnum = 157, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFBOOK_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off Book Level-4 cache", }, { .ctrnum = 158, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TEND", .desc = "A TEND instruction has completed in a constrained" " transactional-execution mode", }, { .ctrnum = 159, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On Chip Level-3 cache with intervention", }, { .ctrnum = 160, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off Chip/On Book Level-3 cache with" " intervention", }, { .ctrnum = 161, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFBOOK_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off Book Level-3 cache with intervention", }, { .ctrnum = 177, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_NC_TABORT", .desc = "A transaction abort has occurred in a" " nonconstrained transactional-execution mode", }, { .ctrnum = 178, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TABORT_NO_SPECIAL", .desc = "A transaction abort has occurred in a constrained" " transactional-execution mode and the CPU is not" " using any special logic to allow the transaction to" " complete", }, { .ctrnum = 179, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TABORT_SPECIAL", .desc = "A transaction abort has occurred in a constrained" " transactional-execution mode and the CPU is using" " special logic to allow the transaction to complete", }, }; static const pme_cpumf_ctr_t cpumcf_z13_counters[] = { { .ctrnum = 128, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_WRITES_RO_EXCL", .desc = "Counter:128 Name:L1D_WRITES_RO_EXCL A directory" " write to the Level-1 Data cache where the line was" " originally in a Read-Only state in the cache but" " has been updated to be in the Exclusive state that" " allows stores to the cache line.", }, { .ctrnum = 129, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer", }, { .ctrnum = 130, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_MISSES", .desc = "Level-1 Data TLB miss in progress. Incremented by" " one for every cycle a DTLB1 miss is in progress.", }, { .ctrnum = 131, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-1" " Data Translation Lookaside Buffer for a one-" " megabyte page", }, { .ctrnum = 132, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "DTLB1_GPAGE_WRITES", .desc = "Counter:132 Name:DTLB1_GPAGE_WRITES A translation" " entry has been written to the Level-1 Data" " Translation Lookaside Buffer for a two-gigabyte" " page.", }, { .ctrnum = 133, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_L2D_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from the Level-2 Data cache", }, { .ctrnum = 134, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_WRITES", .desc = "A translation entry has been written to the Level-1" " Instruction Translation Lookaside Buffer", }, { .ctrnum = 135, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "ITLB1_MISSES", .desc = "Level-1 Instruction TLB miss in progress." " Incremented by one for every cycle an ITLB1 miss is" " in progress", }, { .ctrnum = 136, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_L2I_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from the Level-2 Instruction cache", }, { .ctrnum = 137, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_PTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Page Table Entry arrays", }, { .ctrnum = 138, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_HPAGE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Combined Region Segment Table Entry arrays for" " a one-megabyte large page translation", }, { .ctrnum = 139, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TLB2_CRSTE_WRITES", .desc = "A translation entry has been written to the Level-2" " TLB Combined Region Segment Table Entry arrays", }, { .ctrnum = 140, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TEND", .desc = "A TEND instruction has completed in a constrained" " transactional-execution mode", }, { .ctrnum = 141, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_NC_TEND", .desc = "A TEND instruction has completed in a non-" " constrained transactional-execution mode", }, { .ctrnum = 143, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1C_TLB1_MISSES", .desc = "Increments by one for any cycle where a Level-1" " cache or Level-1 TLB miss is in progress.", }, { .ctrnum = 144, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Chip Level-3 cache without intervention", }, { .ctrnum = 145, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Chip Level-3 cache with intervention", }, { .ctrnum = 146, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONNODE_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Node Level-4 cache", }, { .ctrnum = 147, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONNODE_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Node Level-3 cache with intervention", }, { .ctrnum = 148, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONNODE_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Node Level-3 cache without intervention", }, { .ctrnum = 149, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONDRAWER_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-4 cache", }, { .ctrnum = 150, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONDRAWER_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-3 cache with intervention", }, { .ctrnum = 151, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONDRAWER_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-3 cache without" " intervention", }, { .ctrnum = 152, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-4 cache", }, { .ctrnum = 153, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-3 cache with" " intervention", }, { .ctrnum = 154, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-3 cache" " without intervention", }, { .ctrnum = 155, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-4 cache", }, { .ctrnum = 156, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-3 cache with" " intervention", }, { .ctrnum = 157, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-3 cache without" " intervention", }, { .ctrnum = 158, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONNODE_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from On-Node memory", }, { .ctrnum = 159, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONDRAWER_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from On-Drawer memory", }, { .ctrnum = 160, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_OFFDRAWER_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from On-Drawer memory", }, { .ctrnum = 161, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1D_ONCHIP_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Data cache" " directory where the returned cache line was sourced" " from On-Chip memory", }, { .ctrnum = 162, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Chip Level-3 cache without intervention", }, { .ctrnum = 163, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On Chip Level-3 cache with intervention", }, { .ctrnum = 164, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONNODE_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Node Level-4 cache", }, { .ctrnum = 165, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONNODE_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Node Level-3 cache with intervention", }, { .ctrnum = 166, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONNODE_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Node Level-3 cache without intervention", }, { .ctrnum = 167, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONDRAWER_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-4 cache", }, { .ctrnum = 168, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONDRAWER_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-3 cache with intervention", }, { .ctrnum = 169, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONDRAWER_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an On-Drawer Level-3 cache without" " intervention", }, { .ctrnum = 170, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-4 cache", }, { .ctrnum = 171, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-3 cache with" " intervention", }, { .ctrnum = 172, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Same-Column Level-3 cache" " without intervention", }, { .ctrnum = 173, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-4 cache", }, { .ctrnum = 174, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-3 cache with" " intervention", }, { .ctrnum = 175, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from an Off-Drawer Far-Column Level-3 cache without" " intervention", }, { .ctrnum = 176, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONNODE_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from On-Node memory", }, { .ctrnum = 177, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONDRAWER_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from On-Drawer memory", }, { .ctrnum = 178, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_OFFDRAWER_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from On-Drawer memory", }, { .ctrnum = 179, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "L1I_ONCHIP_MEM_SOURCED_WRITES", .desc = "A directory write to the Level-1 Instruction cache" " directory where the returned cache line was sourced" " from On-Chip memory", }, { .ctrnum = 218, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_NC_TABORT", .desc = "A transaction abort has occurred in a non-" " constrained transactional-execution mode", }, { .ctrnum = 219, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TABORT_NO_SPECIAL", .desc = "A transaction abort has occurred in a constrained" " transactional-execution mode and the CPU is not" " using any special logic to allow the transaction to" " complete", }, { .ctrnum = 220, .ctrset = CPUMF_CTRSET_EXTENDED, .name = "TX_C_TABORT_SPECIAL", .desc = "A transaction abort has occurred in a constrained" " transactional-execution mode and the CPU is using" " special logic to allow the transaction to complete", }, { .ctrnum = 448, .ctrset = CPUMF_CTRSET_MT_DIAG, .name = "MT_DIAG_CYCLES_ONE_THR_ACTIVE", .desc = "Cycle count with one thread active", }, { .ctrnum = 449, .ctrset = CPUMF_CTRSET_MT_DIAG, .name = "MT_DIAG_CYCLES_TWO_THR_ACTIVE", .desc = "Cycle count with two threads active", }, }; static const pme_cpumf_ctr_t cpumsf_counters[] = { { .ctrnum = 720896, .ctrset = CPUMF_CTRSET_NONE, .name = "SF_CYCLES_BASIC", .desc = "Sample CPU cycles using basic-sampling mode", }, { .ctrnum = 774144, .ctrset = CPUMF_CTRSET_NONE, .name = "SF_CYCLES_BASIC_DIAG", .desc = "Sample CPU cycle using diagnostic-sampling mode" " (not for ordinary use)", }, }; #endif /* __S390X_CPUMF_EVENTS_H__ */ papi-5.6.0/src/components/powercap/utils/README000664 001750 001750 00000001754 13216244360 023355 0ustar00jshenry1963jshenry1963000000 000000 powercap_write_test This test demonstrates the ability of PAPI to use the Linux powercap interface to read and write power constraints to gather energy information and apply power constraints. See the instructions in the components/powercap directory to build the component. It works by using PAPI to read information from Linux powercap interface and write it to a file. The code repeatedly runs an OpenMP multithreaded routine (primes) to stress the CPU. The routine is called 100 times, and at each call the power measurements are read using PAPI_read(EventSet, values) and printed to a file. At every 10th call the power caps for the power packages are adjusted using PAPI_write(EventSet, values ). Build and run the test as follows cd components/powercap/utils/ make clean make ./powercap_write_test In the output file, you can see the power values that are SET and the power values that are READ. By comparing them you can confirm that the component is performing as expected. papi-5.6.0/src/libpfm4/lib/pfmlib_intel_knl_unc_m2pcie.c000664 001750 001750 00000006045 13216244365 025216 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_knl_m2pcie.c : Intel Knights Landing M2PCIe uncore PMU * * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_knl_unc_m2pcie_events.h" static void display_m2p(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC_R2PCIE=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d] %s\n", reg->val, reg->com.unc_event, reg->com.unc_umask, reg->com.unc_en, reg->com.unc_inv, reg->com.unc_edge, reg->com.unc_thres, pe[e->event].name); } pfmlib_pmu_t intel_knl_unc_m2pcie_support = { .desc = "Intel Knights Landing M2PCIe uncore", .name = "knl_unc_m2pcie", .perf_name = "uncore_m2pcie", .pmu = PFM_PMU_INTEL_KNL_UNC_M2PCIE, .pme_count = LIBPFM_ARRAY_SIZE(intel_knl_unc_m2pcie_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 1, .pe = intel_knl_unc_m2pcie_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .pmu_detect = pfm_intel_knl_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .display_reg = display_m2p, }; papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_r3qpi.c000664 001750 001750 00000005235 13216244365 025420 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snbep_r3qpi.c : Intel SandyBridge-EP R3QPI uncore PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_snbep_unc_r3qpi_events.h" #define DEFINE_R3QPI_BOX(n) \ pfmlib_pmu_t intel_snbep_unc_r3qpi##n##_support = {\ .desc = "Intel Sandy Bridge-EP R3QPI"#n" uncore", \ .name = "snbep_unc_r3qpi"#n,\ .perf_name = "uncore_r3qpi_"#n, \ .pmu = PFM_PMU_INTEL_SNBEP_UNC_R3QPI##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_snbep_unc_r3_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 3,\ .num_fixed_cntrs = 0,\ .max_encoding = 1,\ .pe = intel_snbep_unc_r3_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK\ | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ } DEFINE_R3QPI_BOX(0); DEFINE_R3QPI_BOX(1); papi-5.6.0/src/run_tests_exclude.txt000664 001750 001750 00000003013 13216244370 021617 0ustar00jshenry1963jshenry1963000000 000000 # this file enumerates test cases that will NOT be run # when the run_tests.sh macro is executed # enter each test name on a separate line # lines beginning with # will be ignored # this file must have UNIX line endings # For starters we do not want to try and execute Makefiles ftests/Makefile.recipies ftests/Makefile ftests/Makefile.target.in ctests/Makefile.recipies ctests/Makefile ctests/Makefile.target.in ctests/Make-export components/infiniband/tests/Makefile components/cuda/tests/Makefile components/Makefile_comp_tests components/net/tests/Makefile components/lustre/tests/Makefile components/perf_event/tests/Makefile components/nvml/tests/Makefile components/perf_event_uncore/tests/Makefile components/rapl/tests/Makefile components/bcs/tests/Makefile testlib/Makefile testlib/Makefile.target.in # Template PBS Job Script for Parallel Job on Myrinet Nodes ctests/cpi.pbs # Time wasting support program, not a standalone test ctests/burn # Support program for the attach tests ctests/attach_target # long running tests (if you are not in a hurry comment these lines) ctests/pthrtough2 ctests/timer_overflow # Some architectures require OMP_NUM_THREADS otherwise the test hangs ctests/omptough # these tests haven't been implemented # Helper scripts for iozone components/appio/tests/iozone/Gnuplot.txt components/appio/tests/iozone/Generate_Graphs components/appio/tests/iozone/report.pl components/appio/tests/iozone/iozone_visualizer.pl components/appio/tests/iozone/gengnuplot.sh components/appio/tests/iozone/gnu3d.dem papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_ubo_events.h000664 001750 001750 00000004716 13216244364 026163 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_ubo */ static intel_x86_umask_t bdx_unc_u_event_msg[]={ { .uname = "DOORBELL_RCVD", .ucode = 0x800, .udesc = "VLW Received", .uflags = INTEL_X86_DFL, }, }; static intel_x86_umask_t bdx_unc_u_phold_cycles[]={ { .uname = "ASSERT_TO_ACK", .ucode = 0x100, .udesc = "Cycles PHOLD Assert to Ack. Assert to ACK", .uflags = INTEL_X86_DFL, }, }; static intel_x86_entry_t intel_bdx_unc_u_pe[]={ { .name = "UNC_U_EVENT_MSG", .code = 0x42, .desc = "Virtual Logical Wire (legacy) message were received from uncore", .modmsk = BDX_UNC_UBO_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_u_event_msg, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_u_event_msg), }, { .name = "UNC_U_PHOLD_CYCLES", .code = 0x45, .desc = "PHOLD cycles. Filter from source CoreID.", .modmsk = BDX_UNC_UBO_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_u_phold_cycles, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_u_phold_cycles), }, { .name = "UNC_U_RACU_REQUESTS", .code = 0x46, .desc = "Number outstanding register requests within message channel tracker", .modmsk = BDX_UNC_UBO_ATTRS, .cntmsk = 0x3, }, }; papi-5.6.0/src/event_data/power4/groups000664 001750 001750 00000026123 13216244361 022122 0ustar00jshenry1963jshenry1963000000 000000 { **************************** { THIS IS OPEN SOURCE CODE { **************************** { (C) COPYRIGHT International Business Machines Corp. 2005 { This file is licensed under the University of Tennessee license. { See LICENSE.txt. { { File: events/power4/groups { Author: Maynard Johnson { maynardj@us.ibm.com { Mods: { { Number of groups 63 { Group descriptions #0,94,81,83,77,81,81,77,80,pm_slice0,Time Slice 0 ##8005,800F,8001,8001,8003,800F,8003,8003 00000D0E,00000000,4A5675AC,00022000 Time Slice 0 #1,81,81,79,13,32,86,84,82,pm_eprof,Group for use with eprof ##800F,800F,8C10,0C17,0621,8001,8C10,8C10 0000070E,10034000,45F29420,00002001 Group for use with eprof #2,86,81,79,13,32,86,84,82,pm_basic,Basic performance indicators ##8001,800F,8C10,0C17,0621,8001,8C10,8C10 0000090E,10034000,45F29420,00002000 Basic performance indicators #3,86,0,8,9,33,81,10,36,pm_ifu,IFU events ##8001,0224,0230,0231,0223,800F,0232,0233 00000938,80000000,C6767D6C,00022000 IFU events #4,7,1,33,77,86,26,73,79,pm_isu,ISU Queue full events ##0601,0605,0635,8001,8600,0600,800F,8110 0000112A,50041000,EA5103A0,00002000 ISU Queue full events #5,82,82,74,74,83,82,74,75,pm_lsource,Information on data source ##8C66,8C66,8C66,8C66,8C66,8C66,8C66,8C66 00000E1C,0010C000,739CE738,00002000 Information on data source #6,87,86,78,78,91,87,79,73,pm_isource,Instruction Source information ##8227,8227,8227,8227,8227,8227,8227,8227 00000F1E,80000000,7BDEF7BC,00022000 Instruction Source information #7,88,87,73,77,92,89,84,82,pm_lsu,Information on the Load Store Unit ##8C00,8C00,800F,8001,8C00,8C00,8C10,8C10 00000810,000F0000,3A508420,00002000 Information on the Load Store Unit #8,35,6,12,53,31,88,78,74,pm_xlate1,Translation Events ##0900,0904,0936,0931,0227,8900,8001,800F 00001028,81082000,F67E849C,00022000 Translation Events #9,34,5,56,52,31,88,78,74,pm_xlate2,Translation Events ##0901,0905,0932,0935,0227,8900,8001,800F 0000112A,81082000,D77E849C,00022000 Translation Events #10,50,49,17,18,52,51,78,74,pm_gps1,L3 Events ##4000,4001,4016,4017,4002,4003,8001,800F 00001022,00000C00,B5E5349C,00022000 L3 Events #11,38,39,38,37,40,37,78,74,pm_l2a,L2 SliceA events ##4006,4005,4010,4011,4004,4007,8001,800F 0000162A,00000C00,8469749C,00022000 L2 SliceA events #12,42,43,40,39,44,41,78,74,pm_l2b,L2 SliceB events ##4022,4021,4012,4013,4020,4023,8001,800F 00001A32,00000600,94F1B49C,00022000 L2 SliceB events #13,46,47,42,41,48,45,78,74,pm_l2c,L2 SliceC events ##4026,4025,4014,4015,4024,4027,8001,800F 00001E3A,00000600,A579F49C,00022000 L2 SliceC events #14,84,83,75,75,82,83,78,77,pm_fpu1,Floating Point events ##8000,8000,8010,8010,800F,8000,8001,8010 00000810,00000000,420E84A0,00002000 Floating Point events #15,83,84,73,77,84,84,75,78,pm_fpu2,Floating Point events ##8020,8020,800F,8001,8000,8020,8010,8930 00000810,010020E8,3A508420,00002000 Floating Point events #16,86,81,0,1,81,81,2,3,pm_idu1,Instruction Decode Unit events ##8001,800F,0450,0451,8003,800F,0452,0453 0000090E,04010000,8456794C,00022000 Instruction Decode Unit events #17,86,81,4,5,89,81,6,7,pm_idu2,Instruction Decode Unit events ##8001,800F,0454,0455,8001,800F,0456,0457 0000090E,04010000,A5527B5C,00022000 Instruction Decode Unit events #18,80,2,11,34,53,32,78,74,pm_isu_rename,ISU Rename Pool Events ##0602,0604,0611,0631,0606,0621,8001,800F 00001228,10055000,8E6D949C,00022000 ISU Rename Pool Events #19,13,22,30,30,82,86,54,55,pm_isu_queues1,ISU Queue Full Events ##0603,0607,0610,0614,800F,8001,0612,0613 0000132E,10050000,850E994C,00022000 ISU Queue Full Events #20,32,81,31,32,28,27,78,74,pm_isu_flow,ISU Instruction Flow Events ##0621,800F,0632,0636,0623,0624,8001,800F 0000190E,10005000,D7B7C49C,00022000 ISU Instruction Flow Events #21,85,92,83,16,82,86,15,76,pm_isu_work,ISU Indicators of Work Blockage ##8004,8001,8001,0637,800F,8001,0633,8002 00000C12,10001000,4FCE9DA8,00002000 ISU Indicators of Work Blockage #22,77,78,69,73,81,86,44,45,pm_serialize,LSU Serializing Events ##0903,0921,0C75,800F,8003,8001,0C73,0C77 00001332,0118B000,E9D69DFC,00022000 LSU Serializing Events #23,71,70,50,51,69,68,78,74,pm_lsubusy,LSU Busy Events ##0C21,0C25,0C33,0C37,0C22,0C26,8001,800F 0000193A,0000F000,DFF5E49C,00022000 LSU Busy Events #24,86,36,73,74,83,82,74,75,pm_lsource2,Information on data source ##8001,0C64,800F,8C66,8C66,8C66,8C66,8C66 00000938,0010C000,3B9CE738,00002000 Information on data source #25,82,82,74,74,36,81,74,81,pm_lsource3,Information on data source ##8C66,8C66,8C66,8C66,0C64,800F,8C66,8001 00000E1C,0010C000,73B87724,00022000 Information on data source #26,86,81,78,78,91,87,79,73,pm_isource2,Instruction Source information ##8001,800F,8227,8227,8227,8227,8227,8227 0000090E,80000000,7BDEF7BC,00022000 Instruction Source information #27,87,86,78,78,91,87,73,81,pm_isource3,Instruction Source information ##8227,8227,8227,8227,8227,8227,800F,8001 00000F1E,80000000,7BDEF3A4,00022000 Instruction Source information #28,10,19,25,29,11,20,78,74,pm_fpu3,Floating Point events by unit ##0000,0004,0011,0015,0001,0005,8001,800F 00001028,00000000,8D63549C,00022000 Floating Point events by unit #29,12,21,22,27,8,17,78,74,pm_fpu4,Floating Point events by unit ##0002,0006,0013,0017,0003,0007,8001,800F 0000122C,00000000,9DE7749C,00022000 Floating Point events by unit #30,9,18,23,28,82,86,21,26,pm_fpu5,Floating Point events by unit ##0020,0024,0010,0014,800F,8001,0012,0016 00001838,00000000,850E9958,00002000 Floating Point events by unit #31,14,23,19,20,16,25,73,81,pm_fpu6,Floating Point events by unit ##0023,0027,0930,0934,0022,0026,800F,8001 00001B3E,01002000,C735E3A4,00022000 Floating Point events by unit #32,15,24,22,27,82,86,73,24,pm_fpu7,Floating Point events by unit ##0021,0025,0013,0017,800F,8001,800F,0030 0000193A,00000000,9DCE93E0,00002000 Floating Point events by unit #33,86,81,76,76,88,85,76,79,pm_fxu,Fix Point Unit events ##8001,800F,8130,8002,8002,8002,8002,8110 0000090E,40000002,4294A520,00002000 Fix Point Unit events #34,67,66,52,53,82,86,56,12,pm_lsu_lmq,LSU Load Miss Queue Events ##0926,0927,0935,0931,800F,8001,0932,0936 00001E3E,0100A000,EE4E9D78,00002000 LSU Load Miss Queue Events #35,55,61,73,73,56,62,78,74,pm_lsu_flush,LSU Flush Events ##0C02,0C06,800F,800F,0C03,0C07,8001,800F 0000122C,000C0000,39E7749C,00022000 LSU Flush Events #36,57,63,48,49,82,86,46,47,pm_lsu_load1,LSU Load Events ##0C00,0C04,0C10,0C14,800F,8001,0C12,0C16 00001028,000F0000,850E9958,00002000 LSU Load Events #37,58,64,71,72,82,86,70,13,pm_lsu_store1,LSU Store Events ##0C01,0C05,0C11,0C15,800F,8001,0C13,0C17 0000112A,000F0000,8D4E99DC,00022000 LSU Store Events #38,59,65,71,72,79,81,78,74,pm_lsu_store2,LSU Store Events ##0C20,0C24,0C11,0C15,0C23,800F,8001,800F 00001838,0003C000,8D76749C,00022000 LSU Store Events #39,54,60,73,73,36,81,78,74,pm_lsu7,Information on the Load Store Unit ##0902,0906,800F,800F,0C64,800F,8001,800F 0000122C,0118C000,39F8749C,00022000 Information on the Load Store Unit #40,4,3,43,35,82,86,73,14,pm_dpfetch,Data Prefetch Events ##0907,0C27,0C34,0C35,800F,8001,800F,0C36 0000173E,0108F000,E74E93F8,00002000 Data Prefetch Events #41,85,88,84,73,81,86,77,86,pm_misc,Misc Events for testing ##8004,8002,8004,800F,8003,8001,8003,8005 00000C14,00000000,61D695B4,00022000 Misc Events for testing #42,92,91,73,84,90,92,82,81,pm_mark1,Information on marked instructions ##8920,8003,800F,8004,8004,8005,8005,8001 00000816,01008080,3B18D6A4,00722001 Information on marked instructions #43,91,89,73,82,90,91,81,84,pm_mark2,Marked Instructions Processing Flow ##8002,8005,800F,8005,8004,8004,8004,8004 00000A1A,00000000,3B58C630,00002001 Marked Instructions Processing Flow #44,93,81,82,84,94,93,68,81,pm_mark3,Marked Stores Processing Flow ##8003,800F,8003,8004,8005,8003,0933,8001 00000B0E,01002000,5B1ABDA4,00022001 Marked Stores Processing Flow #45,92,81,81,85,94,92,78,85,pm_mark4,Marked Loads Processing FLow ##8920,800F,8910,8910,8005,8005,8001,8910 0000080E,01028080,421AD4A0,00002001 Marked Loads Processing FLow #46,90,90,80,83,93,90,80,83,pm_mark_lsource,Information on marked data source ##8C76,8C76,8C76,8C76,8C76,8C76,8C76,8C76 00000E1C,00103000,739CE738,00002001 Information on marked data source #47,86,81,57,83,93,90,80,83,pm_mark_lsource2,Information on marked data source ##8001,800F,0C74,8C76,8C76,8C76,8C76,8C76 0000090E,00103000,E39CE738,00002001 Information on marked data source #48,90,90,80,83,82,86,80,57,pm_mark_lsource3,Information on marked data source ##8C76,8C76,8C76,8C76,800F,8001,8C76,0C74 00000E1C,00103000,738E9770,00002001 Information on marked data source #49,76,72,60,65,82,86,61,66,pm_lsu_mark1,Load Store Unit Marked Events ##0923,0922,0910,0914,800F,8001,0911,0915 00001B34,01028000,850E98D4,00022001 Load Store Unit Marked Events #50,73,74,58,63,82,86,59,64,pm_lsu_mark2,Load Store Unit Marked Events ##0920,0924,0912,0916,800F,8001,0913,0917 00001838,01028000,958E99DC,00022001 Load Store Unit Marked Events #51,75,81,62,67,82,92,82,81,pm_lsu_mark3,Load Store Unit Marked Events ##0925,800F,0C31,0C32,800F,8005,8005,8001 00001D0E,0100B000,CE8ED6A4,00022001 Load Store Unit Marked Events #52,67,91,53,77,82,92,77,52,pm_threshold,Group for pipeline threshold studies ##0926,8003,0931,8001,800F,8005,8003,0935 00001E16,0100A000,CA4ED5F4,00722001 Group for pipeline threshold studies #53,84,83,77,75,82,83,78,77,pm_pe_bench1,PE Benchmarker group for FP analysis ##8000,8000,8630,8010,800F,8000,8001,8010 00000810,10001002,420E84A0,00002000 PE Benchmarker group for FP analysis #54,81,84,22,77,86,84,27,78,pm_pe_bench2,PE Benchmarker group for FP stalls analysis ##800F,8020,0013,8001,8600,8020,0017,8930 00000710,11042068,9A508BA0,00002000 PE Benchmarker group for FP stalls analysis #55,86,0,8,9,1,81,10,36,pm_pe_bench3,PE Benchmarker group for branch analysis ##8001,0224,0230,0231,0605,800F,0232,0233 00000938,90040000,C66A7D6C,00022000 PE Benchmarker group for branch analysis #56,6,35,79,70,82,86,84,82,pm_pe_bench4,PE Benchmarker group for L1 and TLB analysis ##0904,0900,8C10,0C13,800F,8001,8C10,8C10 00001420,010B0000,44CE9420,00002000 PE Benchmarker group for L1 and TLB analysis #57,86,81,74,74,83,82,74,75,pm_pe_bench5,PE Benchmarker group for L2 analysis ##8001,800F,8C66,8C66,8C66,8C66,8C66,8C66 0000090E,0010C000,739CE738,00002000 PE Benchmarker group for L2 analysis #58,82,82,74,74,83,81,78,75,pm_pe_bench6,PE Benchmarker group for L3 analysis ##8C66,8C66,8C66,8C66,8C66,800F,8001,8C66 00000E1C,0010C000,739C74B8,00002000 PE Benchmarker group for L3 analysis #59,6,88,79,70,82,86,84,82,pm_hpmcount1,Hpmcount group for L1 and TLB behavior analysis ##0904,8002,8C10,0C13,800F,8001,8C10,8C10 00001414,010B0000,44CE9420,00002000 Hpmcount group for L1 and TLB behavior analysis #60,84,83,22,27,82,84,78,78,pm_hpmcount2,Hpmcount group for computation intensity analysis ##8000,8000,0013,0017,800F,8020,8001,8930 00000810,01002028,9DCE84A0,00002000 Hpmcount group for computation intensity analysis #61,86,81,79,8,79,81,9,10,pm_l1andbr,L1 misses and branch misspredict analysis ##8001,800F,8C10,0230,0C23,800F,0231,0232 0000090E,8003C000,46367CE8,00002000 L1 misses and branch misspredict analysis #62,86,81,79,8,82,79,84,82,pm_imix,Instruction mix: loads, stores and branches ##8001,800F,8C10,0230,800F,0C23,8C10,8C10 0000090E,8003C000,460FB420,00002000 Instruction mix: loads, stores and branches papi-5.6.0/src/Rules.perfctr000664 001750 001750 00000005650 13216244356 020015 0ustar00jshenry1963jshenry1963000000 000000 DESCR = "Linux with PerfCtr $(VERSION) patch and library" ifneq (/usr,$(PERFCTR_PREFIX)) PWD = $(shell pwd) ifeq (,$(PERFCTR_LIB_PATH)) ifeq (,$(PERFCTR_ROOT)) PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) endif PERFCTR_LIB_PATH := $(PERFCTR_ROOT)/usr.lib CC_SHR += -Wl,-rpath-link -Wl,$(PERFCTR_LIB_PATH) endif ifeq (,$(PERFCTR_INC_PATH)) ifeq (,$(PERFCTR_ROOT)) PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) endif PERFCTR_INC_PATH := $(PERFCTR_ROOT)/usr.lib PERFCTR_KINC_PATH := $(PERFCTR_ROOT)/linux/include else PERFCTR_KINC_PATH := $(PERFCTR_INC_PATH) endif ifneq (/usr/include,$(PERFCTR_INC_PATH)) CFLAGS += -I$(PERFCTR_INC_PATH) endif ifneq (/usr/include/linux/include,$(PERFCTR_KINC_PATH)) CFLAGS += -I$(PERFCTR_KINC_PATH) endif endif CFLAGS-2.6.x := -DPERFCTR26 CFLAGS-2.7.x := $(CFLAGS-2.6.x) CFLAGS += $(CFLAGS-$(VERSION)) MISCHDRS += linux-lock.h mb.h MISCSRCS += $(ARCH_SPEC_EVTS) SHLIBDEPS = -Bdynamic -L$(PERFCTR_LIB_PATH) -lperfctr PERFCTR_OBJS = $(shell $(AR) t $(PERFCTR_LIB_PATH)/libperfctr.a 2> /dev/null) MISCOBJS = $(PERFCTR_OBJS) $(MISCSRCS:.c=.o) ifeq (,$(PERFCTR_OBJS)) $(PERFCTR_LIB_PATH)/libperfctr.a: ifneq (,${PERFCTR_ROOT}) ifeq (ppc64,${ARCH}) $(MAKE) CC='$(CC) $(BITFLAGS)' -C $(PERFCTR_ROOT) endif ifeq (x86_64,${ARCH}) ifeq (-m32,${BITFLAGS}) $(MAKE) CC='$(CC) $(BITFLAGS)' LD_FLAGS="-melf_i386" ARCH="i386" -C $(PERFCTR_ROOT) else $(MAKE) -C $(PERFCTR_ROOT) endif else $(MAKE) -C $(PERFCTR_ROOT) endif else @echo '$@ not installed!'; exit 1 endif $(MAKE) endif #Dynamic linking loader needs correct version of libperfctr ifeq ($(VERSION),2.7.x) SO_ABIVER=6 else ifeq ($(VERSION),2.6.x) i386_ABIVER=5 x86_64_ABIVER=6 ppc_ABIVER=5 SO_ABIVER=$($(ARCH)_ABIVER) endif endif POST_BUILD = cp $(PERFCTR_LIB_PATH)/libperfctr.so .;ln -fs libperfctr.so libperfctr.so.$(SO_ABIVER) include Makefile.inc config.h: @echo 'Please clobber your build and run ./configure. Direct Makefile access has been deprecated.' $(PERFCTR_OBJS): $(PERFCTR_LIB_PATH)/libperfctr.a $(AR) xv $< $(CPU)_events.o: $(CPU)_events.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c $(CPU)_events.c -o $@ #perfctr.o: perfctr.c $(HEADERS) # $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c perfctr.c -o $@ native_clean: -rm -f $(MISCOBJS) libperfctr.so* ifneq (,${PERFCTR_ROOT}) $(MAKE) -C $(PERFCTR_ROOT) clean endif native_install: ifeq ($(ARCH),ppc64) -mkdir -p $(DESTDIR)$(DATADIR) -chmod go+rx $(DESTDIR)$(DATADIR) cp -Rf ./event_data $(DESTDIR)$(DATADIR) endif ifneq (,${PERFCTR_ROOT}) ifeq (-m32,${BITFLAGS}) $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) DESTDIR=$(DESTDIR) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) ARCH="i386" install else $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) DESTDIR=$(DESTDIR) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) install endif endif native_clobber: ifneq (,${PERFCTR_ROOT}) $(MAKE) -C $(PERFCTR_ROOT) distclean endif papi-5.6.0/src/cpus.h000664 001750 001750 00000001174 13216244360 016447 0ustar00jshenry1963jshenry1963000000 000000 /** @file cpus.h * Author: Gary Mohr * gary.mohr@bull.com * - based on threads.h by unknown author - */ #ifndef PAPI_CPUS_H #define PAPI_CPUS_H typedef struct _CpuInfo { unsigned int cpu_num; struct _CpuInfo *next; hwd_context_t **context; EventSetInfo_t **running_eventset; EventSetInfo_t *from_esi; /* ESI used for last update this control state */ int num_users; } CpuInfo_t; int _papi_hwi_initialize_cpu( CpuInfo_t **dest, unsigned int cpu_num ); int _papi_hwi_shutdown_cpu( CpuInfo_t *cpu ); int _papi_hwi_lookup_or_create_cpu( CpuInfo_t ** here, unsigned int cpu_num ); #endif papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_snbep_unc_ha.3000664 001750 001750 00000002437 13216244364 025712 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_snbep_unc_ha - support for Intel Sandy Bridge-EP Home Agent (HA) uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: snbep_unc_ha .B PMU desc: Intel Sandy Bridge-EP HA uncore PMU .sp .SH DESCRIPTION The library supports the Intel Sandy Bridge Home Agent (HA) uncore PMU. This PMU model only exists on Sandy Bridge model 45. There is only one Home Agent per processor socket. .SH MODIFIERS The following modifiers are supported on Intel Sandy Bridge C-Box uncore PMU: .TP .B i Invert the meaning of the event. The counter will now count HA cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/validation_tests/papi_sr_ins.c000664 001750 001750 00000007457 13216244370 023365 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the PAPI_SR_INS */ /* performance counter (retired stores). */ /* This just does a generic matrix-matrix test */ /* Should have a comprehensive assembly language test */ /* (see my deterministic benchmark suite) but that would be */ /* a lot more complicated. */ /* by Vince Weaver, */ #include #include #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "matrix_multiply.h" #define SLEEP_RUNS 3 int main(int argc, char **argv) { int quiet; double error; int i; long long count,high=0,low=0,total=0,average=0; long long mmm_count; long long expected; int retval; int eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if (!quiet) { printf("\nTesting PAPI_SR_INS\n\n"); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_SR_INS"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_SR_INS\n"); test_skip( __FILE__, __LINE__, "adding PAPI_LD_INS", retval ); } /**************/ /* Sleep test */ /**************/ if (!quiet) { printf("Testing a sleep of 1 second (%d times):\n",SLEEP_RUNS); } for(i=0;ihigh) high=count; if ((low==0) || (count100000) { if (!quiet) printf("Average cycle count too high!\n"); test_fail( __FILE__, __LINE__, "idle average", retval ); } /*****************************/ /* testing Matrix Matrix GHz */ /*****************************/ if (!quiet) { printf("\nTesting with matrix matrix multiply\n"); } PAPI_reset(eventset); PAPI_start(eventset); naive_matrix_multiply(quiet); retval=PAPI_stop(eventset,&count); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "Problem stopping!", retval ); } expected=naive_matrix_multiply_estimated_stores(quiet); if (!quiet) { printf("\tActual measured stores = %lld\n",count); } error= 100.0 * (double)(count-expected) / (double)expected; if (!quiet) { printf("\tExpected %lld, got %lld\n",expected,count); printf("\tError=%.2f%%\n",error); } if ((error>10.0) || (error<-10.0)) { if (!quiet) printf("Error too high!\n"); test_fail( __FILE__, __LINE__, "Error too high", retval ); } mmm_count=count; /************************************/ /* Check for Linear Speedup */ /************************************/ if (!quiet) printf("\nTesting for a linear cycle increase\n"); #define REPITITIONS 2 PAPI_reset(eventset); PAPI_start(eventset); for(i=0;i10.0) || (error<-10.0)) { if (!quiet) printf("Error too high!\n"); test_fail( __FILE__, __LINE__, "Error too high", retval ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/cpus.c000664 001750 001750 00000016255 13216244360 016450 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: cpus.c * Author: Gary Mohr * gary.mohr@bull.com * - based on threads.c by Philip Mucci - */ /* This file contains cpu allocation and bookkeeping functions */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "cpus.h" #include #include /* The list of cpus; this gets built as user apps set the cpu papi */ /* option on an event set */ static CpuInfo_t *_papi_hwi_cpu_head; static CpuInfo_t * _papi_hwi_lookup_cpu( unsigned int cpu_num ) { APIDBG("Entry:\n"); CpuInfo_t *tmp; tmp = ( CpuInfo_t * ) _papi_hwi_cpu_head; while ( tmp != NULL ) { THRDBG( "Examining cpu %#x at %p\n", tmp->cpu_num, tmp ); if ( tmp->cpu_num == cpu_num ) { break; } tmp = tmp->next; if ( tmp == _papi_hwi_cpu_head ) { tmp = NULL; break; } } if ( tmp ) { _papi_hwi_cpu_head = tmp; THRDBG( "Found cpu %#x at %p\n", cpu_num, tmp ); } else { THRDBG( "Did not find cpu %#x\n", cpu_num ); } return tmp; } int _papi_hwi_lookup_or_create_cpu( CpuInfo_t **here, unsigned int cpu_num ) { APIDBG("Entry: here: %p\n", here); CpuInfo_t *tmp = NULL; int retval = PAPI_OK; _papi_hwi_lock( CPUS_LOCK ); tmp = _papi_hwi_lookup_cpu(cpu_num); if ( tmp == NULL ) { retval = _papi_hwi_initialize_cpu( &tmp, cpu_num ); } /* Increment use count */ tmp->num_users++; if ( retval == PAPI_OK ) { *here = tmp; } _papi_hwi_unlock( CPUS_LOCK ); return retval; } static CpuInfo_t * allocate_cpu( unsigned int cpu_num ) { APIDBG("Entry: cpu_num: %d\n", cpu_num); CpuInfo_t *cpu; int i; /* Allocate new CpuInfo structure */ cpu = ( CpuInfo_t * ) papi_calloc( 1, sizeof ( CpuInfo_t ) ); if ( cpu == NULL ) { goto allocate_error; } /* identify the cpu this info structure represents */ cpu->cpu_num = cpu_num; cpu->context = ( hwd_context_t ** ) papi_calloc( ( size_t ) papi_num_components , sizeof ( hwd_context_t * ) ); if ( !cpu->context ) { goto error_free_cpu; } /* Allocate an eventset per component per cpu? Why? */ cpu->running_eventset = ( EventSetInfo_t ** ) papi_calloc(( size_t ) papi_num_components, sizeof ( EventSetInfo_t * ) ); if ( !cpu->running_eventset ) { goto error_free_context; } for ( i = 0; i < papi_num_components; i++ ) { cpu->context[i] = ( void * ) papi_calloc( 1, ( size_t ) _papi_hwd[i]->size.context ); cpu->running_eventset[i] = NULL; if ( cpu->context[i] == NULL ) { goto error_free_contexts; } } cpu->num_users=0; THRDBG( "Allocated CpuInfo: %p\n", cpu ); return cpu; error_free_contexts: for ( i--; i >= 0; i-- ) papi_free( cpu->context[i] ); error_free_context: papi_free( cpu->context ); error_free_cpu: papi_free( cpu ); allocate_error: return NULL; } /* Must be called with CPUS_LOCK held! */ static int remove_cpu( CpuInfo_t * entry ) { APIDBG("Entry: entry: %p\n", entry); CpuInfo_t *tmp = NULL, *prev = NULL; THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); /* Find the preceding element and the matched element, short circuit if we've seen the head twice */ for ( tmp = ( CpuInfo_t * ) _papi_hwi_cpu_head; ( entry != tmp ) || ( prev == NULL ); tmp = tmp->next ) { prev = tmp; } if ( tmp != entry ) { THRDBG( "Cpu %d at %p was not found in the cpu list!\n", entry->cpu_num, entry ); return PAPI_EBUG; } /* Only 1 element in list */ if ( prev == tmp ) { _papi_hwi_cpu_head = NULL; tmp->next = NULL; THRDBG( "_papi_hwi_cpu_head now NULL\n" ); } else { prev->next = tmp->next; /* If we're removing the head, better advance it! */ if ( _papi_hwi_cpu_head == tmp ) { _papi_hwi_cpu_head = tmp->next; THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); } THRDBG( "Removed cpu %p from list\n", tmp ); } return PAPI_OK; } static void free_cpu( CpuInfo_t **cpu ) { APIDBG( "Entry: *cpu: %p, cpu_num: %d, cpu_users: %d\n", *cpu, ( *cpu )->cpu_num, (*cpu)->num_users); int i,users,retval; _papi_hwi_lock( CPUS_LOCK ); (*cpu)->num_users--; users=(*cpu)->num_users; /* Remove from linked list if no users */ if (!users) remove_cpu( *cpu ); _papi_hwi_unlock( CPUS_LOCK ); /* Exit early if still users of this CPU */ if (users!=0) return; THRDBG( "Shutting down cpu %d at %p\n", (*cpu)->cpu_num, cpu ); for ( i = 0; i < papi_num_components; i++ ) { if (_papi_hwd[i]->cmp_info.disabled) continue; retval = _papi_hwd[i]->shutdown_thread( (*cpu)->context[i] ); if ( retval != PAPI_OK ) { // failure = retval; } } for ( i = 0; i < papi_num_components; i++ ) { if ( ( *cpu )->context[i] ) { papi_free( ( *cpu )->context[i] ); } } if ( ( *cpu )->context ) { papi_free( ( *cpu )->context ); } if ( ( *cpu )->running_eventset ) { papi_free( ( *cpu )->running_eventset ); } /* why do we clear this? */ memset( *cpu, 0x00, sizeof ( CpuInfo_t ) ); papi_free( *cpu ); *cpu = NULL; } /* Must be called with CPUS_LOCK held! */ static void insert_cpu( CpuInfo_t * entry ) { APIDBG("Entry: entry: %p\n", entry); if ( _papi_hwi_cpu_head == NULL ) { /* 0 elements */ THRDBG( "_papi_hwi_cpu_head is NULL\n" ); entry->next = entry; } else if ( _papi_hwi_cpu_head->next == _papi_hwi_cpu_head ) { /* 1 element */ THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); _papi_hwi_cpu_head->next = entry; entry->next = ( CpuInfo_t * ) _papi_hwi_cpu_head; } else { /* 2+ elements */ THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); entry->next = _papi_hwi_cpu_head->next; _papi_hwi_cpu_head->next = entry; } _papi_hwi_cpu_head = entry; THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); } /* Must be called with CPUS_LOCK held! */ int _papi_hwi_initialize_cpu( CpuInfo_t **dest, unsigned int cpu_num ) { APIDBG("Entry: dest: %p, *dest: %p, cpu_num: %d\n", dest, *dest, cpu_num); int retval; CpuInfo_t *cpu; int i; if ( ( cpu = allocate_cpu(cpu_num) ) == NULL ) { *dest = NULL; return PAPI_ENOMEM; } /* Call the component to fill in anything special. */ for ( i = 0; i < papi_num_components; i++ ) { if (_papi_hwd[i]->cmp_info.disabled) continue; retval = _papi_hwd[i]->init_thread( cpu->context[i] ); if ( retval ) { free_cpu( &cpu ); *dest = NULL; return retval; } } insert_cpu( cpu ); *dest = cpu; return PAPI_OK; } int _papi_hwi_shutdown_cpu( CpuInfo_t *cpu ) { APIDBG("Entry: cpu: %p, cpu_num: %d\n", cpu, cpu->cpu_num); free_cpu( &cpu ); return PAPI_OK; } papi-5.6.0/src/libpfm4/lib/events/intel_knl_unc_m2pcie_events.h000664 001750 001750 00000010176 13216244364 026561 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: knl_unc_m2pcie (Intel Knights Landing M2PCIe uncore) */ static const intel_x86_umask_t knl_unc_m2p_ingress_cycles_ne[]={ { .uname = "CBO_IDI", .udesc = "CBO_IDI", .ucode = 0x0100, }, { .uname = "CBO_NCB", .udesc = "CBO_NCB", .ucode = 0x0200, }, { .uname = "CBO_NCS", .udesc = "CBO_NCS", .ucode = 0x0400, }, { .uname = "ALL", .udesc = "All", .ucode = 0x0800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_unc_m2p_egress_cycles[]={ { .uname = "AD_0", .udesc = "AD_0", .ucode = 0x0100, }, { .uname = "AK_0", .udesc = "AK_0", .ucode = 0x0200, }, { .uname = "BL_0", .udesc = "BL_0", .ucode = 0x0400, }, { .uname = "AD_1", .udesc = "AD_1", .ucode = 0x0800, }, { .uname = "AK_1", .udesc = "AK_1", .ucode = 0x1000, }, { .uname = "BL_1", .udesc = "BL_1", .ucode = 0x2000, }, }; static const intel_x86_umask_t knl_unc_m2p_egress_inserts[]={ { .uname = "AD_0", .udesc = "AD_0", .ucode = 0x0100, }, { .uname = "AK_0", .udesc = "AK_0", .ucode = 0x0200, }, { .uname = "BL_0", .udesc = "BL_0", .ucode = 0x0400, }, { .uname = "AK_CRD_0", .udesc = "AK_CRD_0", .ucode = 0x0800, }, { .uname = "AD_1", .udesc = "AD_1", .ucode = 0x1000, }, { .uname = "AK_1", .udesc = "AK_1", .ucode = 0x2000, }, { .uname = "BL_1", .udesc = "BL_1", .ucode = 0x4000, }, { .uname = "AK_CRD_1", .udesc = "AK_CRD_1", .ucode = 0x8000, }, }; static const intel_x86_entry_t intel_knl_unc_m2pcie_pe[]={ { .name = "UNC_M2P_INGRESS_CYCLES_NE", .desc = "Ingress Queue Cycles Not Empty. Counts the number of cycles when the M2PCIe Ingress is not empty", .code = 0x10, .cntmsk = 0xf, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_ingress_cycles_ne), .umasks = knl_unc_m2p_ingress_cycles_ne }, { .name = "UNC_M2P_EGRESS_CYCLES_NE", .desc = "Egress (to CMS) Cycles Not Empty. Counts the number of cycles when the M2PCIe Egress is not empty", .code = 0x23, .cntmsk = 0x3, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_cycles), .umasks = knl_unc_m2p_egress_cycles }, { .name = "UNC_M2P_EGRESS_INSERTS", .desc = "Egress (to CMS) Ingress. Counts the number of number of messages inserted into the the M2PCIe Egress queue", .code = 0x24, .cntmsk = 0xf, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_inserts), .umasks = knl_unc_m2p_egress_inserts }, { .name = "UNC_M2P_EGRESS_CYCLES_FULL", .desc = "Egress (to CMS) Cycles Full. Counts the number of cycles when the M2PCIe Egress is full", .code = 0x25, .cntmsk = 0xf, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_m2p_egress_cycles), .umasks = knl_unc_m2p_egress_cycles }, }; papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_gen_ia64.h000664 001750 001750 00000004716 13216244362 025402 0ustar00jshenry1963jshenry1963000000 000000 /* * Generic IA-64 PMU specific types and definitions * * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_GEN_IA64_H__ #define __PFMLIB_GEN_IA64_H__ #include #include #if BYTE_ORDER != LITTLE_ENDIAN #error "this file only supports little endian environments" #endif #ifdef __cplusplus extern "C" { #endif #define PMU_GEN_IA64_FIRST_COUNTER 4 /* index of first PMC/PMD counter */ #define PMU_GEN_IA64_NUM_COUNTERS 4 /* total numbers of PMC/PMD pairs used as counting monitors */ #define PMU_GEN_IA64_NUM_PMCS 8 /* total number of PMCS defined */ #define PMU_GEN_IA64_NUM_PMDS 4 /* total number of PMDS defined */ /* * architected PMC register structure */ typedef union { unsigned long pmc_val; /* generic PMC register */ struct { unsigned long pmc_plm:4; /* privilege level mask */ unsigned long pmc_ev:1; /* external visibility */ unsigned long pmc_oi:1; /* overflow interrupt */ unsigned long pmc_pm:1; /* privileged monitor */ unsigned long pmc_ig1:1; /* reserved */ unsigned long pmc_es:8; /* event select */ unsigned long pmc_ig2:48; /* reserved */ } pmc_gen_count_reg; } pfm_gen_ia64_pmc_reg_t; typedef struct { unsigned long pmd_val; /* generic counter value */ } pfm_gen_ia64_pmd_reg_t; #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_GEN_IA64_H__ */ papi-5.6.0/src/perfctr-2.6.x/examples/global/000775 001750 001750 00000000000 13216244366 022623 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/shlib.c000664 001750 001750 00000010657 13216244361 020105 0ustar00jshenry1963jshenry1963000000 000000 /* * File: profile.c * Author: Philip Mucci * mucci@cs.utk.edu */ #include #include #include #include #if (!defined(NO_DLFCN) && !defined(_BGL) && !defined(_BGP)) #include #endif #include "papi.h" #include "papi_test.h" void print_shlib_info_map(const PAPI_shlib_info_t *shinfo, int quiet) { PAPI_address_map_t *map = shinfo->map; int i; if (NULL == map) { test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info", 1); } if (!quiet) for ( i = 0; i < shinfo->count; i++ ) { printf( "Library: %s\n", map->name ); printf( "Text start: %p, Text end: %p\n", map->text_start, map->text_end ); printf( "Data start: %p, Data end: %p\n", map->data_start, map->data_end ); printf( "Bss start: %p, Bss end: %p\n", map->bss_start, map->bss_end ); if ( strlen( &(map->name[0]) ) == 0 ) test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); if ( ( map->text_start == 0x0 ) || ( map->text_end == 0x0 ) || ( map->text_start >= map->text_end ) ) test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); /* if ((map->data_start == 0x0) || (map->data_end == 0x0) || (map->data_start >= map->data_end)) test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info",1); if (((map->bss_start) && (!map->bss_end)) || ((!map->bss_start) && (map->bss_end)) || (map->bss_start > map->bss_end)) test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info",1); */ map++; } } void display( char *msg ) { int i; for (i=0; i<64; i++) { printf( "%1d", (msg[i] ? 1 : 0) ); } printf("\n"); } int main( int argc, char **argv ) { int retval,quiet; const PAPI_shlib_info_t *shinfo; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if ( ( shinfo = PAPI_get_shared_lib_info( ) ) == NULL ) { test_skip( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); } if ( ( shinfo->count == 0 ) && ( shinfo->map ) ) { test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); } print_shlib_info_map(shinfo, quiet); /* Needed for debugging, so you can ^Z and stop the process, */ /* inspect /proc to see if it's right */ sleep( 1 ); #ifndef NO_DLFCN { const char *_libname = "libcrypt.so"; void *handle; void ( *setkey) (const char *key); void ( *encrypt) (char block[64], int edflag); char key[64]={ 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, }; /* bit pattern for key */ char orig[64]; /* bit pattern for messages */ char txt[64]; /* bit pattern for messages */ int oldcount; handle = dlopen( _libname, RTLD_NOW ); if ( !handle ) { printf( "dlopen: %s\n", dlerror( ) ); if (!quiet) printf( "Did you forget to set the environmental " "variable LIBPATH (in AIX) or " "LD_LIBRARY_PATH (in linux) ?\n" ); test_fail( __FILE__, __LINE__, "dlopen", 1 ); } setkey = dlsym( handle, "setkey" ); encrypt = dlsym( handle, "encrypt" ); if ( setkey == NULL || encrypt == NULL) { if (!quiet) printf( "dlsym: %s\n", dlerror( ) ); test_fail( __FILE__, __LINE__, "dlsym", 1 ); } memset(orig,0,64); memcpy(txt,orig,64); setkey(key); if (!quiet) { printf("original "); display(txt); } encrypt(txt, 0); /* encode */ if (!quiet) { printf("encrypted "); display(txt); } if (!memcmp(txt,orig,64)) { test_fail( __FILE__, __LINE__, "encode", 1 ); } encrypt(txt, 1); /* decode */ if (!quiet) { printf("decrypted "); display(txt); } if (memcmp(txt,orig,64)) { test_fail( __FILE__, __LINE__, "decode", 1 ); } oldcount = shinfo->count; if ( ( shinfo = PAPI_get_shared_lib_info( ) ) == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); } /* Needed for debugging, so you can ^Z and stop the process, */ /* inspect /proc to see if it's right */ sleep( 1 ); if ( ( shinfo->count == 0 ) && ( shinfo->map ) ) { test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); } if ( shinfo->count <= oldcount ) { test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); } print_shlib_info_map(shinfo, quiet); /* Needed for debugging, so you can ^Z and stop the process, */ /* inspect /proc to see if it's right */ sleep( 1 ); dlclose( handle ); } #endif test_pass( __FILE__ ); return 0; } papi-5.6.0/man/man3/PAPIF_assign_eventset_component.3000664 001750 001750 00000001111 13216244355 024427 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_assign_eventset_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_assign_eventset_component \- .PP assign a component index to an existing but empty EventSet .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_assign_eventset_component( C_INT EventSet, C_INT EventSet, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_assign_eventset_component\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/papi_internal.h000664 001750 001750 00000041340 13216244366 020327 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file papi_internal.h * @author Philip Mucci * mucci@cs.utk.edu * @author Dan Terpstra * terpstra.utk.edu * @author Kevin London * london@cs.utk.edu * @author Haihang You * you@cs.utk.edu */ #ifndef _PAPI_INTERNAL_H #define _PAPI_INTERNAL_H /* AIX's C compiler does not recognize the inline keyword */ #ifdef _AIX #define inline #endif #include "papi_debug.h" #define DEADBEEF 0xdedbeef extern int papi_num_components; extern int _papi_num_compiled_components; extern int init_level; extern int _papi_hwi_errno; extern int _papi_hwi_num_errors; extern char **_papi_errlist; /********************************************************/ /* This block provides general strings used in PAPI */ /* If a new string is needed for PAPI prompts */ /* it should be placed in this file and referenced by */ /* label. */ /********************************************************/ #define PAPI_ERROR_CODE_str "Error Code" #define PAPI_SHUTDOWN_str "PAPI_shutdown: PAPI is not initialized" #define PAPI_SHUTDOWN_SYNC_str "PAPI_shutdown: other threads still have running EventSets" /* some members of structs and/or function parameters may or may not be necessary, but at this point, we have included anything that might possibly be useful later, and will remove them as we progress */ /* Signal used for overflow delivery */ #define PAPI_INT_MPX_SIGNAL SIGPROF #define PAPI_INT_SIGNAL SIGPROF #define PAPI_INT_ITIMER ITIMER_PROF #define PAPI_INT_ITIMER_MS 1 #if defined(linux) #define PAPI_NSIG _NSIG #else #define PAPI_NSIG 128 #endif /* Multiplex definitions */ #define PAPI_INT_MPX_DEF_US 10000 /*Default resolution in us. of mpx handler */ /* Commands used to compute derived events */ #define NOT_DERIVED 0x0 /**< Do nothing */ #define DERIVED_ADD 0x1 /**< Add counters */ #define DERIVED_PS 0x2 /**< Divide by the cycle counter and convert to seconds */ #define DERIVED_ADD_PS 0x4 /**< Add 2 counters then divide by the cycle counter and xl8 to secs. */ #define DERIVED_CMPD 0x8 /**< Event lives in operand index but takes 2 or more codes */ #define DERIVED_SUB 0x10 /**< Sub all counters from counter with operand_index */ #define DERIVED_POSTFIX 0x20 /**< Process counters based on specified postfix string */ #define DERIVED_INFIX 0x40 /**< Process counters based on specified infix string */ /* Thread related: thread local storage */ #define LOWLEVEL_TLS PAPI_NUM_TLS+0 #define NUM_INNER_TLS 1 #define PAPI_MAX_TLS (NUM_INNER_TLS+PAPI_NUM_TLS) /* Thread related: locks */ #define INTERNAL_LOCK PAPI_NUM_LOCK+0 /* papi_internal.c */ #define MULTIPLEX_LOCK PAPI_NUM_LOCK+1 /* multiplex.c */ #define THREADS_LOCK PAPI_NUM_LOCK+2 /* threads.c */ #define HIGHLEVEL_LOCK PAPI_NUM_LOCK+3 /* papi_hl.c */ #define MEMORY_LOCK PAPI_NUM_LOCK+4 /* papi_memory.c */ #define COMPONENT_LOCK PAPI_NUM_LOCK+5 /* per-component */ #define GLOBAL_LOCK PAPI_NUM_LOCK+6 /* papi.c for global variable (static and non) initialization/shutdown */ #define CPUS_LOCK PAPI_NUM_LOCK+7 /* cpus.c */ #define NAMELIB_LOCK PAPI_NUM_LOCK+8 /* papi_pfm4_events.c */ /* extras related */ #define NEED_CONTEXT 1 #define DONT_NEED_CONTEXT 0 #define PAPI_EVENTS_IN_DERIVED_EVENT 8 /* these vestigial pointers are to structures defined in the components they are opaque to the framework and defined as void at this level they are remapped to real data in the component routines that use them */ #define hwd_context_t void #define hwd_control_state_t void #define hwd_reg_alloc_t void #define hwd_register_t void #define hwd_siginfo_t void #define hwd_ucontext_t void /* DEFINES END HERE */ #ifndef NO_CONFI #include "config.h" #endif #include OSCONTEXT #include "papi_preset.h" #ifndef inline_static #define inline_static inline static #endif typedef struct _EventSetDomainInfo { int domain; } EventSetDomainInfo_t; typedef struct _EventSetGranularityInfo { int granularity; } EventSetGranularityInfo_t; typedef struct _EventSetOverflowInfo { int flags; int event_counter; PAPI_overflow_handler_t handler; long long *deadline; int *threshold; int *EventIndex; int *EventCode; } EventSetOverflowInfo_t; typedef struct _EventSetAttachInfo { unsigned long tid; } EventSetAttachInfo_t; typedef struct _EventSetCpuInfo { unsigned int cpu_num; } EventSetCpuInfo_t; typedef struct _EventSetInheritInfo { int inherit; } EventSetInheritInfo_t; /** @internal */ typedef struct _EventSetProfileInfo { PAPI_sprofil_t **prof; int *count; /**< Number of buffers */ int *threshold; int *EventIndex; int *EventCode; int flags; int event_counter; } EventSetProfileInfo_t; /** This contains info about an individual event added to the EventSet. The event can be either PRESET or NATIVE, and either simple or derived. If derived, it can consist of up to PAPI_EVENTS_IN_DERIVED_EVENT native events. An EventSet contains a pointer to an array of these structures to define each added event. @internal */ typedef struct _EventInfo { unsigned int event_code; /**< Preset or native code for this event as passed to PAPI_add_event() */ int pos[PAPI_EVENTS_IN_DERIVED_EVENT]; /**< position in the counter array for this events components */ char *ops; /**< operation string of preset (points into preset event struct) */ int derived; /**< Counter derivation command used for derived events */ } EventInfo_t; /** This contains info about each native event added to the EventSet. An EventSet contains an array of MAX_COUNTERS of these structures to define each native event in the set. @internal */ typedef struct _NativeInfo { int ni_event; /**< native (libpfm4) event code; always non-zero unless empty */ int ni_papi_code; /**< papi event code value returned to papi applications */ int ni_position; /**< counter array position where this native event lives */ int ni_owners; /**< specifies how many owners share this native event */ hwd_register_t *ni_bits; /**< Component defined resources used by this native event */ } NativeInfo_t; /* Multiplex definitions */ /** This contains only the information about an event that * would cause two events to be counted separately. Options * that don't affect an event aren't included here. * @internal */ typedef struct _papi_info { long long event_type; int domain; int granularity; } PapiInfo; typedef struct _masterevent { int uses; int active; int is_a_rate; int papi_event; PapiInfo pi; long long count; long long cycles; long long handler_count; long long prev_total_c; long long count_estimate; double rate_estimate; struct _threadlist *mythr; struct _masterevent *next; } MasterEvent; /** @internal */ typedef struct _threadlist { #ifdef PTHREADS pthread_t thr; #else unsigned long int tid; #endif /** Total cycles for this thread */ long long total_c; /** Pointer to event in use */ MasterEvent *cur_event; /** List of multiplexing events for this thread */ MasterEvent *head; /** Pointer to next thread */ struct _threadlist *next; } Threadlist; /* Ugh, should move this out and into all callers of papi_internal.h */ #include "sw_multiplex.h" /** Opaque struct, not defined yet...due to threads.h <-> papi_internal.h @internal */ struct _ThreadInfo; struct _CpuInfo; /** Fields below are ordered by access in PAPI_read for performance @internal */ typedef struct _EventSetInfo { struct _ThreadInfo *master; /**< Pointer to thread that owns this EventSet*/ struct _CpuInfo *CpuInfo; /**< Pointer to cpu that owns this EventSet */ int state; /**< The state of this entire EventSet; can be PAPI_RUNNING or PAPI_STOPPED plus flags */ EventInfo_t *EventInfoArray; /**< This array contains the mapping from events added into the API into hardware specific encoding as returned by the kernel or the code that directly accesses the counters. */ hwd_control_state_t *ctl_state; /**< This contains the encoding necessary for the hardware to set the counters to the appropriate conditions */ unsigned long int tid; /**< Thread ID, only used if PAPI_thread_init() is called */ int EventSetIndex; /**< Index of the EventSet in the array */ int CmpIdx; /**< Which Component this EventSet Belongs to */ int NumberOfEvents; /**< Number of events added to EventSet */ long long *hw_start; /**< Array of length num_mpx_cntrs to hold unprocessed, out of order, long long counter registers */ long long *sw_stop; /**< Array of length num_mpx_cntrs that contains processed, in order, PAPI counter values when used or stopped */ int NativeCount; /**< Number of native events in NativeInfoArray */ NativeInfo_t *NativeInfoArray; /**< Info about each native event in the set */ hwd_register_t *NativeBits; /**< Component-specific bits corresponding to the native events */ EventSetDomainInfo_t domain; EventSetGranularityInfo_t granularity; EventSetOverflowInfo_t overflow; EventSetMultiplexInfo_t multiplex; EventSetAttachInfo_t attach; EventSetCpuInfo_t cpu; EventSetProfileInfo_t profile; EventSetInheritInfo_t inherit; } EventSetInfo_t; /** @internal */ typedef struct _dynamic_array { EventSetInfo_t **dataSlotArray; /**< array of ptrs to EventSets */ int totalSlots; /**< number of slots in dataSlotArrays */ int availSlots; /**< number of open slots in dataSlotArrays */ int fullSlots; /**< number of full slots in dataSlotArray */ int lowestEmptySlot; /**< index of lowest empty dataSlotArray */ } DynamicArray_t; /* Component option types for _papi_hwd_ctl. */ typedef struct _papi_int_attach { unsigned long tid; EventSetInfo_t *ESI; } _papi_int_attach_t; typedef struct _papi_int_cpu { unsigned int cpu_num; EventSetInfo_t *ESI; } _papi_int_cpu_t; typedef struct _papi_int_multiplex { int flags; unsigned long ns; EventSetInfo_t *ESI; } _papi_int_multiplex_t; typedef struct _papi_int_defdomain { int defdomain; } _papi_int_defdomain_t; typedef struct _papi_int_domain { int domain; int eventset; EventSetInfo_t *ESI; } _papi_int_domain_t; typedef struct _papi_int_granularity { int granularity; int eventset; EventSetInfo_t *ESI; } _papi_int_granularity_t; typedef struct _papi_int_overflow { EventSetInfo_t *ESI; EventSetOverflowInfo_t overflow; } _papi_int_overflow_t; typedef struct _papi_int_profile { EventSetInfo_t *ESI; EventSetProfileInfo_t profile; } _papi_int_profile_t; typedef PAPI_itimer_option_t _papi_int_itimer_t; /* These shortcuts are only for use code */ #undef multiplex_itimer_sig #undef multiplex_itimer_num #undef multiplex_itimer_us typedef struct _papi_int_inherit { EventSetInfo_t *ESI; int inherit; } _papi_int_inherit_t; /** @internal */ typedef struct _papi_int_addr_range { /* if both are zero, range is disabled */ EventSetInfo_t *ESI; int domain; caddr_t start; /**< start address of an address range */ caddr_t end; /**< end address of an address range */ int start_off; /**< offset from start address as programmed in hardware */ int end_off; /**< offset from end address as programmed in hardware */ /**< if offsets are undefined, they are both set to -1 */ } _papi_int_addr_range_t; typedef union _papi_int_option_t { _papi_int_overflow_t overflow; _papi_int_profile_t profile; _papi_int_domain_t domain; _papi_int_attach_t attach; _papi_int_cpu_t cpu; _papi_int_multiplex_t multiplex; _papi_int_itimer_t itimer; _papi_int_inherit_t inherit; _papi_int_granularity_t granularity; _papi_int_addr_range_t address_range; } _papi_int_option_t; /** Hardware independent context * @internal */ typedef struct { hwd_siginfo_t *si; hwd_ucontext_t *ucontext; } _papi_hwi_context_t; /** @internal */ typedef struct _papi_mdi { DynamicArray_t global_eventset_map; /**< Global structure to maintain int<->EventSet mapping */ pid_t pid; /**< Process identifier */ PAPI_hw_info_t hw_info; /**< See definition in papi.h */ PAPI_exe_info_t exe_info; /**< See definition in papi.h */ PAPI_shlib_info_t shlib_info; /**< See definition in papi.h */ PAPI_preload_info_t preload_info; /**< See definition in papi.h */ } papi_mdi_t; extern papi_mdi_t _papi_hwi_system_info; extern int _papi_hwi_error_level; /* extern const hwi_describe_t _papi_hwi_err[PAPI_NUM_ERRORS]; */ /*extern volatile int _papi_hwi_using_signal;*/ extern int _papi_hwi_using_signal[PAPI_NSIG]; /** @ingroup papi_data_structures */ typedef struct _papi_os_option { char name[PAPI_MAX_STR_LEN]; /**< Name of the operating system */ char version[PAPI_MAX_STR_LEN]; /**< descriptive OS Version */ int os_version; /**< numerical, for workarounds */ int itimer_sig; /**< Signal used by the multiplex timer, 0 if not */ int itimer_num; /**< Number of the itimer used by mpx and overflow/profile emulation */ int itimer_ns; /**< ns between mpx switching and overflow/profile emulation */ int itimer_res_ns; /**< ns of resolution of itimer */ int clock_ticks; /**< clock ticks per second */ unsigned long reserved[8]; /* For future expansion */ } PAPI_os_info_t; extern PAPI_os_info_t _papi_os_info; /* For internal PAPI use only */ #include "papi_lock.h" #include "threads.h" EventSetInfo_t *_papi_hwi_lookup_EventSet( int eventset ); void _papi_hwi_set_papi_event_string (const char *event_string); char *_papi_hwi_get_papi_event_string (void); void _papi_hwi_free_papi_event_string(); void _papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag); unsigned int _papi_hwi_get_papi_event_code (void); int _papi_hwi_get_ntv_idx (unsigned int papi_evt_code); int _papi_hwi_is_sw_multiplex( EventSetInfo_t * ESI ); hwd_context_t *_papi_hwi_get_context( EventSetInfo_t * ESI, int *is_dirty ); extern int _papi_hwi_error_level; extern PAPI_debug_handler_t _papi_hwi_debug_handler; void PAPIERROR( char *format, ... ); void PAPIWARN( char *format, ... ); int _papi_hwi_assign_eventset( EventSetInfo_t * ESI, int cidx ); void _papi_hwi_free_EventSet( EventSetInfo_t * ESI ); int _papi_hwi_create_eventset( int *EventSet, ThreadInfo_t * handle ); int _papi_hwi_lookup_EventCodeIndex( const EventSetInfo_t * ESI, unsigned int EventCode ); int _papi_hwi_remove_EventSet( EventSetInfo_t * ESI ); void _papi_hwi_map_events_to_native( EventSetInfo_t *ESI); int _papi_hwi_add_event( EventSetInfo_t * ESI, int EventCode ); int _papi_hwi_remove_event( EventSetInfo_t * ESI, int EventCode ); int _papi_hwi_read( hwd_context_t * context, EventSetInfo_t * ESI, long long *values ); int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI ); int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx ); int _papi_hwi_init_global( void ); int _papi_hwi_init_global_internal( void ); int _papi_hwi_init_os(void); void _papi_hwi_init_errors(void); PAPI_os_info_t *_papi_hwi_get_os_info(void); void _papi_hwi_shutdown_global_internal( void ); void _papi_hwi_dummy_handler( int EventSet, void *address, long long overflow_vector, void *context ); int _papi_hwi_get_preset_event_info( int EventCode, PAPI_event_info_t * info ); int _papi_hwi_get_user_event_info( int EventCode, PAPI_event_info_t * info ); int _papi_hwi_derived_type( char *tmp, int *code ); int _papi_hwi_query_native_event( unsigned int EventCode ); int _papi_hwi_get_native_event_info( unsigned int EventCode, PAPI_event_info_t * info ); int _papi_hwi_native_name_to_code( const char *in, int *out ); int _papi_hwi_native_code_to_name( unsigned int EventCode, char *hwi_name, int len ); int _papi_hwi_invalid_cmp( int cidx ); int _papi_hwi_component_index( int event_code ); int _papi_hwi_native_to_eventcode(int cidx, int event_code, int ntv_idx, const char *event_name); int _papi_hwi_eventcode_to_native(int event_code); #endif /* PAPI_INTERNAL_H */ papi-5.6.0/src/components/perf_event_uncore/tests/Makefile000664 001750 001750 00000003273 13216244357 026033 0ustar00jshenry1963jshenry1963000000 000000 NAME=perf_event_uncore include ../../Makefile_comp_tests.target %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< TESTS = perf_event_uncore perf_event_uncore_attach perf_event_uncore_multiple \ perf_event_amd_northbridge perf_event_uncore_cbox DOLOOPS= $(testlibdir)/do_loops.o perf_event_uncore_tests: $(TESTS) perf_event_uncore_lib.o: perf_event_uncore_lib.c perf_event_uncore_lib.h $(CC) $(CFLAGS) $(INCLUDE) -c perf_event_uncore_lib.c perf_event_amd_northbridge: perf_event_amd_northbridge.o $(DOLOOPS) $(UTILOBJS) $(PAPILIB) $(DOLOOPS) $(CC) $(LFLAGS) -o perf_event_amd_northbridge perf_event_amd_northbridge.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_uncore: perf_event_uncore.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o $(CC) $(LFLAGS) -o perf_event_uncore perf_event_uncore.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_uncore_attach: perf_event_uncore_attach.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o $(CC) $(LFLAGS) -o perf_event_uncore_attach perf_event_uncore_attach.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_uncore_multiple: perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_multiple perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_uncore_cbox: perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_cbox perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o *~ papi-5.6.0/src/perfctr-2.6.x/OTHER000775 001750 001750 00000006540 13216244366 020361 0ustar00jshenry1963jshenry1963000000 000000 RELATED PACKAGES ================ [Note: this list is incomplete and not actively maintained.] tsprof (by John Reiser, jreiser@BitWagon.com) http://www.BitWagon.com/tsprof/tsprof.html A profiling application which uses the facilities provided by the perfctr driver. lperfex (by Troy Baer, troy@osc.edu) http://www.osc.edu/~troy/lperfex/ A hardware performance monitoring tool for Linux/IA32 systems, using the interface provided by the perfctr driver and library. vprof (by Curtis Janssen, cljanss@ca.sandia.gov) http://aros.ca.sandia.gov/~cljanss/perf/vprof/ An application that annotates source code lines with event counts, using perfctr's overflow signal feature. msr (by Stephan.Meyer@pobox.com) http://pobox.com/~smeyer/msr.html [page seems dead now] Supports kernels up to 2.2.6. No longer actively developed. Adds /dev/msr as a raw interface to the CPUs MSRs. No virtual PMCs. proc_sr (by bosch@lri.fr) Patch for kernel 2.0.27. Adds a bunch of files under /proc/Intel/, which allow global access to MSRs and control registers. No virtual PMCs. vtimer (by dadkins@mit.edu and bsong@supertech.lcs.mit.edu) Patch for kernel 2.3.6. Adds per-process virtual TSC support. msr_patch-2.1.126 (by Harald Hoyer, HarryH@Royal.Net) Patch for kernel 2.1.126. Adds /proc//msr text file with read/write access to per-process PMC MSRs Only supports the Intel P5MMX. Does not handle counter overflow. hardcntr-2 (by David.Mentre@irisa.fr) http://www.irisa.fr/prive/mentre/linux-counters/hardctr-2.patch Patch for kernel 2.1.65. Adds some text files under /proc//hardcntr/, which allow read/write access for per-process PMC MSRs. Only supports the Intel P6. Does not handle counter overflow. Deprecated by author. perf-0.7 (by Erik Hendriks, hendriks@cesdis.gsfc.nasa.gov) Patches for kernels 2.0.36, 2.2.2, and 2.2.9. Does support both per-process and global PMCs. Does handle counter overflow. No virtual TSC. Only supports Intel P6. msr-patch-2.3.1 (by Richard Gooch, http://www.atnf.csiro.au/~rgooch/linux/) Adds /dev//perfmon with access to per-CPU PMCs. Does not implement virtual-mode PMCs. Does not handle counter overflow. Part of bigger package (MSRs, requires devfs). pperf/libpperf (by M. Patrick Goda, http://qso.lanl.gov/~mpg/pperf.html) Based on Stephan Meyer's msr patch, and shares its limitations. Emphasis on user-space tools. pcl (by Rudolf Berrendorf and Heinz Ziegler, http://www.fz-juelich.de/zam/PCL/) User-space tools, based on libpperf. PAPI (http://icl.cs.utk.edu/projects/papi/) "Portable Interface to Hardware Performance Counters" Common interface to the perfctr facilities on several operating systems and processor architectures. On Linux, uses Hendriks' "perf" patch for Intel P6 processor. User-space tools. Implements both overflow signals and event multiplexing. pmc-1.02 (by Don Heller, dheller@scl.ameslab.gov) http://www.scl.ameslab.gov/Projects/Rabbit/index.html Derived from pperf by M. Patrick Goda and Michael S. Warren. Global-mode counters access via /dev/pmc. Supports Intel P5 and P6 via compiler option. No virtual counters. Emphasis on user-space tools. Using hardware performance counters with Linux (by David Mentré) http://www.irisa.fr/prive/mentre/linux-counters/ A page with info and links. Compaq Continuous Profiling Infrastructure Project http://www.research.digital.com/SRC/dcpi/ papi-5.6.0/src/papi_internal.c000664 001750 001750 00000242267 13216244366 020335 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: papi_internal.c * * Author: Philip Mucci * mucci@cs.utk.edu * Mods: dan terpstra * terpstra@cs.utk.edu * Mods: Min Zhou * min@cs.utk.edu * Mods: Kevin London * london@cs.utk.edu * Mods: Per Ekman * pek@pdc.kth.se * Mods: Haihang You * you@cs.utk.edu * Mods: Maynard Johnson * maynardj@us.ibm.com * Mods: Brian Sheely * bsheely@eecs.utk.edu * Mods: * * Mods: * */ #include #include #include #include #include #include #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "sw_multiplex.h" #include "extras.h" #include "papi_preset.h" #include "cpus.h" #include "papi_common_strings.h" /* Advanced definitons */ static int default_debug_handler( int errorCode ); static long long handle_derived( EventInfo_t * evi, long long *from ); /* Global definitions used by other files */ int init_level = PAPI_NOT_INITED; int _papi_hwi_error_level = PAPI_QUIET; PAPI_debug_handler_t _papi_hwi_debug_handler = default_debug_handler; papi_mdi_t _papi_hwi_system_info; int _papi_hwi_errno = PAPI_OK; int _papi_hwi_num_errors = 0; hwi_presets_t user_defined_events[PAPI_MAX_USER_EVENTS]; int user_defined_events_count = 0; /*****************************/ /* Native Event Mapping Code */ /*****************************/ #define NATIVE_EVENT_CHUNKSIZE 1024 struct native_event_info { int cidx; int component_event; int ntv_idx; char *evt_name; }; // The following array is indexed by the papi event code (after the native bit has been removed) static struct native_event_info *_papi_native_events=NULL; static int num_native_events=0; static int num_native_chunks=0; char **_papi_errlist= NULL; static int num_error_chunks = 0; // pointer to event:mask string associated with last enum call to a components // will be NULL for non libpfm4 components // this is needed because libpfm4 event codes and papi event codes do not contain mask information char *papi_event_string = NULL; void _papi_hwi_set_papi_event_string (const char *event_string) { INTDBG("event_string: %s\n", event_string); if (papi_event_string != NULL) { free (papi_event_string); papi_event_string = NULL; } if (event_string != NULL) { papi_event_string = strdup(event_string); } return; } char * _papi_hwi_get_papi_event_string () { INTDBG("papi_event_string: %s\n", papi_event_string); return papi_event_string; } void _papi_hwi_free_papi_event_string() { if (papi_event_string != NULL) { free(papi_event_string); papi_event_string = NULL; } return; } // A place to keep the current papi event code so some component functions can fetch its value // The current event code can be stored here prior to component calls and cleared after the component returns static unsigned int papi_event_code = -1; static int papi_event_code_changed = -1; void _papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag) { INTDBG("new event_code: %#x, update_flag: %d, previous event_code: %#x\n", event_code, update_flag, papi_event_code); // if call is just to reset and start over, set both flags to show nothing saved yet if (update_flag < 0) { papi_event_code_changed = -1; papi_event_code = -1; return; } // if 0, it is being set prior to calling a component, if >0 it is being changed by the component papi_event_code_changed = update_flag; // save the event code passed in papi_event_code = event_code; return; } unsigned int _papi_hwi_get_papi_event_code () { INTDBG("papi_event_code: %#x\n", papi_event_code); return papi_event_code; } /* Get the index into the ESI->NativeInfoArray for the current PAPI event code */ int _papi_hwi_get_ntv_idx (unsigned int papi_evt_code) { INTDBG("ENTER: papi_evt_code: %#x\n", papi_evt_code); int result; int event_index; if (papi_evt_code == 0) { INTDBG("EXIT: PAPI_ENOEVNT, invalid papi event code\n"); return PAPI_ENOEVNT; } event_index=papi_evt_code&PAPI_NATIVE_AND_MASK; if ((event_index<0) || (event_index>=num_native_events)) { INTDBG("EXIT: PAPI_ENOEVNT, invalid index into native event array\n"); return PAPI_ENOEVNT; } result=_papi_native_events[event_index].ntv_idx; INTDBG("EXIT: result: %d\n", result); return result; } // // Check for the presence of a component name or pmu name in the event string. // If found check if it matches this component or one of the pmu's supported by this component. // // returns true if the event could be for this component and false if it is not for this component. // if there is no component or pmu name then it could be for this component and returns true. // static int is_supported_by_component(int cidx, char *event_name) { INTDBG("ENTER: cidx: %d, event_name: %s\n", cidx, event_name); int i; int component_name = 0; int pmu_name = 0; char *wptr = NULL; // if event does not have a component name or pmu name, return to show it could be supported by this component // when component and pmu names are not provided, we just have to call the components to see if they recognize the event // // look for component names first if ((wptr = strstr(event_name, ":::")) != NULL) { component_name = 1; } else if ((wptr = strstr(event_name, "::")) != NULL) { pmu_name = 1; } else { INTDBG("EXIT: No Component or PMU name in event string, try this component\n"); // need to force all components to be called to find owner of this event // ???? can we assume the default pmu when no component or pmu name is provided ???? return 1; } // get a temporary copy of the component or pmu name int name_len = wptr - event_name; wptr = strdup(event_name); wptr[name_len] = '\0'; // if a component name was found, compare it to the component name in the component info structure if (component_name) { // INTDBG("component_name: %s\n", _papi_hwd[cidx]->cmp_info.name); if (strcmp (wptr, _papi_hwd[cidx]->cmp_info.name) == 0) { free (wptr); INTDBG("EXIT: Component %s supports this event\n", _papi_hwd[cidx]->cmp_info.name); return 1; } } // if a pmu name was found, compare it to the pmu name list if the component info structure (if there is one) if (pmu_name) { for ( i=0 ; icmp_info.pmu_names[i] == NULL) { continue; } // INTDBG("pmu_name[%d]: %p (%s)\n", i, _papi_hwd[cidx]->cmp_info.pmu_names[i], _papi_hwd[cidx]->cmp_info.pmu_names[i]); if (strcmp (wptr, _papi_hwd[cidx]->cmp_info.pmu_names[i]) == 0) { INTDBG("EXIT: Component %s supports PMU %s and this event\n", _papi_hwd[cidx]->cmp_info.name, wptr); free (wptr); return 1; } } } free (wptr); INTDBG("EXIT: Component does not support this event\n"); return 0; } /** @internal * @class _papi_hwi_prefix_component_name * @brief Prefixes a component's name to each of its events. * @param *component_name * @param *event_name * @param *out * @param *out_len * * Given sane component_name and event_name it returns component_name:::event_name. * It is safe in the case that event_name == out and it checks against the * traditional PAPI 'cpu' components, opting to not prepend those. */ int _papi_hwi_prefix_component_name( char *component_name, char *event_name, char *out, int out_len) { int size1, size2; char temp[out_len]; size1 = strlen(event_name); size2 = strlen(component_name); /* sanity checks */ if ( size1 == 0 ) { return (PAPI_EBUG); /* hopefully event_name always has length?! */ } if ( size1 >= out_len ) return (PAPI_ENOMEM); /* Guard against event_name == out */ memcpy( temp, event_name, out_len ); /* no component name to prefix */ if ( size2 == 0 ) { sprintf(out, "%s%c", temp, '\0' ); return (PAPI_OK); } /* Don't prefix 'cpu' component names for now */ if ( strstr(component_name, "pe") || strstr(component_name, "bgq") || strstr(component_name, "bgp") ) { sprintf( out, "%s%c", temp, '\0'); return (PAPI_OK); } /* strlen(component_name) + ::: + strlen(event_name) + NULL */ if ( size1+size2+3+1 > out_len ) return (PAPI_ENOMEM); sprintf( out, "%s:::%s%c" , component_name, temp, '\0'); return (PAPI_OK); } /** @internal * @class _papi_hwi_strip_component_prefix * @brief Strip off cmp_name::: from an event name. * * @param *event_name * @return Start of the component consumable portion of the name. * * This function checks specifically for ':::' and will return the start of * event_name if it doesn't find the ::: . */ const char *_papi_hwi_strip_component_prefix(const char *event_name) { const char *start = NULL; /* We assume ::: is the seperator * eg: * papi_component:::event_name */ start = strstr( event_name, ":::" ); if ( start != NULL ) start+= 3; /* return the actual start of event_name */ else start = event_name; return (start); } /* find the papi event code (4000xxx) associated with the specified component, native event, and event name */ static int _papi_hwi_find_native_event(int cidx, int event, const char *event_name) { INTDBG("ENTER: cidx: %x, event: %#x, event_name: %s\n", cidx, event, event_name); int i; // if no event name passed in, it can not be found if (event_name == NULL) { INTDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } for(i=0;i=num_native_chunks*NATIVE_EVENT_CHUNKSIZE) { num_native_chunks++; _papi_native_events=(struct native_event_info *) realloc(_papi_native_events, num_native_chunks*NATIVE_EVENT_CHUNKSIZE* sizeof(struct native_event_info)); if (_papi_native_events==NULL) { new_native_event=PAPI_ENOMEM; goto native_alloc_early_out; } } _papi_native_events[num_native_events].cidx=cidx; _papi_native_events[num_native_events].component_event=ntv_event; _papi_native_events[num_native_events].ntv_idx=ntv_idx; if (event_name != NULL) { _papi_native_events[num_native_events].evt_name=strdup(event_name); } else { _papi_native_events[num_native_events].evt_name=NULL; } new_native_event=num_native_events|PAPI_NATIVE_MASK; num_native_events++; native_alloc_early_out: _papi_hwi_unlock( INTERNAL_LOCK ); INTDBG("EXIT: new_native_event: %#x, num_native_events: %d\n", new_native_event, num_native_events); return new_native_event; } /** @internal * @class _papi_hwi_add_error * * Adds a new error string to PAPI's internal store. * MAKE SURE you are not holding INTERNAL_LOCK when you call me! */ static int _papi_hwi_add_error( char *error ) { INTDBG("Adding a new Error message |%s|\n", error); _papi_hwi_lock(INTERNAL_LOCK); if (_papi_hwi_num_errors >= num_error_chunks*NATIVE_EVENT_CHUNKSIZE) { num_error_chunks++; _papi_errlist= (char **) realloc(_papi_errlist, num_error_chunks*NATIVE_EVENT_CHUNKSIZE*sizeof(char *)); if (_papi_errlist==NULL) { _papi_hwi_num_errors = -2; goto bail; } } _papi_errlist[_papi_hwi_num_errors] = strdup( error ); if ( _papi_errlist[_papi_hwi_num_errors] == NULL ) _papi_hwi_num_errors = -2; bail: _papi_hwi_unlock(INTERNAL_LOCK); return _papi_hwi_num_errors++; } static void _papi_hwi_cleanup_errors() { int i; if ( _papi_errlist == NULL || _papi_hwi_num_errors == 0 ) return; _papi_hwi_lock( INTERNAL_LOCK ); for (i=0; i < _papi_hwi_num_errors; i++ ) { free( _papi_errlist[i]); _papi_errlist[i] = NULL; } free( _papi_errlist ); _papi_errlist = NULL; _papi_hwi_num_errors = 0; num_error_chunks=0; _papi_hwi_unlock( INTERNAL_LOCK ); } static int _papi_hwi_lookup_error( char *error ) { int i; for (i=0; i<_papi_hwi_num_errors; i++) { if ( !strncasecmp( _papi_errlist[i], error, strlen( error ) ) ) return i; } return (-1); } /** @internal * @class _papi_hwi_publish_error * * @return * <= 0 : Code for the error. * < 0 : We couldn't get memory to allocate for your error. * * An internal interface for adding an error code to the library. * The returned code is suitable for returning to users. * */ int _papi_hwi_publish_error( char *error ) { int error_code = -1; if ( (error_code = _papi_hwi_lookup_error( error )) < 0 ) error_code = _papi_hwi_add_error(error); return (-error_code); /* internally error_code is an index, externally, it should be <= 0 */ } void _papi_hwi_init_errors(void) { /* we use add error to avoid the cost of lookups, we know the errors are not there yet */ _papi_hwi_add_error("No error"); _papi_hwi_add_error("Invalid argument"); _papi_hwi_add_error("Insufficient memory"); _papi_hwi_add_error("A System/C library call failed"); _papi_hwi_add_error("Not supported by component"); _papi_hwi_add_error("Access to the counters was lost or interrupted"); _papi_hwi_add_error("Internal error, please send mail to the developers"); _papi_hwi_add_error("Event does not exist"); _papi_hwi_add_error("Event exists, but cannot be counted due to hardware resource limits"); _papi_hwi_add_error("EventSet is currently not running"); _papi_hwi_add_error("EventSet is currently counting"); _papi_hwi_add_error("No such EventSet available"); _papi_hwi_add_error("Event in argument is not a valid preset"); _papi_hwi_add_error("Hardware does not support performance counters"); _papi_hwi_add_error("Unknown error code"); _papi_hwi_add_error("Permission level does not permit operation"); _papi_hwi_add_error("PAPI hasn't been initialized yet"); _papi_hwi_add_error("Component Index isn't set"); _papi_hwi_add_error("Not supported"); _papi_hwi_add_error("Not implemented"); _papi_hwi_add_error("Buffer size exceeded"); _papi_hwi_add_error("EventSet domain is not supported for the operation"); _papi_hwi_add_error("Invalid or missing event attributes"); _papi_hwi_add_error("Too many events or attributes"); _papi_hwi_add_error("Bad combination of features"); } int _papi_hwi_invalid_cmp( int cidx ) { return ( cidx < 0 || cidx >= papi_num_components ); } int _papi_hwi_component_index( int event_code ) { INTDBG("ENTER: event_code: %#x\n", event_code); int cidx; int event_index; /* currently assume presets are for component 0 only */ if (IS_PRESET(event_code)) { INTDBG("EXIT: Event %#x is a PRESET, assigning component %d\n", event_code,0); return 0; } /* user defined events are treated like preset events (component 0 only) */ if (IS_USER_DEFINED(event_code)) { INTDBG("EXIT: Event %#x is USER DEFINED, assigning component %d\n", event_code,0); return 0; } event_index=event_code&PAPI_NATIVE_AND_MASK; if ( (event_index < 0) || (event_index>=num_native_events)) { INTDBG("EXIT: Event index %#x is out of range, num_native_events: %d\n", event_index, num_native_events); return PAPI_ENOEVNT; } cidx=_papi_native_events[event_index].cidx; if ((cidx<0) || (cidx >= papi_num_components)) { INTDBG("EXIT: Component index %#x is out of range, papi_num_components: %d\n", cidx, papi_num_components); return PAPI_ENOCMP; } INTDBG("EXIT: Found cidx: %d event_index: %d, event_code: %#x\n", cidx, event_index, event_code); return cidx; } /* Convert an internal component event to a papi event code */ int _papi_hwi_native_to_eventcode(int cidx, int event_code, int ntv_idx, const char *event_name) { INTDBG("Entry: cidx: %d, event: %#x, ntv_idx: %d, event_name: %s\n", cidx, event_code, ntv_idx, event_name); int result; if (papi_event_code_changed > 0) { result = _papi_hwi_get_papi_event_code(); INTDBG("EXIT: papi_event_code: %#x set by the component\n", result); return result; } result=_papi_hwi_find_native_event(cidx, event_code, event_name); if (result==PAPI_ENOEVNT) { // Need to create one result=_papi_hwi_add_native_event(cidx, event_code, ntv_idx, event_name); } INTDBG("EXIT: result: %#x\n", result); return result; } /* Convert a native_event code to an internal event code */ int _papi_hwi_eventcode_to_native(int event_code) { INTDBG("ENTER: event_code: %#x\n", event_code); int result; int event_index; event_index=event_code&PAPI_NATIVE_AND_MASK; if ((event_index < 0) || (event_index>=num_native_events)) { INTDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } result=_papi_native_events[event_index].component_event; INTDBG("EXIT: result: %#x\n", result); return result; } /*********************/ /* Utility functions */ /*********************/ void PAPIERROR( char *format, ... ) { va_list args; if ( ( _papi_hwi_error_level != PAPI_QUIET ) || ( getenv( "PAPI_VERBOSE" ) ) ) { va_start( args, format ); fprintf( stderr, "PAPI Error: " ); vfprintf( stderr, format, args ); fprintf( stderr, "\n" ); va_end( args ); } } void PAPIWARN( char *format, ... ) { va_list args; if ( ( _papi_hwi_error_level != PAPI_QUIET ) || ( getenv( "PAPI_VERBOSE" ) ) ) { va_start( args, format ); fprintf( stderr, "PAPI Warning: " ); vfprintf( stderr, format, args ); fprintf( stderr, "\n" ); va_end( args ); } } static int default_debug_handler( int errorCode ) { char str[PAPI_HUGE_STR_LEN]; if ( errorCode == PAPI_OK ) return ( errorCode ); if ( ( errorCode > 0 ) || ( -errorCode > _papi_hwi_num_errors ) ) { PAPIERROR( "%s %d,%s,Bug! Unknown error code", PAPI_ERROR_CODE_str, errorCode, "" ); return ( PAPI_EBUG ); } switch ( _papi_hwi_error_level ) { case PAPI_VERB_ECONT: case PAPI_VERB_ESTOP: /* gcc 2.96 bug fix, do not change */ /* fprintf(stderr,"%s %d: %s: %s\n",PAPI_ERROR_CODE_str,errorCode,_papi_hwi_err[-errorCode].name,_papi_hwi_err[-errorCode].descr); */ sprintf( str, "%s %d,%s", PAPI_ERROR_CODE_str, errorCode, _papi_errlist[-errorCode] ); if ( errorCode == PAPI_ESYS ) sprintf( str + strlen( str ), ": %s", strerror( errno ) ); PAPIERROR( str ); if ( _papi_hwi_error_level == PAPI_VERB_ESTOP ) abort( ); /* patch provided by will cohen of redhat */ else return errorCode; break; case PAPI_QUIET: default: return errorCode; } return ( PAPI_EBUG ); /* Never get here */ } static int allocate_eventset_map( DynamicArray_t * map ) { /* Allocate and clear the Dynamic Array structure */ if ( map->dataSlotArray != NULL ) papi_free( map->dataSlotArray ); memset( map, 0x00, sizeof ( DynamicArray_t ) ); /* Allocate space for the EventSetInfo_t pointers */ map->dataSlotArray = ( EventSetInfo_t ** ) papi_malloc( PAPI_INIT_SLOTS * sizeof ( EventSetInfo_t * ) ); if ( map->dataSlotArray == NULL ) { return ( PAPI_ENOMEM ); } memset( map->dataSlotArray, 0x00, PAPI_INIT_SLOTS * sizeof ( EventSetInfo_t * ) ); map->totalSlots = PAPI_INIT_SLOTS; map->availSlots = PAPI_INIT_SLOTS; map->fullSlots = 0; return ( PAPI_OK ); } static int expand_dynamic_array( DynamicArray_t * DA ) { int number; EventSetInfo_t **n; /*realloc existing PAPI_EVENTSET_MAP.dataSlotArray */ number = DA->totalSlots * 2; n = ( EventSetInfo_t ** ) papi_realloc( DA->dataSlotArray, ( size_t ) number * sizeof ( EventSetInfo_t * ) ); if ( n == NULL ) return ( PAPI_ENOMEM ); /* Need to assign this value, what if realloc moved it? */ DA->dataSlotArray = n; memset( DA->dataSlotArray + DA->totalSlots, 0x00, ( size_t ) DA->totalSlots * sizeof ( EventSetInfo_t * ) ); DA->totalSlots = number; DA->availSlots = number - DA->fullSlots; return ( PAPI_OK ); } static int EventInfoArrayLength( const EventSetInfo_t * ESI ) { return ( _papi_hwd[ESI->CmpIdx]->cmp_info.num_mpx_cntrs ); } /*========================================================================*/ /* This function allocates space for one EventSetInfo_t structure and for */ /* all of the pointers in this structure. If any malloc in this function */ /* fails, all memory malloced to the point of failure is freed, and NULL */ /* is returned. Upon success, a pointer to the EventSetInfo_t data */ /* structure is returned. */ /*========================================================================*/ static int create_EventSet( EventSetInfo_t ** here ) { EventSetInfo_t *ESI; ESI = ( EventSetInfo_t * ) papi_calloc( 1, sizeof ( EventSetInfo_t ) ); if ( ESI == NULL ) { return PAPI_ENOMEM; } *here = ESI; return PAPI_OK; } int _papi_hwi_assign_eventset( EventSetInfo_t *ESI, int cidx ) { INTDBG("ENTER: ESI: %p (%d), cidx: %d\n", ESI, ESI->EventSetIndex, cidx); int retval; size_t max_counters; char *ptr; unsigned int i, j; /* If component doesn't exist... */ if (_papi_hwi_invalid_cmp(cidx)) return PAPI_ECMP; /* Assigned at create time */ ESI->domain.domain = _papi_hwd[cidx]->cmp_info.default_domain; ESI->granularity.granularity = _papi_hwd[cidx]->cmp_info.default_granularity; ESI->CmpIdx = cidx; /* ??? */ max_counters = ( size_t ) _papi_hwd[cidx]->cmp_info.num_mpx_cntrs; ESI->ctl_state = (hwd_control_state_t *) papi_calloc( 1, (size_t) _papi_hwd[cidx]->size.control_state ); ESI->sw_stop = (long long *) papi_calloc( ( size_t ) max_counters, sizeof ( long long ) ); ESI->hw_start = ( long long * ) papi_calloc( ( size_t ) max_counters, sizeof ( long long ) ); ESI->EventInfoArray = ( EventInfo_t * ) papi_calloc( (size_t) max_counters, sizeof ( EventInfo_t ) ); /* allocate room for the native events and for the component-private */ /* register structures */ /* ugh is there a cleaner way to allocate this? vmw */ ESI->NativeInfoArray = ( NativeInfo_t * ) papi_calloc( ( size_t ) max_counters, sizeof ( NativeInfo_t )); ESI->NativeBits = papi_calloc(( size_t ) max_counters, ( size_t ) _papi_hwd[cidx]->size.reg_value ); /* NOTE: the next two malloc allocate blocks of memory that are later */ /* parcelled into overflow and profile arrays */ ESI->overflow.deadline = ( long long * ) papi_malloc( ( sizeof ( long long ) + sizeof ( int ) * 3 ) * ( size_t ) max_counters ); ESI->profile.prof = ( PAPI_sprofil_t ** ) papi_malloc( ( sizeof ( PAPI_sprofil_t * ) * ( size_t ) max_counters + ( size_t ) max_counters * sizeof ( int ) * 4 ) ); /* If any of these allocations failed, free things up and fail */ if ( ( ESI->ctl_state == NULL ) || ( ESI->sw_stop == NULL ) || ( ESI->hw_start == NULL ) || ( ESI->NativeInfoArray == NULL ) || ( ESI->NativeBits == NULL ) || ( ESI->EventInfoArray == NULL ) || ( ESI->profile.prof == NULL ) || ( ESI->overflow.deadline == NULL ) ) { if ( ESI->sw_stop ) papi_free( ESI->sw_stop ); if ( ESI->hw_start ) papi_free( ESI->hw_start ); if ( ESI->EventInfoArray ) papi_free( ESI->EventInfoArray ); if ( ESI->NativeInfoArray ) papi_free( ESI->NativeInfoArray ); if ( ESI->NativeBits ) papi_free( ESI->NativeBits ); if ( ESI->ctl_state ) papi_free( ESI->ctl_state ); if ( ESI->overflow.deadline ) papi_free( ESI->overflow.deadline ); if ( ESI->profile.prof ) papi_free( ESI->profile.prof ); papi_free( ESI ); return PAPI_ENOMEM; } /* Carve up the overflow block into separate arrays */ ptr = ( char * ) ESI->overflow.deadline; ptr += sizeof ( long long ) * max_counters; ESI->overflow.threshold = ( int * ) ptr; ptr += sizeof ( int ) * max_counters; ESI->overflow.EventIndex = ( int * ) ptr; ptr += sizeof ( int ) * max_counters; ESI->overflow.EventCode = ( int * ) ptr; /* Carve up the profile block into separate arrays */ ptr = ( char * ) ESI->profile.prof + ( sizeof ( PAPI_sprofil_t * ) * max_counters ); ESI->profile.count = ( int * ) ptr; ptr += sizeof ( int ) * max_counters; ESI->profile.threshold = ( int * ) ptr; ptr += sizeof ( int ) * max_counters; ESI->profile.EventIndex = ( int * ) ptr; ptr += sizeof ( int ) * max_counters; ESI->profile.EventCode = ( int * ) ptr; /* initialize_EventInfoArray */ for ( i = 0; i < max_counters; i++ ) { ESI->EventInfoArray[i].event_code=( unsigned int ) PAPI_NULL; ESI->EventInfoArray[i].ops = NULL; ESI->EventInfoArray[i].derived=NOT_DERIVED; for ( j = 0; j < PAPI_EVENTS_IN_DERIVED_EVENT; j++ ) { ESI->EventInfoArray[i].pos[j] = PAPI_NULL; } } /* initialize_NativeInfoArray */ for( i = 0; i < max_counters; i++ ) { ESI->NativeInfoArray[i].ni_event = -1; ESI->NativeInfoArray[i].ni_position = -1; ESI->NativeInfoArray[i].ni_papi_code = -1; ESI->NativeInfoArray[i].ni_owners = 0; ESI->NativeInfoArray[i].ni_bits = ((unsigned char*)ESI->NativeBits) + (i*_papi_hwd[cidx]->size.reg_value); } ESI->NativeCount = 0; ESI->state = PAPI_STOPPED; /* these used to be init_config */ retval = _papi_hwd[cidx]->init_control_state( ESI->ctl_state ); retval |= _papi_hwd[cidx]->set_domain( ESI->ctl_state, ESI->domain.domain); return retval; } /*========================================================================*/ /* This function should free memory for one EventSetInfo_t structure. */ /* The argument list consists of a pointer to the EventSetInfo_t */ /* structure, *ESI. */ /* The calling function should check for ESI==NULL. */ /*========================================================================*/ void _papi_hwi_free_EventSet( EventSetInfo_t * ESI ) { _papi_hwi_cleanup_eventset( ESI ); #ifdef DEBUG memset( ESI, 0x00, sizeof ( EventSetInfo_t ) ); #endif papi_free( ESI ); } static int add_EventSet( EventSetInfo_t * ESI, ThreadInfo_t * master ) { DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; int i, errorCode; _papi_hwi_lock( INTERNAL_LOCK ); if ( map->availSlots == 0 ) { errorCode = expand_dynamic_array( map ); if ( errorCode < PAPI_OK ) { _papi_hwi_unlock( INTERNAL_LOCK ); return ( errorCode ); } } i = 0; for ( i = 0; i < map->totalSlots; i++ ) { if ( map->dataSlotArray[i] == NULL ) { ESI->master = master; ESI->EventSetIndex = i; map->fullSlots++; map->availSlots--; map->dataSlotArray[i] = ESI; _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_OK ); } } _papi_hwi_unlock( INTERNAL_LOCK ); return ( PAPI_EBUG ); } int _papi_hwi_create_eventset( int *EventSet, ThreadInfo_t * handle ) { EventSetInfo_t *ESI; int retval; /* Is the EventSet already in existence? */ if ( ( EventSet == NULL ) || ( handle == NULL ) ) return PAPI_EINVAL; if ( *EventSet != PAPI_NULL ) return PAPI_EINVAL; /* Well, then allocate a new one. Use n to keep track of a NEW EventSet */ retval = create_EventSet( &ESI ); if ( retval != PAPI_OK ) return retval; ESI->CmpIdx = -1; /* when eventset is created, it is not decided yet which component it belongs to, until first event is added */ ESI->state = PAPI_STOPPED; /* Add it to the global table */ retval = add_EventSet( ESI, handle ); if ( retval < PAPI_OK ) { _papi_hwi_free_EventSet( ESI ); return retval ; } *EventSet = ESI->EventSetIndex; INTDBG( "(%p,%p): new EventSet in slot %d\n", ( void * ) EventSet, handle, *EventSet ); return retval; } /* This function returns the index of the the next free slot in the EventInfoArray. If EventCode is already in the list, it returns PAPI_ECNFLCT. */ static int get_free_EventCodeIndex( const EventSetInfo_t * ESI, unsigned int EventCode ) { int k; int lowslot = PAPI_ECNFLCT; int limit = EventInfoArrayLength( ESI ); /* Check for duplicate events and get the lowest empty slot */ for ( k = 0; k < limit; k++ ) { if ( ESI->EventInfoArray[k].event_code == EventCode ) return ( PAPI_ECNFLCT ); /*if ((ESI->EventInfoArray[k].event_code == PAPI_NULL) && (lowslot == PAPI_ECNFLCT)) */ if ( ESI->EventInfoArray[k].event_code == ( unsigned int ) PAPI_NULL ) { lowslot = k; break; } } return ( lowslot ); } /* This function returns the index of the EventCode or error */ /* Index to what? The index to everything stored EventCode in the */ /* EventSet. */ int _papi_hwi_lookup_EventCodeIndex( const EventSetInfo_t * ESI, unsigned int EventCode ) { int i; int limit = EventInfoArrayLength( ESI ); for ( i = 0; i < limit; i++ ) { if ( ESI->EventInfoArray[i].event_code == EventCode ) { return i; } } return PAPI_EINVAL; } /* This function only removes empty EventSets */ int _papi_hwi_remove_EventSet( EventSetInfo_t * ESI ) { DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; int i; i = ESI->EventSetIndex; _papi_hwi_lock( INTERNAL_LOCK ); _papi_hwi_free_EventSet( ESI ); /* do bookkeeping for PAPI_EVENTSET_MAP */ map->dataSlotArray[i] = NULL; map->availSlots++; map->fullSlots--; _papi_hwi_unlock( INTERNAL_LOCK ); return PAPI_OK; } /* this function checks if an event is already in an EventSet Success, return ESI->NativeInfoArray[] index Fail, return PAPI_ENOEVNT; */ static int event_already_in_eventset( EventSetInfo_t * ESI, int papi_event ) { INTDBG( "ENTER: ESI: %p, papi_event: %#x\n", ESI, papi_event); int i; int nevt = _papi_hwi_eventcode_to_native(papi_event); /* to find the native event from the native events list */ for( i = 0; i < ESI->NativeCount; i++ ) { if ( nevt == ESI->NativeInfoArray[i].ni_event ) { // Also need to check papi event code if set because the same event with different masks // will generate the same libpfm4 event code (what was checked above). But there will be // different papi events created for it and they need to be handled separately. if (papi_event == ESI->NativeInfoArray[i].ni_papi_code) { INTDBG( "EXIT: event: %#x already mapped at index: %d\n", papi_event, i); return i; } } } INTDBG( "EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } /* This function goes through the events in an EventSet's EventInfoArray */ /* And maps each event (whether native or part of a preset) to */ /* an event in the EventSets NativeInfoArray. */ /* We need to do this every time a native event is added to or removed */ /* from an eventset. */ /* It is also called after a update controlstate as the components are */ /* allowed to re-arrange the native events to fit hardware constraints. */ void _papi_hwi_map_events_to_native( EventSetInfo_t *ESI) { INTDBG("ENTER: ESI: %p, ESI->EventInfoArray: %p, ESI->NativeInfoArray: %p, ESI->NumberOfEvents: %d, ESI->NativeCount: %d\n", ESI, ESI->EventInfoArray, ESI->NativeInfoArray, ESI->NumberOfEvents, ESI->NativeCount); int i, event, k, n, preset_index = 0, nevt; int total_events = ESI->NumberOfEvents; event = 0; for( i = 0; i < total_events; i++ ) { /* find the first event that isn't PAPI_NULL */ /* Is this really necessary? --vmw */ while ( ESI->EventInfoArray[event].event_code == ( unsigned int ) PAPI_NULL ) { event++; } /* If it's a preset */ if ( IS_PRESET(ESI->EventInfoArray[event].event_code) ) { preset_index = ( int ) ESI->EventInfoArray[event].event_code & PAPI_PRESET_AND_MASK; /* walk all sub-events in the preset */ for( k = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT; k++ ) { nevt = _papi_hwi_presets[preset_index].code[k]; if ( nevt == PAPI_NULL ) { break; } INTDBG("Looking for subevent %#x\n",nevt); /* Match each sub-event to something in the Native List */ for( n = 0; n < ESI->NativeCount; n++ ) { if ( nevt == ESI->NativeInfoArray[n].ni_papi_code ) { INTDBG("Found papi event: %#x, &ESI->NativeInfoArray[%d]: %p, ni_event: %#x, ni_position %d\n", nevt, n, &(ESI->NativeInfoArray[n]), ESI->NativeInfoArray[n].ni_event, ESI->NativeInfoArray[n].ni_position); ESI->EventInfoArray[event].pos[k] = ESI->NativeInfoArray[n].ni_position; break; } } } } /* If it's a native event */ else if( IS_NATIVE(ESI->EventInfoArray[event].event_code) ) { nevt = ( int ) ESI->EventInfoArray[event].event_code; // get index into native info array for this event int nidx = event_already_in_eventset( ESI, nevt ); // if not found, then we need to return an error if (nidx == PAPI_ENOEVNT) { INTDBG("EXIT: needed event not found\n"); return; } ESI->EventInfoArray[event].pos[0] = ESI->NativeInfoArray[nidx].ni_position; INTDBG("nidx: %d, ni_position: %d\n", nidx, ESI->NativeInfoArray[nidx].ni_position); } /* If it's a user-defined event */ else if ( IS_USER_DEFINED(ESI->EventInfoArray[event].event_code) ) { preset_index = ( int ) ESI->EventInfoArray[event].event_code & PAPI_UE_AND_MASK; for ( k = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT; k++ ) { nevt = user_defined_events[preset_index].code[k]; INTDBG("nevt: %#x, user_defined_events[%d].code[%d]: %#x, code[%d]: %#x\n", nevt, preset_index, k, user_defined_events[preset_index].code[k], k+1, user_defined_events[preset_index].code[k+1]); if ( nevt == PAPI_NULL ) break; /* Match each sub-event to something in the Native List */ for ( n = 0; n < ESI->NativeCount; n++ ) { // if this is the event we are looking for, set its position and exit inner loop to look for next sub-event if ( _papi_hwi_eventcode_to_native(nevt) == ESI->NativeInfoArray[n].ni_event ) { ESI->EventInfoArray[event].pos[k] = ESI->NativeInfoArray[n].ni_position; break; } } } } event++; } INTDBG("EXIT: \n"); return; } static int add_native_fail_clean( EventSetInfo_t *ESI, int nevt ) { INTDBG("ENTER: ESI: %p, nevt: %#x\n", ESI, nevt); int i, max_counters; int cidx; cidx = _papi_hwi_component_index( nevt ); if (cidx<0) return PAPI_ENOCMP; max_counters = _papi_hwd[cidx]->cmp_info.num_mpx_cntrs; /* to find the native event from the native events list */ for( i = 0; i < max_counters; i++ ) { // INTDBG("ESI->NativeInfoArray[%d]: %p, ni_event: %#x, ni_papi_event_code: %#x, ni_position: %d, ni_owners: %d\n", // i, &(ESI->NativeInfoArray[i]), ESI->NativeInfoArray[i].ni_event, ESI->NativeInfoArray[i].ni_papi_code, ESI->NativeInfoArray[i].ni_position, ESI->NativeInfoArray[i].ni_owners); if ( nevt == ESI->NativeInfoArray[i].ni_papi_code ) { ESI->NativeInfoArray[i].ni_owners--; /* to clean the entry in the nativeInfo array */ if ( ESI->NativeInfoArray[i].ni_owners == 0 ) { ESI->NativeInfoArray[i].ni_event = -1; ESI->NativeInfoArray[i].ni_position = -1; ESI->NativeInfoArray[i].ni_papi_code = -1; ESI->NativeCount--; } INTDBG( "EXIT: nevt: %#x, returned: %d\n", nevt, i); return i; } } INTDBG( "EXIT: returned: -1\n"); return -1; } /* since update_control_state trashes overflow settings, this puts things back into balance. */ static int update_overflow( EventSetInfo_t * ESI ) { int i, retval = PAPI_OK; if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { for( i = 0; i < ESI->overflow.event_counter; i++ ) { retval = _papi_hwd[ESI->CmpIdx]->set_overflow( ESI, ESI->overflow.EventIndex[i], ESI->overflow.threshold[i] ); if ( retval != PAPI_OK ) { break; } } } return retval; } /* this function is called by _papi_hwi_add_event when adding native events ESI: event set to add the events to nevnt: pointer to array of native event table indexes to add size: number of native events to add out: ??? return: < 0 = error 0 = no new events added 1 = new events added */ static int add_native_events( EventSetInfo_t *ESI, unsigned int *nevt, int size, EventInfo_t *out ) { INTDBG ("ENTER: ESI: %p, nevt: %p, size: %d, out: %p\n", ESI, nevt, size, out); int nidx, i, j, added_events = 0; int retval, retval2; int max_counters; hwd_context_t *context; max_counters = _papi_hwd[ESI->CmpIdx]->cmp_info.num_mpx_cntrs; /* Walk through the list of native events, adding them */ for( i = 0; i < size; i++ ) { /* Check to see if event is already in EventSet */ nidx = event_already_in_eventset( ESI, nevt[i] ); if ( nidx >= 0 ) { /* Event is already there. Set position */ out->pos[i] = ESI->NativeInfoArray[nidx].ni_position; ESI->NativeInfoArray[nidx].ni_owners++; continue; } /* Event wasn't already there */ if ( ESI->NativeCount == max_counters ) { /* No more room in counters! */ for( j = 0; j < i; j++ ) { if ( ( nidx = add_native_fail_clean( ESI, nevt[j] ) ) >= 0 ) { out->pos[j] = -1; continue; } INTDBG( "should not happen!\n" ); } INTDBG( "EXIT: counters are full!\n" ); return PAPI_ECOUNT; } /* there is an empty slot for the native event; */ /* initialize the native index for the new added event */ INTDBG( "Adding nevt[%d]: %#x, ESI->NativeInfoArray[%d]: %p, Component: %d\n", i, nevt[i], ESI->NativeCount, &ESI->NativeInfoArray[ESI->NativeCount], ESI->CmpIdx ); ESI->NativeInfoArray[ESI->NativeCount].ni_event = _papi_hwi_eventcode_to_native(nevt[i]); ESI->NativeInfoArray[ESI->NativeCount].ni_papi_code = nevt[i]; ESI->NativeInfoArray[ESI->NativeCount].ni_owners = 1; ESI->NativeCount++; added_events++; } INTDBG("added_events: %d\n", added_events); /* if we added events we need to tell the component so it */ /* can add them too. */ if ( added_events ) { /* get the context we should use for this event set */ context = _papi_hwi_get_context( ESI, NULL ); if ( _papi_hwd[ESI->CmpIdx]->allocate_registers( ESI ) == PAPI_OK ) { retval = _papi_hwd[ESI->CmpIdx]->update_control_state( ESI->ctl_state, ESI->NativeInfoArray, ESI->NativeCount, context); if ( retval != PAPI_OK ) { clean: for( i = 0; i < size; i++ ) { if ( ( nidx = add_native_fail_clean( ESI, nevt[i] ) ) >= 0 ) { out->pos[i] = -1; continue; } INTDBG( "should not happen!\n" ); } /* re-establish the control state after the previous error */ retval2 = _papi_hwd[ESI->CmpIdx]->update_control_state( ESI->ctl_state, ESI->NativeInfoArray, ESI->NativeCount, context); if ( retval2 != PAPI_OK ) { PAPIERROR("update_control_state failed to re-establish working events!" ); INTDBG( "EXIT: update_control_state returned: %d\n", retval2); return retval2; } INTDBG( "EXIT: update_control_state returned: %d\n", retval); return retval; } INTDBG( "EXIT: update_control_state returned: %d, we return: 1 (need remap)\n", retval); return 1; /* need remap */ } else { retval = PAPI_EMISC; goto clean; } } INTDBG( "EXIT: PAPI_OK\n"); return PAPI_OK; } int _papi_hwi_add_event( EventSetInfo_t * ESI, int EventCode ) { INTDBG("ENTER: ESI: %p (%d), EventCode: %#x\n", ESI, ESI->EventSetIndex, EventCode); int i, j, thisindex, remap, retval = PAPI_OK; int cidx; cidx=_papi_hwi_component_index( EventCode ); if (cidx<0) return PAPI_ENOCMP; /* Sanity check that the new EventCode is from the same component */ /* as previous events. */ if ( ESI->CmpIdx < 0 ) { if ( ( retval = _papi_hwi_assign_eventset( ESI, cidx)) != PAPI_OK ) { INTDBG("EXIT: Error assigning eventset to component index %d\n", cidx); return retval; } } else { if ( ESI->CmpIdx != cidx ) { INTDBG("EXIT: Event is not valid for component index %d\n", cidx); return PAPI_EINVAL; } } /* Make sure the event is not present and get the next free slot. */ thisindex = get_free_EventCodeIndex( ESI, ( unsigned int ) EventCode ); if ( thisindex < PAPI_OK ) { return thisindex; } INTDBG("Adding event to slot %d of EventSet %d\n",thisindex,ESI->EventSetIndex); /* If it is a software MPX EventSet, add it to the multiplex data structure */ /* and this thread's multiplex list */ if ( !_papi_hwi_is_sw_multiplex( ESI ) ) { /* Handle preset case */ if ( IS_PRESET(EventCode) ) { int count; int preset_index = EventCode & ( int ) PAPI_PRESET_AND_MASK; /* Check if it's within the valid range */ if ( ( preset_index < 0 ) || ( preset_index >= PAPI_MAX_PRESET_EVENTS ) ) { return PAPI_EINVAL; } /* count the number of native events in this preset */ count = ( int ) _papi_hwi_presets[preset_index].count; /* Check if event exists */ if ( !count ) { return PAPI_ENOEVNT; } /* check if the native events have been used as overflow events */ /* this is not allowed */ if ( ESI->state & PAPI_OVERFLOWING ) { for( i = 0; i < count; i++ ) { for( j = 0; j < ESI->overflow.event_counter; j++ ) { if ( ESI->overflow.EventCode[j] ==(int) ( _papi_hwi_presets[preset_index].code[i] ) ) { return PAPI_ECNFLCT; } } } } /* Try to add the preset. */ remap = add_native_events( ESI, _papi_hwi_presets[preset_index].code, count, &ESI->EventInfoArray[thisindex] ); if ( remap < 0 ) { return remap; } else { /* Fill in the EventCode (machine independent) information */ ESI->EventInfoArray[thisindex].event_code = ( unsigned int ) EventCode; ESI->EventInfoArray[thisindex].derived = _papi_hwi_presets[preset_index].derived_int; ESI->EventInfoArray[thisindex].ops = _papi_hwi_presets[preset_index].postfix; ESI->NumberOfEvents++; _papi_hwi_map_events_to_native( ESI ); } } /* Handle adding Native events */ else if ( IS_NATIVE(EventCode) ) { /* Check if native event exists */ if ( _papi_hwi_query_native_event( ( unsigned int ) EventCode ) != PAPI_OK ) { return PAPI_ENOEVNT; } /* check if the native events have been used as overflow events */ /* This is not allowed */ if ( ESI->state & PAPI_OVERFLOWING ) { for( j = 0; j < ESI->overflow.event_counter; j++ ) { if ( EventCode == ESI->overflow.EventCode[j] ) { return PAPI_ECNFLCT; } } } /* Try to add the native event. */ remap = add_native_events( ESI, (unsigned int *)&EventCode, 1, &ESI->EventInfoArray[thisindex] ); if ( remap < 0 ) { return remap; } else { /* Fill in the EventCode (machine independent) information */ ESI->EventInfoArray[thisindex].event_code = ( unsigned int ) EventCode; ESI->NumberOfEvents++; _papi_hwi_map_events_to_native( ESI ); } } else if ( IS_USER_DEFINED( EventCode ) ) { int count; int index = EventCode & PAPI_UE_AND_MASK; if ( index < 0 || index >= user_defined_events_count ) return ( PAPI_EINVAL ); count = ( int ) user_defined_events[index].count; for ( i = 0; i < count; i++ ) { for ( j = 0; j < ESI->overflow.event_counter; j++ ) { if ( ESI->overflow.EventCode[j] == (int)(user_defined_events[index].code[i]) ) { return ( PAPI_EBUG ); } } } remap = add_native_events( ESI, user_defined_events[index].code, count, &ESI->EventInfoArray[thisindex] ); if ( remap < 0 ) { return remap; } else { ESI->EventInfoArray[thisindex].event_code = (unsigned int) EventCode; ESI->EventInfoArray[thisindex].derived = user_defined_events[index].derived_int; ESI->EventInfoArray[thisindex].ops = user_defined_events[index].postfix; ESI->NumberOfEvents++; _papi_hwi_map_events_to_native( ESI ); } } else { /* not Native, Preset, or User events */ return PAPI_EBUG; } } else { /* Multiplexing is special. See multiplex.c */ retval = mpx_add_event( &ESI->multiplex.mpx_evset, EventCode, ESI->domain.domain, ESI->granularity.granularity ); if ( retval < PAPI_OK ) { return retval; } /* Relevant (???) */ ESI->EventInfoArray[thisindex].event_code = ( unsigned int ) EventCode; ESI->EventInfoArray[thisindex].derived = NOT_DERIVED; ESI->NumberOfEvents++; /* event is in the EventInfoArray but not mapped to the NativeEvents */ /* this causes issues if you try to set overflow on the event. */ /* in theory this wouldn't matter anyway. */ } /* reinstate the overflows if any */ retval=update_overflow( ESI ); return retval; } static int remove_native_events( EventSetInfo_t *ESI, int *nevt, int size ) { INTDBG( "Entry: ESI: %p, nevt: %p, size: %d\n", ESI, nevt, size); NativeInfo_t *native = ESI->NativeInfoArray; hwd_context_t *context; int i, j, zero = 0, retval; /* Remove the references to this event from the native events: for all the metrics in this event, compare to each native event in this event set, and decrement owners if they match */ for( i = 0; i < size; i++ ) { int cevt = _papi_hwi_eventcode_to_native(nevt[i]); // INTDBG( "nevt[%d]: %#x, cevt: %#x\n", i, nevt[i], cevt); for( j = 0; j < ESI->NativeCount; j++ ) { if ((native[j].ni_event == cevt) && (native[j].ni_papi_code == nevt[i]) ) { // INTDBG( "native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n", // j, &(native[j]), native[j].ni_papi_code, native[j].ni_event, native[j].ni_position, native[j].ni_owners); native[j].ni_owners--; if ( native[j].ni_owners == 0 ) { zero++; } break; } } } /* Remove any native events from the array if owners dropped to zero. The NativeInfoArray must be dense, with no empty slots, so if we remove an element, we must compact the list */ for( i = 0; i < ESI->NativeCount; i++ ) { if ( native[i].ni_event == -1 ) continue; if ( native[i].ni_owners == 0 ) { int copy = 0; int sz = _papi_hwd[ESI->CmpIdx]->size.reg_value; for( j = ESI->NativeCount - 1; j > i; j-- ) { if ( native[j].ni_event == -1 || native[j].ni_owners == 0 ) continue; else { /* copy j into i */ native[i].ni_event = native[j].ni_event; native[i].ni_position = native[j].ni_position; native[i].ni_owners = native[j].ni_owners; /* copy opaque [j].ni_bits to [i].ni_bits */ memcpy( native[i].ni_bits, native[j].ni_bits, ( size_t ) sz ); /* reset j to initialized state */ native[j].ni_event = -1; native[j].ni_position = -1; native[j].ni_owners = 0; copy++; break; } } if ( copy == 0 ) { /* set this structure back to empty state */ /* ni_owners is already 0 and contents of ni_bits doesn't matter */ native[i].ni_event = -1; native[i].ni_position = -1; } } } INTDBG( "ESI->NativeCount: %d, zero: %d\n", ESI->NativeCount, zero); /* to reset hwd_control_state values */ ESI->NativeCount -= zero; /* If we removed any elements, clear the now empty slots, reinitialize the index, and update the count. Then send the info down to the component to update the hwd control structure. */ retval = PAPI_OK; if ( zero ) { /* get the context we should use for this event set */ context = _papi_hwi_get_context( ESI, NULL ); retval = _papi_hwd[ESI->CmpIdx]->update_control_state( ESI->ctl_state, native, ESI->NativeCount, context); if ( retval == PAPI_OK ) retval = update_overflow( ESI ); } return ( retval ); } int _papi_hwi_remove_event( EventSetInfo_t * ESI, int EventCode ) { int j = 0, retval, thisindex; EventInfo_t *array; thisindex = _papi_hwi_lookup_EventCodeIndex( ESI, ( unsigned int ) EventCode ); if ( thisindex < PAPI_OK ) return ( thisindex ); /* If it is a MPX EventSet, remove it from the multiplex data structure and this threads multiplex list */ if ( _papi_hwi_is_sw_multiplex( ESI ) ) { retval = mpx_remove_event( &ESI->multiplex.mpx_evset, EventCode ); if ( retval < PAPI_OK ) return ( retval ); } else /* Remove the events hardware dependent stuff from the EventSet */ { if ( IS_PRESET(EventCode) ) { int preset_index = EventCode & PAPI_PRESET_AND_MASK; /* Check if it's within the valid range */ if ( ( preset_index < 0 ) || ( preset_index >= PAPI_MAX_PRESET_EVENTS ) ) return PAPI_EINVAL; /* Check if event exists */ if ( !_papi_hwi_presets[preset_index].count ) return PAPI_ENOEVNT; /* Remove the preset event. */ for ( j = 0; _papi_hwi_presets[preset_index].code[j] != (unsigned int)PAPI_NULL; j++ ); retval = remove_native_events( ESI, ( int * )_papi_hwi_presets[preset_index].code, j ); if ( retval != PAPI_OK ) return ( retval ); } else if ( IS_NATIVE(EventCode) ) { /* Check if native event exists */ if ( _papi_hwi_query_native_event( ( unsigned int ) EventCode ) != PAPI_OK ) return PAPI_ENOEVNT; /* Remove the native event. */ retval = remove_native_events( ESI, &EventCode, 1 ); if ( retval != PAPI_OK ) return ( retval ); } else if ( IS_USER_DEFINED( EventCode ) ) { int index = EventCode & PAPI_UE_AND_MASK; if ( (index < 0) || (index >= user_defined_events_count) ) return ( PAPI_EINVAL ); for( j = 0; j < PAPI_EVENTS_IN_DERIVED_EVENT && user_defined_events[index].code[j] != 0; j++ ) { retval = remove_native_events( ESI, ( int * )user_defined_events[index].code, j); if ( retval != PAPI_OK ) return ( retval ); } } else return ( PAPI_ENOEVNT ); } array = ESI->EventInfoArray; /* Compact the Event Info Array list if it's not the last event */ /* clear the newly empty slot in the array */ for ( ; thisindex < ESI->NumberOfEvents - 1; thisindex++ ) array[thisindex] = array[thisindex + 1]; array[thisindex].event_code = ( unsigned int ) PAPI_NULL; for ( j = 0; j < PAPI_EVENTS_IN_DERIVED_EVENT; j++ ) array[thisindex].pos[j] = PAPI_NULL; array[thisindex].ops = NULL; array[thisindex].derived = NOT_DERIVED; ESI->NumberOfEvents--; return ( PAPI_OK ); } int _papi_hwi_read( hwd_context_t * context, EventSetInfo_t * ESI, long long *values ) { INTDBG("ENTER: context: %p, ESI: %p, values: %p\n", context, ESI, values); int retval; long long *dp = NULL; int i, index; retval = _papi_hwd[ESI->CmpIdx]->read( context, ESI->ctl_state, &dp, ESI->state ); if ( retval != PAPI_OK ) { INTDBG("EXIT: retval: %d\n", retval); return retval; } /* This routine distributes hardware counters to software counters in the order that they were added. Note that the higher level EventInfoArray[i] entries may not be contiguous because the user has the right to remove an event. But if we do compaction after remove event, this function can be changed. */ for ( i = 0; i != ESI->NumberOfEvents; i++ ) { index = ESI->EventInfoArray[i].pos[0]; if ( index == -1 ) continue; INTDBG( "ESI->EventInfoArray: %p, pos[%d]: %d, dp[%d]: %lld, derived[%d]: %#x\n", ESI->EventInfoArray, i, index, index, dp[index], i, ESI->EventInfoArray[i].derived ); /* If this is not a derived event */ if ( ESI->EventInfoArray[i].derived == NOT_DERIVED ) { values[i] = dp[index]; INTDBG( "value: %#llx\n", values[i] ); } else { /* If this is a derived event */ values[i] = handle_derived( &ESI->EventInfoArray[i], dp ); #ifdef DEBUG if ( values[i] < ( long long ) 0 ) { INTDBG( "Derived Event is negative!!: %lld\n", values[i] ); } INTDBG( "derived value: %#llx \n", values[i] ); #endif } } INTDBG("EXIT: PAPI_OK\n"); return PAPI_OK; } int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI ) { int i, j, num_cntrs, retval; hwd_context_t *context; int EventCode; NativeInfo_t *native; if ( !_papi_hwi_invalid_cmp( ESI->CmpIdx ) ) { num_cntrs = _papi_hwd[ESI->CmpIdx]->cmp_info.num_mpx_cntrs; for(i=0;iEventInfoArray[i].event_code; /* skip if event not there */ if ( EventCode == PAPI_NULL ) continue; /* If it is a MPX EventSet, remove it from the multiplex */ /* data structure and this thread's multiplex list */ if ( _papi_hwi_is_sw_multiplex( ESI ) ) { retval = mpx_remove_event( &ESI->multiplex.mpx_evset, EventCode ); if ( retval < PAPI_OK ) return retval; } else { native = ESI->NativeInfoArray; /* clear out ESI->NativeInfoArray */ /* do we really need to do this, seeing as we free() it later? */ for( j = 0; j < ESI->NativeCount; j++ ) { native[j].ni_event = -1; native[j].ni_position = -1; native[j].ni_owners = 0; /* native[j].ni_bits?? */ } } /* do we really need to do this, seeing as we free() it later? */ ESI->EventInfoArray[i].event_code= ( unsigned int ) PAPI_NULL; for( j = 0; j < PAPI_EVENTS_IN_DERIVED_EVENT; j++ ) { ESI->EventInfoArray[i].pos[j] = PAPI_NULL; } ESI->EventInfoArray[i].ops = NULL; ESI->EventInfoArray[i].derived = NOT_DERIVED; } context = _papi_hwi_get_context( ESI, NULL ); /* calling with count of 0 equals a close? */ retval = _papi_hwd[ESI->CmpIdx]->update_control_state( ESI->ctl_state, NULL, 0, context); if (retval!=PAPI_OK) { return retval; } } ESI->CmpIdx = -1; ESI->NumberOfEvents = 0; ESI->NativeCount = 0; if ( ( ESI->state & PAPI_MULTIPLEXING ) && ESI->multiplex.mpx_evset ) papi_free( ESI->multiplex.mpx_evset ); if ( ( ESI->state & PAPI_CPU_ATTACH ) && ESI->CpuInfo ) _papi_hwi_shutdown_cpu( ESI->CpuInfo ); if ( ESI->ctl_state ) papi_free( ESI->ctl_state ); if ( ESI->sw_stop ) papi_free( ESI->sw_stop ); if ( ESI->hw_start ) papi_free( ESI->hw_start ); if ( ESI->EventInfoArray ) papi_free( ESI->EventInfoArray ); if ( ESI->NativeInfoArray ) papi_free( ESI->NativeInfoArray ); if ( ESI->NativeBits ) papi_free( ESI->NativeBits ); if ( ESI->overflow.deadline ) papi_free( ESI->overflow.deadline ); if ( ESI->profile.prof ) papi_free( ESI->profile.prof ); ESI->ctl_state = NULL; ESI->sw_stop = NULL; ESI->hw_start = NULL; ESI->EventInfoArray = NULL; ESI->NativeInfoArray = NULL; ESI->NativeBits = NULL; memset( &ESI->domain, 0x0, sizeof(EventSetDomainInfo_t) ); memset( &ESI->granularity, 0x0, sizeof(EventSetGranularityInfo_t) ); memset( &ESI->overflow, 0x0, sizeof(EventSetOverflowInfo_t) ); memset( &ESI->multiplex, 0x0, sizeof(EventSetMultiplexInfo_t) ); memset( &ESI->attach, 0x0, sizeof(EventSetAttachInfo_t) ); memset( &ESI->cpu, 0x0, sizeof(EventSetCpuInfo_t) ); memset( &ESI->profile, 0x0, sizeof(EventSetProfileInfo_t) ); memset( &ESI->inherit, 0x0, sizeof(EventSetInheritInfo_t) ); ESI->CpuInfo = NULL; return PAPI_OK; } int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx ) { int retval, i, j = 0, *mpxlist = NULL; EventSetInfo_t *ESI = mpx->ESI; int flags = mpx->flags; /* If there are any events in the EventSet, convert them to multiplex events */ if ( ESI->NumberOfEvents ) { mpxlist = ( int * ) papi_malloc( sizeof ( int ) * ( size_t ) ESI->NumberOfEvents ); if ( mpxlist == NULL ) return ( PAPI_ENOMEM ); /* Build the args to MPX_add_events(). */ /* Remember the EventInfoArray can be sparse and the data can be non-contiguous */ for ( i = 0; i < EventInfoArrayLength( ESI ); i++ ) if ( ESI->EventInfoArray[i].event_code != ( unsigned int ) PAPI_NULL ) mpxlist[j++] = ( int ) ESI->EventInfoArray[i].event_code; /* Resize the EventInfo_t array */ if ( ( _papi_hwd[ESI->CmpIdx]->cmp_info.kernel_multiplex == 0 ) || ( ( _papi_hwd[ESI->CmpIdx]->cmp_info.kernel_multiplex ) && ( flags & PAPI_MULTIPLEX_FORCE_SW ) ) ) { retval = MPX_add_events( &ESI->multiplex.mpx_evset, mpxlist, j, ESI->domain.domain, ESI->granularity.granularity ); if ( retval != PAPI_OK ) { papi_free( mpxlist ); return ( retval ); } } papi_free( mpxlist ); } /* Update the state before initialization! */ ESI->state |= PAPI_MULTIPLEXING; if ( _papi_hwd[ESI->CmpIdx]->cmp_info.kernel_multiplex && ( flags & PAPI_MULTIPLEX_FORCE_SW ) ) ESI->multiplex.flags = PAPI_MULTIPLEX_FORCE_SW; ESI->multiplex.ns = ( int ) mpx->ns; return ( PAPI_OK ); } #include "components_config.h" int papi_num_components = ( sizeof ( _papi_hwd ) / sizeof ( *_papi_hwd ) ) - 1; /* * Routine that initializes all available components. * A component is available if a pointer to its info vector * appears in the NULL terminated_papi_hwd table. */ int _papi_hwi_init_global( void ) { int retval, i = 0; retval = _papi_hwi_innoculate_os_vector( &_papi_os_vector ); if ( retval != PAPI_OK ) { return retval; } while ( _papi_hwd[i] ) { retval = _papi_hwi_innoculate_vector( _papi_hwd[i] ); if ( retval != PAPI_OK ) { return retval; } /* We can be disabled by user before init */ if (!_papi_hwd[i]->cmp_info.disabled) { retval = _papi_hwd[i]->init_component( i ); _papi_hwd[i]->cmp_info.disabled=retval; /* Do some sanity checking */ if (retval==PAPI_OK) { if (_papi_hwd[i]->cmp_info.num_cntrs > _papi_hwd[i]->cmp_info.num_mpx_cntrs) { fprintf(stderr,"Warning! num_cntrs %d is more than num_mpx_cntrs %d for component %s\n", _papi_hwd[i]->cmp_info.num_cntrs, _papi_hwd[i]->cmp_info.num_mpx_cntrs, _papi_hwd[i]->cmp_info.name); } } } i++; } return PAPI_OK; } /* Machine info struct initialization using defaults */ /* See _papi_mdi definition in papi_internal.h */ int _papi_hwi_init_global_internal( void ) { int retval; memset(&_papi_hwi_system_info,0x0,sizeof( _papi_hwi_system_info )); memset( _papi_hwi_using_signal,0x0,sizeof( _papi_hwi_using_signal )); /* Global struct to maintain EventSet mapping */ retval = allocate_eventset_map( &_papi_hwi_system_info.global_eventset_map ); if ( retval != PAPI_OK ) { return retval; } _papi_hwi_system_info.pid = 0; /* Process identifier */ /* PAPI_hw_info_t struct */ memset(&(_papi_hwi_system_info.hw_info),0x0,sizeof(PAPI_hw_info_t)); return PAPI_OK; } void _papi_hwi_shutdown_global_internal( void ) { _papi_hwi_cleanup_all_presets( ); _papi_hwi_cleanup_errors( ); _papi_hwi_lock( INTERNAL_LOCK ); papi_free( _papi_hwi_system_info.global_eventset_map.dataSlotArray ); memset( &_papi_hwi_system_info.global_eventset_map, 0x00, sizeof ( DynamicArray_t ) ); _papi_hwi_unlock( INTERNAL_LOCK ); if ( _papi_hwi_system_info.shlib_info.map ) { papi_free( _papi_hwi_system_info.shlib_info.map ); } memset( &_papi_hwi_system_info, 0x0, sizeof ( _papi_hwi_system_info ) ); } void _papi_hwi_dummy_handler( int EventSet, void *address, long long overflow_vector, void *context ) { /* This function is not used and shouldn't be called. */ ( void ) EventSet; /*unused */ ( void ) address; /*unused */ ( void ) overflow_vector; /*unused */ ( void ) context; /*unused */ return; } static long long handle_derived_add( int *position, long long *from ) { int pos, i; long long retval = 0; i = 0; while ( i < PAPI_EVENTS_IN_DERIVED_EVENT ) { pos = position[i++]; if ( pos == PAPI_NULL ) break; INTDBG( "Compound event, adding %lld to %lld\n", from[pos], retval ); retval += from[pos]; } return ( retval ); } static long long handle_derived_subtract( int *position, long long *from ) { int pos, i; long long retval = from[position[0]]; i = 1; while ( i < PAPI_EVENTS_IN_DERIVED_EVENT ) { pos = position[i++]; if ( pos == PAPI_NULL ) break; INTDBG( "Compound event, subtracting pos=%d %lld from %lld\n", pos, from[pos], retval ); retval -= from[pos]; } return ( retval ); } static long long units_per_second( long long units, long long cycles ) { return ( ( units * (long long) _papi_hwi_system_info.hw_info.cpu_max_mhz * (long long) 1000000 ) / cycles ); } static long long handle_derived_ps( int *position, long long *from ) { return ( units_per_second( from[position[1]], from[position[0]] ) ); } static long long handle_derived_add_ps( int *position, long long *from ) { long long tmp = handle_derived_add( position + 1, from ); return ( units_per_second( tmp, from[position[0]] ) ); } /* this function implement postfix calculation, it reads in a string where I use: | as delimiter N2 indicate No. 2 native event in the derived preset +, -, *, / as operator # as MHZ(million hz) got from _papi_hwi_system_info.hw_info.cpu_max_mhz*1000000.0 Haihang (you@cs.utk.edu) */ static long long _papi_hwi_postfix_calc( EventInfo_t * evi, long long *hw_counter ) { char *point = evi->ops, operand[16]; double stack[PAPI_EVENTS_IN_DERIVED_EVENT]; int i, val, top = 0; INTDBG("ENTER: evi: %p, evi->ops: %p (%s), evi->pos[0]: %d, evi->pos[1]: %d, hw_counter: %p (%lld %lld)\n", evi, evi->ops, evi->ops, evi->pos[0], evi->pos[1], hw_counter, hw_counter[0], hw_counter[1]); memset(&stack,0,PAPI_EVENTS_IN_DERIVED_EVENT*sizeof(double)); while ( *point != '\0' ) { if ( *point == '|' ) { /* consume '|' characters */ point++; } else if ( *point == 'N' ) { /* to get count for each native event */ point++; i = 0; while ( isdigit(*point) ) { assert(i<16); operand[i] = *point; point++; i++; } assert(0pos[val]]; top++; } else if ( *point == '#' ) { /* to get mhz */ point++; assert( top < PAPI_EVENTS_IN_DERIVED_EVENT ); stack[top] = _papi_hwi_system_info.hw_info.cpu_max_mhz * 1000000.0; top++; } else if ( isdigit( *point ) ) { i = 0; while ( isdigit(*point) ) { assert(i<16); operand[i] = *point; point++; i++; } assert(0= 2); stack[top - 2] += stack[top - 1]; top--; } else if ( *point == '-' ) { /* - calculation */ point++; assert(top >= 2); stack[top - 2] -= stack[top - 1]; top--; } else if ( *point == '*' ) { /* * calculation */ point++; assert(top >= 2); stack[top - 2] *= stack[top - 1]; top--; } else if ( *point == '/' ) { /* / calculation */ point++; assert(top >= 2); /* FIXME should handle runtime divide by zero */ stack[top - 2] /= stack[top - 1]; top--; } else { /* flag an error parsing the preset */ PAPIERROR( "BUG! Unable to parse \"%s\"", evi->ops ); return ( long long ) stack[0]; } } assert(top == 1); INTDBG("EXIT: stack[0]: %lld\n", (long long)stack[0]); return ( long long ) stack[0]; } static long long handle_derived( EventInfo_t * evi, long long *from ) { INTDBG("ENTER: evi: %p, evi->derived: %d, from: %p\n", evi, evi->derived, from); switch ( evi->derived ) { case DERIVED_ADD: return ( handle_derived_add( evi->pos, from ) ); case DERIVED_ADD_PS: return ( handle_derived_add_ps( evi->pos, from ) ); case DERIVED_SUB: return ( handle_derived_subtract( evi->pos, from ) ); case DERIVED_PS: return ( handle_derived_ps( evi->pos, from ) ); case DERIVED_POSTFIX: return ( _papi_hwi_postfix_calc( evi, from ) ); case DERIVED_CMPD: /* This type has existed for a long time, but was never implemented. Probably because its a no-op. However, if it's in a header, it should be supported. As I found out when I implemented it in Pentium 4 for testing...dkt */ return ( from[evi->pos[0]] ); default: PAPIERROR( "BUG! Unknown derived command %d, returning 0", evi->derived ); INTDBG("EXIT: Unknown derived command %d\n", evi->derived); return ( ( long long ) 0 ); } } /* table matching derived types to derived strings. used by get_info, encode_event, xml translator */ static const hwi_describe_t _papi_hwi_derived[] = { {NOT_DERIVED, "NOT_DERIVED", "Do nothing"}, {DERIVED_ADD, "DERIVED_ADD", "Add counters"}, {DERIVED_PS, "DERIVED_PS", "Divide by the cycle counter and convert to seconds"}, {DERIVED_ADD_PS, "DERIVED_ADD_PS", "Add 2 counters then divide by the cycle counter and xl8 to secs."}, {DERIVED_CMPD, "DERIVED_CMPD", "Event lives in first counter but takes 2 or more codes"}, {DERIVED_SUB, "DERIVED_SUB", "Sub all counters from first counter"}, {DERIVED_POSTFIX, "DERIVED_POSTFIX", "Process counters based on specified postfix string"}, {DERIVED_INFIX, "DERIVED_INFIX", "Process counters based on specified infix string"}, {-1, NULL, NULL} }; /* _papi_hwi_derived_type: Helper routine to extract a derived type from a derived string returns type value if found, otherwise returns -1 */ int _papi_hwi_derived_type( char *tmp, int *code ) { int i = 0; while ( _papi_hwi_derived[i].name != NULL ) { if ( strcasecmp( tmp, _papi_hwi_derived[i].name ) == 0 ) { *code = _papi_hwi_derived[i].value; return PAPI_OK; } i++; } INTDBG( "Invalid derived string %s\n", tmp ); return PAPI_EINVAL; } /* _papi_hwi_derived_string: Helper routine to extract a derived string from a derived type copies derived type string into derived if found, otherwise returns PAPI_EINVAL */ static int _papi_hwi_derived_string( int type, char *derived, int len ) { int j; for ( j = 0; _papi_hwi_derived[j].value != -1; j++ ) { if ( _papi_hwi_derived[j].value == type ) { strncpy( derived, _papi_hwi_derived[j].name, ( size_t )\ len ); return PAPI_OK; } } INTDBG( "Invalid derived type %d\n", type ); return PAPI_EINVAL; } /* _papi_hwi_get_preset_event_info: Assumes EventCode contains a valid preset code. But defensive programming says check for NULL pointers. Returns a filled in PAPI_event_info_t structure containing descriptive strings and values for the specified preset event. */ int _papi_hwi_get_preset_event_info( int EventCode, PAPI_event_info_t * info ) { INTDBG("ENTER: EventCode: %#x, info: %p\n", EventCode, info); int i = EventCode & PAPI_PRESET_AND_MASK; unsigned int j; if ( _papi_hwi_presets[i].symbol ) { /* if the event is in the preset table */ // since we are setting the whole structure to zero the strncpy calls below will // be leaving NULL terminates strings as long as they copy 1 less byte than the // buffer size of the field. memset( info, 0, sizeof ( PAPI_event_info_t ) ); info->event_code = ( unsigned int ) EventCode; strncpy( info->symbol, _papi_hwi_presets[i].symbol, sizeof(info->symbol)-1); if ( _papi_hwi_presets[i].short_descr != NULL ) strncpy( info->short_descr, _papi_hwi_presets[i].short_descr, sizeof ( info->short_descr )-1 ); if ( _papi_hwi_presets[i].long_descr != NULL ) strncpy( info->long_descr, _papi_hwi_presets[i].long_descr, sizeof ( info->long_descr )-1 ); info->event_type = _papi_hwi_presets[i].event_type; info->count = _papi_hwi_presets[i].count; _papi_hwi_derived_string( _papi_hwi_presets[i].derived_int, info->derived, sizeof ( info->derived ) ); if ( _papi_hwi_presets[i].postfix != NULL ) strncpy( info->postfix, _papi_hwi_presets[i].postfix, sizeof ( info->postfix )-1 ); for(j=0;j < info->count; j++) { info->code[j]=_papi_hwi_presets[i].code[j]; strncpy(info->name[j], _papi_hwi_presets[i].name[j], sizeof(info->name[j])-1); } if ( _papi_hwi_presets[i].note != NULL ) { strncpy( info->note, _papi_hwi_presets[i].note, sizeof ( info->note )-1 ); } return PAPI_OK; } else { return PAPI_ENOEVNT; } } /* _papi_hwi_get_user_event_info: Assumes EventCode contains a valid user event code. But defensive programming says check for NULL pointers. Returns a filled in PAPI_event_info_t structure containing descriptive strings and values for the specified preset event. */ int _papi_hwi_get_user_event_info( int EventCode, PAPI_event_info_t * info ) { INTDBG("ENTER: EventCode: %#x, info: %p\n", EventCode, info); unsigned int i = EventCode & PAPI_UE_AND_MASK; unsigned int j; // if event code not in valid range, return error if (i >= PAPI_MAX_USER_EVENTS) { INTDBG("EXIT: Invalid event index: %d, max value is: %d\n", i, PAPI_MAX_USER_EVENTS - 1); return( PAPI_ENOEVNT ); } if ( user_defined_events[i].symbol == NULL) { /* if the event is in the preset table */ INTDBG("EXIT: Event symbol for this event is NULL\n"); return PAPI_ENOEVNT; } /* set whole structure to 0 */ memset( info, 0, sizeof ( PAPI_event_info_t ) ); info->event_code = ( unsigned int ) EventCode; strncpy( info->symbol, user_defined_events[i].symbol, sizeof(info->symbol)-1); if ( user_defined_events[i].short_descr != NULL ) strncpy( info->short_descr, user_defined_events[i].short_descr, sizeof(info->short_descr)-1); if ( user_defined_events[i].long_descr != NULL ) strncpy( info->long_descr, user_defined_events[i].long_descr, sizeof(info->long_descr)-1); // info->event_type = user_defined_events[i].event_type; info->count = user_defined_events[i].count; _papi_hwi_derived_string( user_defined_events[i].derived_int, info->derived, sizeof(info->derived)-1); if ( user_defined_events[i].postfix != NULL ) strncpy( info->postfix, user_defined_events[i].postfix, sizeof(info->postfix)-1); for(j=0;j < info->count; j++) { info->code[j]=user_defined_events[i].code[j]; INTDBG("info->code[%d]: %#x\n", j, info->code[j]); strncpy(info->name[j], user_defined_events[i].name[j], sizeof(info->name[j])-1); } if ( user_defined_events[i].note != NULL ) { strncpy( info->note, user_defined_events[i].note, sizeof(info->note)-1); } INTDBG("EXIT: PAPI_OK: event_code: %#x, symbol: %s, short_desc: %s, long_desc: %s\n", info->event_code, info->symbol, info->short_descr, info->long_descr); return PAPI_OK; } /* Returns PAPI_OK if native EventCode found, or PAPI_ENOEVNT if not; Used to enumerate the entire array, e.g. for native_avail.c */ int _papi_hwi_query_native_event( unsigned int EventCode ) { INTDBG("ENTER: EventCode: %#x\n", EventCode); char name[PAPI_HUGE_STR_LEN]; /* probably overkill, */ /* but should always be big enough */ int cidx; int nevt_code; cidx = _papi_hwi_component_index( EventCode ); if (cidx<0) { INTDBG("EXIT: PAPI_ENOCMP\n"); return PAPI_ENOCMP; } // save event code so components can get it with call to: _papi_hwi_get_papi_event_code() _papi_hwi_set_papi_event_code(EventCode, 0); if ((nevt_code = _papi_hwi_eventcode_to_native(EventCode)) < 0) { INTDBG("EXIT: nevt_code: %d\n", nevt_code); return nevt_code; } int ret = _papi_hwd[cidx]->ntv_code_to_name( (unsigned int)nevt_code, name, sizeof(name)); INTDBG("EXIT: ret: %d\n", ret); return (ret); } /* Converts an ASCII name into a native event code usable by other routines Returns code = 0 and PAPI_OK if name not found. This allows for sparse native event arrays */ int _papi_hwi_native_name_to_code( const char *in, int *out ) { INTDBG("ENTER: in: %s, out: %p\n", in, out); int retval = PAPI_ENOEVNT; char name[PAPI_HUGE_STR_LEN]; /* make sure it's big enough */ unsigned int i; int cidx; char *full_event_name; if (in == NULL) { INTDBG("EXIT: PAPI_EINVAL\n"); return PAPI_EINVAL; } full_event_name = strdup(in); in = _papi_hwi_strip_component_prefix(in); // look in each component for(cidx=0; cidx < papi_num_components; cidx++) { if (_papi_hwd[cidx]->cmp_info.disabled) continue; // if this component does not support the pmu which defines this event, no need to call it if (is_supported_by_component(cidx, full_event_name) == 0) continue; INTDBG("cidx: %d, name: %s, event: %s\n", cidx, _papi_hwd[cidx]->cmp_info.name, in); // show that we do not have an event code yet (the component may create one and update this info) // this also clears any values left over from a previous call _papi_hwi_set_papi_event_code(-1, -1); // if component has a ntv_name_to_code function, use it to get event code if (_papi_hwd[cidx]->ntv_name_to_code != NULL) { // try and get this events event code retval = _papi_hwd[cidx]->ntv_name_to_code( in, ( unsigned * ) out ); if (retval==PAPI_OK) { *out = _papi_hwi_native_to_eventcode(cidx, *out, -1, in); free (full_event_name); INTDBG("EXIT: PAPI_OK event: %s code: %#x\n", in, *out); return PAPI_OK; } } else { // force the code through the work around retval = PAPI_ECMP; } /* If not implemented, work around */ if ( retval==PAPI_ECMP) { i = 0; retval = _papi_hwd[cidx]->ntv_enum_events( &i, PAPI_ENUM_FIRST ); if (retval != PAPI_OK) { free (full_event_name); INTDBG("EXIT: retval: %d\n", retval); return retval; } // _papi_hwi_lock( INTERNAL_LOCK ); do { // save event code so components can get it with call to: _papi_hwi_get_papi_event_code() _papi_hwi_set_papi_event_code(i, 0); retval = _papi_hwd[cidx]->ntv_code_to_name(i, name, sizeof(name)); /* printf("%#x\nname =|%s|\ninput=|%s|\n", i, name, in); */ if ( retval == PAPI_OK && in != NULL) { if ( strcasecmp( name, in ) == 0 ) { *out = _papi_hwi_native_to_eventcode(cidx, i, -1, name); free (full_event_name); INTDBG("EXIT: PAPI_OK, event: %s, code: %#x\n", in, *out); return PAPI_OK; } retval = PAPI_ENOEVNT; } else { *out = 0; retval = PAPI_ENOEVNT; break; } } while ( ( _papi_hwd[cidx]->ntv_enum_events( &i, PAPI_ENUM_EVENTS ) == PAPI_OK ) ); // _papi_hwi_unlock( INTERNAL_LOCK ); } } free (full_event_name); INTDBG("EXIT: retval: %d\n", retval); return retval; } /* Returns event name based on native event code. Returns NULL if name not found */ int _papi_hwi_native_code_to_name( unsigned int EventCode, char *hwi_name, int len ) { INTDBG("ENTER: EventCode: %#x, hwi_name: %p, len: %d\n", EventCode, hwi_name, len); int cidx; int retval; int nevt_code; cidx = _papi_hwi_component_index( EventCode ); if (cidx<0) return PAPI_ENOEVNT; if ( EventCode & PAPI_NATIVE_MASK ) { // save event code so components can get it with call to: _papi_hwi_get_papi_event_code() _papi_hwi_set_papi_event_code(EventCode, 0); if ((nevt_code = _papi_hwi_eventcode_to_native(EventCode)) < 0) { INTDBG("EXIT: nevt_code: %d\n", nevt_code); return nevt_code; } if ( (retval = _papi_hwd[cidx]->ntv_code_to_name( (unsigned int)nevt_code, hwi_name, len) ) == PAPI_OK ) { retval = _papi_hwi_prefix_component_name( _papi_hwd[cidx]->cmp_info.short_name, hwi_name, hwi_name, len); INTDBG("EXIT: retval: %d\n", retval); return retval; } INTDBG("EXIT: retval: %d\n", retval); return (retval); } INTDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } /* The native event equivalent of PAPI_get_event_info */ int _papi_hwi_get_native_event_info( unsigned int EventCode, PAPI_event_info_t *info ) { INTDBG("ENTER: EventCode: %#x, info: %p\n", EventCode, info); int retval; int cidx; int nevt_code; cidx = _papi_hwi_component_index( EventCode ); if (cidx<0) return PAPI_ENOCMP; if (_papi_hwd[cidx]->cmp_info.disabled) return PAPI_ENOCMP; if ( EventCode & PAPI_NATIVE_MASK ) { // save event code so components can get it with call to: _papi_hwi_get_papi_event_code() _papi_hwi_set_papi_event_code(EventCode, 0); /* clear the event info */ memset( info, 0, sizeof ( PAPI_event_info_t ) ); info->event_code = ( unsigned int ) EventCode; info->component_index = (unsigned int) cidx; retval = _papi_hwd[cidx]->ntv_code_to_info( _papi_hwi_eventcode_to_native(EventCode), info); /* If component error, it's missing the ntv_code_to_info vector */ /* so we'll have to fake it. */ if ( retval == PAPI_ECMP ) { INTDBG("missing NTV_CODE_TO_INFO, faking\n"); /* Fill in the info structure */ if ((nevt_code = _papi_hwi_eventcode_to_native(EventCode)) < 0) { INTDBG("EXIT: nevt_code: %d\n", nevt_code); return nevt_code; } if ( (retval = _papi_hwd[cidx]->ntv_code_to_name( (unsigned int)nevt_code, info->symbol, sizeof(info->symbol)) ) == PAPI_OK ) { } else { INTDBG("EXIT: retval: %d\n", retval); return retval; } if ((nevt_code = _papi_hwi_eventcode_to_native(EventCode)) <0) { INTDBG("EXIT: nevt_code: %d\n", nevt_code); return nevt_code; } retval = _papi_hwd[cidx]->ntv_code_to_descr( (unsigned int)nevt_code, info->long_descr, sizeof ( info->long_descr)); if (retval!=PAPI_OK) { INTDBG("Failed ntv_code_to_descr()\n"); } } retval = _papi_hwi_prefix_component_name( _papi_hwd[cidx]->cmp_info.short_name, info->symbol, info->symbol, sizeof(info->symbol) ); INTDBG("EXIT: retval: %d\n", retval); return retval; } INTDBG("EXIT: PAPI_ENOEVNT\n"); return PAPI_ENOEVNT; } EventSetInfo_t * _papi_hwi_lookup_EventSet( int eventset ) { const DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; EventSetInfo_t *set; if ( ( eventset < 0 ) || ( eventset > map->totalSlots ) ) return ( NULL ); set = map->dataSlotArray[eventset]; #ifdef DEBUG if ( ( ISLEVEL( DEBUG_THREADS ) ) && ( _papi_hwi_thread_id_fn ) && ( set->master->tid != _papi_hwi_thread_id_fn( ) ) ) return ( NULL ); #endif return ( set ); } int _papi_hwi_is_sw_multiplex(EventSetInfo_t *ESI) { /* Are we multiplexing at all */ if ( ( ESI->state & PAPI_MULTIPLEXING ) == 0 ) { return 0; } /* Does the component support kernel multiplexing */ if ( _papi_hwd[ESI->CmpIdx]->cmp_info.kernel_multiplex ) { /* Have we forced software multiplexing */ if ( ESI->multiplex.flags == PAPI_MULTIPLEX_FORCE_SW ) { return 1; } /* Nope, using hardware multiplexing */ return 0; } /* We are multiplexing but the component does not support hardware */ return 1; } hwd_context_t * _papi_hwi_get_context( EventSetInfo_t * ESI, int *is_dirty ) { INTDBG("Entry: ESI: %p, is_dirty: %p\n", ESI, is_dirty); int dirty_ctx; hwd_context_t *ctx=NULL; /* assume for now the control state is clean (last updated by this ESI) */ dirty_ctx = 0; /* get a context pointer based on if we are counting for a thread or for a cpu */ if (ESI->state & PAPI_CPU_ATTACHED) { /* use cpu context */ ctx = ESI->CpuInfo->context[ESI->CmpIdx]; /* if the user wants to know if the control state was last set by the same event set, tell him */ if (is_dirty != NULL) { if (ESI->CpuInfo->from_esi != ESI) { dirty_ctx = 1; } *is_dirty = dirty_ctx; } ESI->CpuInfo->from_esi = ESI; } else { /* use thread context */ ctx = ESI->master->context[ESI->CmpIdx]; /* if the user wants to know if the control state was last set by the same event set, tell him */ if (is_dirty != NULL) { if (ESI->master->from_esi != ESI) { dirty_ctx = 1; } *is_dirty = dirty_ctx; } ESI->master->from_esi = ESI; } return( ctx ); } papi-5.6.0/src/ctests/bgp/000775 001750 001750 00000000000 13216244360 017376 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/earprofile.c000664 001750 001750 00000013062 13216244360 021124 0ustar00jshenry1963jshenry1963000000 000000 /* * File: profile.c * Author: Philip Mucci * mucci@cs.utk.edu * Mods: Dan Terpstra * terpstra@cs.utk.edu */ /* This file performs the following test: profiling and program info option call - This tests the SVR4 profiling interface of PAPI. These are counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). The Eventset contains: + PAPI_FP_INS (to profile) + PAPI_TOT_CYC - Set up profile - Start eventset 1 - Do both (flops and reads) - Stop eventset 1 */ #include #include #include #include "papi.h" #include "papi_test.h" #include "prof_utils.h" #include "do_loops.h" #undef THRESHOLD #define THRESHOLD 1000 static void ear_no_profile( void ) { int retval; if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( 10000 ); if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); printf( "Test type : \tNo profiling\n" ); printf( TAB1, event_name, ( values[0] )[0] ); printf( TAB1, "PAPI_TOT_CYC:", ( values[0] )[1] ); } static int do_profile( caddr_t start, unsigned long plength, unsigned scale, int thresh, int bucket ) { int i, retval; unsigned long blength; int num_buckets; const char *profstr[2] = { "PAPI_PROFIL_POSIX", "PAPI_PROFIL_INST_EAR" }; int profflags[2] = { PAPI_PROFIL_POSIX, PAPI_PROFIL_POSIX | PAPI_PROFIL_INST_EAR }; int num_profs; do_stuff( ); num_profs = sizeof ( profflags ) / sizeof ( int ); ear_no_profile( ); blength = prof_size( plength, scale, bucket, &num_buckets ); prof_alloc( num_profs, blength ); for ( i = 0; i < num_profs; i++ ) { if ( !TESTS_QUIET ) printf( "Test type : \t%s\n", profstr[i] ); if ( ( retval = PAPI_profil( profbuf[i], blength, start, scale, EventSet, PAPI_event, thresh, profflags[i] | bucket ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); } if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if ( !TESTS_QUIET ) { printf( TAB1, event_name, ( values[1] )[0] ); printf( TAB1, "PAPI_TOT_CYC:", ( values[1] )[1] ); } if ( ( retval = PAPI_profil( profbuf[i], blength, start, scale, EventSet, PAPI_event, 0, profflags[i] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); } prof_head( blength, bucket, num_buckets, "address\t\t\tPOSIX\tINST_DEAR\n" ); prof_out( start, num_profs, bucket, num_buckets, scale ); retval = prof_check( num_profs, bucket, num_buckets ); for ( i = 0; i < num_profs; i++ ) { free( profbuf[i] ); } return retval; } int main( int argc, char **argv ) { int num_events, num_tests = 6; long length; int retval, retval2; const PAPI_hw_info_t *hw_info; const PAPI_exe_info_t *prginfo; caddr_t start, end; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); } if ( ( hw_info = PAPI_get_hardware_info( ) ) == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); } if ( ( strncasecmp( hw_info->model_string, "Itanium", strlen( "Itanium" ) ) != 0 ) && ( strncasecmp( hw_info->model_string, "32", strlen( "32" ) ) != 0 ) ) { if (!quiet) printf("Itanium only for now.\n"); test_skip( __FILE__, __LINE__, "Test unsupported", PAPI_ENOIMPL ); } // if ( quiet ) { // test_skip( __FILE__, __LINE__, // "Test deprecated in quiet mode for PAPI 3.6", 0 ); // } sprintf( event_name, "DATA_EAR_CACHE_LAT4" ); if ( ( retval = PAPI_event_name_to_code( event_name, &PAPI_event ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); num_events = 2; values = allocate_test_space( num_tests, num_events ); /* use these lines to profile entire code address space */ start = prginfo->address_info.text_start; end = prginfo->address_info.text_end; length = end - start; if ( length < 0 ) test_fail( __FILE__, __LINE__, "Profile length < 0!", length ); prof_print_address ( "Test earprofile: POSIX compatible event address register profiling.\n", prginfo ); prof_print_prof_info( start, end, THRESHOLD, event_name ); retval = do_profile( start, length, FULL_SCALE, THRESHOLD, PAPI_PROFIL_BUCKET_16 ); retval2 = PAPI_remove_event( EventSet, PAPI_event ); if ( retval2 == PAPI_OK ) retval2 = PAPI_remove_event( EventSet, PAPI_TOT_CYC ); if ( retval2 != PAPI_OK ) test_fail( __FILE__, __LINE__, "Can't remove events", retval2 ); if ( retval ) test_pass( __FILE__ ); else test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); return 1; } papi-5.6.0/src/components/mx/tests/Makefile000664 001750 001750 00000000720 13216244357 022741 0ustar00jshenry1963jshenry1963000000 000000 NAME=mx include ../../Makefile_comp_tests.target %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< TESTS = mx_basic mx_elapsed mx_tests: $(TESTS) mx_basic: mx_basic.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o mx_basic mx_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) mx_elapsed: mx_elapsed.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o mx_elapsed mx_elapsed.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/components/lmsensors/README000664 001750 001750 00000000547 13216244357 022427 0ustar00jshenry1963jshenry1963000000 000000 /** * @file: README * CVS: $Id$ * @author: Dan Terpstra * terpstra@icl.utk.edu * @defgroup papi_components Components * @brief Component Specific Readme file: lmsensors */ /** @page component_readme Component Readme @section Component Specific Information lmsensors/ The PAPI lmsensors component requires lmsensors version >= 3.0.0. */ papi-5.6.0/src/components/rapl/README000664 001750 001750 00000004137 13216244360 021331 0ustar00jshenry1963jshenry1963000000 000000 /** * @file: README * CVS: $Id$ * @author: Dan Terpstra * terpstra@icl.utk.edu * @defgroup papi_components Components * @brief Component Specific Readme file: RAPL */ /** @page component_readme Component Readme @section Component Specific Information RAPL/ RAPL uses the MSR kernel module to read model specific registers (MSRs) from user space. To enable the msr module interface the admin needs to 'chmod 666 /dev/cpu/*/msr'. For kernels older than 3.7, this is all that is required to use the PAPI RAPL component. Historically, the Linux MSR driver only relied upon file system checks. This means that anything as root with any capability set could read and write to MSRs. Changes in the mainline Linux kernel since around 3.7 now require an executable to have capability CAP_SYS_RAWIO to open the MSR device file [1]. This change impacts user programs that use PAPI APIs that rely on the MSR device driver. Besides loading the MSR kernel module and setting the appropriate file permissions on the msr device file, one must grant the CAP_SYS_RAWIO capability to any user executable that needs access to the MSR driver, using the command below: setcap cap_sys_rawio=ep Note that one needs superuser privileges to grant the RAWIO capability to an executable, and that the executable cannot be located on a shared network file system partition. The dynamic linker on most operating systems will remove variables that control dynamic linking from the environment of executables with extended rights, such as setuid executables or executables with raised capabilities. One such variable is LD_LIBRARY_PATH. Therefore, executables that have the RAWIO capability can only load shared libraries from default system directories. One can work around this restriction by either installing the shared libraries in system directories, linking statically against those libraries, or using the -rpath linker option to specify the full path to the shared libraries during the linking step. [1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 */ papi-5.6.0/src/libpfm4/lib/pfmlib_intel_hswep_unc_irp.c000664 001750 001750 00000005752 13216244365 025177 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_hswep_irp.c : Intel Haswell-EP IRP uncore PMU * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_hswep_unc_irp_events.h" static void display_irp(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC_IRP=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "edge=%d thres=%d] %s\n", reg->val, reg->irp.unc_event, reg->irp.unc_umask, reg->irp.unc_en, reg->irp.unc_edge, reg->irp.unc_thres, pe[e->event].name); } pfmlib_pmu_t intel_hswep_unc_irp_support = { .desc = "Intel Haswell-EP IRP uncore", .name = "hswep_unc_irp", .perf_name = "uncore_irp", .pmu = PFM_PMU_INTEL_HSWEP_UNC_IRP, .pme_count = LIBPFM_ARRAY_SIZE(intel_hswep_unc_i_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 3, .pe = intel_hswep_unc_i_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .pmu_detect = pfm_intel_hswep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .display_reg = display_irp, }; papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumII-300000664 001750 001750 00000000662 13216244367 023547 0ustar00jshenry1963jshenry1963000000 000000 [data from a 300MHz Mobile Pentium II] PERFCTR INIT: vendor 0, family 6, model 5 PERFCTR INIT: NITER == 64 PERFCTR INIT: rdpmc ticks == 2035 PERFCTR INIT: rdmsr (counter) ticks == 5290 PERFCTR INIT: rdmsr (evntsel) ticks == 4547 PERFCTR INIT: wrmsr (counter) ticks == 5972 PERFCTR INIT: wrmsr (evntsel) ticks == 5384 PERFCTR INIT: read %cr4 ticks == 213 PERFCTR INIT: write %cr4 ticks == 2763 PERFCTR INIT: loop overhead ticks == 89 papi-5.6.0/src/ctests/burn.c000664 001750 001750 00000000207 13216244360 017737 0ustar00jshenry1963jshenry1963000000 000000 #include #include "do_loops.h" int main( int argc, char **argv ) { (void)argc; (void)argv; do_stuff( ); return 0; } papi-5.6.0/src/libpfm4/debian/changelog000664 001750 001750 00000005652 13216244363 021765 0ustar00jshenry1963jshenry1963000000 000000 libpfm4 (8.0) unstable; urgency=low * add Intel Knights Landing support * add Intel Goldmont support * update Intel event tables * allow . as delimiter for event string * add SQ_MISC:SPLIT_LOCK * enable Broadwell EP * various fixes -- Stephane Eranian Sat, 5 Nov 2016 14:38:01 +0200 libpfm4 (7.0) unstable; urgency=low * add Intel Skylake support * add Intel Haswell-EP uncore PMU support * add Broadwell DE support * updated most Intel x86 event tables to match official tables * refreshed perf_event.h header to 4.2 * more bug fixes and minor updates -- Stephane Eranian Thu, 11 Feb 2016 16:56:01 +0200 libpfm4 (6.0) unstable; urgency=low * add Intel Broadwell (desktop) support * add Intel Haswell-EP support (core) * add Applied Micro X-Gene processor support * simplified X86 model detection for Intel processors * Intel SNB, IVB, HSW event table updates * IBM Power8 event table updates * add ARM Cortex A53 support * more bug fixes and minor updates -- Stephane Eranian Tue, 30 Dec 2014 16:56:01 +0200 libpfm4 (5.0) unstable; urgency=low * Intel IVB-EP uncore PMU support * Intel Silvermont support * Perf raw event syntax support * Intel RAPL event support * AMD Fam15h northbridge support * Qualcomm Krait support * IBM Power 8 support * IBM s390 updates * AMD Fam15h fixes * various IVB, SNB, HSW event table updates * more bug fixes -- Stephane Eranian Fri, 21 Feb 2014 18:45:01 +0200 libpfm4 (4.0) unstable; urgency=low * Intel IVB-EP support * Intel IVB updates support * Intel SNB updates support * Intel SNB-EP uncore support * ldlat support (PEBS-LL) * New Intel Atom support * bug fixes -- Stephane Eranian Fri, 08 JUn 2013 18:45:01 +0200 libpfm4 (3.0) unstable; urgency=low * ARM Cortex A15 support * updated Intel Sandy Bridge core PMU events * Intel Sandy Bridge desktop (model 42) uncore PMU support * Intel Ivy Bridge support * full perf_events generic event support * updated perf_examples * enabled Intel Nehalem/Westmere uncore PMU support * AMD LLano processor supoprt (Fam 12h) * AMD Turion rocessor supoprt (Fam 11h) * Intel Atom Cedarview processor support * Win32 compilation support * perf_events excl attribute * perf_events generic hw event aliases support * many bug fixes -- Stephane Eranian Mon, 27 Aug 2012 17:45:22 +0200 libpfm4 (2.0) unstable; urgency=low * updated event tables for Intel X86 processors * new AMD Fam15h support * new MIPS 74k support * updated ARM Cortex A8/A9 support * 30% size reduction for Intel/AMD X86 event tables * bug fixes and other improvements -- Stephane Eranian Fri, 7 Oct 2011 15:55:22 +0200 libpfm4 (1.0) unstable; urgency=low * Initial Release. -- Arun Sharma Mon, 21 Jun 2010 15:17:22 -0700 papi-5.6.0/src/aix-context.h000664 001750 001750 00000000704 13216244356 017743 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PAPI_AIX_CONTEXT_H #define _PAPI_AIX_CONTEXT_H /* overflow */ /* Override void* definitions from PAPI framework layer */ /* with typedefs to conform to PAPI component layer code. */ #undef hwd_siginfo_t #undef hwd_ucontext_t typedef siginfo_t hwd_siginfo_t; typedef struct sigcontext hwd_ucontext_t; #define GET_OVERFLOW_ADDRESS(ctx) (void *)(((hwd_ucontext_t *)(ctx->ucontext))->sc_jmpbuf.jmp_context.iar) #endif /* _PAPI_AIX_CONTEXT */ papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_num_pmds.3000664 001750 001750 00000000035 13216244361 024375 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_impl_pmcs.3 papi-5.6.0/man/man3/PAPIF_shutdown.3000664 001750 001750 00000000723 13216244355 021027 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_shutdown" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_shutdown \- .PP finish using PAPI and free all related resources\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Prototype:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_shutdown( )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_shutdown\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/lib/pfmlib_cell_priv.h000664 001750 001750 00000005657 13216244365 023127 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 TOSHIBA CORPORATION based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_CELL_PRIV_H__ #define __PFMLIB_CELL_PRIV_H__ #define PFM_CELL_PME_FREQ_PPU_MFC 0 #define PFM_CELL_PME_FREQ_SPU 1 #define PFM_CELL_PME_FREQ_HALF 2 typedef struct { char *pme_name; /* event name */ char *pme_desc; /* event description */ unsigned long long pme_code; /* event code */ unsigned int pme_type; /* count type */ unsigned int pme_freq; /* debug_bus_control's frequency value */ unsigned int pme_enable_word; } pme_cell_entry_t; /* PMC register */ #define REG_PM0_CONTROL 0x0000 #define REG_PM1_CONTROL 0x0001 #define REG_PM2_CONTROL 0x0002 #define REG_PM3_CONTROL 0x0003 #define REG_PM4_CONTROL 0x0004 #define REG_PM5_CONTROL 0x0005 #define REG_PM6_CONTROL 0x0006 #define REG_PM7_CONTROL 0x0007 #define REG_PM0_EVENT 0x0008 #define REG_PM1_EVENT 0x0009 #define REG_PM2_EVENT 0x000A #define REG_PM3_EVENT 0x000B #define REG_PM4_EVENT 0x000C #define REG_PM5_EVENT 0x000D #define REG_PM6_EVENT 0x000E #define REG_PM7_EVENT 0x000F #define REG_GROUP_CONTROL 0x0010 #define REG_DEBUG_BUS_CONTROL 0x0011 #define REG_TRACE_ADDRESS 0x0012 #define REG_EXT_TRACE_TIMER 0x0013 #define REG_PM_STATUS 0x0014 #define REG_PM_CONTROL 0x0015 #define REG_PM_INTERVAL 0x0016 #define REG_PM_START_STOP 0x0017 #define NONE_SIGNAL 0x0000 #define SIGNAL_SPU 41 #define SIGNAL_SPU_TRIGGER 42 #define SIGNAL_SPU_EVENT 43 #define COUNT_TYPE_BOTH_TYPE 1 #define COUNT_TYPE_CUMULATIVE_LEN 2 #define COUNT_TYPE_OCCURRENCE 3 #define COUNT_TYPE_MULTI_CYCLE 4 #define COUNT_TYPE_SINGLE_CYCLE 5 #define WORD_0_ONLY 1 /* 0001 */ #define WORD_2_ONLY 4 /* 0100 */ #define WORD_0_AND_1 3 /* 0011 */ #define WORD_0_AND_2 5 /* 0101 */ #define WORD_NONE 0 #endif /* __PFMLIB_CELL_PRIV_H__ */ papi-5.6.0/src/components/appio/tests/iozone/report.pl000775 001750 001750 00000010015 13216244356 025140 0ustar00jshenry1963jshenry1963000000 000000 #!/usr/bin/perl # # arguments: one of more report files # # Christian Mautner , 2005-10-31 # # This script is based loosely on the Generate_Graph set # of scripts that come with iozone, but is a complete re-write # # The main reason to write this was the need to compare the behaviour of # two or more different setups, for tuning filesystems or # comparing different pieces of hardware. # # This script is in the public domain, too short and too trivial # to deserve a copyright. # # Simply run iozone like, for example, ./iozone -a -g 4G > config1.out (if your machine has 4GB) # and then run perl report.pl config1.out # or get another report from another box into config2.out and run # perl report.pl config1.out config2.out # the look in the report_* directory for .png # # If you don't like png or the graphic size, search for "set terminal" in this file and put whatever gnuplot # terminal you want. Note I've also noticed that gnuplot switched the set terminal png syntax # a while back, you might need "set terminal png small size 900,700" # @Reports=@ARGV; die "usage: $0 [...]\n" if not @Reports or grep (m|^-|, @Reports); die "report files must be in current directory" if grep (m|/|, @Reports); %columns=( 'write' =>3, 'read' =>5, 'rewrite' =>4, 'reread' =>6, 'randread' =>7, 'randwrite' =>8, 'bkwdread' =>9, 'recrewrite'=>10, 'strideread'=>11, 'fwrite' =>12, 'frewrite' =>13, 'fread' =>14, 'freread' =>15, ); # # create output directory. the name is the concatenation # of all report file names (minus the file extension, plus # prefix report_) # $outdir="report_".join("_",map{/([^\.]+)(\..*)?/ && $1}(@Reports)); print STDERR "Output directory: $outdir "; if ( -d $outdir ) { print STDERR "(removing old directory) "; system "rm -rf $outdir"; } mkdir $outdir or die "cannot make directory $outdir"; print STDERR "done.\nPreparing data files..."; foreach $report (@Reports) { open(I, $report) or die "cannot open $report for reading"; $report=~/^([^\.]+)/; $datafile="$1.dat"; push @datafiles, $datafile; open(O, ">$outdir/$datafile") or die "cannot open $outdir/$datafile for writing"; open(O2, ">$outdir/2d-$datafile") or die "cannot open $outdir/$datafile for writing"; while() { next unless ( /^[\s\d]+$/ ); @split = split(); next unless ( @split >= 8 ); print O; print O2 if $split[1] == 16384 or $split[0] == $split[1]; } close I, O, O2; } print STDERR "done.\nGenerating graphs:"; foreach $column (keys %columns) { print STDERR " $column"; open(G, ">$outdir/$column.do") or die "cannot open $outdir/$column.do for writing"; print G qq{ set title "Iozone performance: $column" set grid lt 2 lw 1 set surface set parametric set xtics set ytics set logscale x 2 set logscale y 2 set autoscale z #set xrange [2.**5:2.**24] set xlabel "File size in KBytes" set ylabel "Record size in Kbytes" set zlabel "Kbytes/sec" set style data lines set dgrid3d 80,80,3 #set terminal png small picsize 900 700 set terminal png small size 900 700 set output "$column.png" }; print G "splot ". join(", ", map{qq{"$_" using 1:2:$columns{$column} title "$_"}}(@datafiles)); print G "\n"; close G; open(G, ">$outdir/2d-$column.do") or die "cannot open $outdir/$column.do for writing"; print G qq{ set title "Iozone performance: $column" #set terminal png small picsize 450 350 set terminal png small size 450 350 set logscale x set xlabel "File size in KBytes" set ylabel "Kbytes/sec" set output "2d-$column.png" }; print G "plot ". join(", ", map{qq{"2d-$_" using 1:$columns{$column} title "$_" with lines}}(@datafiles)); print G "\n"; close G; if ( system("cd $outdir && gnuplot $column.do && gnuplot 2d-$column.do") ) { print STDERR "(failed) "; } else { print STDERR "(ok) "; } } print STDERR "done.\n"; papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumPro-200000664 001750 001750 00000001307 13216244367 024002 0ustar00jshenry1963jshenry1963000000 000000 [data from a 200MHz Pentium Pro] PERFCTR INIT: vendor 0, family 6, model 1, stepping 9, clock 199312 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 88 cycles PERFCTR INIT: rdtsc cost is 36.0 cycles (2396 total) PERFCTR INIT: rdpmc cost is 33.1 cycles (2209 total) PERFCTR INIT: rdmsr (counter) cost is 81.1 cycles (5280 total) PERFCTR INIT: rdmsr (evntsel) cost is 68.4 cycles (4470 total) PERFCTR INIT: wrmsr (counter) cost is 82.3 cycles (5357 total) PERFCTR INIT: wrmsr (evntsel) cost is 73.0 cycles (4760 total) PERFCTR INIT: read %cr4 cost is 1.9 cycles (211 total) PERFCTR INIT: write %cr4 cost is 38.2 cycles (2537 total) perfctr: driver 2.0-pre6, cpu type Intel Pentium Pro at 199312 kHz papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_pmu_type.3000664 001750 001750 00000000034 13216244361 024414 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_pmu_name.3 papi-5.6.0/src/perfctr-2.6.x/etc/costs/Pentium4-2.0000775 001750 001750 00000001714 13216244366 023406 0ustar00jshenry1963jshenry1963000000 000000 [data from a 2.0 GHz Pentium 4 (or possibly a Xeon)] PERFCTR INIT: vendor 0, family 15, model 1, stepping 2, clock 1993687 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 496 cycles PERFCTR INIT: rdtsc cost is 78.3 cycles (5512 total) PERFCTR INIT: rdpmc cost is 145.0 cycles (9780 total) PERFCTR INIT: rdmsr (counter) cost is 247.5 cycles (16340 total) PERFCTR INIT: rdmsr (escr) cost is 162.9 cycles (10924 total) PERFCTR INIT: wrmsr (counter) cost is 817.2 cycles (52800 total) PERFCTR INIT: wrmsr (escr) cost is 877.0 cycles (56624 total) PERFCTR INIT: read cr4 cost is 5.2 cycles (832 total) PERFCTR INIT: write cr4 cost is 246.5 cycles (16276 total) PERFCTR INIT: rdpmc (fast) cost is 59.3 cycles (4296 total) PERFCTR INIT: rdmsr (cccr) cost is 163.5 cycles (10964 total) PERFCTR INIT: wrmsr (cccr) cost is 839.6 cycles (54232 total) PERFCTR INIT: write LVTPC cost is 43.5 cycles (3284 total) PERFCTR INIT: sync_core cost is 254.7 cycles (16800 total) papi-5.6.0/src/components/appio/tests/iozone/iozone_visualizer.pl000775 001750 001750 00000015561 13216244356 027420 0ustar00jshenry1963jshenry1963000000 000000 #!/usr/bin/perl use warnings; use strict; # arguments: one of more report files # # Christian Mautner , 2005-10-31 # Marc Schoechlin , 2007-12-02 # # This script is just a hack :-) # # This script is based loosely on the Generate_Graph set # of scripts that come with iozone, but is a complete re-write # # The main reason to write this was the need to compare the behaviour of # two or more different setups, for tuning filesystems or # comparing different pieces of hardware. # # This script is in the public domain, too short and too trivial # to deserve a copyright. # # Simply run iozone like, for example, ./iozone -a -g 4G > config1.out (if your machine has 4GB) # # and then run perl report.pl config1.out # or get another report from another box into config2.out and run # perl report.pl config1.out config2.out # the look in the report_* directory for .png # # If you don't like png or the graphic size, search for "set terminal" in this file and put whatever gnuplot # terminal you want. Note I've also noticed that gnuplot switched the set terminal png syntax # a while back, you might need "set terminal png small size 900,700" # use Getopt::Long; my $column; my %columns; my $datafile; my @datafiles; my $outdir; my $report; my $nooffset=0; my @Reports; my @split; my $size3d; my $size2d; # evaluate options GetOptions( '3d=s' => \$size3d, '2d=s' => \$size2d, 'nooffset' => \$nooffset ); $size3d = "900,700" unless defined $size3d; $size2d = "800,500" unless defined $size2d; my $xoffset = "offset -7"; my $yoffset = "offset -3"; if ($nooffset == 1){ $xoffset = ""; $yoffset = ""; } print "\niozone_visualizer.pl : this script is distributed as public domain\n"; print "Christian Mautner , 2005-10-31\n"; print "Marc Schoechlin , 2007-12-02\n"; @Reports=@ARGV; die "usage: $0 --3d=x,y -2d=x,y [...]\n" if not @Reports or grep (m|^-|, @Reports); die "report files must be in current directory" if grep (m|/|, @Reports); print "Configured xtics-offset '$xoffset', configured ytics-offfset '$yoffset' (disable with --nooffset)\n"; print "Size 3d graphs : ".$size3d." (modify with '--3d=x,y')\n"; print "Size 2d graphs : ".$size2d." (modify with '--2d=x,y')\n"; #KB reclen write rewrite read reread read write read rewrite read fwrite frewrite fread freread %columns=( 'KB' =>1, 'reclen' =>2, 'write' =>3, 'rewrite' =>4, 'read' =>5, 'reread' =>6, 'randread' =>7, 'randwrite' =>8, 'bkwdread' =>9, 'recrewrite'=>10, 'strideread'=>11, 'fwrite' =>12, 'frewrite' =>13, 'fread' =>14, 'freread' =>15, ); # # create output directory. the name is the concatenation # of all report file names (minus the file extension, plus # prefix report_) # $outdir="report_".join("_",map{/([^\.]+)(\..*)?/ && $1}(@Reports)); print STDERR "Output directory: $outdir "; if ( -d $outdir ) { print STDERR "(removing old directory) "; system "rm -rf $outdir"; } mkdir $outdir or die "cannot make directory $outdir"; print STDERR "done.\nPreparing data files..."; foreach $report (@Reports) { open(I, $report) or die "cannot open $report for reading"; $report=~/^([^\.]+)/; $datafile="$1.dat"; push @datafiles, $datafile; open(O, ">$outdir/$datafile") or die "cannot open $outdir/$datafile for writing"; open(O2, ">$outdir/2d-$datafile") or die "cannot open $outdir/$datafile for writing"; my @sorted = sort { $columns{$a} <=> $columns{$b} } keys %columns; print O "# ".join(" ",@sorted)."\n"; print O2 "# ".join(" ",@sorted)."\n"; while() { next unless ( /^[\s\d]+$/ ); @split = split(); next unless ( @split == 15 ); print O; print O2 if $split[1] == 16384 or $split[0] == $split[1]; } close(I); close(O); close(O2); } print STDERR "done.\nGenerating graphs:"; open(HTML, ">$outdir/index.html") or die "cannot open $outdir/index.html for writing"; print HTML qq{ IOZone Statistics

IOZone Statistics

\n"; # Genereate 3d plots foreach $column (keys %columns) { print STDERR " $column"; open(G, ">$outdir/$column.do") or die "cannot open $outdir/$column.do for writing"; print G qq{ set title "Iozone performance: $column" set grid lt 2 lw 1 set surface set parametric set xtics $xoffset set ytics $yoffset set logscale x 2 set logscale y 2 set autoscale z #set xrange [2.**5:2.**24] set xlabel "File size in KBytes" -2 set ylabel "Record size in Kbytes" 2 set zlabel "Kbytes/sec" 4,8 set style data lines set dgrid3d 80,80,3 #set terminal png small picsize 900 700 set terminal png small size $size3d nocrop set output "$column.png" }; print HTML qq{ }; print G "splot ". join(", ", map{qq{"$_" using 1:2:$columns{$column} title "$_"}}(@datafiles)); print G "\n"; close G; open(G, ">$outdir/2d-$column.do") or die "cannot open $outdir/$column.do for writing"; print G qq{ set title "Iozone performance: $column" #set terminal png small picsize 450 350 set terminal png medium size $size2d nocrop set logscale x set xlabel "File size in KBytes" set ylabel "Kbytes/sec" set output "2d-$column.png" }; print HTML qq{ }; print G "plot ". join(", ", map{qq{"2d-$_" using 1:$columns{$column} title "$_" with lines}}(@datafiles)); print G "\n"; close G; if ( system("cd $outdir && gnuplot $column.do && gnuplot 2d-$column.do") ) { print STDERR "(failed) "; } else { print STDERR "(ok) "; } } print HTML qq{
}; # Generate Menu print HTML "## Overview\n
    \n"; foreach $column (keys %columns){ print HTML '
  • '.uc($column).' : '. '3d\n". '2d
  • \n"; } print HTML "

3d-$column

[top]
3d-$column

2d-$column

[top]
2d-$column
}; print STDERR "done.\n"; papi-5.6.0/src/libpfm4/docs/man3/pfm_get_os_event_encoding.3000664 001750 001750 00000016600 13216244364 025730 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "January, 2011" "" "Linux Programmer's Manual" .SH NAME pfm_get_os_event_encoding \- get event encoding for a specific operating system .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_get_os_event_encoding(const char *" str ", int " dfl_plm ", pfm_os_t " os ", void *" arg ");" .sp .SH DESCRIPTION This is the key function to retrieve the encoding of an event for a specific operating system interface. The event string passed in \fBstr\fR is parsed and encoded for the operating system specified by \fBos\fR. Only one event per call can be encoded. As such, \fBstr\fR can contain only one symbolic event name. The event is encoded to monitor at the privilege levels specified by the \fBdfl_plm\fR mask, if supported, otherwise this parameter is ignored. The operating system specific input and output arguments are passed in \fBarg\fR. The event string, \fBstr\fR, may contains sub-event masks (umask) and any other supported modifiers. Only one event is parsed from the string. For convenience, it is possible to pass a comma-separated list of events in \fBstr\fR but only the first event is encoded. The following values are supported for \fBos\fR: .TP .B PFM_OS_NONE This value causes the event to be encoded purely as specified by the PMU hardware. The \fBarg\fR argument must be a pointer to a \fBpfm_raw_pmu_encode_arg_t\fR structure which is defined as follows: .nf typedef struct { uint64_t *codes; char **fstr; size_t size; int count; int idx; } pfm_pmu_encode_arg_t; .fi The fields are defined as follows: .RS .TP .B codes A pointer to an array of 64-bit values. On input, if \fBcodes\fR is NULL, then the library allocates whatever is necessary to store the encoding of the event. If \fBcodes\fR is not NULL on input, then \fBcount\fR must reflect its actual number of elements. If \fBcount\fR is big enough, the library stores the encoding at the address provided. Otherwise, an error is returned. .TP .B count On input, the field contains the maximum number of elements in the array \fBcodes\fR. Upon return, it contains the number of actual entries in \fBcodes\fR. If \fBcodes\fR is NULL, then count must be zero. .TP .B fstr If the caller is interested in retrieving the fully qualified event string where all used unit masks and all modifiers are spelled out, this field must be set to a non-null address of a pointer to a string (char **). Upon return, if \fBfstr\fR was not NULL, then the string pointer passed on entry points to the event string. The string is dynamically allocated and \fBmust\fR eventually be freed by the caller. If \fBfstr\fR was NULL on entry, then nothing is returned in this field. The typical calling sequence looks as follows: .nf char *fstr = NULL pfm_pmu_encode_arg_t arg; arg.fstr = &fstr; ret = pfm_get_os_event_encoding("event", PFM_PLM0|PFM_PLM3, PFM_OS_NONE, &e); if (ret == PFM_SUCCESS) { printf("fstr=%s\n", fstr); free(fstr); } .fi .TP .B size This field contains the size of the struct passed. This field is used to provide for extensibility of the struct without compromising backward compatibility. The value should be set to \fBsizeof(pfm_pmu_encode_arg_t)\fR. If instead, a value of \fB0\fR is specified, the library assumes the struct passed is identical to the first ABI version which size is \fBPFM_RAW_ENCODE_ABI0\fR. Thus, if fields were added after the first ABI, they will not be set by the library. The library does check that bytes beyond what is implemented are zeroes. .TP .B idx Upon return, this field contains the opaque unique identifier for the event described in \fBstr\fR. This index can be used to retrieve information about the event using \fBpfm_get_event_info()\fR, for instance. .RE .TP .B PFM_OS_PERF_EVENT, PFM_OS_PERF_EVENT_EXT This value causes the event to be encoded for the perf_event Linux kernel interface (available since 2.6.31). The \fBarg\fR must be a pointer to a \fBpfm_perf_encode_arg_t\fR structure. The PFM_OS_PERF_EVENT layer provides the modifiers exported by the underlying PMU hardware, some of which may actually be overridden by the perf_event interface, such as the monitoring privilege levels. The \fBPFM_OS_PERF_EVENT_EXT\fR extends \fBPFM_OS_EVENT\fR to add modifiers controlled only by the perf_event interface, such as sampling period (\fBperiod\fR), frequency (\fBfreq\fR) and exclusive resource access (\fBexcl\fR). .nf typedef struct { struct perf_event_attr *attr; char **fstr; size_t size; int idx; int cpu; int flags; } pfm_perf_encode_arg_t; .fi The fields are defined as follows: .RS .TP .B attr A pointer to a struct perf_event_attr as defined in perf_event.h. This field cannot be NULL on entry. The struct is not completely overwritten by the call. The library only modifies the fields it knows about, thereby allowing perf_event ABI mismatch between caller and library. .TP .B fstr Same behavior as is described for PFM_OS_NONE above. .TP .B size This field contains the size of the struct passed. This field is used to provide for extensibility of the struct without compromising backward compatibility. The value should be set to \fBsizeof(pfm_perf_encode_arg_t)\fR. If instead, a value of \fB0\fR is specified, the library assumes the struct passed is identical to the first ABI version which size is \fBPFM_PERF_ENCODE_ABI0\fR. Thus, if fields were added after the first ABI, they will not be set by the library. The library does check that bytes beyond what is implemented are zeroes. .TP .B idx Upon return, this field contains the opaque unique identifier for the event described in \fBstr\fR. This index can be used to retrieve information about the event using \fBpfm_get_event_info()\fR, for instance. .TP .B cpu Not used yet. .TP .B flags Not used yet. .RE .PP Here is a example of how this function could be used with PFM_OS_NONE: .nf #include #include #include int main(int argc, char **argv) { pfm_raw_pmu_encode_t raw; int ret; ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) errx(1, "cannot initialize library %s", pfm_strerror(ret)); memset(&raw, 0, sizeof(raw)); ret = pfm_get_os_event_encoding("RETIRED_INSTRUCTIONS", PFM_PLM3, PFM_OS_NONE, &raw); if (ret != PFM_SUCCESS) err(1", cannot get encoding %s", pfm_strerror(ret)); for(i=0; i < raw.count; i++) printf("count[%d]=0x%"PRIx64"\\n", i, raw.codes[i]); free(raw.codes); return 0; } .fi .SH RETURN The function returns in \fBarg\fR the encoding of the event for the os passed in \fBos\fR. The content of \fBarg\fR depends on the \fBos\fR argument. Upon success, \fBPFM_SUCCESS\fR is returned otherwise a specific error code is returned. .SH ERRORS .TP .B PFM_ERR_TOOSMALL The \fBcode\fR argument is too small for the encoding. .TP .B PFM_ERR_INVAL The \fBcode\fR or \fBcount\fR argument is \fBNULL\fR or the \fBstr\fR contains more than one symbolic event. .TP .B PFM_ERR_NOMEM Not enough memory. .TP .B PFM_ERR_NOTFOUND Event not found. .TP .B PFM_ERR_ATTR Invalid event attribute (unit mask or modifier) .TP .B PFM_ERR_ATTR_VAL Invalid modifier value. .TP .B PFM_ERR_ATTR_SET attribute already set, cannot be changed. .TP .B PFM_ERR_ATTR_UMASK Missing unit mask. .TP .B PFM_ERR_ATTR_FEATCOMB Unit masks or features cannot be combined into a single event. .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/libpfm4/docs/man3/pfm_get_event_info.3000664 001750 001750 00000010554 13216244364 024376 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "December, 2009" "" "Linux Programmer's Manual" .SH NAME pfm_get_event_info \- get event information .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_get_event_info(int " idx ", pfm_os_t " os ", pfm_event_info_t *" info ");" .sp .SH DESCRIPTION This function returns in \fBinfo\fR information about a specific event designated by its opaque unique identifier in \fBidx\fR for the operating system specified in \fBos\fR. The \fBpfm_event_info_t\fR structure is defined as follows: .nf typedef struct { const char *name; const char *desc; const char *equiv; size_t size; uint64_t code; pfm_pmu_t pmu; pfm_dtype_t dtype int idx; int nattrs; struct { unsigned int is_precise:1; unsigned int reserved_bits:31; }; } pfm_event_info_t; .fi The fields of this structure are defined as follows: .TP .B name This is the name of the event. This is a read-only string. .TP .B desc This is the description of the event. This is a read-only string. It may contain multiple sentences. .TP .B equiv Certain events may be just variations of actual events. They may be provided as handy shortcuts to avoid supplying a long list of attributes. For those events, this field is not NULL and contains the complete equivalent event string. .TP .B code This is the raw event code. It should not be confused with the encoding of the event. This field represents only the event selection code, it does not include any unit mask or attribute settings. .TP .B pmu This is the identification of the PMU model this event belongs to. It is of type \fBpfm_pmu_t\fR. Using this value and the \fBpfm_get_pmu_info\fR function, it is possible to get PMU information. .TP .B dtype This field returns the representation of the event data. By default, it is \fBPFM_DATA_UINT64\fR. .B idx This is the event unique opaque identifier. It is identical to the idx passed to the call and is provided for completeness. .TP .B nattrs This is the number of attributes supported by this event. Attributes may be unit masks or modifiers. If the event has not attribute, then the value of this field is simply 0. .TP .B size This field contains the size of the struct passed. This field is used to provide for extensibility of the struct without compromising backward compatibility. The value should be set to \fBsizeof(pfm_event_info_t)\fR. If instead, a value of \fB0\fR is specified, the library assumes the struct passed is identical to the first ABI version which size is \fBPFM_EVENT_INFO_ABI0\fR. Thus, if fields were added after the first ABI, they will not be set by the library. The library does check that bytes beyond what is implemented are zeroes. .TP .B is_precise This bitfield indicates whether or not the event support precise sampling. Precise sampling is a hardware mechanism that avoids instruction address skid when using interrupt-based sampling. When the event has umasks, this field means that at least one umask supports precise sampling. On Intel X86 processors, this indicates whether the event supports Precise Event-Based Sampling (PEBS). .PP The \fBpfm_os_t\fR enumeration provides the following choices: .TP .B PFM_OS_NONE The returned information pertains only to what the PMU hardware exports. No operating system attributes is taken into account. .TP .B PFM_OS_PERF_EVENT The returned information includes the actual PMU hardware and the additional attributes exported by the perf_events kernel interface. The perf_event attributes pertain only the PMU hardware. In case perf_events is not detected, an error is returned. .TP .B PFM_OS_PERF_EVENT_EXT The returned information includes all of what is already provided by \fBPFM_OS_PERF_EVENT\fR plus all the software attributes controlled by perf_events, such as sampling period, precise sampling. .PP .SH RETURN If successful, the function returns \fBPFM_SUCCESS\fR and event information in \fBinfo\fR, otherwise it returns an error code. .SH ERRORS .TP .B PFMLIB_ERR_NOINIT Library has not been initialized properly. .TP .B PFMLIB_ERR_INVAL The \fBidx\fR argument is invalid or \fBinfo\fR is \fBNULL\fR or \fBsize\fR is not zero. .TP .B PFMLIB_ERR_NOTSUPP The requested \fBos\fR is not detected or supported. .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/virtual.h000664 001750 001750 00000000547 13216244370 025670 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: virtual.h,v 1.13 2004/05/31 18:18:55 mikpe Exp $ * Virtual per-process performance counters. * * Copyright (C) 1999-2004 Mikael Pettersson */ #ifdef CONFIG_PERFCTR_VIRTUAL extern int vperfctr_init(void); extern void vperfctr_exit(void); #else static inline int vperfctr_init(void) { return 0; } static inline void vperfctr_exit(void) { } #endif papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/virtual.c000664 001750 001750 00000103675 13216244370 025671 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: virtual.c,v 1.117 2007/10/06 13:02:07 mikpe Exp $ * Virtual per-process performance counters. * * Copyright (C) 1999-2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include /* for unlikely() in 2.4.18 and older */ #include #include #include #include #include #include #include #include #include "cpumask.h" #include "virtual.h" /**************************************************************** * * * Data types and macros. * * * ****************************************************************/ struct vperfctr { /* User-visible fields: (must be first for mmap()) */ struct perfctr_cpu_state cpu_state; /* Kernel-private fields: */ int si_signo; atomic_t count; spinlock_t owner_lock; struct task_struct *owner; /* sampling_timer and bad_cpus_allowed are frequently accessed, so they get to share a cache line */ unsigned int sampling_timer ____cacheline_aligned; #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK atomic_t bad_cpus_allowed; #endif unsigned int preserve; unsigned int resume_cstatus; #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT unsigned int ireload_needed; /* only valid if resume_cstatus != 0 */ #endif /* children_lock protects inheritance_id and children, when parent is not the one doing release_task() */ spinlock_t children_lock; unsigned long long inheritance_id; struct perfctr_sum_ctrs children; /* schedule_work() data for when an operation cannot be done in the current context due to locking rules */ struct work_struct work; struct task_struct *parent_tsk; }; #define IS_RUNNING(perfctr) perfctr_cstatus_enabled((perfctr)->cpu_state.user.cstatus) #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT static void vperfctr_ihandler(unsigned long pc); static void vperfctr_handle_overflow(struct task_struct*, struct vperfctr*); static inline void vperfctr_set_ihandler(void) { perfctr_cpu_set_ihandler(vperfctr_ihandler); } #else static inline void vperfctr_set_ihandler(void) { } #endif #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) { atomic_set(&perfctr->bad_cpus_allowed, 0); } #else /* !CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK */ static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) { } #endif /* !CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK */ /**************************************************************** * * * Resource management. * * * ****************************************************************/ /* XXX: perhaps relax this to number of _live_ perfctrs */ static DEFINE_MUTEX(nrctrs_mutex); static int nrctrs; static const char this_service[] = __FILE__; static int inc_nrctrs(void) { const char *other; other = NULL; mutex_lock(&nrctrs_mutex); if (++nrctrs == 1) { other = perfctr_cpu_reserve(this_service); if (other) nrctrs = 0; } mutex_unlock(&nrctrs_mutex); if (other) { printk(KERN_ERR __FILE__ ": cannot operate, perfctr hardware taken by '%s'\n", other); return -EBUSY; } vperfctr_set_ihandler(); return 0; } static void dec_nrctrs(void) { mutex_lock(&nrctrs_mutex); if (--nrctrs == 0) perfctr_cpu_release(this_service); mutex_unlock(&nrctrs_mutex); } /* Allocate a `struct vperfctr'. Claim and reserve an entire page so that it can be mmap():ed. */ static struct vperfctr *vperfctr_alloc(void) { unsigned long page; if (inc_nrctrs() != 0) return ERR_PTR(-EBUSY); page = get_zeroed_page(GFP_KERNEL); if (!page) { dec_nrctrs(); return ERR_PTR(-ENOMEM); } SetPageReserved(virt_to_page(page)); return (struct vperfctr*) page; } static void vperfctr_free(struct vperfctr *perfctr) { ClearPageReserved(virt_to_page(perfctr)); free_page((unsigned long)perfctr); dec_nrctrs(); } static struct vperfctr *get_empty_vperfctr(void) { struct vperfctr *perfctr = vperfctr_alloc(); if (!IS_ERR(perfctr)) { atomic_set(&perfctr->count, 1); vperfctr_init_bad_cpus_allowed(perfctr); spin_lock_init(&perfctr->owner_lock); spin_lock_init(&perfctr->children_lock); } return perfctr; } static void put_vperfctr(struct vperfctr *perfctr) { if (atomic_dec_and_test(&perfctr->count)) vperfctr_free(perfctr); } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) static void scheduled_vperfctr_free(struct work_struct *work) { struct vperfctr *perfctr = container_of(work, struct vperfctr, work); vperfctr_free(perfctr); } #else static void scheduled_vperfctr_free(void *data) { vperfctr_free((struct vperfctr*)data); } #endif static void schedule_put_vperfctr(struct vperfctr *perfctr) { if (!atomic_dec_and_test(&perfctr->count)) return; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) INIT_WORK(&perfctr->work, scheduled_vperfctr_free); #else INIT_WORK(&perfctr->work, scheduled_vperfctr_free, perfctr); #endif schedule_work(&perfctr->work); } static unsigned long long new_inheritance_id(void) { static spinlock_t lock = SPIN_LOCK_UNLOCKED; static unsigned long long counter; unsigned long long id; spin_lock(&lock); id = ++counter; spin_unlock(&lock); return id; } /**************************************************************** * * * Basic counter operations. * * These must all be called by the owner process only. * * These must all be called with preemption disabled. * * * ****************************************************************/ /* PRE: IS_RUNNING(perfctr) * Suspend the counters. */ static inline void vperfctr_suspend(struct vperfctr *perfctr) { perfctr_cpu_suspend(&perfctr->cpu_state); } static inline void vperfctr_reset_sampling_timer(struct vperfctr *perfctr) { /* XXX: base the value on perfctr_info.cpu_khz instead! */ perfctr->sampling_timer = HZ/2; } /* PRE: perfctr == current->thread.perfctr && IS_RUNNING(perfctr) * Restart the counters. */ static inline void vperfctr_resume(struct vperfctr *perfctr) { perfctr_cpu_resume(&perfctr->cpu_state); vperfctr_reset_sampling_timer(perfctr); } static inline void vperfctr_resume_with_overflow_check(struct vperfctr *perfctr) { #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT if (perfctr_cpu_has_pending_interrupt(&perfctr->cpu_state)) { vperfctr_handle_overflow(current, perfctr); return; } #endif vperfctr_resume(perfctr); } /* Sample the counters but do not suspend them. */ static void vperfctr_sample(struct vperfctr *perfctr) { if (IS_RUNNING(perfctr)) { perfctr_cpu_sample(&perfctr->cpu_state); vperfctr_reset_sampling_timer(perfctr); } } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT /* vperfctr interrupt handler (XXX: add buffering support) */ /* PREEMPT note: called in IRQ context with preemption disabled. */ static void vperfctr_ihandler(unsigned long pc) { struct task_struct *tsk = current; struct vperfctr *perfctr; perfctr = tsk->thread.perfctr; if (!perfctr) { printk(KERN_ERR "%s: BUG! pid %d has no vperfctr\n", __FUNCTION__, tsk->pid); return; } if (!perfctr_cstatus_has_ictrs(perfctr->cpu_state.user.cstatus)) { printk(KERN_ERR "%s: BUG! vperfctr has cstatus %#x (pid %d, comm %s)\n", __FUNCTION__, perfctr->cpu_state.user.cstatus, tsk->pid, tsk->comm); return; } vperfctr_suspend(perfctr); vperfctr_handle_overflow(tsk, perfctr); } static void vperfctr_handle_overflow(struct task_struct *tsk, struct vperfctr *perfctr) { unsigned int pmc_mask; siginfo_t si; sigset_t old_blocked; pmc_mask = perfctr_cpu_identify_overflow(&perfctr->cpu_state); if (!pmc_mask) { #ifdef CONFIG_PPC64 /* On some hardware (ppc64, in particular) it's * impossible to control interrupts finely enough to * eliminate overflows on counters we don't care * about. So in this case just restart the counters * and keep going. */ vperfctr_resume(perfctr); #else printk(KERN_ERR "%s: BUG! pid %d has unidentifiable overflow source\n", __FUNCTION__, tsk->pid); #endif return; } perfctr->ireload_needed = 1; /* suspend a-mode and i-mode PMCs, leaving only TSC on */ /* XXX: some people also want to suspend the TSC */ perfctr->resume_cstatus = perfctr->cpu_state.user.cstatus; if (perfctr_cstatus_has_tsc(perfctr->resume_cstatus)) { perfctr->cpu_state.user.cstatus = perfctr_mk_cstatus(1, 0, 0); vperfctr_resume(perfctr); } else perfctr->cpu_state.user.cstatus = 0; si.si_signo = perfctr->si_signo; si.si_errno = 0; si.si_code = SI_PMC_OVF; si.si_pmc_ovf_mask = pmc_mask; /* deliver signal without waking up the receiver */ spin_lock_irq(&tsk->sighand->siglock); old_blocked = tsk->blocked; sigaddset(&tsk->blocked, si.si_signo); spin_unlock_irq(&tsk->sighand->siglock); if (!send_sig_info(si.si_signo, &si, tsk)) send_sig(si.si_signo, tsk, 1); spin_lock_irq(&tsk->sighand->siglock); tsk->blocked = old_blocked; recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); } #endif /**************************************************************** * * * Process management operations. * * These must all, with the exception of vperfctr_unlink() * * and __vperfctr_set_cpus_allowed(), be called by the owner * * process only. * * * ****************************************************************/ /* do_fork() -> copy_process() -> copy_thread() -> __vperfctr_copy(). * Inherit the parent's perfctr settings to the child. * PREEMPT note: do_fork() etc do not run with preemption disabled. */ void __vperfctr_copy(struct task_struct *child_tsk, struct pt_regs *regs) { struct vperfctr *parent_perfctr; struct vperfctr *child_perfctr; /* Do not inherit perfctr settings to kernel-generated threads, like those created by kmod. */ child_perfctr = NULL; if (!user_mode(regs)) goto out; /* Allocation may sleep. Do it before the critical region. */ child_perfctr = get_empty_vperfctr(); if (IS_ERR(child_perfctr)) { child_perfctr = NULL; goto out; } /* Although we're executing in the parent, if it is scheduled then a remote monitor may attach and change the perfctr pointer or the object it points to. This may already have occurred when we get here, so the old copy of the pointer in the child cannot be trusted. */ preempt_disable(); parent_perfctr = current->thread.perfctr; if (parent_perfctr) { child_perfctr->cpu_state.control = parent_perfctr->cpu_state.control; child_perfctr->si_signo = parent_perfctr->si_signo; child_perfctr->inheritance_id = parent_perfctr->inheritance_id; } preempt_enable(); if (!parent_perfctr) { put_vperfctr(child_perfctr); child_perfctr = NULL; goto out; } (void)perfctr_cpu_update_control(&child_perfctr->cpu_state, 0); child_perfctr->owner = child_tsk; out: child_tsk->thread.perfctr = child_perfctr; } /* Called from exit_thread() or do_vperfctr_unlink(). * If the counters are running, stop them and sample their final values. * Mark the vperfctr object as dead. * Optionally detach the vperfctr object from its owner task. * PREEMPT note: exit_thread() does not run with preemption disabled. */ static void vperfctr_unlink(struct task_struct *owner, struct vperfctr *perfctr, int do_unlink) { /* this synchronises with sys_vperfctr() */ spin_lock(&perfctr->owner_lock); perfctr->owner = NULL; spin_unlock(&perfctr->owner_lock); /* perfctr suspend+detach must be atomic wrt process suspend */ /* this also synchronises with perfctr_set_cpus_allowed() */ task_lock(owner); if (IS_RUNNING(perfctr) && owner == current) vperfctr_suspend(perfctr); if (do_unlink) owner->thread.perfctr = NULL; task_unlock(owner); perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; if (do_unlink) put_vperfctr(perfctr); } void __vperfctr_exit(struct vperfctr *perfctr) { vperfctr_unlink(current, perfctr, 0); } /* release_task() -> perfctr_release_task() -> __vperfctr_release(). * A task is being released. If it inherited its perfctr settings * from its parent, then merge its final counts back into the parent. * Then unlink the child's perfctr. * PRE: caller has write_lock_irq(&tasklist_lock). * PREEMPT note: preemption is disabled due to tasklist_lock. * * When current == parent_tsk, the child's counts can be merged * into the parent's immediately. This is the common case. * * When current != parent_tsk, the parent must be task_lock()ed * before its perfctr state can be accessed. task_lock() is illegal * here due to the write_lock_irq(&tasklist_lock) in release_task(), * so the operation is done via schedule_work(). */ static void do_vperfctr_release(struct vperfctr *child_perfctr, struct task_struct *parent_tsk) { struct vperfctr *parent_perfctr; unsigned int cstatus, nrctrs, i; parent_perfctr = parent_tsk->thread.perfctr; if (parent_perfctr && child_perfctr) { spin_lock(&parent_perfctr->children_lock); if (parent_perfctr->inheritance_id == child_perfctr->inheritance_id) { cstatus = parent_perfctr->cpu_state.user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) parent_perfctr->children.tsc += child_perfctr->cpu_state.user.tsc_sum + child_perfctr->children.tsc; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = 0; i < nrctrs; ++i) parent_perfctr->children.pmc[i] += child_perfctr->cpu_state.user.pmc[i].sum + child_perfctr->children.pmc[i]; } spin_unlock(&parent_perfctr->children_lock); } schedule_put_vperfctr(child_perfctr); } static void do_scheduled_release(struct vperfctr *child_perfctr) { struct task_struct *parent_tsk = child_perfctr->parent_tsk; task_lock(parent_tsk); do_vperfctr_release(child_perfctr, parent_tsk); task_unlock(parent_tsk); put_task_struct(parent_tsk); } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) static void scheduled_release(struct work_struct *work) { struct vperfctr *perfctr = container_of(work, struct vperfctr, work); do_scheduled_release(perfctr); } #else static void scheduled_release(void *data) { do_scheduled_release((struct vperfctr*)data); } #endif void __vperfctr_release(struct task_struct *child_tsk) { struct task_struct *parent_tsk = child_tsk->parent; struct vperfctr *child_perfctr = child_tsk->thread.perfctr; child_tsk->thread.perfctr = NULL; if (parent_tsk == current) do_vperfctr_release(child_perfctr, parent_tsk); else { get_task_struct(parent_tsk); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) INIT_WORK(&child_perfctr->work, scheduled_release); #else INIT_WORK(&child_perfctr->work, scheduled_release, child_perfctr); #endif child_perfctr->parent_tsk = parent_tsk; schedule_work(&child_perfctr->work); } } /* schedule() --> switch_to() --> .. --> __vperfctr_suspend(). * If the counters are running, suspend them. * PREEMPT note: switch_to() runs with preemption disabled. */ void __vperfctr_suspend(struct vperfctr *perfctr) { if (IS_RUNNING(perfctr)) vperfctr_suspend(perfctr); } /* schedule() --> switch_to() --> .. --> __vperfctr_resume(). * PRE: perfctr == current->thread.perfctr * If the counters are runnable, resume them. * PREEMPT note: switch_to() runs with preemption disabled. */ void __vperfctr_resume(struct vperfctr *perfctr) { if (IS_RUNNING(perfctr)) { #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK if (unlikely(atomic_read(&perfctr->bad_cpus_allowed)) && perfctr_cstatus_nrctrs(perfctr->cpu_state.user.cstatus)) { perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; BUG_ON(current->state != TASK_RUNNING); send_sig(SIGILL, current, 1); return; } #endif vperfctr_resume_with_overflow_check(perfctr); } } /* Called from update_one_process() [triggered by timer interrupt]. * PRE: perfctr == current->thread.perfctr. * Sample the counters but do not suspend them. * Needed to avoid precision loss due to multiple counter * wraparounds between resume/suspend for CPU-bound processes. * PREEMPT note: called in IRQ context with preemption disabled. */ void __vperfctr_sample(struct vperfctr *perfctr) { if (--perfctr->sampling_timer == 0) vperfctr_sample(perfctr); } #ifdef CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK /* Called from set_cpus_allowed(). * PRE: current holds task_lock(owner) * PRE: owner->thread.perfctr == perfctr */ void __vperfctr_set_cpus_allowed(struct task_struct *owner, struct vperfctr *perfctr, cpumask_t new_mask) { if (cpus_intersects(new_mask, perfctr_cpus_forbidden_mask)) { atomic_set(&perfctr->bad_cpus_allowed, 1); if (printk_ratelimit()) printk(KERN_WARNING "perfctr: process %d (comm %s) issued unsafe" " set_cpus_allowed() on process %d (comm %s)\n", current->pid, current->comm, owner->pid, owner->comm); } else atomic_set(&perfctr->bad_cpus_allowed, 0); } #endif /**************************************************************** * * * Virtual perfctr system calls implementation. * * These can be called by the owner process (tsk == current), * * a monitor process which has the owner under ptrace ATTACH * * control (tsk && tsk != current), or anyone with a handle to * * an unlinked perfctr (!tsk). * * * ****************************************************************/ static int do_vperfctr_write(struct vperfctr *perfctr, unsigned int domain, const void __user *srcp, unsigned int srcbytes, struct task_struct *tsk) { void *tmp; int err; if (!tsk) return -ESRCH; /* attempt to update unlinked perfctr */ if (srcbytes > PAGE_SIZE) /* primitive sanity check */ return -EINVAL; tmp = kmalloc(srcbytes, GFP_USER); if (!tmp) return -ENOMEM; err = -EFAULT; if (copy_from_user(tmp, srcp, srcbytes)) goto out_kfree; /* PREEMPT note: preemption is disabled over the entire region since we're updating an active perfctr. */ preempt_disable(); if (IS_RUNNING(perfctr)) { if (tsk == current) vperfctr_suspend(perfctr); perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; } switch (domain) { case VPERFCTR_DOMAIN_CONTROL: { struct vperfctr_control control; err = -EINVAL; if (srcbytes > sizeof(control)) break; control.si_signo = perfctr->si_signo; control.preserve = perfctr->preserve; memcpy(&control, tmp, srcbytes); /* XXX: validate si_signo? */ perfctr->si_signo = control.si_signo; perfctr->preserve = control.preserve; err = 0; break; } case PERFCTR_DOMAIN_CPU_CONTROL: err = -EINVAL; if (srcbytes > sizeof(perfctr->cpu_state.control.header)) break; memcpy(&perfctr->cpu_state.control.header, tmp, srcbytes); err = 0; break; case PERFCTR_DOMAIN_CPU_MAP: err = -EINVAL; if (srcbytes > sizeof(perfctr->cpu_state.control.pmc_map)) break; memcpy(perfctr->cpu_state.control.pmc_map, tmp, srcbytes); err = 0; break; default: err = perfctr_cpu_control_write(&perfctr->cpu_state.control, domain, tmp, srcbytes); } preempt_enable(); out_kfree: kfree(tmp); return err; } static int vperfctr_enable_control(struct vperfctr *perfctr, struct task_struct *tsk) { int err; unsigned int next_cstatus; unsigned int nrctrs, i; if (perfctr->cpu_state.control.header.nractrs || perfctr->cpu_state.control.header.nrictrs) { cpumask_t old_mask, new_mask; old_mask = tsk->cpus_allowed; cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); if (cpus_empty(new_mask)) return -EINVAL; if (!cpus_equal(new_mask, old_mask)) set_cpus_allowed(tsk, new_mask); } perfctr->cpu_state.user.cstatus = 0; perfctr->resume_cstatus = 0; /* remote access note: perfctr_cpu_update_control() is ok */ err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); if (err < 0) return err; next_cstatus = perfctr->cpu_state.user.cstatus; if (!perfctr_cstatus_enabled(next_cstatus)) return 0; if (!perfctr_cstatus_has_tsc(next_cstatus)) perfctr->cpu_state.user.tsc_sum = 0; nrctrs = perfctr_cstatus_nrctrs(next_cstatus); for(i = 0; i < nrctrs; ++i) if (!(perfctr->preserve & (1<cpu_state.user.pmc[i].sum = 0; spin_lock(&perfctr->children_lock); perfctr->inheritance_id = new_inheritance_id(); memset(&perfctr->children, 0, sizeof perfctr->children); spin_unlock(&perfctr->children_lock); return 0; } static inline void vperfctr_ireload(struct vperfctr *perfctr) { #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT if (perfctr->ireload_needed) { perfctr->ireload_needed = 0; /* remote access note: perfctr_cpu_ireload() is ok */ perfctr_cpu_ireload(&perfctr->cpu_state); } #endif } static int do_vperfctr_resume(struct vperfctr *perfctr, struct task_struct *tsk) { unsigned int resume_cstatus; int ret; if (!tsk) return -ESRCH; /* attempt to update unlinked perfctr */ /* PREEMPT note: preemption is disabled over the entire region because we're updating an active perfctr. */ preempt_disable(); if (IS_RUNNING(perfctr) && tsk == current) vperfctr_suspend(perfctr); resume_cstatus = perfctr->resume_cstatus; if (perfctr_cstatus_enabled(resume_cstatus)) { perfctr->cpu_state.user.cstatus = resume_cstatus; perfctr->resume_cstatus = 0; vperfctr_ireload(perfctr); ret = 0; } else { ret = vperfctr_enable_control(perfctr, tsk); resume_cstatus = perfctr->cpu_state.user.cstatus; } if (ret >= 0 && perfctr_cstatus_enabled(resume_cstatus) && tsk == current) vperfctr_resume(perfctr); preempt_enable(); return ret; } static int do_vperfctr_suspend(struct vperfctr *perfctr, struct task_struct *tsk) { if (!tsk) return -ESRCH; /* attempt to update unlinked perfctr */ /* PREEMPT note: preemption is disabled over the entire region since we're updating an active perfctr. */ preempt_disable(); if (IS_RUNNING(perfctr)) { if (tsk == current) vperfctr_suspend(perfctr); perfctr->resume_cstatus = perfctr->cpu_state.user.cstatus; perfctr->cpu_state.user.cstatus = 0; } preempt_enable(); return 0; } static int do_vperfctr_unlink(struct vperfctr *perfctr, struct task_struct *tsk) { if (tsk) vperfctr_unlink(tsk, perfctr, 1); return 0; } static int do_vperfctr_clear(struct vperfctr *perfctr, struct task_struct *tsk) { if (!tsk) return -ESRCH; /* attempt to update unlinked perfctr */ /* PREEMPT note: preemption is disabled over the entire region because we're updating an active perfctr. */ preempt_disable(); if (IS_RUNNING(perfctr) && tsk == current) vperfctr_suspend(perfctr); memset(&perfctr->cpu_state, 0, sizeof perfctr->cpu_state); perfctr->resume_cstatus = 0; spin_lock(&perfctr->children_lock); perfctr->inheritance_id = 0; memset(&perfctr->children, 0, sizeof perfctr->children); spin_unlock(&perfctr->children_lock); preempt_enable(); return 0; } static int do_vperfctr_control(struct vperfctr *perfctr, unsigned int cmd, struct task_struct *tsk) { switch (cmd) { case VPERFCTR_CONTROL_UNLINK: return do_vperfctr_unlink(perfctr, tsk); case VPERFCTR_CONTROL_SUSPEND: return do_vperfctr_suspend(perfctr, tsk); case VPERFCTR_CONTROL_RESUME: return do_vperfctr_resume(perfctr, tsk); case VPERFCTR_CONTROL_CLEAR: return do_vperfctr_clear(perfctr, tsk); default: return -EINVAL; } } static int do_vperfctr_read(struct vperfctr *perfctr, unsigned int domain, void __user *dstp, unsigned int dstbytes, struct task_struct *tsk) { union { struct perfctr_sum_ctrs sum; struct vperfctr_control control; struct perfctr_sum_ctrs children; } *tmp; unsigned int tmpbytes; int ret; tmpbytes = dstbytes; if (tmpbytes > PAGE_SIZE) /* primitive sanity check */ return -EINVAL; if (tmpbytes < sizeof(*tmp)) tmpbytes = sizeof(*tmp); tmp = kmalloc(tmpbytes, GFP_USER); if (!tmp) return -ENOMEM; /* PREEMPT note: While we're reading our own control, another process may ptrace ATTACH to us and update our control. Disable preemption to ensure we get a consistent copy. Not needed for other cases since the perfctr is either unlinked or its owner is ptrace ATTACH suspended by us. */ if (tsk == current) preempt_disable(); switch (domain) { case VPERFCTR_DOMAIN_SUM: { int j; vperfctr_sample(perfctr); tmp->sum.tsc = perfctr->cpu_state.user.tsc_sum; for(j = 0; j < ARRAY_SIZE(tmp->sum.pmc); ++j) tmp->sum.pmc[j] = perfctr->cpu_state.user.pmc[j].sum; ret = sizeof(tmp->sum); break; } case VPERFCTR_DOMAIN_CONTROL: tmp->control.si_signo = perfctr->si_signo; tmp->control.preserve = perfctr->preserve; ret = sizeof(tmp->control); break; case VPERFCTR_DOMAIN_CHILDREN: if (tsk) spin_lock(&perfctr->children_lock); tmp->children = perfctr->children; if (tsk) spin_unlock(&perfctr->children_lock); ret = sizeof(tmp->children); break; case PERFCTR_DOMAIN_CPU_CONTROL: if (tmpbytes > sizeof(perfctr->cpu_state.control.header)) tmpbytes = sizeof(perfctr->cpu_state.control.header); memcpy(tmp, &perfctr->cpu_state.control.header, tmpbytes); ret = tmpbytes; break; case PERFCTR_DOMAIN_CPU_MAP: if (tmpbytes > sizeof(perfctr->cpu_state.control.pmc_map)) tmpbytes = sizeof(perfctr->cpu_state.control.pmc_map); memcpy(tmp, perfctr->cpu_state.control.pmc_map, tmpbytes); ret = tmpbytes; break; default: ret = -EFAULT; if (copy_from_user(tmp, dstp, dstbytes) == 0) ret = perfctr_cpu_control_read(&perfctr->cpu_state.control, domain, tmp, dstbytes); } if (tsk == current) preempt_enable(); if (ret > 0) { if (ret > dstbytes) ret = dstbytes; if (ret > 0 && copy_to_user(dstp, tmp, ret)) ret = -EFAULT; } kfree(tmp); return ret; } /**************************************************************** * * * Virtual perfctr file operations. * * * ****************************************************************/ static int vperfctr_mmap(struct file *filp, struct vm_area_struct *vma) { struct vperfctr *perfctr; /* Only allow read-only mapping of first page. */ if ((vma->vm_end - vma->vm_start) != PAGE_SIZE || vma->vm_pgoff != 0 || (pgprot_val(vma->vm_page_prot) & _PAGE_RW) || (vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) return -EPERM; perfctr = filp->private_data; if (!perfctr) return -EPERM; return remap_pfn_range(vma, vma->vm_start, virt_to_phys(perfctr) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); } static int vperfctr_release(struct inode *inode, struct file *filp) { struct vperfctr *perfctr = filp->private_data; filp->private_data = NULL; if (perfctr) put_vperfctr(perfctr); return 0; } static struct file_operations vperfctr_file_ops = { .mmap = vperfctr_mmap, .release = vperfctr_release, }; /**************************************************************** * * * File system for virtual perfctrs. Based on pipefs. * * * ****************************************************************/ #define VPERFCTRFS_MAGIC (('V'<<24)|('P'<<16)|('M'<<8)|('C')) /* The code to set up a `struct file_system_type' for a pseudo fs is unfortunately not the same in 2.4 and 2.6. */ #include /* needed for 2.6, included by fs.h in 2.4 */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) static int vperfctrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "vperfctr:", NULL, VPERFCTRFS_MAGIC, mnt); } #else static struct super_block * vperfctrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return get_sb_pseudo(fs_type, "vperfctr:", NULL, VPERFCTRFS_MAGIC); } #endif static struct file_system_type vperfctrfs_type = { .name = "vperfctrfs", .get_sb = vperfctrfs_get_sb, .kill_sb = kill_anon_super, }; /* XXX: check if s/vperfctr_mnt/vperfctrfs_type.kern_mnt/ would work */ static struct vfsmount *vperfctr_mnt; #define vperfctr_fs_init_done() (vperfctr_mnt != NULL) static int __init vperfctrfs_init(void) { int err = register_filesystem(&vperfctrfs_type); if (!err) { vperfctr_mnt = kern_mount(&vperfctrfs_type); if (!IS_ERR(vperfctr_mnt)) return 0; err = PTR_ERR(vperfctr_mnt); unregister_filesystem(&vperfctrfs_type); vperfctr_mnt = NULL; } return err; } static void __exit vperfctrfs_exit(void) { unregister_filesystem(&vperfctrfs_type); mntput(vperfctr_mnt); } static struct inode *vperfctr_get_inode(void) { struct inode *inode; inode = new_inode(vperfctr_mnt->mnt_sb); if (!inode) return NULL; inode->i_fop = &vperfctr_file_ops; inode->i_state = I_DIRTY; inode->i_mode = S_IFCHR | S_IRUSR | S_IWUSR; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) && !DONT_HAVE_i_blksize inode->i_blksize = 0; #endif return inode; } static int vperfctrfs_delete_dentry(struct dentry *dentry) { return 1; } static struct dentry_operations vperfctrfs_dentry_operations = { .d_delete = vperfctrfs_delete_dentry, }; static struct dentry *vperfctr_d_alloc_root(struct inode *inode) { struct qstr this; char name[32]; struct dentry *dentry; sprintf(name, "[%lu]", inode->i_ino); this.name = name; this.len = strlen(name); this.hash = inode->i_ino; /* will go */ dentry = d_alloc(vperfctr_mnt->mnt_sb->s_root, &this); if (dentry) { dentry->d_op = &vperfctrfs_dentry_operations; d_add(dentry, inode); } return dentry; } static struct file *vperfctr_get_filp(void) { struct file *filp; struct inode *inode; struct dentry *dentry; filp = get_empty_filp(); if (!filp) goto out; inode = vperfctr_get_inode(); if (!inode) goto out_filp; dentry = vperfctr_d_alloc_root(inode); if (!dentry) goto out_inode; filp->f_vfsmnt = mntget(vperfctr_mnt); filp->f_dentry = dentry; filp->f_mapping = dentry->d_inode->i_mapping; filp->f_pos = 0; filp->f_flags = 0; filp->f_op = &vperfctr_file_ops; /* fops_get() if MODULE */ filp->f_mode = FMODE_READ; filp->f_version = 0; return filp; out_inode: iput(inode); out_filp: put_filp(filp); /* doesn't run ->release() like fput() does */ out: return NULL; } /**************************************************************** * * * Virtual perfctr actual system calls. * * * ****************************************************************/ /* tid is the actual task/thread id (née pid, stored as ->pid), pid/tgid is that 2.6 thread group id crap (stored as ->tgid) */ asmlinkage long sys_vperfctr_open(int tid, int creat) { struct file *filp; struct task_struct *tsk; struct vperfctr *perfctr; int err; int fd; if (!vperfctr_fs_init_done()) return -ENODEV; filp = vperfctr_get_filp(); if (!filp) return -ENOMEM; err = fd = get_unused_fd(); if (err < 0) goto err_filp; perfctr = NULL; if (creat) { perfctr = get_empty_vperfctr(); /* may sleep */ if (IS_ERR(perfctr)) { err = PTR_ERR(perfctr); goto err_fd; } } tsk = current; if (tid != 0 && tid != tsk->pid) { /* remote? */ read_lock(&tasklist_lock); tsk = find_task_by_pid(tid); if (tsk) get_task_struct(tsk); read_unlock(&tasklist_lock); err = -ESRCH; if (!tsk) goto err_perfctr; err = ptrace_check_attach(tsk, 0); if (err < 0) goto err_tsk; } if (creat) { /* check+install must be atomic to prevent remote-control races */ task_lock(tsk); if (!tsk->thread.perfctr) { perfctr->owner = tsk; tsk->thread.perfctr = perfctr; err = 0; } else err = -EEXIST; task_unlock(tsk); if (err) goto err_tsk; } else { perfctr = tsk->thread.perfctr; /* XXX: Old API needed to allow NULL perfctr here. Do we want to keep or change that rule? */ } filp->private_data = perfctr; if (perfctr) atomic_inc(&perfctr->count); if (tsk != current) put_task_struct(tsk); fd_install(fd, filp); return fd; err_tsk: if (tsk != current) put_task_struct(tsk); err_perfctr: if (perfctr) /* can only occur if creat != 0 */ put_vperfctr(perfctr); err_fd: put_unused_fd(fd); err_filp: fput(filp); return err; } static struct vperfctr *fd_get_vperfctr(int fd) { struct vperfctr *perfctr; struct file *filp; int err; err = -EBADF; filp = fget(fd); if (!filp) goto out; err = -EINVAL; if (filp->f_op != &vperfctr_file_ops) goto out_filp; perfctr = filp->private_data; if (!perfctr) goto out_filp; atomic_inc(&perfctr->count); fput(filp); return perfctr; out_filp: fput(filp); out: return ERR_PTR(err); } static struct task_struct *vperfctr_get_tsk(struct vperfctr *perfctr) { struct task_struct *tsk; tsk = current; if (perfctr != current->thread.perfctr) { /* this synchronises with vperfctr_unlink() and itself */ spin_lock(&perfctr->owner_lock); tsk = perfctr->owner; if (tsk) get_task_struct(tsk); spin_unlock(&perfctr->owner_lock); if (tsk) { int ret = ptrace_check_attach(tsk, 0); if (ret < 0) { put_task_struct(tsk); return ERR_PTR(ret); } } } return tsk; } static void vperfctr_put_tsk(struct task_struct *tsk) { if (tsk && tsk != current) put_task_struct(tsk); } asmlinkage long sys_vperfctr_write(int fd, unsigned int domain, const void __user *argp, unsigned int argbytes) { struct vperfctr *perfctr; struct task_struct *tsk; int ret; perfctr = fd_get_vperfctr(fd); if (IS_ERR(perfctr)) return PTR_ERR(perfctr); tsk = vperfctr_get_tsk(perfctr); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); goto out; } ret = do_vperfctr_write(perfctr, domain, argp, argbytes, tsk); vperfctr_put_tsk(tsk); out: put_vperfctr(perfctr); return ret; } asmlinkage long sys_vperfctr_control(int fd, unsigned int cmd) { struct vperfctr *perfctr; struct task_struct *tsk; int ret; perfctr = fd_get_vperfctr(fd); if (IS_ERR(perfctr)) return PTR_ERR(perfctr); tsk = vperfctr_get_tsk(perfctr); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); goto out; } ret = do_vperfctr_control(perfctr, cmd, tsk); vperfctr_put_tsk(tsk); out: put_vperfctr(perfctr); return ret; } asmlinkage long sys_vperfctr_read(int fd, unsigned int domain, void __user *argp, unsigned int argbytes) { struct vperfctr *perfctr; struct task_struct *tsk; int ret; perfctr = fd_get_vperfctr(fd); if (IS_ERR(perfctr)) return PTR_ERR(perfctr); tsk = vperfctr_get_tsk(perfctr); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); goto out; } ret = do_vperfctr_read(perfctr, domain, argp, argbytes, tsk); vperfctr_put_tsk(tsk); out: put_vperfctr(perfctr); return ret; } /**************************************************************** * * * module_init/exit * * * ****************************************************************/ int __init vperfctr_init(void) { return vperfctrfs_init(); } void __exit vperfctr_exit(void) { vperfctrfs_exit(); } papi-5.6.0/src/libpfm4/perf_examples/task_attach_timeout.c000664 001750 001750 00000011650 13216244365 025720 0ustar00jshenry1963jshenry1963000000 000000 /* * task_attach_timeout.c - attach to another task for monitoring for a short while * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" typedef struct { char *events; int delay; int print; int group; int pinned; } options_t; static options_t options; static void print_counts(perf_event_desc_t *fds, int num, int do_delta) { ssize_t ret; int i; /* * now simply read the results. */ for(i=0; i < num; i++) { uint64_t val; double ratio; ret = read(fds[i].fd, fds[i].values, sizeof(fds[i].values)); if (ret < (ssize_t)sizeof(fds[i].values)) { if (ret == -1) err(1, "cannot read values event %s", fds[i].name); else warnx("could not read event%d", i); } val = perf_scale(fds[i].values); ratio = perf_scale_ratio(fds[i].values); val = do_delta ? perf_scale_delta(fds[i].values, fds[i].prev_values) : val; fds[i].prev_values[0] = fds[i].values[0]; fds[i].prev_values[1] = fds[i].values[1]; fds[i].prev_values[2] = fds[i].values[2]; if (ratio == 1.0) printf("%20"PRIu64" %s\n", val, fds[i].name); else if (ratio == 0.0) printf("%20"PRIu64" %s (did not run: incompatible events, too many events in a group, competing session)\n", val, fds[i].name); else printf("%20"PRIu64" %s (scaled from %.2f%% of time)\n", val, fds[i].name, ratio*100.0); } } int measure(pid_t pid) { perf_event_desc_t *fds = NULL; int i, ret, num_fds = 0; char fn[32]; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed\n"); ret = perf_setup_list_events(options.events, &fds, &num_fds); if (ret || (num_fds == 0)) exit(1); fds[0].fd = -1; for(i=0; i < num_fds; i++) { fds[i].hw.disabled = 0; /* start immediately */ /* request timing information necessary for scaling counts */ fds[i].hw.read_format = PERF_FORMAT_SCALE; fds[i].hw.pinned = !i && options.pinned; fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, (options.group? fds[0].fd : -1), 0); if (fds[i].fd == -1) errx(1, "cannot attach event %s", fds[i].name); } /* * no notification is generated by perf_counters * when the monitored thread exits. Thus we need * to poll /proc/ to detect it has disappeared, * otherwise we have to wait until the end of the * timeout */ sprintf(fn, "/proc/%d/status", pid); while(access(fn, F_OK) == 0 && options.delay) { sleep(1); options.delay--; if (options.print) print_counts(fds, num_fds, 1); } if (options.delay) warn("thread %d terminated before timeout", pid); if (!options.print) print_counts(fds, num_fds, 0); for(i=0; i < num_fds; i++) close(fds[i].fd); perf_free_fds(fds, num_fds); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } static void usage(void) { printf("usage: task_attach_timeout [-h] [-p] [-P] [-g] [-d delay] [-e event1,event2,...] pid\n"); } int main(int argc, char **argv) { int c; while ((c=getopt(argc, argv,"he:vd:pgP")) != -1) { switch(c) { case 'e': options.events = optarg; break; case 'p': options.print = 1; break; case 'P': options.pinned = 1; break; case 'g': options.group = 1; break; case 'd': options.delay = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown error"); } } if (!options.events) options.events = strdup("cycles,instructions"); if (options.delay < 1) options.delay = 10; if (!argv[optind]) errx(1, "you must specify pid to attach to\n"); return measure(atoi(argv[optind])); } papi-5.6.0/src/components/bgpm/README000664 001750 001750 00000002110 13216244357 021313 0ustar00jshenry1963jshenry1963000000 000000 /** * @file: README * CVS: $Id$ * @author: Dan Terpstra * terpstra@icl.utk.edu * @defgroup papi_components Components * @brief Component Specific Readme file: BGPM */ /** @page component_readme Component Readme @section Component Specific Information bgpm/ Five new components have been added to PAPI to support hardware performance monitoring for the BG/Q platform; in particular the BG/Q network, the I/O system, the Compute Node Kernel in addition to the processing core. There are no specific component configure scripts for L2unit, IOunit, NWunit, CNKunit. In order to configure PAPI for BG/Q, use the following configure options at the papi/src level: % ./configure --prefix=< your_choice > \ --with-OS=bgq \ --with-bgpm_installdir=/bgsys/drivers/ppcfloor \ CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc \ F77=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gfortran \ --with-components="bgpm/L2unit bgpm/CNKunit bgpm/IOunit bgpm/NWunit" */ papi-5.6.0/man/man3/PAPI_num_events.3000664 001750 001750 00000002530 13216244356 021230 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_num_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_num_events \- .PP Return the number of events in an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP \fBPAPI_num_events()\fP returns the number of preset and/or native events contained in an event set\&. The event set should be created by \fBPAPI_create_eventset\fP \&. .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_num_events(int EventSet )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIEventSet\fP -- an integer handle for a PAPI event set created by \fBPAPI_create_eventset\fP\&. .br \fI*count\fP -- (Fortran only) On output the variable contains the number of events in the event set .RE .PP \fBReturn values:\fP .RS 4 \fIOn\fP success, this function returns the positive number of events in the event set\&. .br \fIPAPI_EINVAL\fP The event count is zero; only if code is compiled with debug enabled\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .RE .PP \fBExample\fP .RS 4 .PP .nf * // Count the events in our EventSet * printf(\"%d events found in EventSet\&.\\n\", PAPI_num_events(EventSet)); * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_add_event\fP .PP \fBPAPI_create_eventset\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/net/Rules.net000664 001750 001750 00000000333 13216244357 022103 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/net/linux-net.c COMPOBJS += linux-net.o linux-net.o: components/net/linux-net.c components/net/linux-net.h $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/net/linux-net.c -o $@ papi-5.6.0/src/ctests/code2name.c000664 001750 001750 00000010001 13216244360 020617 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: event_code_to_name */ #include #include #include "papi.h" #include "papi_test.h" static void test_continue( const char *call, int retval ) { if (!TESTS_QUIET) { printf( "Expected error in %s: %s\n", call, PAPI_strerror(retval) ); } } int main( int argc, char **argv ) { int retval; int code = PAPI_TOT_CYC, last; char event_name[PAPI_MAX_STR_LEN]; const PAPI_component_info_t *cmp_info; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); if (!quiet) { printf( "Test case code2name.c: " "Check limits and indexing of event tables.\n"); printf( "Looking for PAPI_TOT_CYC...\n" ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } if (!quiet) printf( "Found |%s|\n", event_name ); code = PAPI_FP_OPS; if (!quiet) { printf( "Looking for highest defined preset event " "(PAPI_FP_OPS): %#x...\n",code ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); if (!quiet) printf( "Found |%s|\n", event_name ); code = PAPI_PRESET_MASK | ( PAPI_MAX_PRESET_EVENTS - 1 ); if (!quiet) { printf( "Looking for highest allocated preset event:" " %#x...\n", code ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { test_continue( "PAPI_event_code_to_name", retval ); } else { if (!quiet) printf( "Found |%s|\n", event_name ); } code = PAPI_PRESET_MASK | ( unsigned int ) PAPI_NATIVE_AND_MASK; if (!quiet) { printf( "Looking for highest possible preset event:" " %#x...\n", code ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { test_continue( "PAPI_event_code_to_name", retval ); } else { if (!quiet) printf( "Found |%s|\n", event_name ); } /* Find the first defined native event in component 0 */ /* For platform independence, always ASK FOR the first event */ /* Don't just assume it'll be the first numeric value */ code = PAPI_NATIVE_MASK; PAPI_enum_event( &code, PAPI_ENUM_FIRST ); if (!quiet) { printf( "Looking for first native event: %#x...\n", code ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { if (!quiet) printf("Could not find first native event\n"); test_skip( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } else { if (!quiet) printf( "Found |%s|\n", event_name ); } /* Find the last defined native event */ /* FIXME: hardcoded cmp 0 */ cmp_info = PAPI_get_component_info( 0 ); if ( cmp_info == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_component_info", PAPI_ECMP ); } code = PAPI_NATIVE_MASK; last = code; PAPI_enum_event( &code, PAPI_ENUM_FIRST ); while ( PAPI_enum_event( &code, PAPI_ENUM_EVENTS ) == PAPI_OK ) { last=code; } code = last; if (!quiet) printf( "Looking for last native event: %#x...\n", code ); retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } else { if (!quiet) printf( "Found |%s|\n", event_name ); } /* Highly doubtful we have this many natives */ /* Turn on all bits *except* PRESET bit and COMPONENT bits */ code = PAPI_PRESET_AND_MASK; if (!quiet) printf( "Looking for highest definable native event: %#x...\n", code ); retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { test_continue( "PAPI_event_code_to_name", retval ); } else { if (!quiet) printf( "Found |%s|\n", event_name ); } if ( ( retval == PAPI_ENOCMP) || ( retval == PAPI_ENOEVNT ) || ( retval == PAPI_OK ) ) { test_pass( __FILE__ ); } test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", PAPI_EBUG ); return 1; } papi-5.6.0/man/man3/PAPI_read.3000664 001750 001750 00000003434 13216244356 017764 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_read" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_read \- .PP Read hardware counters from an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_read(int EventSet, long_long * values )\fP; .RE .PP \fBPAPI_read()\fP copies the counters of the indicated event set into the provided array\&. .PP The counters continue counting after the read\&. .PP Note the differences between \fBPAPI_read()\fP and \fBPAPI_accum()\fP, specifically that \fBPAPI_accum()\fP resets the values array to zero\&. .PP \fBPAPI_read()\fP assumes an initialized PAPI library and a properly added event set\&. .PP \fBParameters:\fP .RS 4 \fIEventSet\fP -- an integer handle for a PAPI Event Set as created by \fBPAPI_create_eventset()\fP .br \fI*values\fP -- an array to hold the counter values of the counting events .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. .br \fIPAPI_ENOEVST\fP The event set specified does not exist\&. .RE .PP \fBExamples\fP .RS 4 .PP .nf * do_100events(); * if (PAPI_read(EventSet, values) != PAPI_OK) * handle_error(1); * // values[0] now equals 100 * do_100events(); * if (PAPI_accum(EventSet, values) != PAPI_OK) * handle_error(1); * // values[0] now equals 200 * values[0] = -100; * do_100events(); * if (PAPI_accum(EventSet, values) != PAPI_OK) * handle_error(1); * // values[0] now equals 0 * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_accum\fP .PP \fBPAPI_start\fP .PP \fBPAPI_stop\fP .PP \fBPAPI_reset\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/lib/events/cell_events.h000664 001750 001750 00000336441 13216244364 023423 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 TOSHIBA CORPORATION based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ static pme_cell_entry_t cell_pe[] = { {.pme_name = "CYCLES", .pme_desc = "CPU cycles", .pme_code = 0x0, /* 0 */ .pme_enable_word = WORD_NONE, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BRANCH_COMMIT_TH0", .pme_desc = "Branch instruction committed.", .pme_code = 0x834, /* 2100 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BRANCH_FLUSH_TH0", .pme_desc = "Branch instruction that caused a misprediction flush is committed. Branch misprediction includes", .pme_code = 0x835, /* 2101 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "INST_BUFF_EMPTY_TH0", .pme_desc = "Instruction buffer empty.", .pme_code = 0x836, /* 2102 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "INST_ERAT_MISS_TH0", .pme_desc = "Instruction effective-address-to-real-address translation (I-ERAT) miss.", .pme_code = 0x837, /* 2103 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L1_ICACHE_MISS_CYCLES_TH0", .pme_desc = "L1 Instruction cache miss cycles. Counts the cycles from the miss event until the returned instruction is dispatched or cancelled due to branch misprediction, completion restart, or exceptions.", .pme_code = 0x838, /* 2104 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "DISPATCH_BLOCKED_TH0", .pme_desc = "Valid instruction available for dispatch, but dispatch is blocked.", .pme_code = 0x83a, /* 2106 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "INST_FLUSH_TH0", .pme_desc = "Instruction in pipeline stage EX7 causes a flush.", .pme_code = 0x83d, /* 2109 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "PPC_INST_COMMIT_TH0", .pme_desc = "Two PowerPC instructions committed. For microcode sequences, only the last microcode operation is counted. Committed instructions are counted two at a time. If only one instruction has committed for a given cycle, this event will not be raised until another instruction has been committed in a future cycle.", .pme_code = 0x83f, /* 2111 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BRANCH_COMMIT_TH1", .pme_desc = "Branch instruction committed.", .pme_code = 0x847, /* 2119 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BRANCH_FLUSH_TH1", .pme_desc = "Branch instruction that caused a misprediction flush is committed. Branch misprediction includes", .pme_code = 0x848, /* 2120 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "INST_BUFF_EMPTY_TH1", .pme_desc = "Instruction buffer empty.", .pme_code = 0x849, /* 2121 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "INST_ERAT_MISS_TH1", .pme_desc = "Instruction effective-address-to-real-address translation (I-ERAT) miss.", .pme_code = 0x84a, /* 2122 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L1_ICACHE_MISS_CYCLES_TH1", .pme_desc = "L1 Instruction cache miss cycles. Counts the cycles from the miss event until the returned instruction is dispatched or cancelled due to branch misprediction, completion restart, or exceptions.", .pme_code = 0x84b, /* 2123 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "DISPATCH_BLOCKED_TH1", .pme_desc = "Valid instruction available for dispatch, but dispatch is blocked.", .pme_code = 0x84d, /* 2125 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "INST_FLUSH_TH1", .pme_desc = "Instruction in pipeline stage EX7 causes a flush.", .pme_code = 0x850, /* 2128 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "PPC_INST_COMMIT_TH1", .pme_desc = "Two PowerPC instructions committed. For microcode sequences, only the last microcode operation is counted. Committed instructions are counted two at a time. If only one instruction has committed for a given cycle, this event will not be raised until another instruction has been committed in a future cycle.", .pme_code = 0x852, /* 2130 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "DATA_ERAT_MISS_TH0", .pme_desc = "Data effective-address-to-real-address translation (D-ERAT) miss. Not speculative.", .pme_code = 0x89a, /* 2202 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "ST_REQ_TH0", .pme_desc = "Store request counted at the L2 interface. Counts microcoded PPE sequences more than once. (Thread 0 and 1)", .pme_code = 0x89b, /* 2203 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "LD_VALID_TH0", .pme_desc = "Load valid at a particular pipe stage. Speculative, since flushed operations are counted as well. Counts microcoded PPE sequences more than once. Misaligned flushes might be counted the first time as well. Load operations include all loads that read data from the cache, dcbt and dcbtst. Does not include load Vector/SIMD multimedia extension pattern instructions.", .pme_code = 0x89c, /* 2204 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "L1_DCACHE_MISS_TH0", .pme_desc = "L1 D-cache load miss. Pulsed when there is a miss request that has a tag miss but not an ERAT miss. Speculative, since flushed operations are counted as well.", .pme_code = 0x89d, /* 2205 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "DATA_ERAT_MISS_TH1", .pme_desc = "Data effective-address-to-real-address translation (D-ERAT) miss. Not speculative.", .pme_code = 0x8aa, /* 2218 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "LD_VALID_TH1", .pme_desc = "Load valid at a particular pipe stage. Speculative, since flushed operations are counted as well. Counts microcoded PPE sequences more than once. Misaligned flushes might be counted the first time as well. Load operations include all loads that read data from the cache, dcbt and dcbtst. Does not include load Vector/SIMD multimedia extension pattern instructions.", .pme_code = 0x8ac, /* 2220 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "DATA_ERAT_MISS_TH1", .pme_desc = "L1 D-cache load miss. Pulsed when there is a miss request that has a tag miss but not an ERAT miss. Speculative, since flushed operations are counted as well.", .pme_code = 0x8ad, /* 2221 */ .pme_enable_word = WORD_0_AND_1, .pme_freq = PFM_CELL_PME_FREQ_PPU_MFC, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "LD_MFC_MMIO", .pme_desc = "Load from MFC memory-mapped I/O (MMIO) space.", .pme_code = 0xc1c, /* 3100 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "ST_MFC_MMIO", .pme_desc = "Stores to MFC MMIO space.", .pme_code = 0xc1d, /* 3101 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "REQ_TOKEN_TYPE", .pme_desc = "Request token for even memory bank numbers 0-14.", .pme_code = 0xc22, /* 3106 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "RCV_8BEAT_DATA", .pme_desc = "Receive 8-beat data from the Element Interconnect Bus (EIB).", .pme_code = 0xc2b, /* 3115 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "SEND_8BEAT_DATA", .pme_desc = "Send 8-beat data to the EIB.", .pme_code = 0xc2c, /* 3116 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "SEND_CMD", .pme_desc = "Send a command to the EIB; includes retried commands.", .pme_code = 0xc2d, /* 3117 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "DATA_GRANT_CYCLES", .pme_desc = "Cycles between data request and data grant.", .pme_code = 0xc2e, /* 3118 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_ST_Q_NOT_EMPTY_CYCLES", .pme_desc = "The five-entry Non-Cacheable Unit (NCU) Store Command queue not empty.", .pme_code = 0xc33, /* 3123 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "L2_CACHE_HIT", .pme_desc = "Cache hit for core interface unit (CIU) loads and stores.", .pme_code = 0xc80, /* 3200 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_CACHE_MISS", .pme_desc = "Cache miss for CIU loads and stores.", .pme_code = 0xc81, /* 3201 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_LD_MISS", .pme_desc = "CIU load miss.", .pme_code = 0xc84, /* 3204 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_ST_MISS", .pme_desc = "CIU store to Invalid state (miss).", .pme_code = 0xc85, /* 3205 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_LWARX_LDARX_MISS_TH0", .pme_desc = "Load word and reserve indexed (lwarx/ldarx) for Thread 0 hits Invalid cache state", .pme_code = 0xc87, /* 3207 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_STWCX_STDCX_MISS_TH0", .pme_desc = "Store word conditional indexed (stwcx/stdcx) for Thread 0 hits Invalid cache state when reservation is set.", .pme_code = 0xc8e, /* 3214 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_ALL_SNOOP_SM_BUSY", .pme_desc = "All four snoop state machines busy.", .pme_code = 0xc99, /* 3225 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "L2_DCLAIM_GOOD", .pme_desc = "Data line claim (dclaim) that received good combined response; includes store/stcx/dcbz to Shared (S), Shared Last (SL),or Tagged (T) cache state; does not include dcbz to Invalid (I) cache state.", .pme_code = 0xce8, /* 3304 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_DCLAIM_TO_RWITM", .pme_desc = "Dclaim converted into rwitm; may still not get to the bus if stcx is aborted .", .pme_code = 0xcef, /* 3311 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_ST_TO_M_MU_E", .pme_desc = "Store to modified (M), modified unsolicited (MU), or exclusive (E) cache state.", .pme_code = 0xcf0, /* 3312 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_ST_Q_FULL", .pme_desc = "8-entry store queue (STQ) full.", .pme_code = 0xcf1, /* 3313 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "L2_ST_TO_RC_ACKED", .pme_desc = "Store dispatched to RC machine is acknowledged.", .pme_code = 0xcf2, /* 3314 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_GATHERABLE_ST", .pme_desc = "Gatherable store (type = 00000) received from CIU.", .pme_code = 0xcf3, /* 3315 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_PUSH", .pme_desc = "Snoop push.", .pme_code = 0xcf6, /* 3318 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_INTERVENTION_FROM_SL_E_SAME_MODE", .pme_desc = "Send intervention from (SL | E) cache state to a destination within the same CBE chip.", .pme_code = 0xcf7, /* 3319 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_INTERVENTION_FROM_M_MU_SAME_MODE", .pme_desc = "Send intervention from (M | MU) cache state to a destination within the same CBE chip.", .pme_code = 0xcf8, /* 3320 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RETRY_CONFLICTS", .pme_desc = "Respond with Retry to a snooped request due to one of the following conflicts", .pme_code = 0xcfd, /* 3325 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RETRY_BUSY", .pme_desc = "Respond with Retry to a snooped request because all snoop machines are busy.", .pme_code = 0xcfe, /* 3326 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RESP_MMU_TO_EST", .pme_desc = "Snooped response causes a cache state transition from (M | MU) to (E | S | T).", .pme_code = 0xcff, /* 3327 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RESP_E_TO_S", .pme_desc = "Snooped response causes a cache state transition from E to S.", .pme_code = 0xd00, /* 3328 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RESP_ESLST_TO_I", .pme_desc = "Snooped response causes a cache state transition from (E | SL | S | T) to Invalid (I).", .pme_code = 0xd01, /* 3329 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_SNOOP_RESP_MMU_TO_I", .pme_desc = "Snooped response causes a cache state transition from (M | MU) to I.", .pme_code = 0xd02, /* 3330 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_LWARX_LDARX_MISS_TH1", .pme_desc = "Load and reserve indexed (lwarx/ldarx) for Thread 1 hits Invalid cache state.", .pme_code = 0xd54, /* 3412 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "L2_STWCX_STDCX_MISS_TH1", .pme_desc = "Store conditional indexed (stwcx/stdcx) for Thread 1 hits Invalid cache state.", .pme_code = 0xd5b, /* 3419 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_NON_CACHEABLE_ST_ALL", .pme_desc = "Non-cacheable store request received from CIU; includes all synchronization operations such as sync and eieio.", .pme_code = 0xdac, /* 3500 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_SYNC_REQ", .pme_desc = "sync received from CIU.", .pme_code = 0xdad, /* 3501 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_NON_CACHEABLE_ST", .pme_desc = "Non-cacheable store request received from CIU; includes only stores.", .pme_code = 0xdb0, /* 3504 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_EIEIO_REQ", .pme_desc = "eieio received from CIU.", .pme_code = 0xdb2, /* 3506 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_TLBIE_REQ", .pme_desc = "tlbie received from CIU.", .pme_code = 0xdb3, /* 3507 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_SYNC_WAIT", .pme_desc = "sync at the bottom of the store queue, while waiting on st_done signal from the Bus Interface Unit (BIU) and sync_done signal from L2.", .pme_code = 0xdb4, /* 3508 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_LWSYNC_WAIT", .pme_desc = "lwsync at the bottom of the store queue, while waiting for a sync_done signal from the L2.", .pme_code = 0xdb5, /* 3509 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_EIEIO_WAIT", .pme_desc = "eieio at the bottom of the store queue, while waiting for a st_done signal from the BIU and a sync_done signal from the L2.", .pme_code = 0xdb6, /* 3510 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_TLBIE_WAIT", .pme_desc = "tlbie at the bottom of the store queue, while waiting for a st_done signal from the BIU.", .pme_code = 0xdb7, /* 3511 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_COMBINED_NON_CACHEABLE_ST", .pme_desc = "Non-cacheable store combined with the previous non-cacheable store with a contiguous address.", .pme_code = 0xdb8, /* 3512 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_ALL_ST_GATHER_BUFFS_FULL", .pme_desc = "All four store-gather buffers full.", .pme_code = 0xdbb, /* 3515 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_LD_REQ", .pme_desc = "Non-cacheable load request received from CIU; includes instruction and data fetches.", .pme_code = 0xdbc, /* 3516 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "NCU_ST_Q_NOT_EMPTY", .pme_desc = "The four-deep store queue not empty.", .pme_code = 0xdbd, /* 3517 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_ST_Q_FULL", .pme_desc = "The four-deep store queue full.", .pme_code = 0xdbe, /* 3518 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "NCU_AT_LEAST_ONE_ST_GATHER_BUFF_NOT_EMPTY", .pme_desc = "At least one store gather buffer not empty.", .pme_code = 0xdbf, /* 3519 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_DUAL_INST_COMMITTED", .pme_desc = "A dual instruction is committed.", .pme_code = 0x1004, /* 4100 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_SINGLE_INST_COMMITTED", .pme_desc = "A single instruction is committed.", .pme_code = 0x1005, /* 4101 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_PIPE0_INST_COMMITTED", .pme_desc = "A pipeline 0 instruction is committed.", .pme_code = 0x1006, /* 4102 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_PIPE1_INST_COMMITTED", .pme_desc = "A pipeline 1 instruction is committed.", .pme_code = 0x1007, /* 4103 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_LS_BUSY", .pme_desc = "Local storage is busy.", .pme_code = 0x1009, /* 4105 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_DMA_CONFLICT_LD_ST", .pme_desc = "A direct memory access (DMA) might conflict with a load or store.", .pme_code = 0x100a, /* 4106 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_LS_ST", .pme_desc = "A store instruction to local storage is issued.", .pme_code = 0x100b, /* 4107 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_LS_LD", .pme_desc = "A load instruction from local storage is issued.", .pme_code = 0x100c, /* 4108 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_FP_EXCEPTION", .pme_desc = "A floating-point unit exception occurred.", .pme_code = 0x100d, /* 4109 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_BRANCH_COMMIT", .pme_desc = "A branch instruction is committed.", .pme_code = 0x100e, /* 4110 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_NON_SEQ_PC", .pme_desc = "A nonsequential change of the SPU program counter has occurred. This can be caused by branch, asynchronous interrupt, stalled wait on channel, error-correction code (ECC) error, and so forth.", .pme_code = 0x100f, /* 4111 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_BRANCH_NOT_TAKEN", .pme_desc = "A branch was not taken.", .pme_code = 0x1010, /* 4112 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_BRANCH_MISS_PREDICTION", .pme_desc = "Branch miss prediction. This count is not exact. Certain other code sequences can cause additional pulses on this signal.", .pme_code = 0x1011, /* 4113 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_BRANCH_HINT_MISS_PREDICTION", .pme_desc = "Branch hint miss prediction. This count is not exact. Certain other code sequences can cause additional pulses on this signal.", .pme_code = 0x1012, /* 4114 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_INST_SEQ_ERROR", .pme_desc = "Instruction sequence error", .pme_code = 0x1013, /* 4115 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "SPU_STALL_CH_WRITE", .pme_desc = "Stalled waiting on any blocking channel write.", .pme_code = 0x1015, /* 4117 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_EXTERNAL_EVENT_CH0", .pme_desc = "Stalled waiting on external event status (Channel 0).", .pme_code = 0x1016, /* 4118 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_SIGNAL_1_CH3", .pme_desc = "Stalled waiting on SPU Signal Notification 1 (Channel 3).", .pme_code = 0x1017, /* 4119 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_SIGNAL_2_CH4", .pme_desc = "Stalled waiting on SPU Signal Notification 2 (Channel 4).", .pme_code = 0x1018, /* 4120 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_DMA_CH21", .pme_desc = "Stalled waiting on DMA Command Opcode or ClassID Register (Channel 21).", .pme_code = 0x1019, /* 4121 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_MFC_READ_CH24", .pme_desc = "Stalled waiting on memory flow control (MFC) Read Tag-Group Status (Channel 24).", .pme_code = 0x101a, /* 4122 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_MFC_READ_CH25", .pme_desc = "Stalled waiting on MFC Read List Stall-and-Notify Tag Status (Channel 25).", .pme_code = 0x101b, /* 4123 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_OUTBOUND_MAILBOX_WRITE_CH28", .pme_desc = "Stalled waiting on SPU Write Outbound Mailbox (Channel 28).", .pme_code = 0x101c, /* 4124 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_STALL_MAILBOX_CH29", .pme_desc = "Stalled waiting on SPU Mailbox (Channel 29).", .pme_code = 0x1022, /* 4130 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_TR_STALL_CH", .pme_desc = "Stalled waiting on a channel operation.", .pme_code = 0x10a1, /* 4257 */ .pme_enable_word = WORD_NONE, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_EV_INST_FETCH_STALL", .pme_desc = "Instruction fetch stall", .pme_code = 0x1107, /* 4359 */ .pme_enable_word = WORD_NONE, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "SPU_EV_ADDR_TRACE", .pme_desc = "Serialized SPU address (program counter) trace.", .pme_code = 0x110b, /* 4363 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_SPU, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD", .pme_desc = "An atomic load was received from direct memory access controller (DMAC).", .pme_code = 0x13ed, /* 5101 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_DCLAIM", .pme_desc = "An atomic dclaim was sent to synergistic bus interface (SBI); includes retried requests.", .pme_code = 0x13ee, /* 5102 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_RWITM", .pme_desc = "An atomic rwitm performed was sent to SBI; includes retried requests.", .pme_code = 0x13ef, /* 5103 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD_CACHE_MISS_MU", .pme_desc = "An atomic load miss caused MU cache state.", .pme_code = 0x13f0, /* 5104 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD_CACHE_MISS_E", .pme_desc = "An atomic load miss caused E cache state.", .pme_code = 0x13f1, /* 5105 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD_CACHE_MISS_SL", .pme_desc = "An atomic load miss caused SL cache state.", .pme_code = 0x13f2, /* 5106 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD_CACHE_HIT", .pme_desc = "An atomic load hits cache.", .pme_code = 0x13f3, /* 5107 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_LD_CACHE_MISS_INTERVENTION", .pme_desc = "Atomic load misses cache with data intervention; sum of signals 4 and 6 in this group.", .pme_code = 0x13f4, /* 5108 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ATOMIC_PUTLLXC_CACHE_MISS_WO_INTERVENTION", .pme_desc = "putllc or putlluc misses cache without data intervention; for putllc, counts only when reservation is set for the address.", .pme_code = 0x13fa, /* 5114 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SNOOP_MACHINE_BUSY", .pme_desc = "Snoop machine busy.", .pme_code = 0x13fd, /* 5117 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MFC_SNOOP_MMU_TO_I", .pme_desc = "A snoop caused cache transition from [M | MU] to I.", .pme_code = 0x13ff, /* 5119 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SNOOP_ESSL_TO_I", .pme_desc = "A snoop caused cache transition from [E | S | SL] to I.", .pme_code = 0x1401, /* 5121 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SNOOP_MU_TO_T", .pme_desc = "A snoop caused cache transition from MU to T cache state.", .pme_code = 0x1403, /* 5123 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SENT_INTERVENTION_LOCAL", .pme_desc = "Sent modified data intervention to a destination within the same CBE chip.", .pme_code = 0x1407, /* 5127 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ANY_DMA_GET", .pme_desc = "Any flavor of DMA get[] command issued to Synergistic Bus Interface (SBI); sum of signals 17-25 in this group.", .pme_code = 0x1450, /* 5200 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ANY_DMA_PUT", .pme_desc = "Any flavor of DMA put[] command issued to SBI; sum of signals 2-16 in this group.", .pme_code = 0x1451, /* 5201 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_DMA_PUT", .pme_desc = "DMA put (put) is issued to SBI.", .pme_code = 0x1452, /* 5202 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_DMA_GET", .pme_desc = "DMA get data from effective address to local storage (get) issued to SBI.", .pme_code = 0x1461, /* 5217 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_LD_REQ", .pme_desc = "Load request sent to element interconnect bus (EIB); includes read, read atomic, rwitm, rwitm atomic, and retried commands.", .pme_code = 0x14b8, /* 5304 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_ST_REQ", .pme_desc = "Store request sent to EIB; includes wwf, wwc, wwk, dclaim, dclaim atomic, and retried commands.", .pme_code = 0x14b9, /* 5305 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_RECV_DATA", .pme_desc = "Received data from EIB, including partial cache line data.", .pme_code = 0x14ba, /* 5306 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SENT_DATA", .pme_desc = "Sent data to EIB, both as a master and a snooper.", .pme_code = 0x14bb, /* 5307 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SBI_Q_NOT_EMPTY", .pme_desc = "16-deep synergistic bus interface (SBI) queue with outgoing requests not empty; does not include atomic requests.", .pme_code = 0x14bc, /* 5308 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MFC_SBI_Q_FULL", .pme_desc = "16-deep SBI queue with outgoing requests full; does not include atomic requests.", .pme_code = 0x14bd, /* 5309 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MFC_SENT_REQ", .pme_desc = "Sent request to EIB.", .pme_code = 0x14be, /* 5310 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_RECV_DATA_BUS_GRANT", .pme_desc = "Received data bus grant; includes data sent for MMIO operations.", .pme_code = 0x14c0, /* 5312 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_WAIT_DATA_BUS_GRANT", .pme_desc = "Cycles between data bus request and data bus grant.", .pme_code = 0x14c1, /* 5313 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MFC_CMD_O_MEM", .pme_desc = "Command (read or write) for an odd-numbered memory bank; valid only when resource allocation is turned on.", .pme_code = 0x14c2, /* 5314 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_CMD_E_MEM", .pme_desc = "Command (read or write) for an even-numbered memory bank; valid only when resource allocation is turned on.", .pme_code = 0x14c3, /* 5315 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_RECV_RETRY_RESP", .pme_desc = "Request gets the Retry response; includes local and global requests.", .pme_code = 0x14c6, /* 5318 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_SENT_DATA_BUS_REQ", .pme_desc = "Sent data bus request to EIB.", .pme_code = 0x14c7, /* 5319 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_TLB_MISS", .pme_desc = "Translation Lookaside Buffer (TLB) miss without parity or protection errors.", .pme_code = 0x1518, /* 5400 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "MFC_TLB_CYCLES", .pme_desc = "TLB miss (cycles).", .pme_code = 0x1519, /* 5401 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MFC_TLB_HIT", .pme_desc = "TLB hit.", .pme_code = 0x151a, /* 5402 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_READ_RWITM_1", .pme_desc = "Number of read and rwitm commands (including atomic) AC1 to AC0. (Group 1)", .pme_code = 0x17d4, /* 6100 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_DCLAIM_1", .pme_desc = "Number of dclaim commands (including atomic) AC1 to AC0. (Group 1)", .pme_code = 0x17d5, /* 6101 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_WWK_WWC_WWF_1", .pme_desc = "Number of wwk, wwc, and wwf commands from AC1 to AC0. (Group 1)", .pme_code = 0x17d6, /* 6102 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SYNC_TLBSYNC_EIEIO_1", .pme_desc = "Number of sync, tlbsync, and eieio commands from AC1 to AC0. (Group 1)", .pme_code = 0x17d7, /* 6103 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_TLBIE_1", .pme_desc = "Number of tlbie commands from AC1 to AC0. (Group 1)", .pme_code = 0x17d8, /* 6104 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PAAM_CAM_HIT_1", .pme_desc = "Previous adjacent address match (PAAM) Content Addressable Memory (CAM) hit. (Group 1)", .pme_code = 0x17df, /* 6111 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PAAM_CAM_MISS_1", .pme_desc = "PAAM CAM miss. (Group 1)", .pme_code = 0x17e0, /* 6112 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_CMD_REFLECTED_1", .pme_desc = "Command reflected. (Group 1)", .pme_code = 0x17e2, /* 6114 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_READ_RWITM_2", .pme_desc = "Number of read and rwitm commands (including atomic) AC1 to AC0. (Group 2)", .pme_code = 0x17e4, /* 6116 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_DCLAIM_2", .pme_desc = "Number of dclaim commands (including atomic) AC1 to AC0. (Group 2)", .pme_code = 0x17e5, /* 6117 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_WWK_WWC_WWF_2", .pme_desc = "Number of wwk, wwc, and wwf commands from AC1 to AC0. (Group 2)", .pme_code = 0x17e6, /* 6118 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SYNC_TLBSYNC_EIEIO_2", .pme_desc = "Number of sync, tlbsync, and eieio commands from AC1 to AC0. (Group 2)", .pme_code = 0x17e7, /* 6119 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_TLBIE_2", .pme_desc = "Number of tlbie commands from AC1 to AC0. (Group 2)", .pme_code = 0x17e8, /* 6120 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PAAM_CAM_HIT_2", .pme_desc = "PAAM CAM hit. (Group 2)", .pme_code = 0x17ef, /* 6127 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PAAM_CAM_MISS_2", .pme_desc = "PAAM CAM miss. (Group 2)", .pme_code = 0x17f0, /* 6128 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_CMD_REFLECTED_2", .pme_desc = "Command reflected. (Group 2)", .pme_code = 0x17f2, /* 6130 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE6", .pme_desc = "Local command from SPE 6.", .pme_code = 0x1839, /* 6201 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE4", .pme_desc = "Local command from SPE 4.", .pme_code = 0x183a, /* 6202 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CME_FROM_SPE2", .pme_desc = "Local command from SPE 2.", .pme_code = 0x183b, /* 6203 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_PPE", .pme_desc = "Local command from PPE.", .pme_code = 0x183d, /* 6205 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE1", .pme_desc = "Local command from SPE 1.", .pme_code = 0x183e, /* 6206 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE3", .pme_desc = "Local command from SPE 3.", .pme_code = 0x183f, /* 6207 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE5", .pme_desc = "Local command from SPE 5.", .pme_code = 0x1840, /* 6208 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_LOCAL_CMD_FROM_SPE7", .pme_desc = "Local command from SPE 7.", .pme_code = 0x1841, /* 6209 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE6", .pme_desc = "AC1-to-AC0 global command from SPE 6.", .pme_code = 0x1844, /* 6212 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE4", .pme_desc = "AC1-to-AC0 global command from SPE 4.", .pme_code = 0x1845, /* 6213 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE2", .pme_desc = "AC1-to-AC0 global command from SPE 2.", .pme_code = 0x1846, /* 6214 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE0", .pme_desc = "AC1-to-AC0 global command from SPE 0.", .pme_code = 0x1847, /* 6215 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_PPE", .pme_desc = "AC1-to-AC0 global command from PPE.", .pme_code = 0x1848, /* 6216 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE1", .pme_desc = "AC1-to-AC0 global command from SPE 1.", .pme_code = 0x1849, /* 6217 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE3", .pme_desc = "AC1-to-AC0 global command from SPE 3.", .pme_code = 0x184a, /* 6218 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE5", .pme_desc = "AC1-to-AC0 global command from SPE 5.", .pme_code = 0x184b, /* 6219 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GLOBAL_CMD_FROM_SPE7", .pme_desc = "AC1-to-AC0 global command from SPE 7", .pme_code = 0x184c, /* 6220 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_AC1_REFLECTING_LOCAL_CMD", .pme_desc = "AC1 is reflecting any local command.", .pme_code = 0x184e, /* 6222 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_AC1_SEND_GLOBAL_CMD", .pme_desc = "AC1 sends a global command to AC0.", .pme_code = 0x184f, /* 6223 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_AC0_REFLECT_GLOBAL_CMD", .pme_desc = "AC0 reflects a global command back to AC1.", .pme_code = 0x1850, /* 6224 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_AC1_REFLECT_CMD_TO_BM", .pme_desc = "AC1 reflects a command back to the bus masters.", .pme_code = 0x1851, /* 6225 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING0_1", .pme_desc = "Grant on data ring 0.", .pme_code = 0x189c, /* 6300 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING1_1", .pme_desc = "Grant on data ring 1.", .pme_code = 0x189d, /* 6301 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING2_1", .pme_desc = "Grant on data ring 2.", .pme_code = 0x189e, /* 6302 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING3_1", .pme_desc = "Grant on data ring 3.", .pme_code = 0x189f, /* 6303 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_DATA_RING0_INUSE_1", .pme_desc = "Data ring 0 is in use.", .pme_code = 0x18a0, /* 6304 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_DATA_RING1_INUSE_1", .pme_desc = "Data ring 1 is in use.", .pme_code = 0x18a1, /* 6305 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_DATA_RING2_INUSE_1", .pme_desc = "Data ring 2 is in use.", .pme_code = 0x18a2, /* 6306 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_DATA_RING3_INUSE_1", .pme_desc = "Data ring 3 is in use.", .pme_code = 0x18a3, /* 6307 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_ALL_DATA_RINGS_IDLE_1", .pme_desc = "All data rings are idle.", .pme_code = 0x18a4, /* 6308 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_ONE_DATA_RING_BUSY_1", .pme_desc = "One data ring is busy.", .pme_code = 0x18a5, /* 6309 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TWO_OR_THREE_DATA_RINGS_BUSY_1", .pme_desc = "Two or three data rings are busy.", .pme_code = 0x18a6, /* 6310 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_ALL_DATA_RINGS_BUSY_1", .pme_desc = "All data rings are busy.", .pme_code = 0x18a7, /* 6311 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_IOIF0_DATA_REQ_PENDING_1", .pme_desc = "BIC(IOIF0) data request pending.", .pme_code = 0x18a8, /* 6312 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE6_DATA_REQ_PENDING_1", .pme_desc = "SPE 6 data request pending.", .pme_code = 0x18a9, /* 6313 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE4_DATA_REQ_PENDING_1", .pme_desc = "SPE 4 data request pending.", .pme_code = 0x18aa, /* 6314 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE2_DATA_REQ_PENDING_1", .pme_desc = "SPE 2 data request pending.", .pme_code = 0x18ab, /* 6315 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE0_DATA_REQ_PENDING_1", .pme_desc = "SPE 0 data request pending.", .pme_code = 0x18ac, /* 6316 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_MIC_DATA_REQ_PENDING_1", .pme_desc = "MIC data request pending.", .pme_code = 0x18ad, /* 6317 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_PPE_DATA_REQ_PENDING_1", .pme_desc = "PPE data request pending.", .pme_code = 0x18ae, /* 6318 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE1_DATA_REQ_PENDING_1", .pme_desc = "SPE 1 data request pending.", .pme_code = 0x18af, /* 6319 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE3_DATA_REQ_PENDING_1", .pme_desc = "SPE 3 data request pending.", .pme_code = 0x18b0, /* 6320 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE5_DATA_REQ_PENDING_1", .pme_desc = "SPE 5 data request pending.", .pme_code = 0x18b1, /* 6321 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE7_DATA_REQ_PENDING_1", .pme_desc = "SPE 7 data request pending.", .pme_code = 0x18b2, /* 6322 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_IOIF0_DATA_DEST_1", .pme_desc = "IOIF0 is data destination.", .pme_code = 0x18b4, /* 6324 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE6_DATA_DEST_1", .pme_desc = "SPE 6 is data destination.", .pme_code = 0x18b5, /* 6325 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE4_DATA_DEST_1", .pme_desc = "SPE 4 is data destination.", .pme_code = 0x18b6, /* 6326 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE2_DATA_DEST_1", .pme_desc = "SPE 2 is data destination.", .pme_code = 0x18b7, /* 6327 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE0_DATA_DEST_1", .pme_desc = "SPE 0 is data destination.", .pme_code = 0x18b8, /* 6328 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_MIC_DATA_DEST_1", .pme_desc = "MIC is data destination.", .pme_code = 0x18b9, /* 6329 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PPE_DATA_DEST_1", .pme_desc = "PPE is data destination.", .pme_code = 0x18ba, /* 6330 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE1_DATA_DEST_1", .pme_desc = "SPE 1 is data destination.", .pme_code = 0x18bb, /* 6331 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_IOIF0_DATA_REQ_PENDING_2", .pme_desc = "BIC(IOIF0) data request pending.", .pme_code = 0x1900, /* 6400 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE6_DATA_REQ_PENDING_2", .pme_desc = "SPE 6 data request pending.", .pme_code = 0x1901, /* 6401 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE4_DATA_REQ_PENDING_2", .pme_desc = "SPE 4 data request pending.", .pme_code = 0x1902, /* 6402 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE2_DATA_REQ_PENDING_2", .pme_desc = "SPE 2 data request pending.", .pme_code = 0x1903, /* 6403 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE0_DATA_REQ_PENDING_2", .pme_desc = "SPE 0 data request pending.", .pme_code = 0x1904, /* 6404 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_MIC_DATA_REQ_PENDING_2", .pme_desc = "MIC data request pending.", .pme_code = 0x1905, /* 6405 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_PPE_DATA_REQ_PENDING_2", .pme_desc = "PPE data request pending.", .pme_code = 0x1906, /* 6406 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE1_DATA_REQ_PENDING_2", .pme_desc = "SPE 1 data request pending.", .pme_code = 0x1907, /* 6407 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE3_DATA_REQ_PENDING_2", .pme_desc = "SPE 3 data request pending.", .pme_code = 0x1908, /* 6408 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE5_DATA_REQ_PENDING_2", .pme_desc = "SPE 5 data request pending.", .pme_code = 0x1909, /* 6409 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_SPE7_DATA_REQ_PENDING_2", .pme_desc = "SPE 7 data request pending.", .pme_code = 0x190a, /* 6410 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_IOIF1_DATA_REQ_PENDING_2", .pme_desc = "IOIF1 data request pending.", .pme_code = 0x190b, /* 6411 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "EIB_IOIF0_DATA_DEST_2", .pme_desc = "IOIF0 is data destination.", .pme_code = 0x190c, /* 6412 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE6_DATA_DEST_2", .pme_desc = "SPE 6 is data destination.", .pme_code = 0x190d, /* 6413 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE4_DATA_DEST_2", .pme_desc = "SPE 4 is data destination.", .pme_code = 0x190e, /* 6414 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE2_DATA_DEST_2", .pme_desc = "SPE 2 is data destination.", .pme_code = 0x190f, /* 6415 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE0_DATA_DEST_2", .pme_desc = "SPE 0 is data destination.", .pme_code = 0x1910, /* 6416 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_MIC_DATA_DEST_2", .pme_desc = "MIC is data destination.", .pme_code = 0x1911, /* 6417 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_PPE_DATA_DEST_2", .pme_desc = "PPE is data destination.", .pme_code = 0x1912, /* 6418 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE1_DATA_DEST_2", .pme_desc = "SPE 1 is data destination.", .pme_code = 0x1913, /* 6419 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE3_DATA_DEST_2", .pme_desc = "SPE 3 is data destination.", .pme_code = 0x1914, /* 6420 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE5_DATA_DEST_2", .pme_desc = "SPE 5 is data destination.", .pme_code = 0x1915, /* 6421 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_SPE7_DATA_DEST_2", .pme_desc = "SPE 7 is data destination.", .pme_code = 0x1916, /* 6422 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_IOIF1_DATA_DEST_2", .pme_desc = "IOIF1 is data destination.", .pme_code = 0x1917, /* 6423 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING0_2", .pme_desc = "Grant on data ring 0.", .pme_code = 0x1918, /* 6424 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING1_2", .pme_desc = "Grant on data ring 1.", .pme_code = 0x1919, /* 6425 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING2_2", .pme_desc = "Grant on data ring 2.", .pme_code = 0x191a, /* 6426 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_GRANT_DATA_RING3_2", .pme_desc = "Grant on data ring 3.", .pme_code = 0x191b, /* 6427 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "EIB_ALL_DATA_RINGS_IDLE_2", .pme_desc = "All data rings are idle.", .pme_code = 0x191c, /* 6428 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_ONE_DATA_RING_BUSY_2", .pme_desc = "One data ring is busy.", .pme_code = 0x191d, /* 6429 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TWO_OR_THREE_DATA_RINGS_BUSY_2", .pme_desc = "Two or three data rings are busy.", .pme_code = 0x191e, /* 6430 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_ALL_DATA_RINGS_BUSY_2", .pme_desc = "All four data rings are busy.", .pme_code = 0x191f, /* 6431 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_XIO_UNUSED", .pme_desc = "Even XIO token unused by RAG 0.", .pme_code = 0xfe4c, /* 65100 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_XIO_UNUSED", .pme_desc = "Odd XIO token unused by RAG 0.", .pme_code = 0xfe4d, /* 65101 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_BANK_UNUSED", .pme_desc = "Even bank token unused by RAG 0.", .pme_code = 0xfe4e, /* 65102 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_BANK_UNUSED", .pme_desc = "Odd bank token unused by RAG 0.", .pme_code = 0xfe4f, /* 65103 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE0", .pme_desc = "Token granted for SPE 0.", .pme_code = 0xfe54, /* 65108 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE1", .pme_desc = "Token granted for SPE 1.", .pme_code = 0xfe55, /* 65109 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE2", .pme_desc = "Token granted for SPE 2.", .pme_code = 0xfe56, /* 65110 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE3", .pme_desc = "Token granted for SPE 3.", .pme_code = 0xfe57, /* 65111 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE4", .pme_desc = "Token granted for SPE 4.", .pme_code = 0xfe58, /* 65112 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE5", .pme_desc = "Token granted for SPE 5.", .pme_code = 0xfe59, /* 65113 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE6", .pme_desc = "Token granted for SPE 6.", .pme_code = 0xfe5a, /* 65114 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_SPE7", .pme_desc = "Token granted for SPE 7.", .pme_code = 0xfe5b, /* 65115 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_XIO_WASTED", .pme_desc = "Even XIO token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.", .pme_code = 0xfeb0, /* 65200 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_XIO_WASTED", .pme_desc = "Odd XIO token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.", .pme_code = 0xfeb1, /* 65201 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_BANK_WASTED", .pme_desc = "Even bank token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.", .pme_code = 0xfeb2, /* 65202 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_BANK_WASTED", .pme_desc = "Odd bank token wasted by RAG 0; valid only when Unused Enable (UE) = 1 in TKM_CR register.", .pme_code = 0xfeb3, /* 65203 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_E_XIO_WASTED", .pme_desc = "Even XIO token wasted by RAG U.", .pme_code = 0xfebc, /* 65212 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_O_XIO_WASTED", .pme_desc = "Odd XIO token wasted by RAG U.", .pme_code = 0xfebd, /* 65213 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_E_BANK_WASTED", .pme_desc = "Even bank token wasted by RAG U.", .pme_code = 0xfebe, /* 65214 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_O_BANK_WASTED", .pme_desc = "Odd bank token wasted by RAG U.", .pme_code = 0xfebf, /* 65215 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_XIO_RAG1", .pme_desc = "Even XIO token from RAG 0 shared with RAG 1", .pme_code = 0xff14, /* 65300 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_XIO_RAG2", .pme_desc = "Even XIO token from RAG 0 shared with RAG 2", .pme_code = 0xff15, /* 65301 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_XIO_RAG3", .pme_desc = "Even XIO token from RAG 0 shared with RAG 3", .pme_code = 0xff16, /* 65302 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_XIO_RAG1", .pme_desc = "Odd XIO token from RAG 0 shared with RAG 1", .pme_code = 0xff17, /* 65303 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_XIO_RAG2", .pme_desc = "Odd XIO token from RAG 0 shared with RAG 2", .pme_code = 0xff18, /* 65304 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_XIO_RAG3", .pme_desc = "Odd XIO token from RAG 0 shared with RAG 3", .pme_code = 0xff19, /* 65305 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_BANK_RAG1", .pme_desc = "Even bank token from RAG 0 shared with RAG 1", .pme_code = 0xff1a, /* 65306 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_BANK_RAG2", .pme_desc = "Even bank token from RAG 0 shared with RAG 2", .pme_code = 0xff1b, /* 65307 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_E_BANK_RAG3", .pme_desc = "Even bank token from RAG 0 shared with RAG 3", .pme_code = 0xff1c, /* 65308 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_BANK_RAG1", .pme_desc = "Odd bank token from RAG 0 shared with RAG 1", .pme_code = 0xff1d, /* 65309 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_BANK_RAG2", .pme_desc = "Odd bank token from RAG 0 shared with RAG 2", .pme_code = 0xff1e, /* 65310 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_O_BANK_RAG3", .pme_desc = "Odd bank token from RAG 0 shared with RAG 3", .pme_code = 0xff1f, /* 65311 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_XIO_UNUSED", .pme_desc = "Even XIO token was unused by RAG 1.", .pme_code = 0xff88, /* 65416 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_XIO_UNUSED", .pme_desc = "Odd XIO token was unused by RAG 1.", .pme_code = 0xff89, /* 65417 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_BANK_UNUSED", .pme_desc = "Even bank token was unused by RAG 1.", .pme_code = 0xff8a, /* 65418 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_BANK_UNUSED", .pme_desc = "Odd bank token was unused by RAG 1.", .pme_code = 0xff8b, /* 65419 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_IOC0", .pme_desc = "Token was granted for IOC0.", .pme_code = 0xff91, /* 65425 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_TOKEN_GRANTED_IOC1", .pme_desc = "Token was granted for IOC1.", .pme_code = 0xff92, /* 65426 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_XIO_WASTED", .pme_desc = "Even XIO token was wasted by RAG 1. This is valid only when UE = 1 in TKM_CR.", .pme_code = 0xffec, /* 65516 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_XIO_WASTED", .pme_desc = "Odd XIO token was wasted by RAG 1. This is valid only when UE = 1 in TKM_CR.", .pme_code = 0xffed, /* 65517 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_BANK_WASTED", .pme_desc = "Even bank token was wasted by RAG 1. This is valid only when UE = 1 in TKM_CR.", .pme_code = 0xffee, /* 65518 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_BANK_WASTED", .pme_desc = "Odd bank token was wasted by RAG 1. This is valid only when UE = 1 in TKM_CR.", .pme_code = 0xffef, /* 65519 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_XIO_RAG0", .pme_desc = "Even XIO token from RAG 1 shared with RAG 0", .pme_code = 0x10050, /* 65616 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_XIO_RAG2", .pme_desc = "Even XIO token from RAG 1 shared with RAG 2", .pme_code = 0x10051, /* 65617 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_XIO_RAG3", .pme_desc = "Even XIO token from RAG 1 shared with RAG 3", .pme_code = 0x10052, /* 65618 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_XIO_RAG0", .pme_desc = "Odd XIO token from RAG 1 shared with RAG 0", .pme_code = 0x10053, /* 65619 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_XIO_RAG2", .pme_desc = "Odd XIO token from RAG 1 shared with RAG 2", .pme_code = 0x10054, /* 65620 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_XIO_RAG3", .pme_desc = "Odd XIO token from RAG 1 shared with RAG 3", .pme_code = 0x10055, /* 65621 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_BANK_RAG0", .pme_desc = "Even bank token from RAG 1 shared with RAG 0", .pme_code = 0x10056, /* 65622 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_BANK_RAG2", .pme_desc = "Even bank token from RAG 1 shared with RAG 2", .pme_code = 0x10057, /* 65623 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_E_BANK_RAG3", .pme_desc = "Even bank token from RAG 1 shared with RAG 3", .pme_code = 0x10058, /* 65624 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_BANK_RAG0", .pme_desc = "Odd bank token from RAG 1 shared with RAG 0", .pme_code = 0x10059, /* 65625 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_BANK_RAG2", .pme_desc = "Odd bank token from RAG 1 shared with RAG 2", .pme_code = 0x1005a, /* 65626 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG1_O_BANK_RAG3", .pme_desc = "Odd bank token from RAG 1 shared with RAG 3", .pme_code = 0x1005b, /* 65627 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_E_XIO_RAG1", .pme_desc = "Even XIO token from RAG U shared with RAG 1", .pme_code = 0x1005c, /* 65628 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_O_XIO_RAG1", .pme_desc = "Odd XIO token from RAG U shared with RAG 1", .pme_code = 0x1005d, /* 65629 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_E_BANK_RAG1", .pme_desc = "Even bank token from RAG U shared with RAG 1", .pme_code = 0x1005e, /* 65630 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAGU_O_BANK_RAG1", .pme_desc = "Odd bank token from RAG U shared with RAG 1", .pme_code = 0x1005f, /* 65631 */ .pme_enable_word = WORD_0_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_XIO_UNUSED", .pme_desc = "Even XIO token unused by RAG 2", .pme_code = 0x100e4, /* 65764 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_XIO_UNUSED", .pme_desc = "Odd XIO token unused by RAG 2", .pme_code = 0x100e5, /* 65765 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_BANK_UNUSED", .pme_desc = "Even bank token unused by RAG 2", .pme_code = 0x100e6, /* 65766 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_BANK_UNUSED", .pme_desc = "Odd bank token unused by RAG 2", .pme_code = 0x100e7, /* 65767 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF0_IN_TOKEN_UNUSED", .pme_desc = "IOIF0 In token unused by RAG 0", .pme_code = 0x100e8, /* 65768 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF0_OUT_TOKEN_UNUSED", .pme_desc = "IOIF0 Out token unused by RAG 0", .pme_code = 0x100e9, /* 65769 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF1_IN_TOKEN_UNUSED", .pme_desc = "IOIF1 In token unused by RAG 0", .pme_code = 0x100ea, /* 65770 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF1_OUT_TOKEN_UNUSED", .pme_desc = "IOIF1 Out token unused by RAG 0", .pme_code = 0x100eb, /* 65771 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_XIO_WASTED", .pme_desc = "Even XIO token wasted by RAG 2", .pme_code = 0x10148, /* 65864 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_XIO_WASTED", .pme_desc = "Odd XIO token wasted by RAG 2", .pme_code = 0x10149, /* 65865 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_BANK_WASTED", .pme_desc = "Even bank token wasted by RAG 2", .pme_code = 0x1014a, /* 65866 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_BANK_WASTED", .pme_desc = "Odd bank token wasted by RAG 2", .pme_code = 0x1014b, /* 65867 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_XIO_RAG0", .pme_desc = "Even XIO token from RAG 2 shared with RAG 0", .pme_code = 0x101ac, /* 65964 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_XIO_RAG1", .pme_desc = "Even XIO token from RAG 2 shared with RAG 1", .pme_code = 0x101ad, /* 65965 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_XIO_RAG3", .pme_desc = "Even XIO token from RAG 2 shared with RAG 3", .pme_code = 0x101ae, /* 65966 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_XIO_RAG0", .pme_desc = "Odd XIO token from RAG 2 shared with RAG 0", .pme_code = 0x101af, /* 65967 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_XIO_RAG1", .pme_desc = "Odd XIO token from RAG 2 shared with RAG 1", .pme_code = 0x101b0, /* 65968 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_XIO_RAG3", .pme_desc = "Odd XIO token from RAG 2 shared with RAG 3", .pme_code = 0x101b1, /* 65969 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_BANK_RAG0", .pme_desc = "Even bank token from RAG 2 shared with RAG 0", .pme_code = 0x101b2, /* 65970 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_BANK_RAG1", .pme_desc = "Even bank token from RAG 2 shared with RAG 1", .pme_code = 0x101b3, /* 65971 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_E_BANK_RAG3", .pme_desc = "Even bank token from RAG 2 shared with RAG 3", .pme_code = 0x101b4, /* 65972 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_BANK_RAG0", .pme_desc = "Odd bank token from RAG 2 shared with RAG 0", .pme_code = 0x101b5, /* 65973 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_BANK_RAG1", .pme_desc = "Odd bank token from RAG 2 shared with RAG 1", .pme_code = 0x101b6, /* 65974 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG2_O_BANK_RAG3", .pme_desc = "Odd bank token from RAG 2 shared with RAG 3", .pme_code = 0x101b7, /* 65975 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF0_IN_TOKEN_WASTED", .pme_desc = "IOIF0 In token wasted by RAG 0", .pme_code = 0x9ef38, /* 651064 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF0_OUT_TOKEN_WASTED", .pme_desc = "IOIF0 Out token wasted by RAG 0", .pme_code = 0x9ef39, /* 651065 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF1_IN_TOKEN_WASTED", .pme_desc = "IOIF1 In token wasted by RAG 0", .pme_code = 0x9ef3a, /* 651066 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG0_IOIF1_OUT_TOKEN_WASTED", .pme_desc = "IOIF1 Out token wasted by RAG 0", .pme_code = 0x9ef3b, /* 651067 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_XIO_UNUSED", .pme_desc = "Even XIO token was unused by RAG 3.", .pme_code = 0x9efac, /* 651180 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_XIO_UNUSED", .pme_desc = "Odd XIO token was unused by RAG 3.", .pme_code = 0x9efad, /* 651181 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_BANK_UNUSED", .pme_desc = "Even bank token was unused by RAG 3.", .pme_code = 0x9efae, /* 651182 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_BANK_UNUSED", .pme_desc = "Odd bank token was unused by RAG 3.", .pme_code = 0x9efaf, /* 651183 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_XIO_WASTED", .pme_desc = "Even XIO token wasted by RAG 3", .pme_code = 0x9f010, /* 651280 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_XIO_WASTED", .pme_desc = "Odd XIO token wasted by RAG 3", .pme_code = 0x9f011, /* 651281 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_BANK_WASTED", .pme_desc = "Even bank token wasted by RAG 3", .pme_code = 0x9f012, /* 651282 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_BANK_WASTED", .pme_desc = "Odd bank token wasted by RAG 3", .pme_code = 0x9f013, /* 651283 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_XIO_RAG0", .pme_desc = "Even XIO token from RAG 3 shared with RAG 0", .pme_code = 0x9f074, /* 651380 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_XIO_RAG1", .pme_desc = "Even XIO token from RAG 3 shared with RAG 1", .pme_code = 0x9f075, /* 651381 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_XIO_RAG2", .pme_desc = "Even XIO token from RAG 3 shared with RAG 2", .pme_code = 0x9f076, /* 651382 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_XIO_RAG0", .pme_desc = "Odd XIO token from RAG 3 shared with RAG 0", .pme_code = 0x9f077, /* 651383 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_XIO_RAG1", .pme_desc = "Odd XIO token from RAG 3 shared with RAG 1", .pme_code = 0x9f078, /* 651384 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_XIO_RAG2", .pme_desc = "Odd XIO token from RAG 3 shared with RAG 2", .pme_code = 0x9f079, /* 651385 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_BANK_RAG0", .pme_desc = "Even bank token from RAG 3 shared with RAG 0", .pme_code = 0x9f07a, /* 651386 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_BANK_RAG1", .pme_desc = "Even bank token from RAG 3 shared with RAG 1", .pme_code = 0x9f07b, /* 651387 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_E_BANK_RAG2", .pme_desc = "Even bank token from RAG 3 shared with RAG 2", .pme_code = 0x9f07c, /* 651388 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_BANK_RAG0", .pme_desc = "Odd bank token from RAG 3 shared with RAG 0", .pme_code = 0x9f07d, /* 651389 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_BANK_RAG1", .pme_desc = "Odd bank token from RAG 3 shared with RAG 1", .pme_code = 0x9f07e, /* 651390 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "EIB_RAG3_O_BANK_RAG2", .pme_desc = "Odd bank token from RAG 3 shared with RAG 2", .pme_code = 0x9f07f, /* 651391 */ .pme_enable_word = WORD_2_ONLY, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_READ_CMD_Q_EMPTY", .pme_desc = "XIO1 - Read command queue is empty.", .pme_code = 0x1bc5, /* 7109 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_WRITE_CMD_Q_EMPTY", .pme_desc = "XIO1 - Write command queue is empty.", .pme_code = 0x1bc6, /* 7110 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_READ_CMD_Q_FULL", .pme_desc = "XIO1 - Read command queue is full.", .pme_code = 0x1bc8, /* 7112 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_RESPONDS_READ_RETRY", .pme_desc = "XIO1 - MIC responds with a Retry for a read command because the read command queue is full.", .pme_code = 0x1bc9, /* 7113 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_WRITE_CMD_Q_FULL", .pme_desc = "XIO1 - Write command queue is full.", .pme_code = 0x1bca, /* 7114 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_RESPONDS_WRITE_RETRY", .pme_desc = "XIO1 - MIC responds with a Retry for a write command because the write command queue is full.", .pme_code = 0x1bcb, /* 7115 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_READ_CMD_DISPATCHED", .pme_desc = "XIO1 - Read command dispatched; includes high-priority and fast-path reads.", .pme_code = 0x1bde, /* 7134 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_WRITE_CMD_DISPATCHED", .pme_desc = "XIO1 - Write command dispatched.", .pme_code = 0x1bdf, /* 7135 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_READ_MOD_WRITE_CMD_DISPATCHED", .pme_desc = "XIO1 - Read-Modify-Write command (data size < 16 bytes) dispatched.", .pme_code = 0x1be0, /* 7136 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_REFRESH_DISPATCHED", .pme_desc = "XIO1 - Refresh dispatched.", .pme_code = 0x1be1, /* 7137 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_BYTE_MSK_WRITE_CMD_DISPATCHED", .pme_desc = "XIO1 - Byte-masking write command (data size >= 16 bytes) dispatched.", .pme_code = 0x1be3, /* 7139 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_WRITE_CMD_DISPATCHED_AFTER_READ", .pme_desc = "XIO1 - Write command dispatched after a read command was previously dispatched.", .pme_code = 0x1be5, /* 7141 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO1_READ_CMD_DISPATCHED_AFTER_WRITE", .pme_desc = "XIO1 - Read command dispatched after a write command was previously dispatched.", .pme_code = 0x1be6, /* 7142 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_CMD_Q_EMPTY", .pme_desc = "XIO0 - Read command queue is empty.", .pme_code = 0x1c29, /* 7209 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_WRITE_CMD_Q_EMPTY", .pme_desc = "XIO0 - Write command queue is empty.", .pme_code = 0x1c2a, /* 7210 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_CMD_Q_FULL", .pme_desc = "XIO0 - Read command queue is full.", .pme_code = 0x1c2c, /* 7212 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_RESPONDS_READ_RETRY", .pme_desc = "XIO0 - MIC responds with a Retry for a read command because the read command queue is full.", .pme_code = 0x1c2d, /* 7213 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_WRITE_CMD_Q_FULL", .pme_desc = "XIO0 - Write command queue is full.", .pme_code = 0x1c2e, /* 7214 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_RESPONDS_WRITE_RETRY", .pme_desc = "XIO0 - MIC responds with a Retry for a write command because the write command queue is full.", .pme_code = 0x1c2f, /* 7215 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_CMD_DISPATCHED", .pme_desc = "XIO0 - Read command dispatched; includes high-priority and fast-path reads.", .pme_code = 0x1c42, /* 7234 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_WRITE_CMD_DISPATCHED", .pme_desc = "XIO0 - Write command dispatched.", .pme_code = 0x1c43, /* 7235 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_MOD_WRITE_CMD_DISPATCHED", .pme_desc = "XIO0 - Read-Modify-Write command (data size < 16 bytes) dispatched.", .pme_code = 0x1c44, /* 7236 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_REFRESH_DISPATCHED", .pme_desc = "XIO0 - Refresh dispatched.", .pme_code = 0x1c45, /* 7237 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_WRITE_CMD_DISPATCHED_AFTER_READ", .pme_desc = "XIO0 - Write command dispatched after a read command was previously dispatched.", .pme_code = 0x1c49, /* 7241 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_CMD_DISPATCHED_AFTER_WRITE", .pme_desc = "XIO0 - Read command dispatched after a write command was previously dispatched.", .pme_code = 0x1c4a, /* 7242 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_WRITE_CMD_DISPATCHED_2", .pme_desc = "XIO0 - Write command dispatched.", .pme_code = 0x1ca7, /* 7335 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_READ_MOD_WRITE_CMD_DISPATCHED_2", .pme_desc = "XIO0 - Read-Modify-Write command (data size < 16 bytes) dispatched.", .pme_code = 0x1ca8, /* 7336 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_REFRESH_DISPATCHED_2", .pme_desc = "XIO0 - Refresh dispatched.", .pme_code = 0x1ca9, /* 7337 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "MIC_XIO0_BYTE_MSK_WRITE_CMD_DISPATCHED", .pme_desc = "XIO0 - Byte-masking write command (data size >= 16 bytes) dispatched.", .pme_code = 0x1cab, /* 7339 */ .pme_enable_word = 0xF, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEA_DATA_PLG", .pme_desc = "Type A data physical layer group (PLG). Does not include header-only or credit-only data PLGs. In IOIF mode, counts I/O device read data; in BIF mode, counts all outbound data.", .pme_code = 0x1fb0, /* 8112 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEB_DATA_PLG", .pme_desc = "Type B data PLG. In IOIF mode, counts I/O device read data; in BIF mode, counts all outbound data.", .pme_code = 0x1fb1, /* 8113 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_IOIF_TYPEA_DATA_PLG", .pme_desc = "Type A data PLG. Does not include header-only or credit-only PLGs. In IOIF mode, counts CBE store data to I/O device. Does not apply in BIF mode.", .pme_code = 0x1fb2, /* 8114 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_IOIF_TYPEB_DATA_PLG", .pme_desc = "Type B data PLG. In IOIF mode, counts CBE store data to an I/O device. Does not apply in BIF mode.", .pme_code = 0x1fb3, /* 8115 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_DATA_PLG", .pme_desc = "Data PLG. Does not include header-only or credit-only PLGs.", .pme_code = 0x1fb4, /* 8116 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_CMD_PLG", .pme_desc = "Command PLG (no credit-only PLG). In IOIF mode, counts I/O command or reply PLGs. In BIF mode, counts command/ reflected command or snoop/combined responses.", .pme_code = 0x1fb5, /* 8117 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEA_TRANSFER", .pme_desc = "Type A data transfer regardless of length. Can also be used to count Type A data header PLGs (but not credit-only PLGs).", .pme_code = 0x1fb6, /* 8118 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEB_TRANSFER", .pme_desc = "Type B data transfer.", .pme_code = 0x1fb7, /* 8119 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_CMD_GREDIT_ONLY_PLG", .pme_desc = "Command-credit-only command PLG in either IOIF or BIF mode.", .pme_code = 0x1fb8, /* 8120 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_DATA_CREDIT_ONLY_PLG", .pme_desc = "Data-credit-only data PLG sent in either IOIF or BIF mode.", .pme_code = 0x1fb9, /* 8121 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_NON_NULL_ENVLP_SENT", .pme_desc = "Non-null envelope sent (does not include long envelopes).", .pme_code = 0x1fba, /* 8122 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_NULL_ENVLP_SENT", .pme_desc = "Null envelope sent.", .pme_code = 0x1fbc, /* 8124 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_NO_VALID_DATA_SENT", .pme_desc = "No valid data sent this cycle.", .pme_code = 0x1fbd, /* 8125 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_NORMAL_ENVLP_SENT", .pme_desc = "Normal envelope sent.", .pme_code = 0x1fbe, /* 8126 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_LONG_ENVLP_SENT", .pme_desc = "Long envelope sent.", .pme_code = 0x1fbf, /* 8127 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_NULL_PLG_INSERTED", .pme_desc = "A Null PLG inserted in an outgoing envelope.", .pme_code = 0x1fc0, /* 8128 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_OUTBOUND_ENV_ARRAY_FULL", .pme_desc = "Outbound envelope array is full.", .pme_code = 0x1fc1, /* 8129 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_TYPEB_TRANSFER", .pme_desc = "Type B data transfer.", .pme_code = 0x201b, /* 8219 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_NULL_ENVLP_RECV", .pme_desc = "Null envelope received.", .pme_code = 0x206d, /* 8301 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_CMD_PLG_2", .pme_desc = "Command PLG, but not credit-only PLG. In IOIF mode, counts I/O command or reply PLGs. In BIF mode, counts command/reflected command or snoop/combined responses.", .pme_code = 0x207a, /* 8314 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_CMD_GREDIT_ONLY_PLG_2", .pme_desc = "Command-credit-only command PLG.", .pme_code = 0x207b, /* 8315 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_NORMAL_ENVLP_RECV", .pme_desc = "Normal envelope received is good.", .pme_code = 0x2080, /* 8320 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_LONG_ENVLP_RECV", .pme_desc = "Long envelope received is good.", .pme_code = 0x2081, /* 8321 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF0_DATA_GREDIT_ONLY_PLG_2", .pme_desc = "Data-credit-only data PLG in either IOIF or BIF mode; will count a maximum of one per envelope.", .pme_code = 0x2082, /* 8322 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_NON_NULL_ENVLP", .pme_desc = "Non-null envelope; does not include long envelopes; includes retried envelopes.", .pme_code = 0x2083, /* 8323 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_DATA_GRANT_RECV", .pme_desc = "Data grant received.", .pme_code = 0x2084, /* 8324 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_DATA_PLG_2", .pme_desc = "Data PLG. Does not include header-only or credit-only PLGs.", .pme_code = 0x2088, /* 8328 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEA_TRANSFER_2", .pme_desc = "Type A data transfer regardless of length. Can also be used to count Type A data header PLGs, but not credit-only PLGs.", .pme_code = 0x2089, /* 8329 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF0_TYPEB_TRANSFER_2", .pme_desc = "Type B data transfer.", .pme_code = 0x208a, /* 8330 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_NULL_ENVLP_RECV", .pme_desc = "Null envelope received.", .pme_code = 0x20d1, /* 8401 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF1_CMD_PLG_2", .pme_desc = "Command PLG (no credit-only PLG). Counts I/O command or reply PLGs.", .pme_code = 0x20de, /* 8414 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_CMD_GREDIT_ONLY_PLG_2", .pme_desc = "Command-credit-only command PLG.", .pme_code = 0x20df, /* 8415 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_NORMAL_ENVLP_RECV", .pme_desc = "Normal envelope received is good.", .pme_code = 0x20e4, /* 8420 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF1_LONG_ENVLP_RECV", .pme_desc = "Long envelope received is good.", .pme_code = 0x20e5, /* 8421 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "BIF_IOIF1_DATA_GREDIT_ONLY_PLG_2", .pme_desc = "Data-credit-only data PLG received; will count a maximum of one per envelope.", .pme_code = 0x20e6, /* 8422 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_NON_NULL_ENVLP", .pme_desc = "Non-Null envelope received; does not include long envelopes; includes retried envelopes.", .pme_code = 0x20e7, /* 8423 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_DATA_GRANT_RECV", .pme_desc = "Data grant received.", .pme_code = 0x20e8, /* 8424 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_DATA_PLG_2", .pme_desc = "Data PLG received. Does not include header-only or credit-only PLGs.", .pme_code = 0x20ec, /* 8428 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_TYPEA_TRANSFER_2", .pme_desc = "Type I A data transfer regardless of length. Can also be used to count Type A data header PLGs (but not credit-only PLGs).", .pme_code = 0x20ed, /* 8429 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "BIF_IOIF1_TYPEB_TRANSFER_2", .pme_desc = "Type B data transfer received.", .pme_code = 0x20ee, /* 8430 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_MMIO_READ_IOIF1", .pme_desc = "Received MMIO read targeted to IOIF1.", .pme_code = 0x213c, /* 8508 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_MMIO_WRITE_IOIF1", .pme_desc = "Received MMIO write targeted to IOIF1.", .pme_code = 0x213d, /* 8509 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_MMIO_READ_IOIF0", .pme_desc = "Received MMIO read targeted to IOIF0.", .pme_code = 0x213e, /* 8510 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_MMIO_WRITE_IOIF0", .pme_desc = "Received MMIO write targeted to IOIF0.", .pme_code = 0x213f, /* 8511 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_CMD_TO_IOIF0", .pme_desc = "Sent command to IOIF0.", .pme_code = 0x2140, /* 8512 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_CMD_TO_IOIF1", .pme_desc = "Sent command to IOIF1.", .pme_code = 0x2141, /* 8513 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_MATRIX3_OCCUPIED", .pme_desc = "IOIF0 Dependency Matrix 3 is occupied by a dependent command.", .pme_code = 0x219d, /* 8605 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "IOC_IOIF0_MATRIX4_OCCUPIED", .pme_desc = "IOIF0 Dependency Matrix 4 is occupied by a dependent command.", .pme_code = 0x219e, /* 8606 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "IOC_IOIF0_MATRIX5_OCCUPIED", .pme_desc = "IOIF0 Dependency Matrix 5 is occupied by a dependent command.", .pme_code = 0x219f, /* 8607 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_BOTH_TYPE, }, {.pme_name = "IOC_DMA_READ_IOIF0", .pme_desc = "Received read request from IOIF0.", .pme_code = 0x21a2, /* 8610 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_DMA_WRITE_IOIF0", .pme_desc = "Received write request from IOIF0.", .pme_code = 0x21a3, /* 8611 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_INTERRUPT_IOIF0", .pme_desc = "Received interrupt from the IOIF0.", .pme_code = 0x21a6, /* 8614 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_E_MEM", .pme_desc = "IOIF0 request for token for even memory banks 0-14.", .pme_code = 0x220c, /* 8716 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_O_MEM", .pme_desc = "IOIF0 request for token for odd memory banks 1-15.", .pme_code = 0x220d, /* 8717 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_1357", .pme_desc = "IOIF0 request for token type 1, 3, 5, or 7.", .pme_code = 0x220e, /* 8718 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_9111315", .pme_desc = "IOIF0 request for token type 9, 11, 13, or 15.", .pme_code = 0x220f, /* 8719 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_16", .pme_desc = "IOIF0 request for token type 16.", .pme_code = 0x2214, /* 8724 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_17", .pme_desc = "IOIF0 request for token type 17.", .pme_code = 0x2215, /* 8725 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_18", .pme_desc = "IOIF0 request for token type 18.", .pme_code = 0x2216, /* 8726 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOIF0_REQ_TOKEN_19", .pme_desc = "IOIF0 request for token type 19.", .pme_code = 0x2217, /* 8727 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_CUMULATIVE_LEN, }, {.pme_name = "IOC_IOPT_CACHE_HIT", .pme_desc = "I/O page table cache hit for commands from IOIF.", .pme_code = 0x2260, /* 8800 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IOPT_CACHE_MISS", .pme_desc = "I/O page table cache miss for commands from IOIF.", .pme_code = 0x2261, /* 8801 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IOST_CACHE_HIT", .pme_desc = "I/O segment table cache hit.", .pme_code = 0x2263, /* 8803 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IOST_CACHE_MISS", .pme_desc = "I/O segment table cache miss.", .pme_code = 0x2264, /* 8804 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_INTERRUPT_FROM_SPU", .pme_desc = "Interrupt received from any SPU (reflected cmd when IIC has sent ACK response).", .pme_code = 0x2278, /* 8824 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IIC_INTERRUPT_TO_PPU_TH0", .pme_desc = "Internal interrupt controller (IIC) generated interrupt to PPU thread 0.", .pme_code = 0x2279, /* 8825 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_IIC_INTERRUPT_TO_PPU_TH1", .pme_desc = "IIC generated interrupt to PPU thread 1.", .pme_code = 0x227a, /* 8826 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_RECV_EXTERNAL_INTERRUPT_TO_TH0", .pme_desc = "Received external interrupt (using MMIO) from PPU to PPU thread 0.", .pme_code = 0x227b, /* 8827 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, {.pme_name = "IOC_RECV_EXTERNAL_INTERRUPT_TO_TH1", .pme_desc = "Received external interrupt (using MMIO) from PPU to PPU thread 1.", .pme_code = 0x227c, /* 8828 */ .pme_enable_word = WORD_0_AND_2, .pme_freq = PFM_CELL_PME_FREQ_HALF, .pme_type = COUNT_TYPE_OCCURRENCE, }, }; /*--- The number of events : 435 ---*/ #define PME_CELL_EVENT_COUNT (sizeof(cell_pe)/sizeof(pme_cell_entry_t)) papi-5.6.0/src/utils/Makefile000664 001750 001750 00000005412 13216244370 020124 0ustar00jshenry1963jshenry1963000000 000000 # File: utils/Makefile include Makefile.target INCLUDE = -I../testlib -I.. -I. testlibdir=../testlib CLOCKCORE= $(testlibdir)/clockcore.o DOLOOPS = $(testlibdir)/do_loops.o ALL = papi_avail papi_mem_info papi_cost papi_clockres papi_native_avail \ papi_command_line papi_event_chooser papi_decode papi_xml_event_info \ papi_version papi_multiplex_cost papi_component_avail papi_error_codes %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c $< default all utils: $(ALL) papi_avail: papi_avail.o $(PAPILIB) print_header.o $(CC) $(LDFLAGS) -o papi_avail papi_avail.o print_header.o $(PAPILIB) papi_clockres: papi_clockres.o $(PAPILIB) $(CLOCKCORE) $(CC) $(LDFLAGS) -o papi_clockres papi_clockres.o $(PAPILIB) $(CLOCKCORE) -lm papi_command_line: papi_command_line.o $(PAPILIB) $(DOLOOPS) $(CC) $(LDFLAGS) -o papi_command_line papi_command_line.o $(PAPILIB) $(DOLOOPS) papi_component_avail: papi_component_avail.o $(PAPILIB) print_header.o $(CC) $(LDFLAGS) -o papi_component_avail papi_component_avail.o $(PAPILIB) print_header.o papi_cost: papi_cost.o $(PAPILIB) cost_utils.o $(CC) $(LDFLAGS) -o papi_cost papi_cost.o cost_utils.o $(PAPILIB) -lm papi_decode: papi_decode.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_decode papi_decode.o $(PAPILIB) papi_error_codes: papi_error_codes.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_error_codes papi_error_codes.o $(PAPILIB) papi_event_chooser: papi_event_chooser.o $(PAPILIB) print_header.o $(CC) $(LDFLAGS) -o papi_event_chooser papi_event_chooser.o print_header.o $(PAPILIB) papi_hybrid_native_avail: papi_hybrid_native_avail.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_hybrid_native_avail papi_hybrid_native_avail.o $(PAPILIB) papi_mem_info: papi_mem_info.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_mem_info papi_mem_info.o $(PAPILIB) papi_multiplex_cost: papi_multiplex_cost.o $(PAPILIB) cost_utils.o $(CC) $(LDFLAGS) -o papi_multiplex_cost papi_multiplex_cost.o cost_utils.o $(PAPILIB) -lm papi_native_avail: papi_native_avail.o $(PAPILIB) print_header.o $(CC) $(LDFLAGS) -o papi_native_avail papi_native_avail.o $(PAPILIB) print_header.o papi_version: papi_version.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_version papi_version.o $(PAPILIB) papi_xml_event_info: papi_xml_event_info.o $(PAPILIB) $(CC) $(LDFLAGS) -o papi_xml_event_info papi_xml_event_info.o $(PAPILIB) cost_utils.o: ../testlib/papi_test.h cost_utils.c $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c cost_utils.c print_header.o: print_header.h print_header.c $(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c print_header.c clean: rm -f *.o *.stderr *.stdout core *~ $(ALL) distclean clobber: clean rm -f Makefile.target install: $(UTIL_TARGETS) @echo "Utilities (BINDIR) being installed in: \"$(BINDIR)\""; -mkdir -p $(BINDIR) -chmod go+rx $(BINDIR) -find . -perm -100 -type f -exec cp {} $(BINDIR) \; papi-5.6.0/src/papi_common_strings.h000664 001750 001750 00000051240 13216244366 021554 0ustar00jshenry1963jshenry1963000000 000000 /* These are used both by PAPI and by the genpapifdef utility */ /* They are in their own include to allow genpapifdef to be built */ /* without having to link against libpapi.a */ hwi_presets_t _papi_hwi_presets[PAPI_MAX_PRESET_EVENTS] = { /* 0 */ {"PAPI_L1_DCM", "L1D cache misses", "Level 1 data cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 1 */ {"PAPI_L1_ICM", "L1I cache misses", "Level 1 instruction cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 2 */ {"PAPI_L2_DCM", "L2D cache misses", "Level 2 data cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 3 */ {"PAPI_L2_ICM", "L2I cache misses", "Level 2 instruction cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 4 */ {"PAPI_L3_DCM", "L3D cache misses", "Level 3 data cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 5 */ {"PAPI_L3_ICM", "L3I cache misses", "Level 3 instruction cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 6 */ {"PAPI_L1_TCM", "L1 cache misses", "Level 1 cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 7 */ {"PAPI_L2_TCM", "L2 cache misses", "Level 2 cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 8 */ {"PAPI_L3_TCM", "L3 cache misses", "Level 3 cache misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 9 */ {"PAPI_CA_SNP", "Snoop Requests", "Requests for a snoop", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 10 */ {"PAPI_CA_SHR", "Ex Acces shared CL", "Requests for exclusive access to shared cache line", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 11 */ {"PAPI_CA_CLN", "Ex Access clean CL", "Requests for exclusive access to clean cache line", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 12 */ {"PAPI_CA_INV", "Cache ln invalid", "Requests for cache line invalidation", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 13 */ {"PAPI_CA_ITV", "Cache ln intervene", "Requests for cache line intervention", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 14 */ {"PAPI_L3_LDM", "L3 load misses", "Level 3 load misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 15 */ {"PAPI_L3_STM", "L3 store misses", "Level 3 store misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 16 */ {"PAPI_BRU_IDL", "Branch idle cycles", "Cycles branch units are idle", 0, 0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_BR, NULL, {0},{NULL}, NULL}, /* 17 */ {"PAPI_FXU_IDL", "IU idle cycles", "Cycles integer units are idle", 0, 0, PAPI_PRESET_BIT_IDL, NULL, {0},{NULL}, NULL}, /* 18 */ {"PAPI_FPU_IDL", "FPU idle cycles", "Cycles floating point units are idle", 0, 0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 19 */ {"PAPI_LSU_IDL", "L/SU idle cycles", "Cycles load/store units are idle", 0, 0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 20 */ {"PAPI_TLB_DM", "Data TLB misses", "Data translation lookaside buffer misses", 0, 0, PAPI_PRESET_BIT_TLB, NULL, {0},{NULL}, NULL}, /* 21 */ {"PAPI_TLB_IM", "Instr TLB misses", "Instruction translation lookaside buffer misses", 0, 0, PAPI_PRESET_BIT_TLB + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 22 */ {"PAPI_TLB_TL", "Total TLB misses", "Total translation lookaside buffer misses", 0, 0, PAPI_PRESET_BIT_TLB, NULL, {0},{NULL}, NULL}, /* 23 */ {"PAPI_L1_LDM", "L1 load misses", "Level 1 load misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 24 */ {"PAPI_L1_STM", "L1 store misses", "Level 1 store misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 25 */ {"PAPI_L2_LDM", "L2 load misses", "Level 2 load misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 26 */ {"PAPI_L2_STM", "L2 store misses", "Level 2 store misses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 27 */ {"PAPI_BTAC_M", "Br targt addr miss", "Branch target address cache misses", 0, 0, PAPI_PRESET_BIT_BR, NULL, {0},{NULL}, NULL}, /* 28 */ {"PAPI_PRF_DM", "Data prefetch miss", "Data prefetch cache misses", 0, 0, PAPI_PRESET_BIT_CACH, NULL, {0},{NULL}, NULL}, /* 29 */ {"PAPI_L3_DCH", "L3D cache hits", "Level 3 data cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 30 */ {"PAPI_TLB_SD", "TLB shootdowns", "Translation lookaside buffer shootdowns", 0, 0, PAPI_PRESET_BIT_TLB, NULL, {0},{NULL}, NULL}, /* 31 */ {"PAPI_CSR_FAL", "Failed store cond", "Failed store conditional instructions", 0, 0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 32 */ {"PAPI_CSR_SUC", "Good store cond", "Successful store conditional instructions", 0, 0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 33 */ {"PAPI_CSR_TOT", "Total store cond", "Total store conditional instructions", 0, 0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 34 */ {"PAPI_MEM_SCY", "Stalled mem cycles", "Cycles Stalled Waiting for memory accesses", 0, 0, PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 35 */ {"PAPI_MEM_RCY", "Stalled rd cycles", "Cycles Stalled Waiting for memory Reads", 0, 0, PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 36 */ {"PAPI_MEM_WCY", "Stalled wr cycles", "Cycles Stalled Waiting for memory writes", 0, 0, PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 37 */ {"PAPI_STL_ICY", "No instr issue", "Cycles with no instruction issue", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 38 */ {"PAPI_FUL_ICY", "Max instr issue", "Cycles with maximum instruction issue", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 39 */ {"PAPI_STL_CCY", "No instr done", "Cycles with no instructions completed", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 40 */ {"PAPI_FUL_CCY", "Max instr done", "Cycles with maximum instructions completed", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 41 */ {"PAPI_HW_INT", "Hdw interrupts", "Hardware interrupts", 0, 0, PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /* 42 */ {"PAPI_BR_UCN", "Uncond branch", "Unconditional branch instructions", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 43 */ {"PAPI_BR_CN", "Cond branch", "Conditional branch instructions", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 44 */ {"PAPI_BR_TKN", "Cond branch taken", "Conditional branch instructions taken", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 45 */ {"PAPI_BR_NTK", "Cond br not taken", "Conditional branch instructions not taken", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 46 */ {"PAPI_BR_MSP", "Cond br mspredictd", "Conditional branch instructions mispredicted", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 47 */ {"PAPI_BR_PRC", "Cond br predicted", "Conditional branch instructions correctly predicted", 0, 0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND, NULL, {0},{NULL}, NULL}, /* 48 */ {"PAPI_FMA_INS", "FMAs completed", "FMA instructions completed", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 49 */ {"PAPI_TOT_IIS", "Instr issued", "Instructions issued", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 50 */ {"PAPI_TOT_INS", "Instr completed", "Instructions completed", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 51 */ {"PAPI_INT_INS", "Int instructions", "Integer instructions", 0, 0, PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 52 */ {"PAPI_FP_INS", "FP instructions", "Floating point instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 53 */ {"PAPI_LD_INS", "Loads", "Load instructions", 0, 0, PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 54 */ {"PAPI_SR_INS", "Stores", "Store instructions", 0, 0, PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 55 */ {"PAPI_BR_INS", "Branches", "Branch instructions", 0, 0, PAPI_PRESET_BIT_BR, NULL, {0},{NULL}, NULL}, /* 56 */ {"PAPI_VEC_INS", "Vector/SIMD instr", "Vector/SIMD instructions (could include integer)", 0, 0, PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /* 57 */ {"PAPI_RES_STL", "Stalled res cycles", "Cycles stalled on any resource", 0, 0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /* 58 */ {"PAPI_FP_STAL", "Stalled FPU cycles", "Cycles the FP unit(s) are stalled", 0, 0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 59 */ {"PAPI_TOT_CYC", "Total cycles", "Total cycles", 0, 0, PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /* 60 */ {"PAPI_LST_INS", "L/S completed", "Load/store instructions completed", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_MEM, NULL, {0},{NULL}, NULL}, /* 61 */ {"PAPI_SYC_INS", "Syncs completed", "Synchronization instructions completed", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /* 62 */ {"PAPI_L1_DCH", "L1D cache hits", "Level 1 data cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 63 */ {"PAPI_L2_DCH", "L2D cache hits", "Level 2 data cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 64 */ {"PAPI_L1_DCA", "L1D cache accesses", "Level 1 data cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 65 */ {"PAPI_L2_DCA", "L2D cache accesses", "Level 2 data cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 66 */ {"PAPI_L3_DCA", "L3D cache accesses", "Level 3 data cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 67 */ {"PAPI_L1_DCR", "L1D cache reads", "Level 1 data cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 68 */ {"PAPI_L2_DCR", "L2D cache reads", "Level 2 data cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 69 */ {"PAPI_L3_DCR", "L3D cache reads", "Level 3 data cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 70 */ {"PAPI_L1_DCW", "L1D cache writes", "Level 1 data cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 71 */ {"PAPI_L2_DCW", "L2D cache writes", "Level 2 data cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 72 */ {"PAPI_L3_DCW", "L3D cache writes", "Level 3 data cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 73 */ {"PAPI_L1_ICH", "L1I cache hits", "Level 1 instruction cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 74 */ {"PAPI_L2_ICH", "L2I cache hits", "Level 2 instruction cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 75 */ {"PAPI_L3_ICH", "L3I cache hits", "Level 3 instruction cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 76 */ {"PAPI_L1_ICA", "L1I cache accesses", "Level 1 instruction cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 77 */ {"PAPI_L2_ICA", "L2I cache accesses", "Level 2 instruction cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 78 */ {"PAPI_L3_ICA", "L3I cache accesses", "Level 3 instruction cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 79 */ {"PAPI_L1_ICR", "L1I cache reads", "Level 1 instruction cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 80 */ {"PAPI_L2_ICR", "L2I cache reads", "Level 2 instruction cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 81 */ {"PAPI_L3_ICR", "L3I cache reads", "Level 3 instruction cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 82 */ {"PAPI_L1_ICW", "L1I cache writes", "Level 1 instruction cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 83 */ {"PAPI_L2_ICW", "L2I cache writes", "Level 2 instruction cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 84 */ {"PAPI_L3_ICW", "L3I cache writes", "Level 3 instruction cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS, NULL, {0},{NULL}, NULL}, /* 85 */ {"PAPI_L1_TCH", "L1 cache hits", "Level 1 total cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 86 */ {"PAPI_L2_TCH", "L2 cache hits", "Level 2 total cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 87 */ {"PAPI_L3_TCH", "L3 cache hits", "Level 3 total cache hits", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 88 */ {"PAPI_L1_TCA", "L1 cache accesses", "Level 1 total cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 89 */ {"PAPI_L2_TCA", "L2 cache accesses", "Level 2 total cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 90 */ {"PAPI_L3_TCA", "L3 cache accesses", "Level 3 total cache accesses", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 91 */ {"PAPI_L1_TCR", "L1 cache reads", "Level 1 total cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 92 */ {"PAPI_L2_TCR", "L2 cache reads", "Level 2 total cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 93 */ {"PAPI_L3_TCR", "L3 cache reads", "Level 3 total cache reads", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 94 */ {"PAPI_L1_TCW", "L1 cache writes", "Level 1 total cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1, NULL, {0},{NULL}, NULL}, /* 95 */ {"PAPI_L2_TCW", "L2 cache writes", "Level 2 total cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2, NULL, {0},{NULL}, NULL}, /* 96 */ {"PAPI_L3_TCW", "L3 cache writes", "Level 3 total cache writes", 0, 0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3, NULL, {0},{NULL}, NULL}, /* 97 */ {"PAPI_FML_INS", "FPU multiply", "Floating point multiply instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 98 */ {"PAPI_FAD_INS", "FPU add", "Floating point add instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 99 */ {"PAPI_FDV_INS", "FPU divide", "Floating point divide instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*100 */ {"PAPI_FSQ_INS", "FPU square root", "Floating point square root instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*101 */ {"PAPI_FNV_INS", "FPU inverse", "Floating point inverse instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*102 */ {"PAPI_FP_OPS", "FP operations", "Floating point operations", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*103 */ {"PAPI_SP_OPS", "SP operations", "Floating point operations; optimized to count scaled single precision vector operations", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*104 */ {"PAPI_DP_OPS", "DP operations", "Floating point operations; optimized to count scaled double precision vector operations", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*105 */ {"PAPI_VEC_SP", "SP Vector/SIMD instr", "Single precision vector/SIMD instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /*106 */ {"PAPI_VEC_DP", "DP Vector/SIMD instr", "Double precision vector/SIMD instructions", 0, 0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, NULL, {0},{NULL}, NULL}, /* 107 */ {"PAPI_REF_CYC", "Reference cycles", "Reference clock cycles", 0, 0, PAPI_PRESET_BIT_MSC, NULL, {0},{NULL}, NULL}, /*108 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*109 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*110 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*111 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*112 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*113 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*114 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*115 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*116 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*117 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*118 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*119 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*120 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*121 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*122 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*123 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*124 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*125 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*126 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, /*127 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL}, }; #if 0 const hwi_describe_t _papi_hwi_err[PAPI_NUM_ERRORS] = { /* 0 */ {PAPI_OK, "PAPI_OK", "No error"}, /* 1 */ {PAPI_EINVAL, "PAPI_EINVAL", "Invalid argument"}, /* 2 */ {PAPI_ENOMEM, "PAPI_ENOMEM", "Insufficient memory"}, /* 3 */ {PAPI_ESYS, "PAPI_ESYS", "A System/C library call failed"}, /* 4 */ {PAPI_ECMP, "PAPI_ECMP", "Not supported by component"}, /* 5 */ {PAPI_ECLOST, "PAPI_ECLOST", "Access to the counters was lost or interrupted"}, /* 6 */ {PAPI_EBUG, "PAPI_EBUG", "Internal error, please send mail to the developers"}, /* 7 */ {PAPI_ENOEVNT, "PAPI_ENOEVNT", "Event does not exist"}, /* 8 */ {PAPI_ECNFLCT, "PAPI_ECNFLCT", "Event exists, but cannot be counted due to hardware resource limits"}, /* 9 */ {PAPI_ENOTRUN, "PAPI_ENOTRUN", "EventSet is currently not running"}, /*10 */ {PAPI_EISRUN, "PAPI_EISRUN", "EventSet is currently counting"}, /*11 */ {PAPI_ENOEVST, "PAPI_ENOEVST", "No such EventSet available"}, /*12 */ {PAPI_ENOTPRESET, "PAPI_ENOTPRESET", "Event in argument is not a valid preset"}, /*13 */ {PAPI_ENOCNTR, "PAPI_ENOCNTR", "Hardware does not support performance counters"}, /*14 */ {PAPI_EMISC, "PAPI_EMISC", "Unknown error code"}, /*15 */ {PAPI_EPERM, "PAPI_EPERM", "Permission level does not permit operation"}, /*16 */ {PAPI_ENOINIT, "PAPI_ENOINIT", "PAPI hasn't been initialized yet"}, /*17 */ {PAPI_ENOCMP, "PAPI_ENOCMP", "Component Index isn't set"}, /*18 */ {PAPI_ENOSUPP, "PAPI_ENOSUPP", "Not supported"}, /*19 */ {PAPI_ENOIMPL, "PAPI_ENOIMPL", "Not implemented"}, /*20 */ {PAPI_EBUF, "PAPI_EBUF", "Buffer size exceeded"}, /*21 */ {PAPI_EINVAL_DOM, "PAPI_EINVAL_DOM", "EventSet domain is not supported for the operation"}, /*22 */ {PAPI_EATTR, "PAPI_EATTR", "Invalid or missing event attributes"}, /*23 */ {PAPI_ECOUNT, "PAPI_ECOUNT", "Too many events or attributes"}, /*24 */ {PAPI_ECOMBO, "PAPI_ECOMBO", "Bad combination of features"} }; #endif papi-5.6.0/src/ctests/profile_twoevents.c000664 001750 001750 00000007010 13216244360 022546 0ustar00jshenry1963jshenry1963000000 000000 /* * File: profile_twoevents.c * Author: Philip Mucci * mucci@cs.utk.edu */ /* This file performs the following test: profiling two events */ #include #include #include "papi.h" #include "papi_test.h" #include "prof_utils.h" #include "do_loops.h" int main( int argc, char **argv ) { int i, num_tests = 6; unsigned long length, blength; int num_buckets, mask; char title[80]; int retval; const PAPI_exe_info_t *prginfo; caddr_t start, end; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); } mask = prof_events( num_tests ); start = prginfo->address_info.text_start; end = prginfo->address_info.text_end; /* Must have at least FP instr or Tot ins */ if ( ( ( mask & MASK_FP_INS ) == 0 ) && ( ( mask & MASK_TOT_INS ) == 0 ) ) { if (!quiet) printf("No events could be added\n"); test_skip( __FILE__, __LINE__, "No FP or Total Ins. event", 1 ); } if ( start > end ) test_fail( __FILE__, __LINE__, "Profile length < 0!", 0 ); length = ( unsigned long ) ( end - start ); if (!quiet) { prof_print_address( "Test case profile: POSIX compatible profiling with two events.\n", prginfo ); prof_print_prof_info( start, end, THRESHOLD, event_name ); } prof_alloc( 2, length ); blength = prof_size( length, FULL_SCALE, PAPI_PROFIL_BUCKET_16, &num_buckets ); do_no_profile( quiet ); if ( !quiet ) { printf( "Test type : \tPAPI_PROFIL_POSIX\n" ); } if ( ( retval = PAPI_profil( profbuf[0], ( unsigned int ) blength, start, FULL_SCALE, EventSet, PAPI_event, THRESHOLD, PAPI_PROFIL_POSIX ) ) != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); } if ( ( retval = PAPI_profil( profbuf[1], ( unsigned int ) blength, start, FULL_SCALE, EventSet, PAPI_TOT_CYC, THRESHOLD, PAPI_PROFIL_POSIX ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); do_stuff( ); if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if ( !quiet ) { printf( TAB1, event_name, ( values[1] )[0] ); printf( TAB1, "PAPI_TOT_CYC:", ( values[1] )[1] ); } if ( ( retval = PAPI_profil( profbuf[0], ( unsigned int ) blength, start, FULL_SCALE, EventSet, PAPI_event, 0, PAPI_PROFIL_POSIX ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); if ( ( retval = PAPI_profil( profbuf[1], ( unsigned int ) blength, start, FULL_SCALE, EventSet, PAPI_TOT_CYC, 0, PAPI_PROFIL_POSIX ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); sprintf( title, " \t\t %s\tPAPI_TOT_CYC\naddress\t\t\tcounts\tcounts\n", event_name ); if (!quiet) { prof_head( blength, PAPI_PROFIL_BUCKET_16, num_buckets, title ); prof_out( start, 2, PAPI_PROFIL_BUCKET_16, num_buckets, FULL_SCALE ); } remove_test_events( &EventSet, mask ); retval = prof_check( 2, PAPI_PROFIL_BUCKET_16, num_buckets ); for ( i = 0; i < 2; i++ ) { free( profbuf[i] ); } if ( retval == 0 ) { test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/man/man1/papi_decode.1000664 001750 001750 00000003204 13216244355 020462 0ustar00jshenry1963jshenry1963000000 000000 .TH "papi_decode" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME papi_decode \- papi_decode utility\&. .PP file papi_decode\&.c .SH "NAME" .PP papi_decode - provides availability and detail information for PAPI preset events\&. .SH "Synopsis" .PP papi_decode [-ah] .SH "Description" .PP papi_decode is a PAPI utility program that converts the PAPI presets for the existing library into a comma separated value format that can then be viewed or modified in spreadsheet applications or text editors, and can be supplied to PAPI_encode_events (3) as a way of adding or modifying event definitions for specialized applications\&. The format for the csv output consists of a line of field names, followed by a blank line, followed by one line of comma separated values for each event contained in the preset table\&. A portion of this output (for Pentium 4) is shown below: .PP .nf * name,derived,postfix,short_descr,long_descr,note,[native,\&.\&.\&.] * PAPI_L1_ICM,NOT_DERIVED,,"L1I cache misses","Level 1 instruction cache misses",,BPU_fetch_request_TCMISS * PAPI_L2_TCM,NOT_DERIVED,,"L2 cache misses","Level 2 cache misses",,BSQ_cache_reference_RD_2ndL_MISS_WR_2ndL_MISS * PAPI_TLB_DM,NOT_DERIVED,,"Data TLB misses","Data translation lookaside buffer misses",,page_walk_type_DTMISS * .fi .PP .SH "Options" .PP .PD 0 .IP "\(bu" 2 -a Convert only the available PAPI preset events\&. .IP "\(bu" 2 -h Display help information about this utility\&. .PP .SH "Bugs" .PP There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. papi-5.6.0/man/man3/PAPI_get_overflow_event_index.3000664 001750 001750 00000003442 13216244356 024142 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_overflow_event_index" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_overflow_event_index \- .PP converts an overflow vector into an array of indexes to overflowing events .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @param EventSet an integer handle to a PAPI event set as created by PAPI_create_eventset @param overflow_vector a vector with bits set for each counter that overflowed. This vector is passed by the system to the overflow handler routine. @param *array an array of indexes for events in EventSet. No more than *number indexes will be stored into the array. @param *number On input the variable determines the size of the array. On output the variable contains the number of indexes in the array. @retval PAPI_EINVAL One or more of the arguments is invalid. This could occur if the overflow_vector is empty (zero), if the array or number pointers are NULL, if the value of number is less than one, or if the EventSet is empty. @retval PAPI_ENOEVST The EventSet specified does not exist. @par Examples .fi .PP .PP .nf void handler(int EventSet, void *address, long_long overflow_vector, void *context){ int Events[4], number, i; int total = 0, retval; printf("Overflow #%d\n Handler(%d) Overflow at %p! vector=%#llx\n", total, EventSet, address, overflow_vector); total++; number = 4; retval = PAPI_get_overflow_event_index(EventSet, overflow_vector, Events, &number); if(retval == PAPI_OK) for(i=0; i #include #include #include #include #include #include #include #include "libperfctr.h" #include "marshal.h" #include "arch.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) /* * Code to open (with or without creation) per-process perfctrs, * using the ioctl(dev_perfctr_fd, VPERFCTR_{CREAT,OPEN}, pid) API. */ static int _vperfctr_open_pid(int pid, int try_creat) { int dev_perfctr_fd, fd; dev_perfctr_fd = open("/dev/perfctr", O_RDONLY); if (dev_perfctr_fd < 0) return -1; if (try_creat) fd = ioctl(dev_perfctr_fd, VPERFCTR_CREAT, pid); else fd = ioctl(dev_perfctr_fd, VPERFCTR_OPEN, pid); close(dev_perfctr_fd); if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) perror("fcntl"); return fd; } /* * Operations using raw kernel handles, basically just open()/ioctl() wrappers. */ int _vperfctr_open(int creat) { return _vperfctr_open_pid(0, creat); } int _vperfctr_control(int fd, const struct vperfctr_control *control) { return perfctr_ioctl_w(fd, VPERFCTR_CONTROL, control, &vperfctr_control_sdesc); } int _vperfctr_read_control(int fd, struct vperfctr_control *control) { return perfctr_ioctl_r(fd, VPERFCTR_READ_CONTROL, control, &vperfctr_control_sdesc); } int _vperfctr_read_sum(int fd, struct perfctr_sum_ctrs *sum) { return perfctr_ioctl_r(fd, VPERFCTR_READ_SUM, sum, &perfctr_sum_ctrs_sdesc); } /* * Operations using library objects. */ struct vperfctr { /* XXX: point to &vperfctr_state.cpu_state instead? */ volatile const struct vperfctr_state *kstate; int fd; unsigned char have_rdpmc; }; static int vperfctr_open_pid(int pid, struct vperfctr *perfctr, unsigned int mode) { int fd, creat; struct perfctr_info info; if (mode == 0) creat = 0; else if (mode == VPERFCTR_OPEN_CREAT_EXCL) creat = 1; else { errno = EINVAL; return -1; } fd = _vperfctr_open_pid(pid, creat); if (fd < 0) goto out_perfctr; perfctr->fd = fd; if (perfctr_abi_check_fd(perfctr->fd) < 0) goto out_fd; if (perfctr_info(perfctr->fd, &info) < 0) goto out_fd; perfctr->have_rdpmc = (info.cpu_features & PERFCTR_FEATURE_RDPMC) != 0; perfctr->kstate = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, perfctr->fd, 0); if (perfctr->kstate != MAP_FAILED) return 0; munmap((void*)perfctr->kstate, PAGE_SIZE); out_fd: if (creat) vperfctr_unlink(perfctr); close(perfctr->fd); out_perfctr: return -1; } struct vperfctr *vperfctr_open_mode(unsigned int mode) { struct vperfctr *perfctr; perfctr = malloc(sizeof(*perfctr)); if (perfctr) { if (vperfctr_open_pid(0, perfctr, mode) == 0) return perfctr; free(perfctr); } return NULL; } struct vperfctr *vperfctr_open(void) { return vperfctr_open_mode(VPERFCTR_OPEN_CREAT_EXCL); } int vperfctr_info(const struct vperfctr *vperfctr, struct perfctr_info *info) { return perfctr_info(vperfctr->fd, info); } struct perfctr_cpus_info *vperfctr_cpus_info(const struct vperfctr *vperfctr) { return perfctr_cpus_info(vperfctr->fd); } #if (__GNUC__ < 2) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) #define __builtin_expect(x, expected_value) (x) #endif #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) unsigned long long vperfctr_read_tsc(const struct vperfctr *self) { #if defined(rdtscl) unsigned long long sum; unsigned int tsc0, tsc1, now; volatile const struct vperfctr_state *kstate; kstate = self->kstate; if (likely(kstate->cpu_state.cstatus != 0)) { tsc0 = kstate->cpu_state.tsc_start; retry: rdtscl(now); sum = kstate->cpu_state.tsc_sum; tsc1 = kstate->cpu_state.tsc_start; if (likely(tsc1 == tsc0)) return sum += (now - tsc0); tsc0 = tsc1; goto retry; /* better gcc code than with a do{}while() loop */ } return kstate->cpu_state.tsc_sum; #else struct perfctr_sum_ctrs sum_ctrs; if (_vperfctr_read_sum(self->fd, &sum_ctrs) < 0) perror(__FUNCTION__); return sum_ctrs.tsc; #endif } unsigned long long vperfctr_read_pmc(const struct vperfctr *self, unsigned i) { struct perfctr_sum_ctrs sum_ctrs; #if defined(rdpmcl) unsigned long long sum; unsigned int start, now; unsigned int tsc0, tsc1; volatile const struct vperfctr_state *kstate; unsigned int cstatus; kstate = self->kstate; cstatus = kstate->cpu_state.cstatus; /* gcc 3.0 generates crap code for likely(E1 && E2) :-( */ if (perfctr_cstatus_has_tsc(cstatus) && vperfctr_has_rdpmc(self)) { tsc0 = kstate->cpu_state.tsc_start; retry: rdpmcl(kstate->cpu_state.pmc[i].map, now); start = kstate->cpu_state.pmc[i].start; sum = kstate->cpu_state.pmc[i].sum; tsc1 = kstate->cpu_state.tsc_start; if (likely(tsc1 == tsc0)) { return sum += (now - start); } tsc0 = tsc1; goto retry; } #endif if (_vperfctr_read_sum(self->fd, &sum_ctrs) < 0) perror(__FUNCTION__); return sum_ctrs.pmc[i]; } static int vperfctr_read_ctrs_slow(const struct vperfctr *vperfctr, struct perfctr_sum_ctrs *sum) { return _vperfctr_read_sum(vperfctr->fd, sum); } int vperfctr_read_ctrs(const struct vperfctr *self, struct perfctr_sum_ctrs *sum) { #if defined(rdtscl) && defined(rdpmcl) unsigned int tsc0, now; unsigned int cstatus, nrctrs; volatile const struct vperfctr_state *kstate; int i; /* Fast path is impossible if the TSC isn't being sampled (bad idea, but on WinChip you don't have a choice), or at least one PMC is enabled but the CPU doesn't have RDPMC. */ kstate = self->kstate; cstatus = kstate->cpu_state.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); if (perfctr_cstatus_has_tsc(cstatus) && (!nrctrs || vperfctr_has_rdpmc(self))) { retry: tsc0 = kstate->cpu_state.tsc_start; rdtscl(now); sum->tsc = kstate->cpu_state.tsc_sum + (now - tsc0); for(i = nrctrs; --i >= 0;) { rdpmcl(kstate->cpu_state.pmc[i].map, now); sum->pmc[i] = kstate->cpu_state.pmc[i].sum + (now - kstate->cpu_state.pmc[i].start); } if (likely(tsc0 == kstate->cpu_state.tsc_start)) return 0; goto retry; } #endif return vperfctr_read_ctrs_slow(self, sum); } int vperfctr_read_state(const struct vperfctr *self, struct perfctr_sum_ctrs *sum, struct vperfctr_control *control) { if (_vperfctr_read_sum(self->fd, sum) < 0) return -1; /* For historical reasons, control may be NULL. */ if (control && _vperfctr_read_control(self->fd, control) < 0) return -1; return 0; } int vperfctr_control(const struct vperfctr *perfctr, struct vperfctr_control *control) { return _vperfctr_control(perfctr->fd, control); } int vperfctr_stop(const struct vperfctr *perfctr) { struct vperfctr_control control; memset(&control, 0, sizeof control); return _vperfctr_control(perfctr->fd, &control); } int vperfctr_is_running(const struct vperfctr *perfctr) { return perfctr->kstate->cpu_state.cstatus != 0; } int vperfctr_iresume(const struct vperfctr *perfctr) { return ioctl(perfctr->fd, VPERFCTR_IRESUME, NULL); } int vperfctr_unlink(const struct vperfctr *perfctr) { return ioctl(perfctr->fd, VPERFCTR_UNLINK, NULL); } void vperfctr_close(struct vperfctr *perfctr) { munmap((void*)perfctr->kstate, PAGE_SIZE); close(perfctr->fd); free(perfctr); } /* * Operations on other processes' virtual-mode perfctrs. */ struct rvperfctr { struct vperfctr vperfctr; /* must be first for the close() operation */ int pid; }; struct rvperfctr *rvperfctr_open(int pid) { struct rvperfctr *rvperfctr; rvperfctr = malloc(sizeof(*rvperfctr)); if (rvperfctr) { if (vperfctr_open_pid(pid, &rvperfctr->vperfctr, VPERFCTR_OPEN_CREAT_EXCL) == 0) { rvperfctr->pid = pid; return rvperfctr; } free(rvperfctr); } return NULL; } int rvperfctr_pid(const struct rvperfctr *rvperfctr) { return rvperfctr->pid; } int rvperfctr_info(const struct rvperfctr *rvperfctr, struct perfctr_info *info) { return vperfctr_info(&rvperfctr->vperfctr, info); } int rvperfctr_read_ctrs(const struct rvperfctr *rvperfctr, struct perfctr_sum_ctrs *sum) { return vperfctr_read_ctrs_slow(&rvperfctr->vperfctr, sum); } int rvperfctr_read_state(const struct rvperfctr *rvperfctr, struct perfctr_sum_ctrs *sum, struct vperfctr_control *control) { return vperfctr_read_state(&rvperfctr->vperfctr, sum, control); } int rvperfctr_control(const struct rvperfctr *rvperfctr, struct vperfctr_control *control) { return vperfctr_control(&rvperfctr->vperfctr, control); } int rvperfctr_stop(const struct rvperfctr *rvperfctr) { return vperfctr_stop(&rvperfctr->vperfctr); } int rvperfctr_iresume(const struct rvperfctr *rvperfctr) { return vperfctr_iresume(&rvperfctr->vperfctr); } int rvperfctr_unlink(const struct rvperfctr *rvperfctr) { return vperfctr_unlink(&rvperfctr->vperfctr); } void rvperfctr_close(struct rvperfctr *rvperfctr) { /* this relies on offsetof(struct rvperfctr, vperfctr) == 0 */ vperfctr_close(&rvperfctr->vperfctr); } papi-5.6.0/src/components/powercap/tests/000775 001750 001750 00000000000 13216244360 022470 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ftests/fmatrixpapi2.F000664 001750 001750 00000013326 13216244361 021354 0ustar00jshenry1963jshenry1963000000 000000 C **************************************************************************** C C fmatrixpapi2.f C An example of matrix-matrix multiplication and using PAPI high level to C look at the performance. The example illustrates how PAPIF_read_counters C and PAPIF_accum_counters can be used to selectively measure parts of a C code without having to use the low-level interface. C C Derived from an example written by Kevin London March 2000 C **************************************************************************** #include "fpapi_test.h" program fmatrixpapi IMPLICIT integer (p) INTEGER ncols1,nrows1,ncols2,nrows2 PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) INTEGER i,j,num_events,retval C PAPI standardized event to be monitored INTEGER event(2) C PAPI values of the counters INTEGER*8 values(2), dummies(2) REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), & r(nrows1,ncols2) integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() C Setup default values num_events=0 C Open matrix file number 1 for reading C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') C Open matrix file number 2 for reading C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if C Total floating point operations call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event(1) = PAPI_TOT_INS else C Total floating point operations event(1) = PAPI_FP_INS end if C Time used event(2) = PAPI_TOT_CYC C See how many hardware events at one time are supported call PAPIf_num_counters( num_events ) if ( num_events .LT. 2 ) then print *,'This example program requries the architecture ', . 'to support 2 simultaneous hardware events...shutting down.' stop end if if (tests_quiet .EQ. 0) then print *, 'Number of hardware counters supported: ', num_events end if C matrix 1: read in the matrix values do i=1, nrows1 do j=1,ncols1 p(i,j) = i*j*1.0 end do end do C matrix 2: read in the matrix values do i=1, nrows2 do j=1,ncols2 q(i,j) = i*j*1.0 end do end do C Initialize the result matrix do i=1,nrows1 do j=1, ncols2 r(i,j) = i*j*1.0 end do end do C Set up the counters num_events = 2 call PAPIf_start_counters( event, num_events, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_start_counters', retval) end if C We wish to count the events for this call call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) C Read and clear the counter values call PAPIf_read_counters(values, num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_read_counters', retval) end if if (tests_quiet .EQ. 0) then print * if (event(1) .EQ. PAPI_TOT_INS) then print *, 'TOT Instructions: ',values(1) else print *, 'FP Instructions: ',values(1) end if print *, 'Cycles: ',values(2) if (event(1) .EQ. PAPI_FP_INS) then write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', & real(values(1))/real(values(2)) end if end if C We don't wish to count the events for this call call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) C Clear the counter values call PAPIf_read_counters(dummies, num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_read_counters', retval) end if C We wish to count the events for this call call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) C Read the counter values call PAPIf_accum_counters(values, num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_accum_counters', retval) end if C Stop the counters and put the results in the array values call PAPIf_stop_counters(dummies,num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_stop_counters', retval) end if if (tests_quiet .EQ. 0) then print * if (event(1) .EQ. PAPI_TOT_INS) then print *, 'TOT Instructions: ',values(1) else print *, 'FP Instructions: ',values(1) end if print *, 'Cycles: ',values(2) if (event(1) .EQ. PAPI_FP_INS) then write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', & real(values(1))/real(values(2)) end if print * print *,'----------------------------------------------------' print *,'The second instruction and cycle counts should be' print *,'approximately twice the first ones. The efficiency' print *,'metric should be fairly equal between the cases.' end if call ftests_pass(__FILE__) end subroutine Adding_MatMult(p,q,r,ni,nk,nj) implicit integer (p) integer ni,nk,nj real*8 p(ni,*),q(nk,*),r(ni,nj) integer i,j,k C Compute the matrix-matrix multiplication do i=1,ni do j=1,nj do k=1,nk r(i,j)=r(i,j) + p(i,k)*q(k,j) end do end do end do C Make sure the compiler does not optimize away the multiplication call dummy(r) end papi-5.6.0/src/components/lmsensors/Makefile.lmsensors.in000664 001750 001750 00000000042 13216244357 025626 0ustar00jshenry1963jshenry1963000000 000000 SENSORS_INCDIR = @SENSORS_INCDIR@ papi-5.6.0/src/libpfm4/lib/events/perf_events.h000664 001750 001750 00000025424 13216244364 023434 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #define CACHE_ST_ACCESS(n, d, e) \ {\ .name = #n"-STORES",\ .desc = d" store accesses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":WRITE:ACCESS"\ },\ {\ .name = #n"-STORE-MISSES",\ .desc = d" store misses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":WRITE:MISS"\ } #define CACHE_PF_ACCESS(n, d, e) \ {\ .name = #n"-PREFETCHES",\ .desc = d" prefetch accesses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":PREFETCH:ACCESS"\ },\ {\ .name = #n"-PREFETCH-MISSES",\ .desc = d" prefetch misses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":PREFETCH:MISS"\ } #define CACHE_LD_ACCESS(n, d, e) \ {\ .name = #n"-LOADS",\ .desc = d" load accesses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":READ:ACCESS"\ },\ {\ .name = #n"-LOAD-MISSES",\ .desc = d" load misses",\ .id = PERF_COUNT_HW_CACHE_##e,\ .type = PERF_TYPE_HW_CACHE,\ .modmsk = PERF_ATTR_HW,\ .umask_ovfl_idx = -1,\ .equiv = "PERF_COUNT_HW_CACHE_"#e":READ:MISS"\ } #define CACHE_ACCESS(n, d, e) \ CACHE_LD_ACCESS(n, d, e), \ CACHE_ST_ACCESS(n, d, e), \ CACHE_PF_ACCESS(n, d, e) #define ICACHE_ACCESS(n, d, e) \ CACHE_LD_ACCESS(n, d, e), \ CACHE_PF_ACCESS(n, d, e) static perf_event_t perf_static_events[]={ PCL_EVT_HW(CPU_CYCLES), PCL_EVT_AHW(CYCLES, CPU_CYCLES), PCL_EVT_AHW(CPU-CYCLES, CPU_CYCLES), PCL_EVT_HW(INSTRUCTIONS), PCL_EVT_AHW(INSTRUCTIONS, INSTRUCTIONS), PCL_EVT_HW(CACHE_REFERENCES), PCL_EVT_AHW(CACHE-REFERENCES, CACHE_REFERENCES), PCL_EVT_HW(CACHE_MISSES), PCL_EVT_AHW(CACHE-MISSES,CACHE_MISSES), PCL_EVT_HW(BRANCH_INSTRUCTIONS), PCL_EVT_AHW(BRANCH-INSTRUCTIONS, BRANCH_INSTRUCTIONS), PCL_EVT_AHW(BRANCHES, BRANCH_INSTRUCTIONS), PCL_EVT_HW(BRANCH_MISSES), PCL_EVT_AHW(BRANCH-MISSES, BRANCH_MISSES), PCL_EVT_HW(BUS_CYCLES), PCL_EVT_AHW(BUS-CYCLES, BUS_CYCLES), PCL_EVT_HW(STALLED_CYCLES_FRONTEND), PCL_EVT_AHW(STALLED-CYCLES-FRONTEND, STALLED_CYCLES_FRONTEND), PCL_EVT_AHW(IDLE-CYCLES-FRONTEND, STALLED_CYCLES_FRONTEND), PCL_EVT_HW(STALLED_CYCLES_BACKEND), PCL_EVT_AHW(STALLED-CYCLES-BACKEND, STALLED_CYCLES_BACKEND), PCL_EVT_AHW(IDLE-CYCLES-BACKEND, STALLED_CYCLES_BACKEND), PCL_EVT_HW(REF_CPU_CYCLES), PCL_EVT_AHW(REF-CYCLES,REF_CPU_CYCLES), PCL_EVT_SW(CPU_CLOCK), PCL_EVT_ASW(CPU-CLOCK, CPU_CLOCK), PCL_EVT_SW(TASK_CLOCK), PCL_EVT_ASW(TASK-CLOCK, TASK_CLOCK), PCL_EVT_SW(PAGE_FAULTS), PCL_EVT_ASW(PAGE-FAULTS, PAGE_FAULTS), PCL_EVT_ASW(FAULTS, PAGE_FAULTS), PCL_EVT_SW(CONTEXT_SWITCHES), PCL_EVT_ASW(CONTEXT-SWITCHES, CONTEXT_SWITCHES), PCL_EVT_ASW(CS, CONTEXT_SWITCHES), PCL_EVT_SW(CPU_MIGRATIONS), PCL_EVT_ASW(CPU-MIGRATIONS, CPU_MIGRATIONS), PCL_EVT_ASW(MIGRATIONS, CPU_MIGRATIONS), PCL_EVT_SW(PAGE_FAULTS_MIN), PCL_EVT_ASW(MINOR-FAULTS, PAGE_FAULTS_MIN), PCL_EVT_SW(PAGE_FAULTS_MAJ), PCL_EVT_ASW(MAJOR-FAULTS, PAGE_FAULTS_MAJ), { .name = "PERF_COUNT_HW_CACHE_L1D", .desc = "L1 data cache", .id = PERF_COUNT_HW_CACHE_L1D, .type = PERF_TYPE_HW_CACHE, .numasks = 5, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "WRITE", .udesc = "write access", .uid = PERF_COUNT_HW_CACHE_OP_WRITE << 8, .grpid = 0, }, { .uname = "PREFETCH", .udesc = "prefetch access", .uid = PERF_COUNT_HW_CACHE_OP_PREFETCH << 8, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, CACHE_ACCESS(L1-DCACHE, "L1 cache", L1D), { .name = "PERF_COUNT_HW_CACHE_L1I", .desc = "L1 instruction cache", .id = PERF_COUNT_HW_CACHE_L1I, .type = PERF_TYPE_HW_CACHE, .numasks = 4, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "PREFETCH", .udesc = "prefetch access", .uid = PERF_COUNT_HW_CACHE_OP_PREFETCH << 8, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, ICACHE_ACCESS(L1-ICACHE, "L1I cache", L1I), { .name = "PERF_COUNT_HW_CACHE_LL", .desc = "Last level cache", .id = PERF_COUNT_HW_CACHE_LL, .type = PERF_TYPE_HW_CACHE, .numasks = 5, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "WRITE", .udesc = "write access", .uid = PERF_COUNT_HW_CACHE_OP_WRITE << 8, .grpid = 0, }, { .uname = "PREFETCH", .udesc = "prefetch access", .uid = PERF_COUNT_HW_CACHE_OP_PREFETCH << 8, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, CACHE_ACCESS(LLC, "Last level cache", LL), { .name = "PERF_COUNT_HW_CACHE_DTLB", .desc = "Data Translation Lookaside Buffer", .id = PERF_COUNT_HW_CACHE_DTLB, .type = PERF_TYPE_HW_CACHE, .numasks = 5, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "WRITE", .udesc = "write access", .uid = PERF_COUNT_HW_CACHE_OP_WRITE << 8, .grpid = 0, }, { .uname = "PREFETCH", .udesc = "prefetch access", .uid = PERF_COUNT_HW_CACHE_OP_PREFETCH << 8, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, CACHE_ACCESS(DTLB, "Data TLB", DTLB), { .name = "PERF_COUNT_HW_CACHE_ITLB", .desc = "Instruction Translation Lookaside Buffer", .id = PERF_COUNT_HW_CACHE_ITLB, .type = PERF_TYPE_HW_CACHE, .numasks = 3, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, CACHE_LD_ACCESS(ITLB, "Instruction TLB", ITLB), { .name = "PERF_COUNT_HW_CACHE_BPU", .desc = "Branch Prediction Unit", .id = PERF_COUNT_HW_CACHE_BPU, .type = PERF_TYPE_HW_CACHE, .numasks = 3, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } } }, CACHE_LD_ACCESS(BRANCH, "Branch ", BPU), { .name = "PERF_COUNT_HW_CACHE_NODE", .desc = "Node memory access", .id = PERF_COUNT_HW_CACHE_NODE, .type = PERF_TYPE_HW_CACHE, .numasks = 5, .modmsk = PERF_ATTR_HW, .umask_ovfl_idx = -1, .ngrp = 2, .umasks = { { .uname = "READ", .udesc = "read access", .uid = PERF_COUNT_HW_CACHE_OP_READ << 8, .uflags= PERF_FL_DEFAULT, .grpid = 0, }, { .uname = "WRITE", .udesc = "write access", .uid = PERF_COUNT_HW_CACHE_OP_WRITE << 8, .grpid = 0, }, { .uname = "PREFETCH", .udesc = "prefetch access", .uid = PERF_COUNT_HW_CACHE_OP_PREFETCH << 8, .grpid = 0, }, { .uname = "ACCESS", .udesc = "hit access", .uid = PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16, .grpid = 1, }, { .uname = "MISS", .udesc = "miss access", .uid = PERF_COUNT_HW_CACHE_RESULT_MISS << 16, .uflags= PERF_FL_DEFAULT, .grpid = 1, } }, }, CACHE_ACCESS(NODE, "Node ", NODE) }; #define PME_PERF_EVENT_COUNT (sizeof(perf_static_events)/sizeof(perf_event_t)) papi-5.6.0/src/libpfm-3.y/docs/man3/libpfm_p6.3000664 001750 001750 00000004575 13216244361 022745 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2005" "" "Linux Programmer's Manual" .SH NAME libpfm_i386_p6 - support for Intel P6 processor family .SH SYNOPSIS .nf .B #include .B #include .sp .SH DESCRIPTION The libpfm library provides full support for the P6 processor family, including the Pentium M processor. The interface is defined in \fBpfmlib_i386_p6.h\fR. It consists of a set of functions and structures which describe and allow access to the P6 processors specific PMU features. .sp When P6 processor specific features are needed to support a measurement, their descriptions must be passed as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The P6 processors specific input arguments are described in the \fBpfmlib_i386_p6_input_param_t\fR structure and the output parameters in \fBpfmlib_i386_p6_output_param_t\fR. They are defined as follows: .sp .nf typedef struct { unsigned int cnt_mask; unsigned int flags; } pfmlib_i386_p6_counter_t; typedef struct { pfmlib_i386_p6_counter_t pfp_i386_p6_counters[PMU_I386_P6_NUM_COUNTERS]; uint64_t reserved[4]; } pfmlib_i386_p6_input_param_t; typedef struct { uint64_t reserved[8]; } pfmlib_i386_p6_output_param_t; .fi .sp .sp The P6 processor provides a few additional per-event features for counters: thresholding, inversion, edge detection. They can be set using the \fBpfp_i386_p6_counters\fR data structure for each event. The \fBflags\fR field can be initialized as follows: .TP .B PFMLIB_I386_P6_SEL_INV Inverse the results of the \fBcnt_mask\fR comparison when set .TP .B PFMLIB_I386_P6_SEL_EDGE Enables edge detection of events. .LP The \fBcnt_mask\fR field contains is used to set the event threshold. The value of the counter is incremented each time the number of occurrences per cycle of the event is greater or equal to the value of the field. When zero all occurrences are counted. .sp .SH Handling of Pentium M The library provides full support for the Pentium M PMU. A Pentium implements more events than a generic P6 processor. The library autodetects the host processor and can distinguish generic P6 processor from a Pentium. Thus no special call is needed. .sp .SH ERRORS Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. .SH SEE ALSO pfm_dispatch_events(3) and set of examples shipped with the library .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/perfctr-2.6.x/etc/costs/Athlon-1.2000775 001750 001750 00000001267 13216244366 023132 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.2 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 4, stepping 2, clock 1200062 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 85 cycles PERFCTR INIT: rdtsc cost is 10.7 cycles (773 total) PERFCTR INIT: rdpmc cost is 18.6 cycles (1280 total) PERFCTR INIT: rdmsr (counter) cost is 59.9 cycles (3919 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.4 cycles (3441 total) PERFCTR INIT: wrmsr (counter) cost is 79.8 cycles (5198 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.8 cycles (14925 total) PERFCTR INIT: read cr4 cost is 9.8 cycles (715 total) PERFCTR INIT: write cr4 cost is 63.1 cycles (4129 total) perfctr: driver 2.3.10, cpu type AMD K7 at 1200062 kHz papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_sbo_events.h000664 001750 001750 00000031733 13216244364 026160 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_sbo */ static intel_x86_umask_t bdx_unc_s_ring_ad_used[]={ { .uname = "DOWN_EVEN", .ucode = 0x400, .udesc = "Down and Event", }, { .uname = "DOWN_ODD", .ucode = 0x800, .udesc = "Down and Odd", }, { .uname = "UP_EVEN", .ucode = 0x100, .udesc = "Up and Even", }, { .uname = "UP_ODD", .ucode = 0x200, .udesc = "Up and Odd", }, { .uname = "UP", .ucode = 0x300, .udesc = "Up", .uflags= INTEL_X86_NCOMBO, }, { .uname = "DOWN", .ucode = 0xcc00, .udesc = "Down", .uflags= INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_s_ring_bounces[]={ { .uname = "AD_CACHE", .ucode = 0x100, .udesc = "Number of LLC responses that bounced on the Ring. -- ", }, { .uname = "AK_CORE", .ucode = 0x200, .udesc = "Number of LLC responses that bounced on the Ring. -- Acknowledgements to core", }, { .uname = "BL_CORE", .ucode = 0x400, .udesc = "Number of LLC responses that bounced on the Ring. -- Data Responses to core", }, { .uname = "IV_CORE", .ucode = 0x800, .udesc = "Number of LLC responses that bounced on the Ring. -- Snoops of processors cachee.", }, }; static intel_x86_umask_t bdx_unc_s_ring_iv_used[]={ { .uname = "DN", .ucode = 0xc00, .udesc = "BL Ring in Use -- Any", .uflags= INTEL_X86_NCOMBO, }, { .uname = "UP", .ucode = 0x300, .udesc = "BL Ring in Use -- Any", .uflags= INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_s_rxr_bypass[]={ { .uname = "AD_BNC", .ucode = 0x200, .udesc = "Bypass -- AD - Bounces", .uflags= INTEL_X86_NCOMBO, }, { .uname = "AD_CRD", .ucode = 0x100, .udesc = "Bypass -- AD - Credits", .uflags= INTEL_X86_NCOMBO, }, { .uname = "AK", .ucode = 0x1000, .udesc = "Bypass -- AK", .uflags= INTEL_X86_NCOMBO, }, { .uname = "BL_BNC", .ucode = 0x800, .udesc = "Bypass -- BL - Bounces", .uflags= INTEL_X86_NCOMBO, }, { .uname = "BL_CRD", .ucode = 0x400, .udesc = "Bypass -- BL - Credits", .uflags= INTEL_X86_NCOMBO, }, { .uname = "IV", .ucode = 0x2000, .udesc = "Bypass -- IV", .uflags= INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_s_rxr_inserts[]={ { .uname = "AD_BNC", .ucode = 0x200, .udesc = "Ingress Allocations -- AD - Bounces", }, { .uname = "AD_CRD", .ucode = 0x100, .udesc = "Ingress Allocations -- AD - Credits", }, { .uname = "AK", .ucode = 0x1000, .udesc = "Ingress Allocations -- AK", }, { .uname = "BL_BNC", .ucode = 0x800, .udesc = "Ingress Allocations -- BL - Bounces", }, { .uname = "BL_CRD", .ucode = 0x400, .udesc = "Ingress Allocations -- BL - Credits", }, { .uname = "IV", .ucode = 0x2000, .udesc = "Ingress Allocations -- IV", }, }; static intel_x86_umask_t bdx_unc_s_rxr_occupancy[]={ { .uname = "AD_BNC", .ucode = 0x200, .udesc = "Ingress Occupancy -- AD - Bounces", .uflags= INTEL_X86_NCOMBO, }, { .uname = "AD_CRD", .ucode = 0x100, .udesc = "Ingress Occupancy -- AD - Credits", .uflags= INTEL_X86_NCOMBO, }, { .uname = "AK", .ucode = 0x1000, .udesc = "Ingress Occupancy -- AK", .uflags= INTEL_X86_NCOMBO, }, { .uname = "BL_BNC", .ucode = 0x800, .udesc = "Ingress Occupancy -- BL - Bounces", .uflags= INTEL_X86_NCOMBO, }, { .uname = "BL_CRD", .ucode = 0x400, .udesc = "Ingress Occupancy -- BL - Credits", .uflags= INTEL_X86_NCOMBO, }, { .uname = "IV", .ucode = 0x2000, .udesc = "Ingress Occupancy -- IV", .uflags= INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_s_txr_ads_used[]={ { .uname = "AD", .ucode = 0x100, .udesc = "TBD", }, { .uname = "AK", .ucode = 0x200, .udesc = "TBD", }, { .uname = "BL", .ucode = 0x400, .udesc = "TBD", }, }; static intel_x86_umask_t bdx_unc_s_txr_inserts[]={ { .uname = "AD_BNC", .ucode = 0x200, .udesc = "Egress Allocations -- AD - Bounces", }, { .uname = "AD_CRD", .ucode = 0x100, .udesc = "Egress Allocations -- AD - Credits", }, { .uname = "AK", .ucode = 0x1000, .udesc = "Egress Allocations -- AK", }, { .uname = "BL_BNC", .ucode = 0x800, .udesc = "Egress Allocations -- BL - Bounces", }, { .uname = "BL_CRD", .ucode = 0x400, .udesc = "Egress Allocations -- BL - Credits", }, { .uname = "IV", .ucode = 0x2000, .udesc = "Egress Allocations -- IV", }, }; static intel_x86_umask_t bdx_unc_s_txr_occupancy[]={ { .uname = "AD_BNC", .ucode = 0x200, .udesc = "Egress Occupancy -- AD - Bounces", }, { .uname = "AD_CRD", .ucode = 0x100, .udesc = "Egress Occupancy -- AD - Credits", }, { .uname = "AK", .ucode = 0x1000, .udesc = "Egress Occupancy -- AK", }, { .uname = "BL_BNC", .ucode = 0x800, .udesc = "Egress Occupancy -- BL - Bounces", }, { .uname = "BL_CRD", .ucode = 0x400, .udesc = "Egress Occupancy -- BL - Credits", }, { .uname = "IV", .ucode = 0x2000, .udesc = "Egress Occupancy -- IV", }, }; static intel_x86_umask_t bdx_unc_s_txr_ordering[]={ { .uname = "IVSNOOPGO_UP", .ucode = 0x100, .udesc = "TBD", }, { .uname = "IVSNOOP_DN", .ucode = 0x200, .udesc = "TBD", }, { .uname = "AK_U2C_UP_EVEN", .ucode = 0x400, .udesc = "TBD", }, { .uname = "AK_U2C_UP_ODD", .ucode = 0x800, .udesc = "TBD", }, { .uname = "AK_U2C_DN_EVEN", .ucode = 0x1000, .udesc = "TBD", }, { .uname = "AK_U2C_DN_ODD", .ucode = 0x2000, .udesc = "TBD", }, }; static intel_x86_entry_t intel_bdx_unc_s_pe[]={ { .name = "UNC_S_BOUNCE_CONTROL", .code = 0xa, .desc = "TBD", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_S_CLOCKTICKS", .code = 0x0, .desc = "TBD", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_S_FAST_ASSERTED", .code = 0x9, .desc = "Counts the number of cycles either the local or incoming distress signals are asserted. Incoming distress includes up, dn and across.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_S_RING_AD_USED", .code = 0x1b, .desc = "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sent, but does not include when packets are being sunk into the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_ring_ad_used), }, { .name = "UNC_S_RING_AK_USED", .code = 0x1c, .desc = "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sent, but does not include when packets are being sunk into the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_ring_ad_used), }, { .name = "UNC_S_RING_BL_USED", .code = 0x1d, .desc = "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sent, but does not include when packets are being sunk into the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_ring_ad_used), }, { .name = "UNC_S_RING_BOUNCES", .code = 0x5, .desc = "TBD", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_ring_bounces, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_ring_bounces), }, { .name = "UNC_S_RING_IV_USED", .code = 0x1e, .desc = "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sent, but does not include when packets are being sunk into the ring stop. There is only 1 IV ring in BDX. Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ DN_ODD.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_ring_iv_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_ring_iv_used), }, { .name = "UNC_S_RXR_BYPASS", .code = 0x12, .desc = "Bypass the Sbo Ingress.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_rxr_bypass, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_rxr_bypass), }, { .name = "UNC_S_RXR_INSERTS", .code = 0x13, .desc = "Number of allocations into the Sbo Ingress The Ingress is used to queue up requests received from the ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_rxr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_rxr_inserts), }, { .name = "UNC_S_RXR_OCCUPANCY", .code = 0x11, .desc = "Occupancy event for the Ingress buffers in the Sbo. The Ingress is used to queue up requests received from the ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_rxr_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_rxr_occupancy), }, { .name = "UNC_S_TXR_ADS_USED", .code = 0x4, .desc = "", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_txr_ads_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_txr_ads_used), }, { .name = "UNC_S_TXR_INSERTS", .code = 0x2, .desc = "Number of allocations into the Sbo Egress. The Egress is used to queue up requests destined for the ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_txr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_txr_inserts), }, { .name = "UNC_S_TXR_OCCUPANCY", .code = 0x1, .desc = "Occupancy event for the Egress buffers in the Sbo. The egress is used to queue up requests destined for the ring.", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_txr_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_txr_occupancy), }, { .name = "UNC_S_TXR_ORDERING", .code = 0x7, .desc = "TB", .modmsk = BDX_UNC_SBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_s_txr_ordering, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_s_txr_ordering), }, }; papi-5.6.0/src/perfctr-2.6.x/examples/000775 001750 001750 00000000000 13216244366 021363 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/examples/global/arch.h000664 001750 001750 00000000463 13216244367 023716 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.1 2004/01/11 22:07:12 mikpe Exp $ * Architecture-specific support code. * * Copyright (C) 2004 Mikael Pettersson */ extern int counting_mips; /* for CPUs that cannot FLOPS */ extern void setup_control(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control); papi-5.6.0/src/libpfm-3.y/examples_v3.x/task_attach_timeout.c000664 001750 001750 00000023156 13216244362 026106 0ustar00jshenry1963jshenry1963000000 000000 /* * task_attach_timeout.c - attach to another task for monitoring for a short while * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int parent(pid_t pid, unsigned long delay) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; struct pollfd pollfd; pfarg_msg_t msg; unsigned int i, num_counters; int status, ret; int ctx_fd; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * now create a session. we will later attach it to the task we are creating. */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * use our file descriptor for the poll. * we are interested in read events only. */ pollfd.fd = ctx_fd; pollfd.events = POLLIN; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann counting monitors. */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); /* * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret == -1) fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); /* * wait for the child to be actually stopped */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status)); /* * the task is stopped at this point */ /* * now we attach (i.e., attach) the session to ourself */ if (pfm_attach(ctx_fd, 0, pid) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); printf("attached to [%d], timeout set to %lu seconds\n", pid, delay); /* * now the task is running */ /* * We cannot simply do a waitpid() because we may be attaching to a process * totally unrelated to our program. Instead we use a perfmon facility that * notifies us when the monitoring task is exiting. * * When a task with a monitoring session attached to it exits, a PFM_MSG_END * is generated. It can be retrieve with a simple read() on the session's descriptor. * * Another reason why you might return from the read is if there was a counter * overflow, unlikely in this example. * * To measure only for short period of time, use select or poll with a timeout, * see task_attach_timeout.c * */ ret = poll(&pollfd, 1, delay*1000); switch( ret ) { case -1: fatal_error("cannot read from descriptor: %s\n", strerror(errno)); /* no return */ case 1: /* * there is a message, i.e., the program exited before our timeout */ if (ret == 1) { /* * extract message */ ret = read(ctx_fd, &msg, sizeof(msg)); if (msg.type != PFM_MSG_END) fatal_error("unexpected msg type : %d\n", msg.type); } break; case 0: /* * we timed out, we need to stop the task to unload */ ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret == -1) fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); /* * wait for task to be actually stopped */ waitpid(pid, &status, WUNTRACED); /* * check if process exited, then no need to unload */ if (WIFEXITED(status)) goto read_results; if (pfm_attach(ctx_fd, 0, PFM_NO_TARGET) == -1) fatal_error("pfm_detach error errno %d\n",errno); /* * let it run free again */ ptrace(PTRACE_DETACH, pid, NULL, 0); break; default: fatal_error("unexpected return from poll: %d\n", ret); } read_results: /* * now simply read the results. */ if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) { fatal_error("pfm_read(PMD) error errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the session */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; unsigned long delay; pid_t pid; int ret; if (argc < 2) fatal_error("usage: %s pid [timeout]\n", argv[0]); pid = atoi(argv[1]); delay = argc > 2 ? strtoul(argv[2], NULL, 10) : 10; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); return parent(pid, delay); } papi-5.6.0/src/README000664 001750 001750 00000000224 13216244356 016204 0ustar00jshenry1963jshenry1963000000 000000 /* * File: papi/src/README * CVS: $Id$ * Author: Philip Mucci * mucci@cs.utk.edu */ Please see the README in the root directory. papi-5.6.0/src/libpfm4/lib/events/intel_hswep_unc_qpi_events.h000664 001750 001750 00000052154 13216244364 026537 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: hswep_unc_qpi (Intel Haswell-EP QPI uncore) */ static const intel_x86_umask_t hswep_unc_q_direct2core[]={ { .uname = "FAILURE_CREDITS", .udesc = "Number of spawn failures due to lack of Egress credits", .ucode = 0x200, }, { .uname = "FAILURE_CREDITS_RBT", .udesc = "Number of spawn failures due to lack of Egress credit and route-back table (RBT) bit was not set", .ucode = 0x800, }, { .uname = "FAILURE_RBT_HIT", .udesc = "Number of spawn failures because route-back table (RBT) specified that the transaction should not trigger a direct2core transaction", .ucode = 0x400, }, { .uname = "SUCCESS_RBT_HIT", .udesc = "Number of spawn successes", .ucode = 0x100, }, { .uname = "FAILURE_MISS", .udesc = "Number of spawn failures due to RBT tag not matching although the valid bit was set and there was enough Egress credits", .ucode = 0x1000, }, { .uname = "FAILURE_CREDITS_MISS", .udesc = "Number of spawn failures due to RBT tag not matching and they were not enough Egress credits. The valid bit was set", .ucode = 0x2000, }, { .uname = "FAILURE_RBT_MISS", .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set but there were enough Egress credits", .ucode = 0x4000, }, { .uname = "FAILURE_CREDITS_RBT_MISS", .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set and there were not enough Egress credits", .ucode = 0x8000, }, }; static const intel_x86_umask_t hswep_unc_q_rxl_credits_consumed_vn0[]={ { .uname = "DRS", .udesc = "Number of times VN0 consumed for DRS message class", .ucode = 0x100, }, { .uname = "HOM", .udesc = "Number of times VN0 consumed for HOM message class", .ucode = 0x800, }, { .uname = "NCB", .udesc = "Number of times VN0 consumed for NCB message class", .ucode = 0x200, }, { .uname = "NCS", .udesc = "Number of times VN0 consumed for NCS message class", .ucode = 0x400, }, { .uname = "NDR", .udesc = "Number of times VN0 consumed for NDR message class", .ucode = 0x2000, }, { .uname = "SNP", .udesc = "Number of times VN0 consumed for SNP message class", .ucode = 0x1000, }, }; static const intel_x86_umask_t hswep_unc_q_rxl_credits_consumed_vn1[]={ { .uname = "DRS", .udesc = "Number of times VN1 consumed for DRS message class", .ucode = 0x100, }, { .uname = "HOM", .udesc = "Number of times VN1 consumed for HOM message class", .ucode = 0x800, }, { .uname = "NCB", .udesc = "Number of times VN1 consumed for NCB message class", .ucode = 0x200, }, { .uname = "NCS", .udesc = "Number of times VN1 consumed for NCS message class", .ucode = 0x400, }, { .uname = "NDR", .udesc = "Number of times VN1 consumed for NDR message class", .ucode = 0x2000, }, { .uname = "SNP", .udesc = "Number of times VN1 consumed for SNP message class", .ucode = 0x1000, }, }; static const intel_x86_umask_t hswep_unc_q_txl_flits_g0[]={ { .uname = "DATA", .udesc = "Number of data flits over QPI", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NON_DATA", .udesc = "Number of non-NULL non-data flits over QPI", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_q_rxl_flits_g1[]={ { .uname = "DRS", .udesc = "Number of flits over QPI on the Data Response (DRS) channel", .ucode = 0x1800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DRS_DATA", .udesc = "Number of data flits over QPI on the Data Response (DRS) channel", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DRS_NONDATA", .udesc = "Number of protocol flits over QPI on the Data Response (DRS) channel", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM", .udesc = "Number of flits over QPI on the home channel", .ucode = 0x600, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM_NONREQ", .udesc = "Number of non-request flits over QPI on the home channel", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM_REQ", .udesc = "Number of data requests over QPI on the home channel", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SNP", .udesc = "Number of snoop requests flits over QPI", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_q_rxl_flits_g2[]={ { .uname = "NCB", .udesc = "Number of non-coherent bypass flits", .ucode = 0xc00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB_DATA", .udesc = "Number of non-coherent data flits", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB_NONDATA", .udesc = "Number of bypass non-data flits", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCS", .udesc = "Number of non-coherent standard (NCS) flits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR_AD", .udesc = "Number of flits received over Non-data response (NDR) channel", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR_AK", .udesc = "Number of flits received on the Non-data response (NDR) channel)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_q_txr_ad_hom_credit_acquired[]={ { .uname = "VN0", .udesc = "for VN0", .ucode = 0x100, }, { .uname = "VN1", .udesc = "for VN1", .ucode = 0x200, }, }; static const intel_x86_umask_t hswep_unc_q_txr_bl_drs_credit_acquired[]={ { .uname = "VN0", .udesc = "for VN0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VN1", .udesc = "for VN1", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VN_SHR", .udesc = "for shared VN", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_hswep_unc_q_pe[]={ { .name = "UNC_Q_CLOCKTICKS", .desc = "Number of qfclks", .code = 0x14, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_CTO_COUNT", .desc = "Count of CTO Events", .code = 0x38 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_DIRECT2CORE", .desc = "Direct 2 Core Spawning", .code = 0x13, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_direct2core), .umasks = hswep_unc_q_direct2core }, { .name = "UNC_Q_L1_POWER_CYCLES", .desc = "Cycles in L1", .code = 0x12, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL0P_POWER_CYCLES", .desc = "Cycles in L0p", .code = 0x10, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL0_POWER_CYCLES", .desc = "Cycles in L0", .code = 0xf, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_BYPASSED", .desc = "Rx Flit Buffer Bypassed", .code = 0x9, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN0", .desc = "VN0 Credit Consumed", .code = 0x1e | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_credits_consumed_vn0), .umasks = hswep_unc_q_rxl_credits_consumed_vn0 }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN1", .desc = "VN1 Credit Consumed", .code = 0x39 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_credits_consumed_vn1), .umasks = hswep_unc_q_rxl_credits_consumed_vn1 }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VNA", .desc = "VNA Credit Consumed", .code = 0x1d | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_CYCLES_NE", .desc = "RxQ Cycles Not Empty", .code = 0xa, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_FLITS_G1", .desc = "Flits Received - Group 1", .code = 0x2 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_flits_g1), .umasks = hswep_unc_q_rxl_flits_g1 }, { .name = "UNC_Q_RXL_FLITS_G2", .desc = "Flits Received - Group 2", .code = 0x3 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_flits_g2), .umasks = hswep_unc_q_rxl_flits_g2 }, { .name = "UNC_Q_RXL_INSERTS", .desc = "Rx Flit Buffer Allocations", .code = 0x8, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_INSERTS_DRS", .desc = "Rx Flit Buffer Allocations - DRS", .code = 0x9 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_HOM", .desc = "Rx Flit Buffer Allocations - HOM", .code = 0xc | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NCB", .desc = "Rx Flit Buffer Allocations - NCB", .code = 0xa | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NCS", .desc = "Rx Flit Buffer Allocations - NCS", .code = 0xb | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NDR", .desc = "Rx Flit Buffer Allocations - NDR", .code = 0xe | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_SNP", .desc = "Rx Flit Buffer Allocations - SNP", .code = 0xd | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY", .desc = "RxQ Occupancy - All Packets", .code = 0xb, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_OCCUPANCY_DRS", .desc = "RxQ Occupancy - DRS", .code = 0x15 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_HOM", .desc = "RxQ Occupancy - HOM", .code = 0x18 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NCB", .desc = "RxQ Occupancy - NCB", .code = 0x16 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NCS", .desc = "RxQ Occupancy - NCS", .code = 0x17 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NDR", .desc = "RxQ Occupancy - NDR", .code = 0x1a | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_SNP", .desc = "RxQ Occupancy - SNP", .code = 0x19 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXL0P_POWER_CYCLES", .desc = "Cycles in L0p", .code = 0xd, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL0_POWER_CYCLES", .desc = "Cycles in L0", .code = 0xc, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_BYPASSED", .desc = "Tx Flit Buffer Bypassed", .code = 0x5, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_CYCLES_NE", .desc = "Tx Flit Buffer Cycles not Empty", .code = 0x6, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_FLITS_G0", .desc = "Flits Transferred - Group 0", .code = 0x0, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txl_flits_g0), .umasks = hswep_unc_q_txl_flits_g0 }, { .name = "UNC_Q_TXL_FLITS_G1", .desc = "Flits Transferred - Group 1", .code = 0x0 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_flits_g1), .umasks = hswep_unc_q_rxl_flits_g1 /* shared with rxl_flits_g1 */ }, { .name = "UNC_Q_TXL_FLITS_G2", .desc = "Flits Transferred - Group 2", .code = 0x1 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_rxl_flits_g2), .umasks = hswep_unc_q_rxl_flits_g2 /* shared with rxl_flits_g2 */ }, { .name = "UNC_Q_TXL_INSERTS", .desc = "Tx Flit Buffer Allocations", .code = 0x4, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_OCCUPANCY", .desc = "Tx Flit Buffer Occupancy", .code = 0x7, .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_VNA_CREDIT_RETURNS", .desc = "VNA Credits Returned", .code = 0x1c | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_VNA_CREDIT_RETURN_OCCUPANCY", .desc = "VNA Credits Pending Return - Occupancy", .code = 0x1b | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = HSWEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXR_AD_HOM_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD HOM", .code = 0x26 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_HOM_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD HOM", .code = 0x22 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_NDR_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x28 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_NDR_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x24 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_SNP_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD SNP", .code = 0x27 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_SNP_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD SNP", .code = 0x23 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AK_NDR_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AK NDR", .code = 0x29 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AK_NDR_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x25 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_DRS_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL DRS", .code = 0x2a | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_bl_drs_credit_acquired), .umasks = hswep_unc_q_txr_bl_drs_credit_acquired, }, { .name = "UNC_Q_TXR_BL_DRS_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL DRS", .code = 0x1f | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_bl_drs_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_bl_drs_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCB_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL NCB", .code = 0x2b | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCB_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL NCB", .code = 0x20 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCS_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL NCS", .code = 0x2c | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCS_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL NCS", .code = 0x21 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = hswep_unc_q_txr_ad_hom_credit_acquired, }, }; papi-5.6.0/src/libpfm4/lib/pfmlib_s390x_priv.h000664 001750 001750 00000000712 13216244365 023061 0ustar00jshenry1963jshenry1963000000 000000 #ifndef __PFMLIB_S390X_PRIV_H__ #define __PFMLIB_S390X_PRIV_H__ #define CPUMF_COUNTER_MAX 256 typedef struct { uint64_t ctrnum; /* counter number */ unsigned int ctrset; /* counter set */ char *name; /* counter ID */ char *desc; /* short description */ } pme_cpumf_ctr_t; #define min(a, b) ((a) < (b) ? (a) : (b)) extern int pfm_s390x_get_perf_encoding(void *this, pfmlib_event_desc_t *e); #endif /* __PFMLIB_S390X_PRIV_H__ */ papi-5.6.0/man/README000664 001750 001750 00000001026 13216244355 016170 0ustar00jshenry1963jshenry1963000000 000000 /* * File: README * CVS: $Id$ * Author: Philip Mucci * mucci@cs.utk.edu * Mods: * */ This directory contains: Makefile Installs man pages. man1/ Man pages for the PAPI utility applications. man3/ Man pages for the PAPI API functions. Makefile Usage: make make install DESTDIR= Beginning with PAPI 4.2.0, man pages are generated from the PAPI sources using doxygen scripts found in the papi/doc directory. They are updated prior to each release.papi-5.6.0/src/libpfm4/lib/pfmlib_intel_skl.c000664 001750 001750 00000007655 13216244365 023127 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_skl.c : Intel Skylake core PMU * * Copyright (c) 2015 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_skl_events.h" static const int skl_models[] = { 78, /* Skylake mobile */ 94, /* Skylake desktop */ 142,/* KabyLake mobile */ 158,/* KabyLake desktop */ 0 }; static const int skx_models[] = { 85, /* Skylake X */ 0 }; static int pfm_skl_init(void *this) { pfm_intel_x86_cfg.arch_version = 4; return PFM_SUCCESS; } pfmlib_pmu_t intel_skl_support={ .desc = "Intel Skylake", .name = "skl", .pmu = PFM_PMU_INTEL_SKL, .pme_count = LIBPFM_ARRAY_SIZE(intel_skl_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_skl_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = skl_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_skl_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; pfmlib_pmu_t intel_skx_support={ .desc = "Intel Skylake X", .name = "skx", .pmu = PFM_PMU_INTEL_SKX, .pme_count = LIBPFM_ARRAY_SIZE(intel_skl_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_skl_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = skx_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_skl_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; papi-5.6.0/src/components/cuda/sampling/test/matmul.cu000664 001750 001750 00000011553 13216244357 025056 0ustar00jshenry1963jshenry1963000000 000000 //This is a matrix multiplication program in CUDA without any optimizations //like tiling, using shared memory etc #include #include #include #include __global__ void MatrixMulKernel(float* Md, float* Nd, float* Pd, int width) { //2D thread ID int bx=blockIdx.x; int by=blockIdx.y; int tdx=threadIdx.x; int tdy=threadIdx.y; int tx=bx*blockDim.x+tdx; int ty=by*blockDim.y+tdy; //Pvalue stores the Pd element that is computed by the thread float Pvalue=0; for(int k=0;k>>(Md,Nd,Pd,width); // error=cudaDeviceSynchronize(); error =cudaEventRecord(stop,NULL); if(error!=cudaSuccess){ printf("cuda event stop record failed with error=%s\n",cudaGetErrorString(error)); exit(-1); } error = cudaEventSynchronize(stop); if(error!=cudaSuccess){ printf("cuda event sync failed :%s\n",cudaGetErrorString(error)); exit(-1); } float msecTotal=0.0f; error = cudaEventElapsedTime(&msecTotal,start,stop); if(error!=cudaSuccess){ printf("cuda elapsed time calculation failed \n"); exit(-1); } float msecPerMatrixMul = msecTotal; double flopsPerMatrixMul = 2*width*width*width; double gigaFlops=(flopsPerMatrixMul*1.0e-9f)/(msecPerMatrixMul/1000.0f); printf("Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops, WorkgroupSize= %u threads/block\n", gigaFlops, msecPerMatrixMul, flopsPerMatrixMul, width * width); error=cudaMemcpy(P,Pd,size,cudaMemcpyDeviceToHost); if(error!=cudaSuccess){ printf("Device memoory copy back for Pd failed \n"); exit(-1); } printf("Very slow Host Matrix Mult \n"); float temp; // initialization of host data for (int i = 0; i < width; ++i) { for ( int j = 0; j < width; ++j) { temp=0; for(int k=0; k * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_ivbep_unc_ubo_events.h" pfmlib_pmu_t intel_ivbep_unc_ubo_support = { .desc = "Intel Ivy Bridge-EP U-Box uncore", .name = "ivbep_unc_ubo", .perf_name = "uncore_ubox", .pmu = PFM_PMU_INTEL_IVBEP_UNC_UBOX, .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_u_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 2, .num_fixed_cntrs = 1, .max_encoding = 1, .pe = intel_ivbep_unc_u_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .pmu_detect = pfm_intel_ivbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/libpfm4/lib/pfmlib_amd64_fam10h.c000664 001750 001750 00000005256 13216244365 023205 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_amd64_fam10h.c : AMD64 Family 10h * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_amd64_priv.h" #include "events/amd64_events_fam10h.h" #define DEFINE_FAM10H_REV(d, n, r, pmuid) \ pfmlib_pmu_t amd64_fam10h_##n##_support={ \ .desc = "AMD64 Fam10h "#d, \ .name = "amd64_fam10h_"#n, \ .pmu = pmuid, \ .pmu_rev = r, \ .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam10h_pe),\ .type = PFM_PMU_TYPE_CORE, \ .supported_plm = AMD64_FAM10H_PLM, \ .num_cntrs = 4, \ .max_encoding = 1, \ .pe = amd64_fam10h_pe, \ .atdesc = amd64_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK, \ \ .cpu_family = pmuid, \ .pmu_detect = pfm_amd64_family_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), \ .get_event_first = pfm_amd64_get_event_first, \ .get_event_next = pfm_amd64_get_event_next, \ .event_is_valid = pfm_amd64_event_is_valid, \ .validate_table = pfm_amd64_validate_table, \ .get_event_info = pfm_amd64_get_event_info, \ .get_event_attr_info = pfm_amd64_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs),\ .get_event_nattrs = pfm_amd64_get_event_nattrs, \ .get_num_events = pfm_amd64_get_num_events, \ } DEFINE_FAM10H_REV(Barcelona, barcelona, AMD64_FAM10H_REV_B, PFM_PMU_AMD64_FAM10H_BARCELONA); DEFINE_FAM10H_REV(Shanghai, shanghai, AMD64_FAM10H_REV_C, PFM_PMU_AMD64_FAM10H_SHANGHAI); DEFINE_FAM10H_REV(Istanbul, istanbul, AMD64_FAM10H_REV_D, PFM_PMU_AMD64_FAM10H_ISTANBUL); papi-5.6.0/src/components/emon/000775 001750 001750 00000000000 13216244357 020452 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/ita2_btb.c000664 001750 001750 00000033475 13216244362 024257 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_btb.c - example of how use the BTB with the Itanium 2 PMU * * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include typedef pfm_default_smpl_hdr_t btb_hdr_t; typedef pfm_default_smpl_entry_t btb_entry_t; typedef pfm_default_smpl_ctx_arg_t btb_ctx_arg_t; #define BTB_FMT_UUID PFM_DEFAULT_SMPL_UUID static pfm_uuid_t buf_fmt_id = BTB_FMT_UUID; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch * events. */ #define SMPL_PERIOD (4UL*256) /* * We use a small buffer size to exercise the overflow handler */ #define SMPL_BUF_NENTRIES 64 #define M_PMD(x) (1UL<<(x)) #define BTB_REGS_MASK (M_PMD(8)|M_PMD(9)|M_PMD(10)|M_PMD(11)|M_PMD(12)|M_PMD(13)|M_PMD(14)|M_PMD(15)|M_PMD(16)) static void *smpl_vaddr; static unsigned int entry_size; static int id; #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define hweight64(x) _m64_popcnt(x) #elif defined(__GNUC__) static __inline__ int hweight64 (unsigned long x) { unsigned long result; __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); return (int)result; } #else #error "you need to provide inline assembly from your compiler" #endif /* * we don't use static to make sure the compiler does not inline the function */ long func1(void) { return 0;} long do_test(unsigned long loop) { long sum = 0; while(loop--) { if (loop & 0x1) sum += func1(); else sum += loop; } return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * print content of sampling buffer * * XXX: using stdio to print from a signal handler is not safe with multi-threaded * applications */ #define safe_printf printf static void show_btb_reg(int j, pfm_ita2_pmd_reg_t reg, pfm_ita2_pmd_reg_t pmd16) { unsigned long bruflush, b1; int is_valid = reg.pmd8_15_ita2_reg.btb_b == 0 && reg.pmd8_15_ita2_reg.btb_mp == 0 ? 0 :1; b1 = (pmd16.pmd_val >> (4 + 4*(j-8))) & 0x1; bruflush = (pmd16.pmd_val >> (5 + 4*(j-8))) & 0x1; safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d bru=%ld b1=%ld valid=%c\n", j, reg.pmd_val, reg.pmd8_15_ita2_reg.btb_b, reg.pmd8_15_ita2_reg.btb_mp, bruflush, b1, is_valid ? 'Y' : 'N'); if (!is_valid) return; if (reg.pmd8_15_ita2_reg.btb_b) { unsigned long addr; addr = (reg.pmd8_15_ita2_reg.btb_addr+b1)<<4; addr |= reg.pmd8_15_ita2_reg.btb_slot < 3 ? reg.pmd8_15_ita2_reg.btb_slot : 0; safe_printf("\t Source Address: 0x%016lx\n" "\t Taken=%c Prediction: %s\n\n", addr, reg.pmd8_15_ita2_reg.btb_slot < 3 ? 'Y' : 'N', reg.pmd8_15_ita2_reg.btb_mp ? "FE Failure" : bruflush ? "BE Failure" : "Success"); } else { safe_printf("\t Target Address: 0x%016lx\n\n", ((unsigned long)reg.pmd8_15_ita2_reg.btb_addr<<4)); } } static void show_btb(pfm_ita2_pmd_reg_t *btb, pfm_ita2_pmd_reg_t *pmd16) { int i, last; i = (pmd16->pmd16_ita2_reg.btbi_full) ? pmd16->pmd16_ita2_reg.btbi_bbi : 0; last = pmd16->pmd16_ita2_reg.btbi_bbi; safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita2_reg.btbi_bbi, pmd16->pmd16_ita2_reg.btbi_full); do { show_btb_reg(i+8, btb[i], *pmd16); i = (i+1) % 8; } while (i != last); } void process_smpl_buffer(void) { btb_hdr_t *hdr; btb_entry_t *ent; unsigned long pos; unsigned long smpl_entry = 0; pfm_ita2_pmd_reg_t *reg, *pmd16; unsigned long i; int ret; static unsigned long last_ovfl = ~0UL; hdr = (btb_hdr_t *)smpl_vaddr; /* * check that we are not diplaying the previous set of samples again. * Required to take care of the last batch of samples. */ if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (btb_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita2_pmd_reg_t*)(ent+1); /* * in this particular example, we have pmd8-pmd15 has the BTB. We have also * included pmd16 (BTB index) has part of the registers to record. This trick * allows us to get the index to decode the sequential order of the BTB. * * Recorded registers are always recorded in increasing order. So we know * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. */ pmd16 = reg+8; show_btb(reg, pmd16); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { perror("PFM_RESTART"); exit(1); } } int main(void) { int ret; int type = 0; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita2_input_param_t ita2_inp; btb_ctx_arg_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita2_inp,0, sizeof(ita2_inp)); /* * Before calling pfm_find_dispatch(), we must specify what kind * of branches we want to capture. We are interesteed in all the mispredicted branches, * therefore we program we set the various fields of the BTB config to: */ ita2_inp.pfp_ita2_btb.btb_used = 1; ita2_inp.pfp_ita2_btb.btb_ds = 0; ita2_inp.pfp_ita2_btb.btb_tm = 0x3; ita2_inp.pfp_ita2_btb.btb_ptm = 0x3; ita2_inp.pfp_ita2_btb.btb_ppm = 0x3; ita2_inp.pfp_ita2_btb.btb_brt = 0x0; ita2_inp.pfp_ita2_btb.btb_plm = PFM_PLM3; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("cannot find event BRANCH_EVENT\n"); } /* * set the (global) privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * We initialize the format specific information. * The format is identified by its UUID which must be copied * into the ctx_buf_fmt_id field. */ memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ ctx[0].buf_arg.buf_size = 8192; /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; /* * extract our file descriptor */ id = ctx[0].ctx_arg.ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * indicate we want notification when buffer is full */ pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; /* * Now prepare the argument to initialize the PMD and the sampling period * We know we use only one PMD in this case, therefore pmd[0] corresponds * to our first event which is our sampling period. */ pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; /* * indicate PMD to collect in each sample */ pc[0].reg_smpl_pmds[0] = BTB_REGS_MASK; /* * compute size of each sample: fixed-size header + all our BTB regs */ entry_size = sizeof(btb_entry_t)+(hweight64(BTB_REGS_MASK)<<3); /* * When our counter overflows, we want to BTB index to be reset, so that we keep * in sync. This is required to make it possible to interpret pmd16 on overflow * to avoid repeating the same branch several times. */ pc[0].reg_reset_pmds[0] = M_PMD(16); /* * reset pmd16 (BTB index), short and long reset value are set to zero as well * * We use slot 1 of our pd[] array for this. */ pd[1].reg_num = 16; pd[1].reg_value = 0UL; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } /* * we use 2 = 1 for the branch_event + 1 for the reset of PMD16. */ if (perfmonctl(id, PFM_WRITE_PMDS, pd, 2) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) { fatal_error("cannot set ASYNC: %s\n", strerror(errno)); } /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) { fatal_error("cannot setown: %s\n", strerror(errno)); } /* * Let's roll now. */ pfm_self_start(id); do_test(100000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer. Note that the buffer may not be full at this point. * */ process_smpl_buffer(); /* * let's stop this now */ close(id); return 0; } papi-5.6.0/man/man3/PAPI_add_events.3000664 001750 001750 00000005476 13216244356 021175 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_add_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_add_events \- .PP add multiple PAPI presets or native hardware events to an event set .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par C Interface: \#include @n int PAPI_add_events( int EventSet, int * EventCodes, int number ); PAPI_add_event adds one event to a PAPI Event Set. PAPI_add_events does the same, but for an array of events. @n A hardware event can be either a PAPI preset or a native hardware event code. For a list of PAPI preset events, see PAPI_presets or run the avail test case in the PAPI distribution. PAPI presets can be passed to PAPI_query_event to see if they exist on the underlying architecture. For a list of native events available on current platform, run native_avail test case in the PAPI distribution. For the encoding of native events, see PAPI_event_name_to_code to learn how to generate native code for the supported native event on the underlying architecture. @param EventSet An integer handle for a PAPI Event Set as created by PAPI_create_eventset. @param *EventCode An array of defined events. @param number An integer indicating the number of events in the array *EventCode. It should be noted that PAPI_add_events can partially succeed, exactly like PAPI_remove_events. @retval Positive-Integer The number of consecutive elements that succeeded before the error. @retval PAPI_EINVAL One or more of the arguments is invalid. @retval PAPI_ENOMEM Insufficient memory to complete the operation. @retval PAPI_ENOEVST The event set specified does not exist. @retval PAPI_EISRUN The event set is currently counting events. @retval PAPI_ECNFLCT The underlying counter hardware can not count this event and other events in the event set simultaneously. @retval PAPI_ENOEVNT The PAPI preset is not available on the underlying hardware. @retval PAPI_EBUG Internal error, please send mail to the developers. @par Examples: .fi .PP .PP .nf * int EventSet = PAPI_NULL; * unsigned int native = 0x0; * if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) * handle_error( 1 ); * // Add Total Instructions Executed to our EventSet * if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) * handle_error( 1 ); * // Add native event PM_CYC to EventSet * if ( PAPI_event_name_to_code( "PM_CYC", &native ) != PAPI_OK ) * handle_error( 1 ); * if ( PAPI_add_event( EventSet, native ) != PAPI_OK ) * handle_error( 1 ); * .fi .PP .PP .PP .nf @see PAPI_cleanup_eventset @n PAPI_destroy_eventset @n PAPI_event_code_to_name @n PAPI_remove_events @n PAPI_query_event @n PAPI_presets @n PAPI_native @n PAPI_remove_event.fi .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/lib/pfmlib_intel_knc.c000664 001750 001750 00000004573 13216244365 023105 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_knc.c : Intel Knights Corner (Xeon Phi) * * Copyright (c) 2012, Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_intel_x86_priv.h" /* architecture private */ #include "events/intel_knc_events.h" static const int knc_models[] = { 1, /* Knights Corner */ 0 }; pfmlib_pmu_t intel_knc_support={ .desc = "Intel Knights Corner", .name = "knc", .pmu = PFM_PMU_INTEL_KNC, .pme_count = LIBPFM_ARRAY_SIZE(intel_knc_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 2, .max_encoding = 1, .pe = intel_knc_pe, .atdesc = intel_x86_mods, .supported_plm = INTEL_X86_PLM, .cpu_family = 11, .cpu_models = knc_models, .pmu_detect = pfm_intel_x86_model_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_powerpc.h000664 001750 001750 00000000635 13216244362 025656 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file should never be included directly, use * instead. */ #ifndef _PERFMON_POWERPC_H_ #define _PERFMON_POWERPC_H_ #define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ #define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ #endif /* _PERFMON_POWERPC_H_ */ papi-5.6.0/src/ftests/accum.F000664 001750 001750 00000010043 13216244361 020027 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program accum implicit integer (p) integer es1, number, i integer*8 values(10) integer events(2) character*PAPI_MAX_STR_LEN name integer retval integer tests_quiet, get_quiet external get_quiet integer last_char, n external last_char tests_quiet = get_quiet() es1 = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_create_eventset(es1, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if number=2 call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then events(1) = PAPI_TOT_INS else events(1) = PAPI_FP_INS end if events(2) = PAPI_TOT_CYC call PAPIf_add_events( es1, events, number, retval ) if ( retval.LT.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_events', retval) end if do i=1,10 values(i)=0 end do call PAPIf_start(es1, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_accum(es1, values(7), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_accum', retval) end if values(1)=values(7) values(2)=values(8) call PAPIf_stop(es1, values(3), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_start(es1, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_accum(es1, values(7), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_accum', retval) end if values(5)=values(7) values(6)=values(8) call fdo_flops(NUM_FLOPS) call PAPIf_accum(es1, values(7), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_accum', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_stop(es1, values(9), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_remove_events( es1, events, number, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_events', retval) end if if (tests_quiet .EQ. 0) then call PAPIf_event_code_to_name (events(1), name, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_event_code_to_name', retval) end if n=last_char(name) print *, "Test case accum: Test of PAPI_add_events, ", * "PAPI_remove_events, PAPI_accum" print *, "------------------------------------------", * "------------------------" write (*,100) "Test type", 1, 2, 3, 4, 5 write (*,100) name(1:n), values(1), values(3), * values(5), values(7), values(9) write (*,100) "PAPI_TOT_CYC", values(2), values(4), * values(6), values(8), values(10) print *, "------------------------------------------", * "------------------------" 100 format(a15, ":", i10, i10, i10, i10, i10) print * print *, "Verification:" print *, "Column 2 approximately equals to 0;" print *, "Column 3 approximately equals 2 * Column 1;" print *, "Column 4 approximately equals 3 * Column 1;" print *, "Column 5 approximately equals Column 1." end if call ftests_pass(__FILE__) end papi-5.6.0/src/perfctr-2.7.x/usr.lib/virtual.c000664 001750 001750 00000024460 13216244370 022757 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: virtual.c,v 1.35 2005/06/06 21:07:58 mikpe Exp $ * Library interface to virtual per-process performance counters. * * Copyright (C) 1999-2005 Mikael Pettersson */ #include #include #include #include #include #include #include #include "libperfctr.h" #include "arch.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define STRUCT_ARRAY_SIZE(TYPE, MEMBER) ARRAY_SIZE(((TYPE*)0)->MEMBER) /* * Code to open (with or without creation) per-process perfctrs. */ static int _vperfctr_open_pid(int pid, int try_creat, int try_rdonly, int *isnew) { int fd; *isnew = 1; fd = -1; if( try_creat ) fd = _sys_vperfctr_open(-1, pid, 1); if( fd < 0 && (try_creat ? errno == EEXIST : 1) && try_rdonly ) { *isnew = 0; fd = _sys_vperfctr_open(-1, pid, 0); } return fd; } /* * Operations using raw kernel handles, basically just _sys_perfctr() wrappers. */ int _vperfctr_open(int creat) { int dummy; return _vperfctr_open_pid(0, creat, !creat, &dummy); } int __vperfctr_control(int fd, unsigned int cpu_type, const struct vperfctr_control *control) { return _sys_vperfctr_write_control(fd, cpu_type, control); } int _vperfctr_control(int fd, const struct vperfctr_control *control) { struct perfctr_info info; memset(&info, 0, sizeof info); perfctr_info_cpu_init(&info); return __vperfctr_control(fd, info.cpu_type, control); } int __vperfctr_read_control(int fd, unsigned int cpu_type, struct vperfctr_control *control) { return _sys_vperfctr_read_control(fd, cpu_type, control); } int _vperfctr_read_control(int fd, struct vperfctr_control *control) { struct perfctr_info info; memset(&info, 0, sizeof info); perfctr_info_cpu_init(&info); return __vperfctr_read_control(fd, info.cpu_type, control); } int _vperfctr_read_sum(int fd, struct perfctr_sum_ctrs *sum) { return _sys_vperfctr_read_sum(fd, sum); } int _vperfctr_read_children(int fd, struct perfctr_sum_ctrs *children) { return _sys_vperfctr_read_children(fd, children); } /* * Operations using library objects. */ /* user's view of mmap:ed virtual perfctr */ struct vperfctr_state { struct perfctr_cpu_state_user cpu_state; }; struct vperfctr { /* XXX: point to &vperfctr_state.cpu_state instead? */ volatile const struct vperfctr_state *kstate; volatile const void *mapping; int mapping_size; int fd; unsigned int cpu_type; unsigned char have_rdpmc; /* Subset of the user's control data */ unsigned int pmc_map[STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, pmc_map)]; }; static int vperfctr_open_pid(int pid, struct vperfctr *perfctr) { int fd, isnew; struct perfctr_info info; int offset; offset = _perfctr_get_state_user_offset(); if (offset < 0) return -1; fd = _vperfctr_open_pid(pid, 1, 1, &isnew); if( fd < 0 ) { goto out_perfctr; } perfctr->fd = fd; if( perfctr_abi_check_fd(perfctr->fd) < 0 ) goto out_fd; if( perfctr_info(perfctr->fd, &info) < 0 ) goto out_fd; perfctr->cpu_type = info.cpu_type; perfctr->have_rdpmc = (info.cpu_features & PERFCTR_FEATURE_RDPMC) != 0; perfctr->mapping_size = getpagesize(); perfctr->mapping = mmap(NULL, perfctr->mapping_size, PROT_READ, MAP_SHARED, perfctr->fd, 0); if (perfctr->mapping != MAP_FAILED) { perfctr->kstate = (void*)((char*)perfctr->mapping + offset); return 0; } out_fd: if( isnew ) vperfctr_unlink(perfctr); close(perfctr->fd); out_perfctr: return -1; } struct vperfctr *vperfctr_open(void) { struct vperfctr *perfctr; perfctr = malloc(sizeof(*perfctr)); if( perfctr ) { if( vperfctr_open_pid(0, perfctr) == 0 ) return perfctr; free(perfctr); } return NULL; } int vperfctr_info(const struct vperfctr *vperfctr, struct perfctr_info *info) { return perfctr_info(vperfctr->fd, info); } struct perfctr_cpus_info *vperfctr_cpus_info(const struct vperfctr *vperfctr) { return perfctr_cpus_info(vperfctr->fd); } #if (__GNUC__ < 2) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) #define __builtin_expect(x, expected_value) (x) #endif #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) #ifndef seq_read_barrier /* mmap() based sampling is supported only for self-monitoring tasks, * so for most CPUs we should only need a compiler barrier. This * ensures that the two reads of the sequence number will truly wrap * all the operations to make this sample. */ #define seq_read_barrier() __asm__ __volatile__ ("" : : : "memory"); #endif /* These are adaptations of read_seqcount_begin() and * read_seqcount_retry() from include/linux/seqlock.h. They use an * explicit u32 instead of an opaque seqcount_t, since the type of * the lock is part of the kernel/user ABI. */ static inline __u32 read_perfseq_begin(const volatile __u32 *seq) { __u32 ret = *seq; seq_read_barrier(); return ret; } static inline int read_perfseq_retry(const volatile __u32 *seq, __u32 iv) { seq_read_barrier(); return (iv & 1) | ((*seq) ^ iv); } unsigned long long vperfctr_read_tsc(const struct vperfctr *self) { unsigned long long sum; unsigned int start, now; volatile const struct vperfctr_state *kstate; __u32 seq; kstate = self->kstate; if (unlikely(kstate->cpu_state.cstatus == 0)) return kstate->cpu_state.tsc_sum; do { seq = read_perfseq_begin(&kstate->cpu_state.sequence); rdtscl(now); sum = kstate->cpu_state.tsc_sum; start = kstate->cpu_state.tsc_start; } while (unlikely(read_perfseq_retry(&kstate->cpu_state.sequence, seq))); return sum + (now - start); } unsigned long long vperfctr_read_pmc(const struct vperfctr *self, unsigned i) { unsigned long long sum; unsigned int start, now; volatile const struct vperfctr_state *kstate; unsigned int cstatus; __u32 seq; kstate = self->kstate; cstatus = kstate->cpu_state.cstatus; if (unlikely(!vperfctr_has_rdpmc(self))) { struct perfctr_sum_ctrs sum_ctrs; if (_vperfctr_read_sum(self->fd, &sum_ctrs) < 0) perror(__FUNCTION__); return sum_ctrs.pmc[i]; } do { seq = read_perfseq_begin(&kstate->cpu_state.sequence); rdpmcl(self->pmc_map[i], now); start = kstate->cpu_state.pmc[i].start; sum = kstate->cpu_state.pmc[i].sum; } while (unlikely(read_perfseq_retry(&kstate->cpu_state.sequence, seq))); return sum + (now - start); } static int vperfctr_read_ctrs_slow(const struct vperfctr *vperfctr, struct perfctr_sum_ctrs *sum) { return _vperfctr_read_sum(vperfctr->fd, sum); } int vperfctr_read_ctrs(const struct vperfctr *self, struct perfctr_sum_ctrs *sum) { unsigned int now; unsigned int cstatus, nrctrs; volatile const struct vperfctr_state *kstate; __u32 seq; int i; /* Fast path is impossible if at least east one PMC is enabled but the CPU doesn't have RDPMC. */ kstate = self->kstate; cstatus = kstate->cpu_state.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); if (nrctrs && !vperfctr_has_rdpmc(self)) return vperfctr_read_ctrs_slow(self, sum); do { seq = read_perfseq_begin(&kstate->cpu_state.sequence); for (i = nrctrs; --i >= 0;) { rdpmcl(self->pmc_map[i], now); sum->pmc[i] = kstate->cpu_state.pmc[i].sum + (now - (unsigned int)kstate->cpu_state.pmc[i].start); } rdtscl(now); sum->tsc = kstate->cpu_state.tsc_sum + (now - (unsigned int)kstate->cpu_state.tsc_start); } while (unlikely(read_perfseq_retry(&kstate->cpu_state.sequence, seq))); return 0; } int vperfctr_read_state(const struct vperfctr *self, struct perfctr_sum_ctrs *sum, struct vperfctr_control *control) { if( _vperfctr_read_sum(self->fd, sum) < 0 ) return -1; /* For historical reasons, control may be NULL. */ if( control && __vperfctr_read_control(self->fd, self->cpu_type, control) < 0 ) return -1; return 0; } int vperfctr_control(struct vperfctr *perfctr, struct vperfctr_control *control) { memcpy(perfctr->pmc_map, control->cpu_control.pmc_map, sizeof perfctr->pmc_map); return __vperfctr_control(perfctr->fd, perfctr->cpu_type, control); } int vperfctr_stop(struct vperfctr *perfctr) { struct vperfctr_control control; memset(&control, 0, sizeof control); /* XXX: issue a SUSPEND command instead? */ return vperfctr_control(perfctr, &control); } int vperfctr_is_running(const struct vperfctr *perfctr) { return perfctr->kstate->cpu_state.cstatus != 0; } int vperfctr_iresume(const struct vperfctr *perfctr) { return _sys_vperfctr_iresume(perfctr->fd); } int vperfctr_unlink(const struct vperfctr *perfctr) { return _sys_vperfctr_unlink(perfctr->fd); } void vperfctr_close(struct vperfctr *perfctr) { munmap((void*)perfctr->mapping, perfctr->mapping_size); close(perfctr->fd); free(perfctr); } /* * Operations on other processes' virtual-mode perfctrs. */ struct rvperfctr { struct vperfctr vperfctr; /* must be first for the close() operation */ int pid; }; struct rvperfctr *rvperfctr_open(int pid) { struct rvperfctr *rvperfctr; rvperfctr = malloc(sizeof(*rvperfctr)); if( rvperfctr ) { if( vperfctr_open_pid(pid, &rvperfctr->vperfctr) == 0 ) { rvperfctr->pid = pid; return rvperfctr; } free(rvperfctr); } return NULL; } int rvperfctr_pid(const struct rvperfctr *rvperfctr) { return rvperfctr->pid; } int rvperfctr_info(const struct rvperfctr *rvperfctr, struct perfctr_info *info) { return vperfctr_info(&rvperfctr->vperfctr, info); } int rvperfctr_read_ctrs(const struct rvperfctr *rvperfctr, struct perfctr_sum_ctrs *sum) { return vperfctr_read_ctrs_slow(&rvperfctr->vperfctr, sum); } int rvperfctr_read_state(const struct rvperfctr *rvperfctr, struct perfctr_sum_ctrs *sum, struct vperfctr_control *control) { return vperfctr_read_state(&rvperfctr->vperfctr, sum, control); } int rvperfctr_control(struct rvperfctr *rvperfctr, struct vperfctr_control *control) { return vperfctr_control(&rvperfctr->vperfctr, control); } int rvperfctr_stop(struct rvperfctr *rvperfctr) { return vperfctr_stop(&rvperfctr->vperfctr); } int rvperfctr_unlink(const struct rvperfctr *rvperfctr) { return vperfctr_unlink(&rvperfctr->vperfctr); } void rvperfctr_close(struct rvperfctr *rvperfctr) { /* this relies on offsetof(struct rvperfctr, vperfctr) == 0 */ vperfctr_close(&rvperfctr->vperfctr); } papi-5.6.0/src/perfctr-2.7.x/usr.lib/000775 001750 001750 00000000000 13216244370 021117 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/linux/include/asm-ppc64/perfctr.h000664 001750 001750 00000012032 13216244370 025651 0ustar00jshenry1963jshenry1963000000 000000 /* * PPC64 Performance-Monitoring Counters driver * * Copyright (C) 2004 David Gibson, IBM Corporation. * Copyright (C) 2004 Mikael Pettersson */ #ifndef _ASM_PPC64_PERFCTR_H #define _ASM_PPC64_PERFCTR_H #include struct perfctr_sum_ctrs { __u64 tsc; __u64 pmc[8]; /* the size is not part of the user ABI */ }; struct perfctr_cpu_control_header { __u32 tsc_on; __u32 nractrs; /* number of accumulation-mode counters */ __u32 nrictrs; /* number of interrupt-mode counters */ }; struct perfctr_cpu_state_user { __u32 cstatus; /* This is a sequence counter to ensure atomic reads by * userspace. The mechanism is identical to that used for * seqcount_t in include/linux/seqlock.h. */ __u32 sequence; __u64 tsc_start; __u64 tsc_sum; struct { __u64 start; __u64 sum; } pmc[8]; /* the size is not part of the user ABI */ }; /* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs which should have less overhead in most cases */ /* XXX: ppc driver internally also uses cstatus&(1<<30) */ static inline unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, unsigned int nrictrs) { return (tsc_on<<31) | (nrictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; } static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) { return cstatus; } static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) { return (int)cstatus < 0; /* test and jump on sign */ } static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) { return cstatus & 0x7F; /* and with imm8 */ } static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) { return (cstatus >> 8) & 0x7F; } static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) { return cstatus & (0x7F << 16); } /* * 'struct siginfo' support for perfctr overflow signals. * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask * describing which perfctrs overflowed is put in si_pmc_ovf_mask. * A bitmask is used since more than one perfctr can have overflowed * by the time the interrupt handler runs. */ #define SI_PMC_OVF -8 #define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ #ifdef __KERNEL__ #if defined(CONFIG_PERFCTR) struct perfctr_cpu_control { struct perfctr_cpu_control_header header; u64 mmcr0; u64 mmcr1; u64 mmcra; unsigned int ireset[8]; /* [0,0x7fffffff], for i-mode counters, physical indices */ unsigned int pmc_map[8]; /* virtual to physical index map */ }; struct perfctr_cpu_state { /* Don't change field order here without first considering the number of cache lines touched during sampling and context switching. */ unsigned int id; int isuspend_cpu; struct perfctr_cpu_state_user user; unsigned int unused_pmcs; struct perfctr_cpu_control control; }; /* Driver init/exit. */ extern int perfctr_cpu_init(void); extern void perfctr_cpu_exit(void); /* CPU type name. */ extern char *perfctr_cpu_name; /* Hardware reservation. */ extern const char *perfctr_cpu_reserve(const char *service); extern void perfctr_cpu_release(const char *service); /* PRE: state has no running interrupt-mode counters. Check that the new control data is valid. Update the driver's private control data. Returns a negative error code if the control data is invalid. */ extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); /* Parse and update control for the given domain. */ extern int perfctr_cpu_control_write(struct perfctr_cpu_control *control, unsigned int domain, const void *srcp, unsigned int srcbytes); /* Retrieve and format control for the given domain. Returns number of bytes written. */ extern int perfctr_cpu_control_read(const struct perfctr_cpu_control *control, unsigned int domain, void *dstp, unsigned int dstbytes); /* Read a-mode counters. Subtract from start and accumulate into sums. Must be called with preemption disabled. */ extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); /* Write control registers. Read a-mode counters into start. Must be called with preemption disabled. */ extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); /* Perform an efficient combined suspend/resume operation. Must be called with preemption disabled. */ extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); /* The type of a perfctr overflow interrupt handler. It will be called in IRQ context, with preemption disabled. */ typedef void (*perfctr_ihandler_t)(unsigned long pc); /* Operations related to overflow interrupt handling. */ #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); #else static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } #endif static inline int perfctr_cpu_has_pending_interrupt(const struct perfctr_cpu_state *state) { return 0; } #endif /* CONFIG_PERFCTR */ #endif /* __KERNEL__ */ #endif /* _ASM_PPC64_PERFCTR_H */ papi-5.6.0/src/perfctr-2.7.x/etc/costs/Pentium4-3.4000664 001750 001750 00000001773 13216244367 023417 0ustar00jshenry1963jshenry1963000000 000000 [data from a 3.4 GHz Pentium 4 Model 4] PERFCTR INIT: vendor 0, family 15, model 4, stepping 1, clock 3393806 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 825 cycles PERFCTR INIT: rdtsc cost is 96.8 cycles (7021 total) PERFCTR INIT: rdpmc cost is 236.0 cycles (15929 total) PERFCTR INIT: rdmsr (counter) cost is 348.8 cycles (23154 total) PERFCTR INIT: rdmsr (escr) cost is 367.6 cycles (24353 total) PERFCTR INIT: wrmsr (counter) cost is 990.3 cycles (64209 total) PERFCTR INIT: wrmsr (escr) cost is 1023.8 cycles (66351 total) PERFCTR INIT: read cr4 cost is 19.2 cycles (2057 total) PERFCTR INIT: write cr4 cost is 391.6 cycles (25891 total) PERFCTR INIT: rdpmc (fast) cost is 94.8 cycles (6893 total) PERFCTR INIT: rdmsr (cccr) cost is 377.0 cycles (24956 total) PERFCTR INIT: wrmsr (cccr) cost is 977.8 cycles (63410 total) PERFCTR INIT: write LVTPC cost is 28.5 cycles (2652 total) PERFCTR INIT: sync_core cost is 421.1 cycles (27778 total) perfctr: driver 2.7.6, cpu type Intel P4 at 3393806 kHz papi-5.6.0/src/perfctr-2.6.x/etc/costs/MPC7455-1.0000775 001750 001750 00000002337 13216244366 022646 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.0 GHz MPC7455] PERFCTR INIT: PVR 0x80010201, CPU clock 1002480 kHz, TB clock 16708 kHz PERFCTR INIT: NITER == 256 PERFCTR INIT: loop overhead is 70 cycles PERFCTR INIT: mftbl cost is 6.9 cycles (1837 total) PERFCTR INIT: mfspr (pmc1) cost is 3.9 cycles (1080 total) PERFCTR INIT: mfspr (pmc2) cost is 3.9 cycles (1085 total) PERFCTR INIT: mfspr (pmc3) cost is 3.9 cycles (1071 total) PERFCTR INIT: mfspr (pmc4) cost is 3.8 cycles (1064 total) PERFCTR INIT: mfspr (mmcr0) cost is 3.9 cycles (1069 total) PERFCTR INIT: mfspr (mmcr1) cost is 3.8 cycles (1064 total) PERFCTR INIT: mtspr (pmc2) cost is 4.0 cycles (1111 total) PERFCTR INIT: mtspr (pmc3) cost is 3.8 cycles (1064 total) PERFCTR INIT: mtspr (pmc4) cost is 3.9 cycles (1069 total) PERFCTR INIT: mtspr (mmcr1) cost is 3.8 cycles (1064 total) PERFCTR INIT: mtspr (mmcr0) cost is 3.9 cycles (1069 total) PERFCTR INIT: check_fcece(0): MMCR0[FC] is 0, PMC1 is 0x8000008a PERFCTR INIT: check_fcece(1): MMCR0[FC] is 1, PMC1 is 0x80000000 PERFCTR INIT: check_trigger(0): MMCR0[TRIGGER] is 0, PMC1 is 0x80000088, PMC2 is 0x8c PERFCTR INIT: check_trigger(1): MMCR0[TRIGGER] is 0, PMC1 is 0x8000007d, PMC2 is 0x81 perfctr: driver 2.6.12, cpu type PowerPC 60x/7xx/74xx at 1002480 kHz papi-5.6.0/src/perfctr-2.7.x/etc/costs/Pentium4-3.0000664 001750 001750 00000005772 13216244367 023416 0ustar00jshenry1963jshenry1963000000 000000 [data from a 3.0 GHz Pentium 4 Model 3 with hyper-threading enabled] perfctr/x86.c: hyper-threaded P4s detected: restricting access for CPUs 1 PERFCTR INIT: vendor 0, family 15, model 3, stepping 3, clock 2994145 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 840 cycles PERFCTR INIT: rdtsc cost is 143.0 cycles (9998 total) PERFCTR INIT: rdpmc cost is 293.0 cycles (19597 total) PERFCTR INIT: rdmsr (counter) cost is 416.0 cycles (27465 total) PERFCTR INIT: rdmsr (escr) cost is 429.0 cycles (28297 total) PERFCTR INIT: wrmsr (counter) cost is 1247.2 cycles (80663 total) PERFCTR INIT: wrmsr (escr) cost is 1203.6 cycles (77873 total) PERFCTR INIT: read cr4 cost is 24.0 cycles (2378 total) PERFCTR INIT: write cr4 cost is 421.0 cycles (27787 total) PERFCTR INIT: rdpmc (fast) cost is 109.1 cycles (7823 total) PERFCTR INIT: rdmsr (cccr) cost is 514.4 cycles (33765 total) PERFCTR INIT: wrmsr (cccr) cost is 1400.8 cycles (90495 total) PERFCTR INIT: write LVTPC cost is 70.3 cycles (5340 total) PERFCTR INIT: sync_core cost is 543.7 cycles (35640 total) perfctr: driver 2.7.10, cpu type Intel P4 at 2994145 kHz [data from a 3.0 GHz Pentium 4 Model 3] PERFCTR INIT: vendor 0, family 15, model 3, stepping 4, clock 2994546 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 772 cycles PERFCTR INIT: rdtsc cost is 100.9 cycles (7230 total) PERFCTR INIT: rdpmc cost is 227.0 cycles (15300 total) PERFCTR INIT: rdmsr (counter) cost is 353.4 cycles (23392 total) PERFCTR INIT: rdmsr (escr) cost is 365.2 cycles (24150 total) PERFCTR INIT: wrmsr (counter) cost is 990.9 cycles (64192 total) PERFCTR INIT: wrmsr (escr) cost is 980.3 cycles (63517 total) PERFCTR INIT: read cr4 cost is 22.5 cycles (2212 total) PERFCTR INIT: write cr4 cost is 392.7 cycles (25905 total) PERFCTR INIT: rdpmc (fast) cost is 90.9 cycles (6593 total) PERFCTR INIT: rdmsr (cccr) cost is 374.8 cycles (24765 total) PERFCTR INIT: wrmsr (cccr) cost is 938.4 cycles (60832 total) PERFCTR INIT: write LVTPC cost is 25.6 cycles (2415 total) PERFCTR INIT: sync_core cost is 421.6 cycles (27758 total) perfctr: driver 2.7.6, cpu type Intel P4 at 2994546 kHz [data from a 3.0 GHz Pentium 4 Model 2] PERFCTR INIT: vendor 0, family 15, model 2, stepping 7, clock 3057662 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 264 cycles PERFCTR INIT: rdtsc cost is 77.5 cycles (5228 total) PERFCTR INIT: rdpmc cost is 146.2 cycles (9624 total) PERFCTR INIT: rdmsr (counter) cost is 255.0 cycles (16584 total) PERFCTR INIT: rdmsr (escr) cost is 166.2 cycles (10904 total) PERFCTR INIT: wrmsr (counter) cost is 794.4 cycles (51108 total) PERFCTR INIT: wrmsr (escr) cost is 874.0 cycles (56204 total) PERFCTR INIT: read cr4 cost is 5.1 cycles (592 total) PERFCTR INIT: write cr4 cost is 258.8 cycles (16828 total) PERFCTR INIT: rdpmc (fast) cost is 60.5 cycles (4136 total) PERFCTR INIT: rdmsr (cccr) cost is 167.0 cycles (10952 total) PERFCTR INIT: wrmsr (cccr) cost is 833.9 cycles (53636 total) perfctr: driver 2.6.0 DEBUG, cpu type Intel Pentium 4 Model 2 at 3057662 kHz papi-5.6.0/src/libpfm4/include/perfmon/pfmlib.h000664 001750 001750 00000073420 13216244364 023403 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_H__ #define __PFMLIB_H__ #pragma GCC visibility push(default) #ifdef __cplusplus extern "C" { #endif #include #include #include #include #define LIBPFM_VERSION (4 << 16 | 0) #define PFM_MAJ_VERSION(v) ((v)>>16) #define PFM_MIN_VERSION(v) ((v) & 0xffff) /* * ABI revision level */ #define LIBPFM_ABI_VERSION 0 /* * priv level mask (for dfl_plm) */ #define PFM_PLM0 0x01 /* kernel */ #define PFM_PLM1 0x02 /* not yet used */ #define PFM_PLM2 0x04 /* not yet used */ #define PFM_PLM3 0x08 /* priv level 3, 2, 1 (x86) */ #define PFM_PLMH 0x10 /* hypervisor */ /* * Performance Event Source * * The source is what is providing events. * It can be: * - Hardware Performance Monitoring Unit (PMU) * - a particular kernel subsystem * * Identifiers are guaranteed constant across libpfm revisions * * New sources must be added at the end before PFM_PMU_MAX */ typedef enum { PFM_PMU_NONE= 0, /* no PMU */ PFM_PMU_GEN_IA64, /* Intel IA-64 architected PMU */ PFM_PMU_ITANIUM, /* Intel Itanium */ PFM_PMU_ITANIUM2, /* Intel Itanium 2 */ PFM_PMU_MONTECITO, /* Intel Dual-Core Itanium 2 9000 */ PFM_PMU_AMD64, /* AMD AMD64 (obsolete) */ PFM_PMU_I386_P6, /* Intel PIII (P6 core) */ PFM_PMU_INTEL_NETBURST, /* Intel Netburst (Pentium 4) */ PFM_PMU_INTEL_NETBURST_P, /* Intel Netburst Prescott (Pentium 4) */ PFM_PMU_COREDUO, /* Intel Core Duo/Core Solo */ PFM_PMU_I386_PM, /* Intel Pentium M */ PFM_PMU_INTEL_CORE, /* Intel Core */ PFM_PMU_INTEL_PPRO, /* Intel Pentium Pro */ PFM_PMU_INTEL_PII, /* Intel Pentium II */ PFM_PMU_INTEL_ATOM, /* Intel Atom */ PFM_PMU_INTEL_NHM, /* Intel Nehalem core PMU */ PFM_PMU_INTEL_NHM_EX, /* Intel Nehalem-EX core PMU */ PFM_PMU_INTEL_NHM_UNC, /* Intel Nehalem uncore PMU */ PFM_PMU_INTEL_X86_ARCH, /* Intel X86 architectural PMU */ PFM_PMU_MIPS_20KC, /* MIPS 20KC */ PFM_PMU_MIPS_24K, /* MIPS 24K */ PFM_PMU_MIPS_25KF, /* MIPS 25KF */ PFM_PMU_MIPS_34K, /* MIPS 34K */ PFM_PMU_MIPS_5KC, /* MIPS 5KC */ PFM_PMU_MIPS_74K, /* MIPS 74K */ PFM_PMU_MIPS_R10000, /* MIPS R10000 */ PFM_PMU_MIPS_R12000, /* MIPS R12000 */ PFM_PMU_MIPS_RM7000, /* MIPS RM7000 */ PFM_PMU_MIPS_RM9000, /* MIPS RM9000 */ PFM_PMU_MIPS_SB1, /* MIPS SB1/SB1A */ PFM_PMU_MIPS_VR5432, /* MIPS VR5432 */ PFM_PMU_MIPS_VR5500, /* MIPS VR5500 */ PFM_PMU_MIPS_ICE9A, /* SiCortex ICE9A */ PFM_PMU_MIPS_ICE9B, /* SiCortex ICE9B */ PFM_PMU_POWERPC, /* POWERPC */ PFM_PMU_CELL, /* IBM CELL */ PFM_PMU_SPARC_ULTRA12, /* UltraSPARC I, II, IIi, and IIe */ PFM_PMU_SPARC_ULTRA3, /* UltraSPARC III */ PFM_PMU_SPARC_ULTRA3I, /* UltraSPARC IIIi and IIIi+ */ PFM_PMU_SPARC_ULTRA3PLUS, /* UltraSPARC III+ and IV */ PFM_PMU_SPARC_ULTRA4PLUS, /* UltraSPARC IV+ */ PFM_PMU_SPARC_NIAGARA1, /* Niagara-1 */ PFM_PMU_SPARC_NIAGARA2, /* Niagara-2 */ PFM_PMU_PPC970, /* IBM PowerPC 970(FX,GX) */ PFM_PMU_PPC970MP, /* IBM PowerPC 970MP */ PFM_PMU_POWER3, /* IBM POWER3 */ PFM_PMU_POWER4, /* IBM POWER4 */ PFM_PMU_POWER5, /* IBM POWER5 */ PFM_PMU_POWER5p, /* IBM POWER5+ */ PFM_PMU_POWER6, /* IBM POWER6 */ PFM_PMU_POWER7, /* IBM POWER7 */ PFM_PMU_PERF_EVENT, /* perf_event PMU */ PFM_PMU_INTEL_WSM, /* Intel Westmere single-socket (Clarkdale) */ PFM_PMU_INTEL_WSM_DP, /* Intel Westmere dual-socket (Westmere-EP, Gulftwon) */ PFM_PMU_INTEL_WSM_UNC, /* Intel Westmere uncore PMU */ PFM_PMU_AMD64_K7, /* AMD AMD64 K7 */ PFM_PMU_AMD64_K8_REVB, /* AMD AMD64 K8 RevB */ PFM_PMU_AMD64_K8_REVC, /* AMD AMD64 K8 RevC */ PFM_PMU_AMD64_K8_REVD, /* AMD AMD64 K8 RevD */ PFM_PMU_AMD64_K8_REVE, /* AMD AMD64 K8 RevE */ PFM_PMU_AMD64_K8_REVF, /* AMD AMD64 K8 RevF */ PFM_PMU_AMD64_K8_REVG, /* AMD AMD64 K8 RevG */ PFM_PMU_AMD64_FAM10H_BARCELONA, /* AMD AMD64 Fam10h Barcelona RevB */ PFM_PMU_AMD64_FAM10H_SHANGHAI, /* AMD AMD64 Fam10h Shanghai RevC */ PFM_PMU_AMD64_FAM10H_ISTANBUL, /* AMD AMD64 Fam10h Istanbul RevD */ PFM_PMU_ARM_CORTEX_A8, /* ARM Cortex A8 */ PFM_PMU_ARM_CORTEX_A9, /* ARM Cortex A9 */ PFM_PMU_TORRENT, /* IBM Torrent hub chip */ PFM_PMU_INTEL_SNB, /* Intel Sandy Bridge (single socket) */ PFM_PMU_AMD64_FAM14H_BOBCAT, /* AMD AMD64 Fam14h Bobcat */ PFM_PMU_AMD64_FAM15H_INTERLAGOS,/* AMD AMD64 Fam15h Interlagos */ PFM_PMU_INTEL_SNB_EP, /* Intel SandyBridge EP */ PFM_PMU_AMD64_FAM12H_LLANO, /* AMD AMD64 Fam12h Llano */ PFM_PMU_AMD64_FAM11H_TURION, /* AMD AMD64 Fam11h Turion */ PFM_PMU_INTEL_IVB, /* Intel IvyBridge */ PFM_PMU_ARM_CORTEX_A15, /* ARM Cortex A15 */ PFM_PMU_INTEL_SNB_UNC_CB0, /* Intel SandyBridge C-box 0 uncore PMU */ PFM_PMU_INTEL_SNB_UNC_CB1, /* Intel SandyBridge C-box 1 uncore PMU */ PFM_PMU_INTEL_SNB_UNC_CB2, /* Intel SandyBridge C-box 2 uncore PMU */ PFM_PMU_INTEL_SNB_UNC_CB3, /* Intel SandyBridge C-box 3 uncore PMU */ PFM_PMU_INTEL_SNBEP_UNC_CB0, /* Intel SandyBridge-EP C-Box core 0 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB1, /* Intel SandyBridge-EP C-Box core 1 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB2, /* Intel SandyBridge-EP C-Box core 2 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB3, /* Intel SandyBridge-EP C-Box core 3 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB4, /* Intel SandyBridge-EP C-Box core 4 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB5, /* Intel SandyBridge-EP C-Box core 5 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB6, /* Intel SandyBridge-EP C-Box core 6 uncore */ PFM_PMU_INTEL_SNBEP_UNC_CB7, /* Intel SandyBridge-EP C-Box core 7 uncore */ PFM_PMU_INTEL_SNBEP_UNC_HA, /* Intel SandyBridge-EP HA uncore */ PFM_PMU_INTEL_SNBEP_UNC_IMC0, /* Intel SandyBridge-EP IMC socket 0 uncore */ PFM_PMU_INTEL_SNBEP_UNC_IMC1, /* Intel SandyBridge-EP IMC socket 1 uncore */ PFM_PMU_INTEL_SNBEP_UNC_IMC2, /* Intel SandyBridge-EP IMC socket 2 uncore */ PFM_PMU_INTEL_SNBEP_UNC_IMC3, /* Intel SandyBridge-EP IMC socket 3 uncore */ PFM_PMU_INTEL_SNBEP_UNC_PCU, /* Intel SandyBridge-EP PCU uncore */ PFM_PMU_INTEL_SNBEP_UNC_QPI0, /* Intel SandyBridge-EP QPI link 0 uncore */ PFM_PMU_INTEL_SNBEP_UNC_QPI1, /* Intel SandyBridge-EP QPI link 1 uncore */ PFM_PMU_INTEL_SNBEP_UNC_UBOX, /* Intel SandyBridge-EP U-Box uncore */ PFM_PMU_INTEL_SNBEP_UNC_R2PCIE, /* Intel SandyBridge-EP R2PCIe uncore */ PFM_PMU_INTEL_SNBEP_UNC_R3QPI0, /* Intel SandyBridge-EP R3QPI 0 uncore */ PFM_PMU_INTEL_SNBEP_UNC_R3QPI1, /* Intel SandyBridge-EP R3QPI 1 uncore */ PFM_PMU_INTEL_KNC, /* Intel Knights Corner (Xeon Phi) */ PFM_PMU_S390X_CPUM_CF, /* s390x: CPU-M counter facility */ PFM_PMU_ARM_1176, /* ARM 1176 */ PFM_PMU_INTEL_IVB_EP, /* Intel IvyBridge EP */ PFM_PMU_INTEL_HSW, /* Intel Haswell */ PFM_PMU_INTEL_IVB_UNC_CB0, /* Intel IvyBridge C-box 0 uncore PMU */ PFM_PMU_INTEL_IVB_UNC_CB1, /* Intel IvyBridge C-box 1 uncore PMU */ PFM_PMU_INTEL_IVB_UNC_CB2, /* Intel IvyBridge C-box 2 uncore PMU */ PFM_PMU_INTEL_IVB_UNC_CB3, /* Intel IvyBridge C-box 3 uncore PMU */ PFM_PMU_POWER8, /* IBM POWER8 */ PFM_PMU_INTEL_RAPL, /* Intel RAPL */ PFM_PMU_INTEL_SLM, /* Intel Silvermont */ PFM_PMU_AMD64_FAM15H_NB, /* AMD AMD64 Fam15h NorthBridge */ PFM_PMU_ARM_QCOM_KRAIT, /* Qualcomm Krait */ PFM_PMU_PERF_EVENT_RAW, /* perf_events RAW event syntax */ PFM_PMU_INTEL_IVBEP_UNC_CB0, /* Intel IvyBridge-EP C-Box core 0 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB1, /* Intel IvyBridge-EP C-Box core 1 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB2, /* Intel IvyBridge-EP C-Box core 2 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB3, /* Intel IvyBridge-EP C-Box core 3 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB4, /* Intel IvyBridge-EP C-Box core 4 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB5, /* Intel IvyBridge-EP C-Box core 5 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB6, /* Intel IvyBridge-EP C-Box core 6 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB7, /* Intel IvyBridge-EP C-Box core 7 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB8, /* Intel IvyBridge-EP C-Box core 8 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB9, /* Intel IvyBridge-EP C-Box core 9 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB10, /* Intel IvyBridge-EP C-Box core 10 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB11, /* Intel IvyBridge-EP C-Box core 11 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB12, /* Intel IvyBridge-EP C-Box core 12 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB13, /* Intel IvyBridge-EP C-Box core 13 uncore */ PFM_PMU_INTEL_IVBEP_UNC_CB14, /* Intel IvyBridge-EP C-Box core 14 uncore */ PFM_PMU_INTEL_IVBEP_UNC_HA0, /* Intel IvyBridge-EP HA 0 uncore */ PFM_PMU_INTEL_IVBEP_UNC_HA1, /* Intel IvyBridge-EP HA 1 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC0, /* Intel IvyBridge-EP IMC socket 0 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC1, /* Intel IvyBridge-EP IMC socket 1 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC2, /* Intel IvyBridge-EP IMC socket 2 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC3, /* Intel IvyBridge-EP IMC socket 3 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC4, /* Intel IvyBridge-EP IMC socket 4 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC5, /* Intel IvyBridge-EP IMC socket 5 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC6, /* Intel IvyBridge-EP IMC socket 6 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IMC7, /* Intel IvyBridge-EP IMC socket 7 uncore */ PFM_PMU_INTEL_IVBEP_UNC_PCU, /* Intel IvyBridge-EP PCU uncore */ PFM_PMU_INTEL_IVBEP_UNC_QPI0, /* Intel IvyBridge-EP QPI link 0 uncore */ PFM_PMU_INTEL_IVBEP_UNC_QPI1, /* Intel IvyBridge-EP QPI link 1 uncore */ PFM_PMU_INTEL_IVBEP_UNC_QPI2, /* Intel IvyBridge-EP QPI link 2 uncore */ PFM_PMU_INTEL_IVBEP_UNC_UBOX, /* Intel IvyBridge-EP U-Box uncore */ PFM_PMU_INTEL_IVBEP_UNC_R2PCIE, /* Intel IvyBridge-EP R2PCIe uncore */ PFM_PMU_INTEL_IVBEP_UNC_R3QPI0, /* Intel IvyBridge-EP R3QPI 0 uncore */ PFM_PMU_INTEL_IVBEP_UNC_R3QPI1, /* Intel IvyBridge-EP R3QPI 1 uncore */ PFM_PMU_INTEL_IVBEP_UNC_R3QPI2, /* Intel IvyBridge-EP R3QPI 2 uncore */ PFM_PMU_INTEL_IVBEP_UNC_IRP, /* Intel IvyBridge-EP IRP uncore */ PFM_PMU_S390X_CPUM_SF, /* s390x: CPU-M sampling facility */ PFM_PMU_ARM_CORTEX_A57, /* ARM Cortex A57 (ARMv8) */ PFM_PMU_ARM_CORTEX_A53, /* ARM Cortex A53 (ARMv8) */ PFM_PMU_ARM_CORTEX_A7, /* ARM Cortex A7 */ PFM_PMU_INTEL_HSW_EP, /* Intel Haswell EP */ PFM_PMU_INTEL_BDW, /* Intel Broadwell */ PFM_PMU_ARM_XGENE, /* Applied Micro X-Gene (ARMv8) */ PFM_PMU_INTEL_HSWEP_UNC_CB0, /* Intel Haswell-EP C-Box core 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB1, /* Intel Haswell-EP C-Box core 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB2, /* Intel Haswell-EP C-Box core 2 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB3, /* Intel Haswell-EP C-Box core 3 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB4, /* Intel Haswell-EP C-Box core 4 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB5, /* Intel Haswell-EP C-Box core 5 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB6, /* Intel Haswell-EP C-Box core 6 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB7, /* Intel Haswell-EP C-Box core 7 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB8, /* Intel Haswell-EP C-Box core 8 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB9, /* Intel Haswell-EP C-Box core 9 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB10, /* Intel Haswell-EP C-Box core 10 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB11, /* Intel Haswell-EP C-Box core 11 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB12, /* Intel Haswell-EP C-Box core 12 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB13, /* Intel Haswell-EP C-Box core 13 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB14, /* Intel Haswell-EP C-Box core 14 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB15, /* Intel Haswell-EP C-Box core 15 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB16, /* Intel Haswell-EP C-Box core 16 uncore */ PFM_PMU_INTEL_HSWEP_UNC_CB17, /* Intel Haswell-EP C-Box core 17 uncore */ PFM_PMU_INTEL_HSWEP_UNC_HA0, /* Intel Haswell-EP HA 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_HA1, /* Intel Haswell-EP HA 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC0, /* Intel Haswell-EP IMC socket 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC1, /* Intel Haswell-EP IMC socket 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC2, /* Intel Haswell-EP IMC socket 2 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC3, /* Intel Haswell-EP IMC socket 3 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC4, /* Intel Haswell-EP IMC socket 4 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC5, /* Intel Haswell-EP IMC socket 5 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC6, /* Intel Haswell-EP IMC socket 6 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IMC7, /* Intel Haswell-EP IMC socket 7 uncore */ PFM_PMU_INTEL_HSWEP_UNC_PCU, /* Intel Haswell-EP PCU uncore */ PFM_PMU_INTEL_HSWEP_UNC_QPI0, /* Intel Haswell-EP QPI link 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_QPI1, /* Intel Haswell-EP QPI link 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_UBOX, /* Intel Haswell-EP U-Box uncore */ PFM_PMU_INTEL_HSWEP_UNC_R2PCIE, /* Intel Haswell-EP R2PCIe uncore */ PFM_PMU_INTEL_HSWEP_UNC_R3QPI0, /* Intel Haswell-EP R3QPI 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_R3QPI1, /* Intel Haswell-EP R3QPI 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_R3QPI2, /* Intel Haswell-EP R3QPI 2 uncore */ PFM_PMU_INTEL_HSWEP_UNC_IRP, /* Intel Haswell-EP IRP uncore */ PFM_PMU_INTEL_HSWEP_UNC_SB0, /* Intel Haswell-EP S-Box 0 uncore */ PFM_PMU_INTEL_HSWEP_UNC_SB1, /* Intel Haswell-EP S-Box 1 uncore */ PFM_PMU_INTEL_HSWEP_UNC_SB2, /* Intel Haswell-EP S-Box 2 uncore */ PFM_PMU_INTEL_HSWEP_UNC_SB3, /* Intel Haswell-EP S-Box 3 uncore */ PFM_PMU_POWERPC_NEST_MCS_READ_BW, /* POWERPC Nest Memory Read bandwidth */ PFM_PMU_POWERPC_NEST_MCS_WRITE_BW, /* POWERPC Nest Memory Write bandwidth */ PFM_PMU_INTEL_SKL, /* Intel Skylake */ PFM_PMU_INTEL_BDW_EP, /* Intel Broadwell EP */ PFM_PMU_INTEL_GLM, /* Intel Goldmont */ PFM_PMU_INTEL_KNL, /* Intel Knights Landing */ PFM_PMU_INTEL_KNL_UNC_IMC0, /* Intel KnightLanding IMC channel 0 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC1, /* Intel KnightLanding IMC channel 1 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC2, /* Intel KnightLanding IMC channel 2 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC3, /* Intel KnightLanding IMC channel 3 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC4, /* Intel KnightLanding IMC channel 4 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC5, /* Intel KnightLanding IMC channel 5 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC_UCLK0,/* Intel KnightLanding IMC UCLK unit 0 uncore */ PFM_PMU_INTEL_KNL_UNC_IMC_UCLK1,/* Intel KnightLanding IMC UCLK unit 1 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK0,/* Intel KnightLanding EDC ECLK unit 0 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK1,/* Intel KnightLanding EDC ECLK unit 1 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK2,/* Intel KnightLanding EDC ECLK unit 2 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK3,/* Intel KnightLanding EDC ECLK unit 3 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK4,/* Intel KnightLanding EDC ECLK unit 4 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK5,/* Intel KnightLanding EDC ECLK unit 5 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK6,/* Intel KnightLanding EDC ECLK unit 6 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_ECLK7,/* Intel KnightLanding EDC ECLK unit 7 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK0,/* Intel KnightLanding EDC UCLK unit 0 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK1,/* Intel KnightLanding EDC UCLK unit 1 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK2,/* Intel KnightLanding EDC UCLK unit 2 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK3,/* Intel KnightLanding EDC UCLK unit 3 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK4,/* Intel KnightLanding EDC UCLK unit 4 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK5,/* Intel KnightLanding EDC UCLK unit 5 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK6,/* Intel KnightLanding EDC UCLK unit 6 uncore */ PFM_PMU_INTEL_KNL_UNC_EDC_UCLK7,/* Intel KnightLanding EDC UCLK unit 7 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA0, /* Intel KnightLanding CHA unit 0 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA1, /* Intel KnightLanding CHA unit 1 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA2, /* Intel KnightLanding CHA unit 2 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA3, /* Intel KnightLanding CHA unit 3 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA4, /* Intel KnightLanding CHA unit 4 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA5, /* Intel KnightLanding CHA unit 5 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA6, /* Intel KnightLanding CHA unit 6 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA7, /* Intel KnightLanding CHA unit 7 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA8, /* Intel KnightLanding CHA unit 8 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA9, /* Intel KnightLanding CHA unit 9 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA10, /* Intel KnightLanding CHA unit 10 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA11, /* Intel KnightLanding CHA unit 11 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA12, /* Intel KnightLanding CHA unit 12 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA13, /* Intel KnightLanding CHA unit 13 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA14, /* Intel KnightLanding CHA unit 14 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA15, /* Intel KnightLanding CHA unit 15 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA16, /* Intel KnightLanding CHA unit 16 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA17, /* Intel KnightLanding CHA unit 17 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA18, /* Intel KnightLanding CHA unit 18 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA19, /* Intel KnightLanding CHA unit 19 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA20, /* Intel KnightLanding CHA unit 20 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA21, /* Intel KnightLanding CHA unit 21 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA22, /* Intel KnightLanding CHA unit 22 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA23, /* Intel KnightLanding CHA unit 23 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA24, /* Intel KnightLanding CHA unit 24 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA25, /* Intel KnightLanding CHA unit 25 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA26, /* Intel KnightLanding CHA unit 26 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA27, /* Intel KnightLanding CHA unit 27 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA28, /* Intel KnightLanding CHA unit 28 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA29, /* Intel KnightLanding CHA unit 29 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA30, /* Intel KnightLanding CHA unit 30 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA31, /* Intel KnightLanding CHA unit 31 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA32, /* Intel KnightLanding CHA unit 32 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA33, /* Intel KnightLanding CHA unit 33 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA34, /* Intel KnightLanding CHA unit 34 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA35, /* Intel KnightLanding CHA unit 35 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA36, /* Intel KnightLanding CHA unit 36 uncore */ PFM_PMU_INTEL_KNL_UNC_CHA37, /* Intel KnightLanding CHA unit 37 uncore */ PFM_PMU_INTEL_KNL_UNC_UBOX, /* Intel KnightLanding Ubox uncore */ PFM_PMU_INTEL_KNL_UNC_M2PCIE, /* Intel KnightLanding M2PCIe uncore */ PFM_PMU_POWER9, /* IBM POWER9 */ PFM_PMU_INTEL_BDX_UNC_CB0, /* Intel Broadwell-X C-Box core 0 uncore */ PFM_PMU_INTEL_BDX_UNC_CB1, /* Intel Broadwell-X C-Box core 1 uncore */ PFM_PMU_INTEL_BDX_UNC_CB2, /* Intel Broadwell-X C-Box core 2 uncore */ PFM_PMU_INTEL_BDX_UNC_CB3, /* Intel Broadwell-X C-Box core 3 uncore */ PFM_PMU_INTEL_BDX_UNC_CB4, /* Intel Broadwell-X C-Box core 4 uncore */ PFM_PMU_INTEL_BDX_UNC_CB5, /* Intel Broadwell-X C-Box core 5 uncore */ PFM_PMU_INTEL_BDX_UNC_CB6, /* Intel Broadwell-X C-Box core 6 uncore */ PFM_PMU_INTEL_BDX_UNC_CB7, /* Intel Broadwell-X C-Box core 7 uncore */ PFM_PMU_INTEL_BDX_UNC_CB8, /* Intel Broadwell-X C-Box core 8 uncore */ PFM_PMU_INTEL_BDX_UNC_CB9, /* Intel Broadwell-X C-Box core 9 uncore */ PFM_PMU_INTEL_BDX_UNC_CB10, /* Intel Broadwell-X C-Box core 10 uncore */ PFM_PMU_INTEL_BDX_UNC_CB11, /* Intel Broadwell-X C-Box core 11 uncore */ PFM_PMU_INTEL_BDX_UNC_CB12, /* Intel Broadwell-X C-Box core 12 uncore */ PFM_PMU_INTEL_BDX_UNC_CB13, /* Intel Broadwell-X C-Box core 13 uncore */ PFM_PMU_INTEL_BDX_UNC_CB14, /* Intel Broadwell-X C-Box core 14 uncore */ PFM_PMU_INTEL_BDX_UNC_CB15, /* Intel Broadwell-X C-Box core 15 uncore */ PFM_PMU_INTEL_BDX_UNC_CB16, /* Intel Broadwell-X C-Box core 16 uncore */ PFM_PMU_INTEL_BDX_UNC_CB17, /* Intel Broadwell-X C-Box core 17 uncore */ PFM_PMU_INTEL_BDX_UNC_CB18, /* Intel Broadwell-X C-Box core 18 uncore */ PFM_PMU_INTEL_BDX_UNC_CB19, /* Intel Broadwell-X C-Box core 19 uncore */ PFM_PMU_INTEL_BDX_UNC_CB20, /* Intel Broadwell-X C-Box core 20 uncore */ PFM_PMU_INTEL_BDX_UNC_CB21, /* Intel Broadwell-X C-Box core 21 uncore */ PFM_PMU_INTEL_BDX_UNC_CB22, /* Intel Broadwell-X C-Box core 22 uncore */ PFM_PMU_INTEL_BDX_UNC_CB23, /* Intel Broadwell-X C-Box core 23 uncore */ PFM_PMU_INTEL_BDX_UNC_HA0, /* Intel Broadwell-X HA 0 uncore */ PFM_PMU_INTEL_BDX_UNC_HA1, /* Intel Broadwell-X HA 1 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC0, /* Intel Broadwell-X IMC socket 0 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC1, /* Intel Broadwell-X IMC socket 1 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC2, /* Intel Broadwell-X IMC socket 2 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC3, /* Intel Broadwell-X IMC socket 3 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC4, /* Intel Broadwell-X IMC socket 4 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC5, /* Intel Broadwell-X IMC socket 5 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC6, /* Intel Broadwell-X IMC socket 6 uncore */ PFM_PMU_INTEL_BDX_UNC_IMC7, /* Intel Broadwell-X IMC socket 7 uncore */ PFM_PMU_INTEL_BDX_UNC_PCU, /* Intel Broadwell-X PCU uncore */ PFM_PMU_INTEL_BDX_UNC_QPI0, /* Intel Broadwell-X QPI link 0 uncore */ PFM_PMU_INTEL_BDX_UNC_QPI1, /* Intel Broadwell-X QPI link 1 uncore */ PFM_PMU_INTEL_BDX_UNC_QPI2, /* Intel Broadwell-X QPI link 2 uncore */ PFM_PMU_INTEL_BDX_UNC_UBOX, /* Intel Broadwell-X U-Box uncore */ PFM_PMU_INTEL_BDX_UNC_R2PCIE, /* Intel Broadwell-X R2PCIe uncore */ PFM_PMU_INTEL_BDX_UNC_R3QPI0, /* Intel Broadwell-X R3QPI 0 uncore */ PFM_PMU_INTEL_BDX_UNC_R3QPI1, /* Intel Broadwell-X R3QPI 1 uncore */ PFM_PMU_INTEL_BDX_UNC_R3QPI2, /* Intel Broadwell-X R3QPI 2 uncore */ PFM_PMU_INTEL_BDX_UNC_IRP, /* Intel Broadwell-X IRP uncore */ PFM_PMU_INTEL_BDX_UNC_SB0, /* Intel Broadwell-X S-Box 0 uncore */ PFM_PMU_INTEL_BDX_UNC_SB1, /* Intel Broadwell-X S-Box 1 uncore */ PFM_PMU_INTEL_BDX_UNC_SB2, /* Intel Broadwell-X S-Box 2 uncore */ PFM_PMU_INTEL_BDX_UNC_SB3, /* Intel Broadwell-X S-Box 3 uncore */ PFM_PMU_AMD64_FAM17H, /* AMD AMD64 Fam17h Zen */ PFM_PMU_AMD64_FAM16H, /* AMD AMD64 Fam16h Jaguar */ PFM_PMU_INTEL_SKX, /* Intel Skylake-X */ /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ } pfm_pmu_t; typedef enum { PFM_PMU_TYPE_UNKNOWN=0, /* unknown PMU type */ PFM_PMU_TYPE_CORE, /* processor core PMU */ PFM_PMU_TYPE_UNCORE, /* processor socket-level PMU */ PFM_PMU_TYPE_OS_GENERIC,/* generic OS-provided PMU */ PFM_PMU_TYPE_MAX } pfm_pmu_type_t; typedef enum { PFM_ATTR_NONE=0, /* no attribute */ PFM_ATTR_UMASK, /* unit mask */ PFM_ATTR_MOD_BOOL, /* register modifier */ PFM_ATTR_MOD_INTEGER, /* register modifier */ PFM_ATTR_RAW_UMASK, /* raw umask (not user visible) */ PFM_ATTR_MAX /* end-marker */ } pfm_attr_t; /* * define additional event data types beyond historic uint64 * what else can fit in 64 bits? */ typedef enum { PFM_DTYPE_UNKNOWN=0, /* unkown */ PFM_DTYPE_UINT64, /* uint64 */ PFM_DTYPE_INT64, /* int64 */ PFM_DTYPE_DOUBLE, /* IEEE double precision float */ PFM_DTYPE_FIXED, /* 32.32 fixed point */ PFM_DTYPE_RATIO, /* 32/32 integer ratio */ PFM_DTYPE_CHAR8, /* 8 char unterminated string */ PFM_DTYPE_MAX /* end-marker */ } pfm_dtype_t; /* * event attribute control: which layer is controlling * the attribute could be PMU, OS APIs */ typedef enum { PFM_ATTR_CTRL_UNKNOWN = 0, /* unknown */ PFM_ATTR_CTRL_PMU, /* PMU hardware */ PFM_ATTR_CTRL_PERF_EVENT, /* perf_events kernel interface */ PFM_ATTR_CTRL_MAX } pfm_attr_ctrl_t; /* * OS layer * Used when querying event or attribute information */ typedef enum { PFM_OS_NONE = 0, /* only PMU */ PFM_OS_PERF_EVENT, /* perf_events PMU attribute subset + PMU */ PFM_OS_PERF_EVENT_EXT, /* perf_events all attributes + PMU */ PFM_OS_MAX, } pfm_os_t; /* SWIG doesn't deal well with anonymous nested structures */ #ifdef SWIG #define SWIG_NAME(x) x #else #define SWIG_NAME(x) #endif /* SWIG */ /* * special data type for libpfm error value used to help * with Python support and in particular for SWIG. By using * a specific type we can detect library calls and trap errors * in one SWIG statement as opposed to having to keep track of * each call individually. Programs can use 'int' safely for * the return value. */ typedef int pfm_err_t; /* error if !PFM_SUCCESS */ typedef int os_err_t; /* error if a syscall fails */ typedef struct { const char *name; /* event name */ const char *desc; /* event description */ size_t size; /* struct sizeof */ pfm_pmu_t pmu; /* PMU identification */ pfm_pmu_type_t type; /* PMU type */ int nevents; /* how many events for this PMU */ int first_event; /* opaque index of first event */ int max_encoding; /* max number of uint64_t to encode an event */ int num_cntrs; /* number of generic counters */ int num_fixed_cntrs;/* number of fixed counters */ struct { unsigned int is_present:1; /* present on host system */ unsigned int is_dfl:1; /* is architecture default PMU */ unsigned int reserved_bits:30; } SWIG_NAME(flags); } pfm_pmu_info_t; typedef struct { const char *name; /* event name */ const char *desc; /* event description */ const char *equiv; /* event is equivalent to */ size_t size; /* struct sizeof */ uint64_t code; /* event raw code (not encoding) */ pfm_pmu_t pmu; /* which PMU */ pfm_dtype_t dtype; /* data type of event value */ int idx; /* unique event identifier */ int nattrs; /* number of attributes */ int reserved; /* for future use */ struct { unsigned int is_precise:1; /* precise sampling (Intel X86=PEBS) */ unsigned int reserved_bits:31; } SWIG_NAME(flags); } pfm_event_info_t; typedef struct { const char *name; /* attribute symbolic name */ const char *desc; /* attribute description */ const char *equiv; /* attribute is equivalent to */ size_t size; /* struct sizeof */ uint64_t code; /* attribute code */ pfm_attr_t type; /* attribute type */ int idx; /* attribute opaque index */ pfm_attr_ctrl_t ctrl; /* what is providing attr */ struct { unsigned int is_dfl:1; /* is default umask */ unsigned int is_precise:1; /* Intel X86: supports PEBS */ unsigned int reserved_bits:30; } SWIG_NAME(flags); union { uint64_t dfl_val64; /* default 64-bit value */ const char *dfl_str; /* default string value */ int dfl_bool; /* default boolean value */ int dfl_int; /* default integer value */ } SWIG_NAME(defaults); } pfm_event_attr_info_t; /* * use with PFM_OS_NONE for pfm_get_os_event_encoding() */ typedef struct { uint64_t *codes; /* out/in: event codes array */ char **fstr; /* out/in: fully qualified event string */ size_t size; /* sizeof struct */ int count; /* out/in: # of elements in array */ int idx; /* out: unique event identifier */ } pfm_pmu_encode_arg_t; #if __WORDSIZE == 64 #define PFM_PMU_INFO_ABI0 56 #define PFM_EVENT_INFO_ABI0 64 #define PFM_ATTR_INFO_ABI0 64 #define PFM_RAW_ENCODE_ABI0 32 #else #define PFM_PMU_INFO_ABI0 44 #define PFM_EVENT_INFO_ABI0 48 #define PFM_ATTR_INFO_ABI0 48 #define PFM_RAW_ENCODE_ABI0 20 #endif /* * initialization, configuration, errors */ extern pfm_err_t pfm_initialize(void); extern void pfm_terminate(void); extern const char *pfm_strerror(int code); extern int pfm_get_version(void); /* * PMU API */ extern pfm_err_t pfm_get_pmu_info(pfm_pmu_t pmu, pfm_pmu_info_t *output); /* * event API */ extern int pfm_get_event_next(int idx); extern int pfm_find_event(const char *str); extern pfm_err_t pfm_get_event_info(int idx, pfm_os_t os, pfm_event_info_t *output); /* * event encoding API * * content of args depends on value of os (refer to man page) */ extern pfm_err_t pfm_get_os_event_encoding(const char *str, int dfl_plm, pfm_os_t os, void *args); /* * attribute API */ extern pfm_err_t pfm_get_event_attr_info(int eidx, int aidx, pfm_os_t os, pfm_event_attr_info_t *output); /* * library validation API */ extern pfm_err_t pfm_pmu_validate(pfm_pmu_t pmu_id, FILE *fp); /* * older encoding API */ extern pfm_err_t pfm_get_event_encoding(const char *str, int dfl_plm, char **fstr, int *idx, uint64_t **codes, int *count); /* * error codes */ #define PFM_SUCCESS 0 /* success */ #define PFM_ERR_NOTSUPP -1 /* function not supported */ #define PFM_ERR_INVAL -2 /* invalid parameters */ #define PFM_ERR_NOINIT -3 /* library was not initialized */ #define PFM_ERR_NOTFOUND -4 /* event not found */ #define PFM_ERR_FEATCOMB -5 /* invalid combination of features */ #define PFM_ERR_UMASK -6 /* invalid or missing unit mask */ #define PFM_ERR_NOMEM -7 /* out of memory */ #define PFM_ERR_ATTR -8 /* invalid event attribute */ #define PFM_ERR_ATTR_VAL -9 /* invalid event attribute value */ #define PFM_ERR_ATTR_SET -10 /* attribute value already set */ #define PFM_ERR_TOOMANY -11 /* too many parameters */ #define PFM_ERR_TOOSMALL -12 /* parameter is too small */ /* * event, attribute iterators * must be used because no guarante indexes are contiguous * * for pmu, simply iterate over pfm_pmu_t enum and use * pfm_get_pmu_info() and the is_present field */ #define pfm_for_each_event_attr(x, z) \ for((x)=0; (x) < (z)->nattrs; (x) = (x)+1) #define pfm_for_all_pmus(x) \ for((x)= 0 ; (x) < PFM_PMU_MAX; (x)++) #ifdef __cplusplus /* extern C */ } #endif #pragma GCC visibility pop #endif /* __PFMLIB_H__ */ papi-5.6.0/src/components/coretemp/linux-coretemp.h000664 001750 001750 00000004601 13216244357 024457 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file linux-coretemp.h * CVS: $Id$ * @author James Ralph * ralph@eecs.utk.edu * * @ingroup papi_components * * @brief coretemp component * This file has the source code for a component that enables PAPI-C to access * hardware monitoring sensors through the coretemp sysfs interface. This code * will dynamically create a native events table for all the sensors that can * be found under /sys/class/hwmon/hwmon[0-9]+. * * Notes: * - Based heavily upon the lm-sensors component by Heike Jagode. */ #ifndef _PAPI_CORETEMP_H #define _PAPI_CORETEMP_H #include #include /************************* DEFINES SECTION *********************************** *******************************************************************************/ /* this number assumes that there will never be more events than indicated */ #define CORETEMP_MAX_COUNTERS 512 /** Structure that stores private information of each event */ typedef struct CORETEMP_register { /* This is used by the framework.It likes it to be !=0 to do somehting */ unsigned int selector; /* These are the only information needed to locate a libsensors event */ int subfeat_nr; } CORETEMP_register_t; /* * The following structures mimic the ones used by other components. It is more * convenient to use them like that as programming with PAPI makes specific * assumptions for them. */ /** This structure is used to build the table of events */ typedef struct CORETEMP_native_event_entry { char name[PAPI_MAX_STR_LEN]; char units[PAPI_MIN_STR_LEN]; char description[PAPI_MAX_STR_LEN]; char path[PATH_MAX]; int stone; /* some counters are set in stone, a max temperature is just that... */ long value; CORETEMP_register_t resources; } CORETEMP_native_event_entry_t; typedef struct CORETEMP_reg_alloc { CORETEMP_register_t ra_bits; } CORETEMP_reg_alloc_t; typedef struct CORETEMP_control_state { long long counts[CORETEMP_MAX_COUNTERS]; // used for caching long long lastupdate; } CORETEMP_control_state_t; typedef struct CORETEMP_context { CORETEMP_control_state_t state; } CORETEMP_context_t; /************************* GLOBALS SECTION *********************************** *******************************************************************************/ #endif /* _PAPI_CORETEMP_H */ papi-5.6.0/src/libpfm4/perf_examples/000775 001750 001750 00000000000 13216244365 021515 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/perfctr/perfctr-x86.h000664 001750 001750 00000012040 13216244357 023417 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PERFCTR_X86_H #define _PERFCTR_X86_H #include "perfmon/pfmlib.h" #include "libperfctr.h" #include "papi_lock.h" #define MAX_COUNTERS 18 #define MAX_COUNTER_TERMS 8 #define HW_OVERFLOW 1 #define hwd_pmc_control vperfctr_control #include "linux-context.h" /* bit fields unique to P4 */ #define ESCR_T0_OS (1 << 3) #define ESCR_T0_USR (1 << 2) #define CCCR_OVF_PMI_T0 (1 << 26) #define FAST_RDPMC (1 << 31) #ifndef CONFIG_SMP /* Assert that CONFIG_SMP is set before including asm/atomic.h to * get bus-locking atomic_* operations when building on UP kernels */ #define CONFIG_SMP #endif /* Used in resources.selector to determine on which counters an event can live. */ #define CNTR1 0x1 #define CNTR2 0x2 #define CNTR3 0x4 #define CNTR4 0x8 #define CNTR5 0x10 #define CNTRS12 (CNTR1|CNTR2) #define ALLCNTRS (CNTR1|CNTR2|CNTR3|CNTR4|CNTR5) #define HAS_MESI 0x0100 // indicates this event supports MESI modifiers #define HAS_MOESI 0x0200 // indicates this event supports MOESI modifiers #define HAS_UMASK 0x0400 // indicates this event has defined unit mask bits #define MOESI_M 0x1000 // modified bit #define MOESI_O 0x0800 // owner bit #define MOESI_E 0x0400 // exclusive bit #define MOESI_S 0x0200 // shared bit #define MOESI_I 0x0100 // invalid bit #define MOESI_M_INTEL MOESI_O // modified bit on Intel processors #define MOESI_ALL 0x1F00 // mask for MOESI bits in event code or counter_cmd #define UNIT_MASK_ALL 0xFF00 // mask for unit mask bits in event code or counter_cmd /* Masks to craft an eventcode to perfctr's liking */ #define PERF_CTR_MASK 0xFF000000 #define PERF_INV_CTR_MASK 0x00800000 #define PERF_ENABLE 0x00400000 #define PERF_INT_ENABLE 0x00100000 #define PERF_PIN_CONTROL 0x00080000 #define PERF_EDGE_DETECT 0x00040000 #define PERF_OS 0x00020000 #define PERF_USR 0x00010000 #define PERF_UNIT_MASK 0x0000FF00 #define PERF_EVNT_MASK 0x000000FF #define AI_ERROR "No support for a-mode counters after adding an i-mode counter" #define VOPEN_ERROR "vperfctr_open() returned NULL, please run perfex -i to verify your perfctr installation" #define GOPEN_ERROR "gperfctr_open() returned NULL" #define VINFO_ERROR "vperfctr_info() returned < 0" #define VCNTRL_ERROR "vperfctr_control() returned < 0" #define RCNTRL_ERROR "rvperfctr_control() returned < 0" #define GCNTRL_ERROR "gperfctr_control() returned < 0" #define FOPEN_ERROR "fopen(%s) returned NULL" #define STATE_MAL_ERROR "Error allocating perfctr structures" #define MODEL_ERROR "This is not a supported cpu." typedef struct X86_register { unsigned int selector; // mask for which counters in use int counter_cmd; // event code /****************** P4 elements *******************/ unsigned counter[2]; // bitmap of valid counters for each escr unsigned escr[2]; // bit offset for each of 2 valid escrs unsigned cccr; // value to be loaded into cccr register unsigned event; // value defining event to be loaded into escr register unsigned pebs_enable; // flag for PEBS counting unsigned pebs_matrix_vert; // flag for PEBS_MATRIX_VERT unsigned ireset; } X86_register_t; typedef struct X86_reg_alloc { X86_register_t ra_bits; // info about this native event mapping unsigned ra_selector; // bit mask showing which counters can carry this metric unsigned ra_rank; // how many counters can carry this metric /*************** P4 specific element ****************/ unsigned ra_escr[2]; // bit field array showing which esc registers can carry this metric } X86_reg_alloc_t; typedef struct hwd_native { int index; // index in the native table, required unsigned int selector; // which counters unsigned char rank; // rank determines how many counters carry each metric int position; // which counter this native event stays int mod; int link; } hwd_native_t; typedef struct X86_perfctr_control { hwd_native_t native[MAX_COUNTERS]; int native_idx; unsigned char master_selector; X86_register_t allocated_registers; struct vperfctr_control control; struct perfctr_sum_ctrs state; struct rvperfctr *rvperfctr; // Allow attach to be per-eventset } X86_perfctr_control_t; typedef struct X86_perfctr_context { struct vperfctr *perfctr; int stat_fd; } X86_perfctr_context_t; /* Override void* definitions from PAPI framework layer with typedefs to conform to PAPI component layer code. */ #undef hwd_reg_alloc_t typedef X86_reg_alloc_t hwd_reg_alloc_t; #undef hwd_register_t typedef X86_register_t hwd_register_t; #undef hwd_control_state_t typedef X86_perfctr_control_t hwd_control_state_t; #undef hwd_context_t typedef X86_perfctr_context_t hwd_context_t; typedef struct native_event_entry { char name[PAPI_MAX_STR_LEN]; // name of this event char *description; // description of this event X86_register_t resources; // resources required by this native event } native_event_entry_t; typedef pfmlib_event_t pfm_register_t; #endif papi-5.6.0/src/components/coretemp/linux-coretemp.c000664 001750 001750 00000042305 13216244357 024455 0ustar00jshenry1963jshenry1963000000 000000 #include /* Headers required by PAPI */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "linux-coretemp.h" /* this is what I found on my core2 machine * but I have not explored this widely yet*/ #define REFRESH_LAT 4000 #define INVALID_RESULT -1000000L papi_vector_t _coretemp_vector; /* temporary event */ struct temp_event { char name[PAPI_MAX_STR_LEN]; char units[PAPI_MIN_STR_LEN]; char description[PAPI_MAX_STR_LEN]; char location[PAPI_MAX_STR_LEN]; char path[PATH_MAX]; int stone; long count; struct temp_event *next; }; static CORETEMP_native_event_entry_t * _coretemp_native_events; static int num_events = 0; static int is_initialized = 0; /***************************************************************************/ /****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ /***************************************************************************/ static struct temp_event* root = NULL; static struct temp_event *last = NULL; static int insert_in_list(char *name, char *units, char *description, char *filename) { struct temp_event *temp; /* new_event path, events->d_name */ temp = (struct temp_event *) papi_calloc(1, sizeof(struct temp_event)); if (temp==NULL) { PAPIERROR("out of memory!"); /* We should also free any previously allocated data */ return PAPI_ENOMEM; } temp->next = NULL; if (root == NULL) { root = temp; } else if (last) { last->next = temp; } else { /* Because this is a function, it is possible */ /* we are called with root!=NULL but no last */ /* so add this to keep coverity happy */ free(temp); PAPIERROR("This shouldn't be possible\n"); return PAPI_ECMP; } last = temp; snprintf(temp->name, PAPI_MAX_STR_LEN, "%s", name); snprintf(temp->units, PAPI_MIN_STR_LEN, "%s", units); snprintf(temp->description, PAPI_MAX_STR_LEN, "%s", description); snprintf(temp->path, PATH_MAX, "%s", filename); return PAPI_OK; } /* * find all coretemp information reported by the kernel */ static int generateEventList(char *base_dir) { char path[PATH_MAX],filename[PATH_MAX]; char modulename[PAPI_MIN_STR_LEN], location[PAPI_MIN_STR_LEN], units[PAPI_MIN_STR_LEN], description[PAPI_MAX_STR_LEN], name[PAPI_MAX_STR_LEN]; DIR *dir,*d; FILE *fff; int count = 0; struct dirent *hwmonx; int i,pathnum; #define NUM_PATHS 2 char paths[NUM_PATHS][PATH_MAX]={ "device","." }; /* Open "/sys/class/hwmon" */ dir = opendir(base_dir); if ( dir == NULL ) { SUBDBG("Can't find %s, are you sure the coretemp module is loaded?\n", base_dir); return 0; } /* Iterate each /sys/class/hwmonX/device directory */ while( (hwmonx = readdir(dir) ) ) { if ( !strncmp("hwmon", hwmonx->d_name, 5) ) { /* Found a hwmon directory */ /* Sometimes the files are in ./, sometimes in device/ */ for(pathnum=0;pathnumd_name,paths[pathnum]); SUBDBG("Trying to open %s\n",path); d = opendir(path); if (d==NULL) { continue; } /* Get the name of the module */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/name",path); fff=fopen(filename,"r"); if (fff==NULL) { snprintf(modulename, PAPI_MIN_STR_LEN, "Unknown"); } else { if (fgets(modulename,PAPI_MIN_STR_LEN,fff)!=NULL) { modulename[strlen(modulename)-1]='\0'; } fclose(fff); } SUBDBG("Found module %s\n",modulename); /******************************************************/ /* Try handling all events starting with in (voltage) */ /******************************************************/ /* arbitrary maximum */ /* the problem is the numbering can be sparse */ /* should probably go back to dirent listing */ for(i=0;i<32;i++) { /* Try looking for a location label */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/in%d_label", path,i); fff=fopen(filename,"r"); if (fff==NULL) { strncpy(location,"?",PAPI_MIN_STR_LEN); } else { if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { location[strlen(location)-1]='\0'; } fclose(fff); } /* Look for input temperature */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/in%d_input", path,i); fff=fopen(filename,"r"); if (fff==NULL) continue; fclose(fff); snprintf(name, PAPI_MAX_STR_LEN, "%s:in%i_input", hwmonx->d_name, i); snprintf(units, PAPI_MIN_STR_LEN, "V"); snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", units,modulename, location); if (insert_in_list(name,units,description,filename)!=PAPI_OK) { goto done_error; } count++; } /************************************************************/ /* Try handling all events starting with temp (temperature) */ /************************************************************/ for(i=0;i<32;i++) { /* Try looking for a location label */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/temp%d_label", path,i); fff=fopen(filename,"r"); if (fff==NULL) { strncpy(location,"?",PAPI_MIN_STR_LEN); } else { if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { location[strlen(location)-1]='\0'; } fclose(fff); } /* Look for input temperature */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/temp%d_input", path,i); fff=fopen(filename,"r"); if (fff==NULL) continue; fclose(fff); snprintf(name, PAPI_MAX_STR_LEN, "%s:temp%i_input", hwmonx->d_name, i); snprintf(units, PAPI_MIN_STR_LEN, "degrees C"); snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", units,modulename, location); if (insert_in_list(name,units,description,filename)!=PAPI_OK) { goto done_error; } count++; } /************************************************************/ /* Try handling all events starting with fan (fan) */ /************************************************************/ for(i=0;i<32;i++) { /* Try looking for a location label */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/fan%d_label", path,i); fff=fopen(filename,"r"); if (fff==NULL) { strncpy(location,"?",PAPI_MIN_STR_LEN); } else { if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { location[strlen(location)-1]='\0'; } fclose(fff); } /* Look for input fan */ snprintf(filename, PAPI_MAX_STR_LEN, "%s/fan%d_input", path,i); fff=fopen(filename,"r"); if (fff==NULL) continue; fclose(fff); snprintf(name, PAPI_MAX_STR_LEN, "%s:fan%i_input", hwmonx->d_name, i); snprintf(units, PAPI_MIN_STR_LEN, "RPM"); snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", units,modulename, location); if (insert_in_list(name,units,description,filename)!=PAPI_OK) { goto done_error; } count++; } closedir(d); } } } closedir(dir); return count; done_error: closedir(d); closedir(dir); return PAPI_ECMP; } static long long getEventValue( int index ) { char buf[PAPI_MAX_STR_LEN]; FILE* fp; long result; if (_coretemp_native_events[index].stone) { return _coretemp_native_events[index].value; } fp = fopen(_coretemp_native_events[index].path, "r"); if (fp==NULL) { return INVALID_RESULT; } if (fgets(buf, PAPI_MAX_STR_LEN, fp)==NULL) { result=INVALID_RESULT; } else { result=strtoll(buf, NULL, 10); } fclose(fp); return result; } /***************************************************************************** ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* *****************************************************************************/ /* * This is called whenever a thread is initialized */ static int _coretemp_init_thread( hwd_context_t *ctx ) { ( void ) ctx; return PAPI_OK; } /* Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ static int _coretemp_init_component( int cidx ) { int i = 0; struct temp_event *t,*last; if ( is_initialized ) return (PAPI_OK ); is_initialized = 1; /* This is the prefered method, all coretemp sensors are symlinked here * see $(kernel_src)/Documentation/hwmon/sysfs-interface */ num_events = generateEventList("/sys/class/hwmon"); if ( num_events < 0 ) { strncpy(_coretemp_vector.cmp_info.disabled_reason, "Cannot open /sys/class/hwmon",PAPI_MAX_STR_LEN); return PAPI_ENOCMP; } if ( num_events == 0 ) { strncpy(_coretemp_vector.cmp_info.disabled_reason, "No coretemp events found",PAPI_MAX_STR_LEN); return PAPI_ENOCMP; } t = root; _coretemp_native_events = (CORETEMP_native_event_entry_t*) papi_calloc(num_events, sizeof(CORETEMP_native_event_entry_t)); do { strncpy(_coretemp_native_events[i].name,t->name,PAPI_MAX_STR_LEN); _coretemp_native_events[i].name[PAPI_MAX_STR_LEN-1] = '\0'; strncpy(_coretemp_native_events[i].path,t->path,PATH_MAX); _coretemp_native_events[i].path[PATH_MAX-1] = '\0'; strncpy(_coretemp_native_events[i].units,t->units,PAPI_MIN_STR_LEN); _coretemp_native_events[i].units[PAPI_MIN_STR_LEN-1] = '\0'; strncpy(_coretemp_native_events[i].description,t->description,PAPI_MAX_STR_LEN); _coretemp_native_events[i].description[PAPI_MAX_STR_LEN-1] = '\0'; _coretemp_native_events[i].stone = 0; _coretemp_native_events[i].resources.selector = i + 1; last = t; t = t->next; papi_free(last); i++; } while (t != NULL); root = NULL; /* Export the total number of events available */ _coretemp_vector.cmp_info.num_native_events = num_events; /* Export the component id */ _coretemp_vector.cmp_info.CmpIdx = cidx; return PAPI_OK; } /* * Control of counters (Reading/Writing/Starting/Stopping/Setup) * functions */ static int _coretemp_init_control_state( hwd_control_state_t * ctl) { int i; CORETEMP_control_state_t *coretemp_ctl = (CORETEMP_control_state_t *) ctl; for ( i=0; i < num_events; i++ ) { coretemp_ctl->counts[i] = getEventValue(i); } /* Set last access time for caching results */ coretemp_ctl->lastupdate = PAPI_get_real_usec(); return PAPI_OK; } static int _coretemp_start( hwd_context_t *ctx, hwd_control_state_t *ctl) { ( void ) ctx; ( void ) ctl; return PAPI_OK; } static int _coretemp_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long ** events, int flags) { (void) flags; (void) ctx; CORETEMP_control_state_t* control = (CORETEMP_control_state_t*) ctl; long long now = PAPI_get_real_usec(); int i; /* Only read the values from the kernel if enough time has passed */ /* since the last read. Otherwise return cached values. */ if ( now - control->lastupdate > REFRESH_LAT ) { for ( i = 0; i < num_events; i++ ) { control->counts[i] = getEventValue( i ); } control->lastupdate = now; } /* Pass back a pointer to our results */ *events = control->counts; return PAPI_OK; } static int _coretemp_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void) ctx; /* read values */ CORETEMP_control_state_t* control = (CORETEMP_control_state_t*) ctl; int i; for ( i = 0; i < num_events; i++ ) { control->counts[i] = getEventValue( i ); } return PAPI_OK; } /* Shutdown a thread */ static int _coretemp_shutdown_thread( hwd_context_t * ctx ) { ( void ) ctx; return PAPI_OK; } /* * Clean up what was setup in coretemp_init_component(). */ static int _coretemp_shutdown_component( ) { if ( is_initialized ) { is_initialized = 0; papi_free(_coretemp_native_events); _coretemp_native_events = NULL; } return PAPI_OK; } /* This function sets various options in the component * The valid codes being passed in are PAPI_SET_DEFDOM, * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT */ static int _coretemp_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) { ( void ) ctx; ( void ) code; ( void ) option; return PAPI_OK; } static int _coretemp_update_control_state( hwd_control_state_t *ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) { int i, index; ( void ) ctx; ( void ) ptr; for ( i = 0; i < count; i++ ) { index = native[i].ni_event; native[i].ni_position = _coretemp_native_events[index].resources.selector - 1; } return PAPI_OK; } /* * This function has to set the bits needed to count different domains * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER * By default return PAPI_EINVAL if none of those are specified * and PAPI_OK with success * PAPI_DOM_USER is only user context is counted * PAPI_DOM_KERNEL is only the Kernel/OS context is counted * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) * PAPI_DOM_ALL is all of the domains */ static int _coretemp_set_domain( hwd_control_state_t * cntl, int domain ) { (void) cntl; if ( PAPI_DOM_ALL != domain ) return PAPI_EINVAL; return PAPI_OK; } static int _coretemp_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) { ( void ) ctx; ( void ) ctl; return PAPI_OK; } /* * Native Event functions */ static int _coretemp_ntv_enum_events( unsigned int *EventCode, int modifier ) { int index; switch ( modifier ) { case PAPI_ENUM_FIRST: if (num_events==0) { return PAPI_ENOEVNT; } *EventCode = 0; return PAPI_OK; case PAPI_ENUM_EVENTS: index = *EventCode; if ( index < num_events - 1 ) { *EventCode = *EventCode + 1; return PAPI_OK; } else { return PAPI_ENOEVNT; } break; default: return PAPI_EINVAL; } return PAPI_EINVAL; } /* * */ static int _coretemp_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { int index = EventCode; if ( index >= 0 && index < num_events ) { strncpy( name, _coretemp_native_events[index].name, len ); return PAPI_OK; } return PAPI_ENOEVNT; } /* * */ static int _coretemp_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { int index = EventCode; if ( index >= 0 && index < num_events ) { strncpy( name, _coretemp_native_events[index].description, len ); return PAPI_OK; } return PAPI_ENOEVNT; } static int _coretemp_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) { int index = EventCode; if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; strncpy( info->symbol, _coretemp_native_events[index].name, sizeof(info->symbol)); strncpy( info->long_descr, _coretemp_native_events[index].description, sizeof(info->long_descr)); strncpy( info->units, _coretemp_native_events[index].units, sizeof(info->units)); info->units[sizeof(info->units)-1] = '\0'; return PAPI_OK; } /* * */ papi_vector_t _coretemp_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "coretemp", .short_name = "coretemp", .description = "Linux hwmon temperature and other info", .version = "4.2.1", .num_mpx_cntrs = CORETEMP_MAX_COUNTERS, .num_cntrs = CORETEMP_MAX_COUNTERS, .default_domain = PAPI_DOM_ALL, .available_domains = PAPI_DOM_ALL, .default_granularity = PAPI_GRN_SYS, .available_granularities = PAPI_GRN_SYS, .hardware_intr_sig = PAPI_INT_SIGNAL, /* component specific cmp_info initializations */ .fast_real_timer = 0, .fast_virtual_timer = 0, .attach = 0, .attach_must_ptrace = 0, } , /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( CORETEMP_context_t ), .control_state = sizeof ( CORETEMP_control_state_t ), .reg_value = sizeof ( CORETEMP_register_t ), .reg_alloc = sizeof ( CORETEMP_reg_alloc_t ), } , /* function pointers in this component */ .init_thread = _coretemp_init_thread, .init_component = _coretemp_init_component, .init_control_state = _coretemp_init_control_state, .start = _coretemp_start, .stop = _coretemp_stop, .read = _coretemp_read, .shutdown_thread = _coretemp_shutdown_thread, .shutdown_component = _coretemp_shutdown_component, .ctl = _coretemp_ctl, .update_control_state = _coretemp_update_control_state, .set_domain = _coretemp_set_domain, .reset = _coretemp_reset, .ntv_enum_events = _coretemp_ntv_enum_events, .ntv_code_to_name = _coretemp_ntv_code_to_name, .ntv_code_to_descr = _coretemp_ntv_code_to_descr, .ntv_code_to_info = _coretemp_ntv_code_to_info, }; papi-5.6.0/man/man3/PAPI_domain_option_t.3000664 001750 001750 00000001054 13216244356 022227 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_domain_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_domain_option_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "int \fBdef_cidx\fP" .br .ti -1c .RI "int \fBeventset\fP" .br .ti -1c .RI "int \fBdomain\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Field Documentation" .PP .SS "int PAPI_domain_option_t::def_cidx" this structure requires a component index to set default domains .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/perfctr/perfctr-x86.c000664 001750 001750 00000107641 13216244357 023426 0ustar00jshenry1963jshenry1963000000 000000 /* * File: perfctr-x86.c * Author: Brian Sheely * bsheely@eecs.utk.edu * Mods: * */ #include #include #include "papi.h" #include "papi_memory.h" #include "papi_internal.h" #include "perfctr-x86.h" #include "perfmon/pfmlib.h" #include "extras.h" #include "papi_vector.h" #include "papi_libpfm_events.h" #include "papi_preset.h" #include "linux-memory.h" /* Contains source for the Modified Bipartite Allocation scheme */ #include "papi_bipartite.h" /* Prototypes for entry points found in perfctr.c */ extern int _perfctr_init_component( int ); extern int _perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ); extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si, void *context ); extern int _perfctr_init_thread( hwd_context_t * ctx ); extern int _perfctr_shutdown_thread( hwd_context_t * ctx ); #include "linux-common.h" #include "linux-timer.h" extern papi_mdi_t _papi_hwi_system_info; extern papi_vector_t _perfctr_vector; #if defined(PERFCTR26) #define evntsel_aux p4.escr #endif #if defined(PAPI_PENTIUM4_VEC_MMX) #define P4_VEC "MMX" #else #define P4_VEC "SSE" #endif #if defined(PAPI_PENTIUM4_FP_X87) #define P4_FPU " X87" #elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP) #define P4_FPU " X87 SSE_SP" #elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP) #define P4_FPU " SSE_SP SSE_DP" #else #define P4_FPU " X87 SSE_DP" #endif /* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */ #if defined(PAPI_OPTERON_FP_RETIRED) #define AMD_FPU "RETIRED" #elif defined(PAPI_OPTERON_FP_SSE_SP) #define AMD_FPU "SSE_SP" #elif defined(PAPI_OPTERON_FP_SSE_DP) #define AMD_FPU "SSE_DP" #else #define AMD_FPU "SPECULATIVE" #endif static inline int is_pentium4(void) { if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) && ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) { return 1; } return 0; } #ifdef DEBUG static void print_alloc( X86_reg_alloc_t * a ) { SUBDBG( "X86_reg_alloc:\n" ); SUBDBG( " selector: %#x\n", a->ra_selector ); SUBDBG( " rank: %#x\n", a->ra_rank ); SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] ); } void print_control( const struct perfctr_cpu_control *control ) { unsigned int i; SUBDBG( "Control used:\n" ); SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on ); SUBDBG( "nractrs\t\t\t%u\n", control->nractrs ); SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs ); for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) { if ( control->pmc_map[i] >= 18 ) { SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] ); } else { SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] ); } SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] ); if ( control->ireset[i] ) { SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] ); } } } #endif static int _x86_init_control_state( hwd_control_state_t *ptr ) { int i, def_mode = 0; if ( is_pentium4() ) { if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) def_mode |= ESCR_T0_USR; if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) def_mode |= ESCR_T0_OS; for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { ptr->control.cpu_control.evntsel_aux[i] |= def_mode; } ptr->control.cpu_control.tsc_on = 1; ptr->control.cpu_control.nractrs = 0; ptr->control.cpu_control.nrictrs = 0; #ifdef VPERFCTR_CONTROL_CLOEXEC ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); #endif } else { if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) def_mode |= PERF_USR; if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) def_mode |= PERF_OS; ptr->allocated_registers.selector = 0; switch ( _papi_hwi_system_info.hw_info.model ) { case PERFCTR_X86_GENERIC: case PERFCTR_X86_WINCHIP_C6: case PERFCTR_X86_WINCHIP_2: case PERFCTR_X86_VIA_C3: case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PIII: #ifdef PERFCTR_X86_INTEL_CORE case PERFCTR_X86_INTEL_CORE: #endif #ifdef PERFCTR_X86_INTEL_PENTM case PERFCTR_X86_INTEL_PENTM: #endif ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE; for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { ptr->control.cpu_control.evntsel[i] |= def_mode; ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; } break; #ifdef PERFCTR_X86_INTEL_CORE2 case PERFCTR_X86_INTEL_CORE2: #endif #ifdef PERFCTR_X86_INTEL_ATOM case PERFCTR_X86_INTEL_ATOM: #endif #ifdef PERFCTR_X86_INTEL_NHLM case PERFCTR_X86_INTEL_NHLM: #endif #ifdef PERFCTR_X86_INTEL_WSTMR case PERFCTR_X86_INTEL_WSTMR: #endif #ifdef PERFCTR_X86_AMD_K8 case PERFCTR_X86_AMD_K8: #endif #ifdef PERFCTR_X86_AMD_K8C case PERFCTR_X86_AMD_K8C: #endif #ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */ case PERFCTR_X86_AMD_FAM10H: #endif case PERFCTR_X86_AMD_K7: for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode; ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; } break; } #ifdef VPERFCTR_CONTROL_CLOEXEC ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); #endif /* Make sure the TSC is always on */ ptr->control.cpu_control.tsc_on = 1; } return ( PAPI_OK ); } int _x86_set_domain( hwd_control_state_t * cntrl, int domain ) { int i, did = 0; int num_cntrs = _perfctr_vector.cmp_info.num_cntrs; /* Clear the current domain set for this event set */ /* We don't touch the Enable bit in this code */ if ( is_pentium4() ) { for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel_aux[i] &= ~( ESCR_T0_OS | ESCR_T0_USR ); } if ( domain & PAPI_DOM_USER ) { did = 1; for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR; } } if ( domain & PAPI_DOM_KERNEL ) { did = 1; for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS; } } } else { for ( i = 0; i < num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR ); } if ( domain & PAPI_DOM_USER ) { did = 1; for ( i = 0; i < num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel[i] |= PERF_USR; } } if ( domain & PAPI_DOM_KERNEL ) { did = 1; for ( i = 0; i < num_cntrs; i++ ) { cntrl->control.cpu_control.evntsel[i] |= PERF_OS; } } } if ( !did ) return ( PAPI_EINVAL ); else return ( PAPI_OK ); } /* This function examines the event to determine if it can be mapped to counter ctr. Returns true if it can, false if it can't. */ static int _bpt_map_avail( hwd_reg_alloc_t * dst, int ctr ) { return ( int ) ( dst->ra_selector & ( 1 << ctr ) ); } /* This function forces the event to be mapped to only counter ctr. Returns nothing. */ static void _bpt_map_set( hwd_reg_alloc_t * dst, int ctr ) { dst->ra_selector = ( unsigned int ) ( 1 << ctr ); dst->ra_rank = 1; if ( is_pentium4() ) { /* Pentium 4 requires that both an escr and a counter are selected. Find which counter mask contains this counter. Set the opposite escr to empty (-1) */ if ( dst->ra_bits.counter[0] & dst->ra_selector ) dst->ra_escr[1] = -1; else dst->ra_escr[0] = -1; } } /* This function examines the event to determine if it has a single exclusive mapping. Returns true if exlusive, false if non-exclusive. */ static int _bpt_map_exclusive( hwd_reg_alloc_t * dst ) { return ( dst->ra_rank == 1 ); } /* This function compares the dst and src events to determine if any resources are shared. Typically the src event is exclusive, so this detects a conflict if true. Returns true if conflict, false if no conflict. */ static int _bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) { if ( is_pentium4() ) { int retval1, retval2; /* Pentium 4 needs to check for conflict of both counters and esc registers */ /* selectors must share bits */ retval1 = ( ( dst->ra_selector & src->ra_selector ) || /* or escrs must equal each other and not be set to -1 */ ( ( dst->ra_escr[0] == src->ra_escr[0] ) && ( ( int ) dst->ra_escr[0] != -1 ) ) || ( ( dst->ra_escr[1] == src->ra_escr[1] ) && ( ( int ) dst->ra_escr[1] != -1 ) ) ); /* Pentium 4 also needs to check for conflict on pebs registers */ /* pebs enables must both be non-zero */ retval2 = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && /* and not equal to each other */ ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || /* same for pebs_matrix_vert */ ( ( dst->ra_bits.pebs_matrix_vert && src->ra_bits.pebs_matrix_vert ) && ( dst->ra_bits.pebs_matrix_vert != src->ra_bits.pebs_matrix_vert ) ) ); if ( retval2 ) { SUBDBG( "pebs conflict!\n" ); } return ( retval1 | retval2 ); } return ( int ) ( dst->ra_selector & src->ra_selector ); } /* This function removes shared resources available to the src event from the resources available to the dst event, and reduces the rank of the dst event accordingly. Typically, the src event will be exclusive, but the code shouldn't assume it. Returns nothing. */ static void _bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) { int i; unsigned shared; if ( is_pentium4() ) { #ifdef DEBUG SUBDBG( "src, dst\n" ); print_alloc( src ); print_alloc( dst ); #endif /* check for a pebs conflict */ /* pebs enables must both be non-zero */ i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && /* and not equal to each other */ ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || /* same for pebs_matrix_vert */ ( ( dst->ra_bits.pebs_matrix_vert && src->ra_bits.pebs_matrix_vert ) && ( dst->ra_bits.pebs_matrix_vert != src->ra_bits.pebs_matrix_vert ) ) ); if ( i ) { SUBDBG( "pebs conflict! clearing selector\n" ); dst->ra_selector = 0; return; } else { /* remove counters referenced by any shared escrs */ if ( ( dst->ra_escr[0] == src->ra_escr[0] ) && ( ( int ) dst->ra_escr[0] != -1 ) ) { dst->ra_selector &= ~dst->ra_bits.counter[0]; dst->ra_escr[0] = -1; } if ( ( dst->ra_escr[1] == src->ra_escr[1] ) && ( ( int ) dst->ra_escr[1] != -1 ) ) { dst->ra_selector &= ~dst->ra_bits.counter[1]; dst->ra_escr[1] = -1; } /* remove any remaining shared counters */ shared = ( dst->ra_selector & src->ra_selector ); if ( shared ) dst->ra_selector ^= shared; } /* recompute rank */ for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) if ( dst->ra_selector & ( 1 << i ) ) dst->ra_rank++; #ifdef DEBUG SUBDBG( "new dst\n" ); print_alloc( dst ); #endif } else { shared = dst->ra_selector & src->ra_selector; if ( shared ) dst->ra_selector ^= shared; for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) if ( dst->ra_selector & ( 1 << i ) ) dst->ra_rank++; } } static void _bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) { dst->ra_selector = src->ra_selector; if ( is_pentium4() ) { dst->ra_escr[0] = src->ra_escr[0]; dst->ra_escr[1] = src->ra_escr[1]; } } /* Register allocation */ static int _x86_allocate_registers( EventSetInfo_t * ESI ) { int i, j, natNum; hwd_reg_alloc_t event_list[MAX_COUNTERS]; hwd_register_t *ptr; /* Initialize the local structure needed for counter allocation and optimization. */ natNum = ESI->NativeCount; if ( is_pentium4() ) { SUBDBG( "native event count: %d\n", natNum ); } for ( i = 0; i < natNum; i++ ) { /* retrieve the mapping information about this native event */ _papi_libpfm_ntv_code_to_bits_perfctr( ( unsigned int ) ESI->NativeInfoArray[i]. ni_event, &event_list[i].ra_bits ); if ( is_pentium4() ) { /* combine counter bit masks for both esc registers into selector */ event_list[i].ra_selector = event_list[i].ra_bits.counter[0] | event_list[i].ra_bits. counter[1]; } else { /* make sure register allocator only looks at legal registers */ event_list[i].ra_selector = event_list[i].ra_bits.selector & ALLCNTRS; #ifdef PERFCTR_X86_INTEL_CORE2 if ( _papi_hwi_system_info.hw_info.model == PERFCTR_X86_INTEL_CORE2 ) event_list[i].ra_selector |= ( ( event_list[i].ra_bits. selector >> 16 ) << 2 ) & ALLCNTRS; #endif } /* calculate native event rank, which is no. of counters it can live on */ event_list[i].ra_rank = 0; for ( j = 0; j < MAX_COUNTERS; j++ ) { if ( event_list[i].ra_selector & ( 1 << j ) ) { event_list[i].ra_rank++; } } if ( is_pentium4() ) { event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0]; event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1]; #ifdef DEBUG SUBDBG( "i: %d\n", i ); print_alloc( &event_list[i] ); #endif } } if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */ for ( i = 0; i < natNum; i++ ) { #ifdef PERFCTR_X86_INTEL_CORE2 if ( _papi_hwi_system_info.hw_info.model == PERFCTR_X86_INTEL_CORE2 ) event_list[i].ra_bits.selector = event_list[i].ra_selector; #endif #ifdef DEBUG if ( is_pentium4() ) { SUBDBG( "i: %d\n", i ); print_alloc( &event_list[i] ); } #endif /* Copy all info about this native event to the NativeInfo struct */ ptr = ESI->NativeInfoArray[i].ni_bits; *ptr = event_list[i].ra_bits; if ( is_pentium4() ) { /* The selector contains the counter bit position. Turn it into a number and store it in the first counter value, zeroing the second. */ ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1; ptr->counter[1] = 0; } /* Array order on perfctr is event ADD order, not counter #... */ ESI->NativeInfoArray[i].ni_position = i; } return PAPI_OK; } else return PAPI_ECNFLCT; } static void clear_cs_events( hwd_control_state_t * this_state ) { unsigned int i, j; /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */ j = this_state->control.cpu_control.nractrs + this_state->control.cpu_control.nrictrs; /* Remove all counter control command values from eventset. */ for ( i = 0; i < j; i++ ) { SUBDBG( "Clearing pmc event entry %d\n", i ); if ( is_pentium4() ) { this_state->control.cpu_control.pmc_map[i] = 0; this_state->control.cpu_control.evntsel[i] = 0; this_state->control.cpu_control.evntsel_aux[i] = this_state->control.cpu_control. evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR ); } else { this_state->control.cpu_control.pmc_map[i] = i; this_state->control.cpu_control.evntsel[i] = this_state->control.cpu_control. evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR ); } this_state->control.cpu_control.ireset[i] = 0; } if ( is_pentium4() ) { /* Clear pebs stuff */ this_state->control.cpu_control.p4.pebs_enable = 0; this_state->control.cpu_control.p4.pebs_matrix_vert = 0; } /* clear both a and i counter counts */ this_state->control.cpu_control.nractrs = 0; this_state->control.cpu_control.nrictrs = 0; #ifdef DEBUG if ( is_pentium4() ) print_control( &this_state->control.cpu_control ); #endif } /* This function clears the current contents of the control structure and updates it with whatever resources are allocated for all the native events in the native info structure array. */ static int _x86_update_control_state( hwd_control_state_t * this_state, NativeInfo_t * native, int count, hwd_context_t * ctx ) { ( void ) ctx; /*unused */ unsigned int i, k, retval = PAPI_OK; hwd_register_t *bits,*bits2; struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control; /* clear out the events from the control state */ clear_cs_events( this_state ); if ( is_pentium4() ) { /* fill the counters we're using */ for ( i = 0; i < ( unsigned int ) count; i++ ) { /* dereference the mapping information about this native event */ bits = native[i].ni_bits; /* Add counter control command values to eventset */ cpu_control->pmc_map[i] = bits->counter[0]; cpu_control->evntsel[i] = bits->cccr; cpu_control->ireset[i] = bits->ireset; cpu_control->pmc_map[i] |= FAST_RDPMC; cpu_control->evntsel_aux[i] |= bits->event; /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events. Replay_events count L1 and L2 cache events. There is only one of each for the entire eventset. Therefore, there can be only one unique replay_event per eventset. This means L1 and L2 can't be counted together. Which stinks. This conflict should be trapped in the allocation scheme, but we'll test for it here too, just in case. */ if ( bits->pebs_enable ) { /* if pebs_enable isn't set, just copy */ if ( cpu_control->p4.pebs_enable == 0 ) { cpu_control->p4.pebs_enable = bits->pebs_enable; /* if pebs_enable conflicts, flag an error */ } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) { SUBDBG ( "WARNING: P4_update_control_state -- pebs_enable conflict!" ); retval = PAPI_ECNFLCT; } /* if pebs_enable == bits->pebs_enable, do nothing */ } if ( bits->pebs_matrix_vert ) { /* if pebs_matrix_vert isn't set, just copy */ if ( cpu_control->p4.pebs_matrix_vert == 0 ) { cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert; /* if pebs_matrix_vert conflicts, flag an error */ } else if ( cpu_control->p4.pebs_matrix_vert != bits->pebs_matrix_vert ) { SUBDBG ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" ); retval = PAPI_ECNFLCT; } /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */ } } this_state->control.cpu_control.nractrs = count; /* Make sure the TSC is always on */ this_state->control.cpu_control.tsc_on = 1; #ifdef DEBUG print_control( &this_state->control.cpu_control ); #endif } else { switch ( _papi_hwi_system_info.hw_info.model ) { #ifdef PERFCTR_X86_INTEL_CORE2 case PERFCTR_X86_INTEL_CORE2: /* fill the counters we're using */ for ( i = 0; i < ( unsigned int ) count; i++ ) { bits2 = native[i].ni_bits; for ( k = 0; k < MAX_COUNTERS; k++ ) if ( bits2->selector & ( 1 << k ) ) { break; } if ( k > 1 ) this_state->control.cpu_control.pmc_map[i] = ( k - 2 ) | 0x40000000; else this_state->control.cpu_control.pmc_map[i] = k; /* Add counter control command values to eventset */ this_state->control.cpu_control.evntsel[i] |= bits2->counter_cmd; } break; #endif default: /* fill the counters we're using */ for ( i = 0; i < ( unsigned int ) count; i++ ) { /* Add counter control command values to eventset */ bits2 = native[i].ni_bits; this_state->control.cpu_control.evntsel[i] |= bits2->counter_cmd; } } this_state->control.cpu_control.nractrs = ( unsigned int ) count; } return retval; } static int _x86_start( hwd_context_t * ctx, hwd_control_state_t * state ) { int error; #ifdef DEBUG print_control( &state->control.cpu_control ); #endif if ( state->rvperfctr != NULL ) { if ( ( error = rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) { SUBDBG( "rvperfctr_control returns: %d\n", error ); PAPIERROR( RCNTRL_ERROR ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) { SUBDBG( "vperfctr_control returns: %d\n", error ); PAPIERROR( VCNTRL_ERROR ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } static int _x86_stop( hwd_context_t * ctx, hwd_control_state_t * state ) { int error; if ( state->rvperfctr != NULL ) { if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) { PAPIERROR( RCNTRL_ERROR ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } error = vperfctr_stop( ctx->perfctr ); if ( error < 0 ) { SUBDBG( "vperfctr_stop returns: %d\n", error ); PAPIERROR( VCNTRL_ERROR ); return ( PAPI_ESYS ); } return ( PAPI_OK ); } static int _x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp, int flags ) { if ( flags & PAPI_PAUSED ) { vperfctr_read_state( ctx->perfctr, &spc->state, NULL ); if ( !is_pentium4() ) { unsigned int i = 0; for ( i = 0; i < spc->control.cpu_control.nractrs + spc->control.cpu_control.nrictrs; i++ ) { SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i, spc->state.pmc[i] ); } } } else { SUBDBG( "vperfctr_read_ctrs\n" ); if ( spc->rvperfctr != NULL ) { rvperfctr_read_ctrs( spc->rvperfctr, &spc->state ); } else { vperfctr_read_ctrs( ctx->perfctr, &spc->state ); } } *dp = ( long long * ) spc->state.pmc; #ifdef DEBUG { if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { unsigned int i; if ( is_pentium4() ) { for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) { SUBDBG( "raw val hardware index %d is %lld\n", i, ( long long ) spc->state.pmc[i] ); } } else { for ( i = 0; i < spc->control.cpu_control.nractrs + spc->control.cpu_control.nrictrs; i++ ) { SUBDBG( "raw val hardware index %d is %lld\n", i, ( long long ) spc->state.pmc[i] ); } } } } #endif return ( PAPI_OK ); } static int _x86_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl ) { return ( _x86_start( ctx, cntrl ) ); } /* Perfctr requires that interrupting counters appear at the end of the pmc list In the case a user wants to interrupt on a counter in an evntset that is not among the last events, we need to move the perfctr virtual events around to make it last. This function swaps two perfctr events, and then adjust the position entries in both the NativeInfoArray and the EventInfoArray to keep everything consistent. */ static void swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1, int cntr2 ) { unsigned int ui; int si, i, j; for ( i = 0; i < ESI->NativeCount; i++ ) { if ( ESI->NativeInfoArray[i].ni_position == cntr1 ) ESI->NativeInfoArray[i].ni_position = cntr2; else if ( ESI->NativeInfoArray[i].ni_position == cntr2 ) ESI->NativeInfoArray[i].ni_position = cntr1; } for ( i = 0; i < ESI->NumberOfEvents; i++ ) { for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) { if ( ESI->EventInfoArray[i].pos[j] == cntr1 ) ESI->EventInfoArray[i].pos[j] = cntr2; else if ( ESI->EventInfoArray[i].pos[j] == cntr2 ) ESI->EventInfoArray[i].pos[j] = cntr1; } } ui = contr->cpu_control.pmc_map[cntr1]; contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2]; contr->cpu_control.pmc_map[cntr2] = ui; ui = contr->cpu_control.evntsel[cntr1]; contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2]; contr->cpu_control.evntsel[cntr2] = ui; if ( is_pentium4() ) { ui = contr->cpu_control.evntsel_aux[cntr1]; contr->cpu_control.evntsel_aux[cntr1] = contr->cpu_control.evntsel_aux[cntr2]; contr->cpu_control.evntsel_aux[cntr2] = ui; } si = contr->cpu_control.ireset[cntr1]; contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2]; contr->cpu_control.ireset[cntr2] = si; } static int _x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ) { hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state ); struct hwd_pmc_control *contr = &(ctl->control); int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0; OVFDBG( "EventIndex=%d\n", EventIndex ); #ifdef DEBUG if ( is_pentium4() ) print_control( &(contr->cpu_control) ); #endif /* The correct event to overflow is EventIndex */ ncntrs = _perfctr_vector.cmp_info.num_cntrs; i = ESI->EventInfoArray[EventIndex].pos[0]; if ( i >= ncntrs ) { PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs ); return PAPI_EINVAL; } if ( threshold != 0 ) { /* Set an overflow threshold */ retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig, NEED_CONTEXT, _perfctr_vector.cmp_info.CmpIdx ); if ( retval != PAPI_OK ) return ( retval ); /* overflow interrupt occurs on the NEXT event after overflow occurs thus we subtract 1 from the threshold. */ contr->cpu_control.ireset[i] = ( -threshold + 1 ); if ( is_pentium4() ) contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0; else contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE; contr->cpu_control.nrictrs++; contr->cpu_control.nractrs--; nricntrs = ( int ) contr->cpu_control.nrictrs; nracntrs = ( int ) contr->cpu_control.nractrs; contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig; /* move this event to the bottom part of the list if needed */ if ( i < nracntrs ) swap_events( ESI, contr, i, nracntrs ); OVFDBG( "Modified event set\n" ); } else { if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) { contr->cpu_control.ireset[i] = 0; contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 ); contr->cpu_control.nrictrs--; contr->cpu_control.nractrs++; } else if ( !is_pentium4() && contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) { contr->cpu_control.ireset[i] = 0; contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE ); contr->cpu_control.nrictrs--; contr->cpu_control.nractrs++; } nricntrs = ( int ) contr->cpu_control.nrictrs; nracntrs = ( int ) contr->cpu_control.nractrs; /* move this event to the top part of the list if needed */ if ( i >= nracntrs ) swap_events( ESI, contr, i, nracntrs - 1 ); if ( !nricntrs ) contr->si_signo = 0; OVFDBG( "Modified event set\n" ); retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig ); } #ifdef DEBUG if ( is_pentium4() ) print_control( &(contr->cpu_control) ); #endif OVFDBG( "End of call. Exit code: %d\n", retval ); return ( retval ); } static int _x86_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI ) { ( void ) master; /*unused */ ( void ) ESI; /*unused */ return ( PAPI_OK ); } /* these define cccr and escr register bits, and the p4 event structure */ #include "perfmon/pfmlib_pentium4.h" #include "../lib/pfmlib_pentium4_priv.h" #define P4_REPLAY_REAL_MASK 0x00000003 extern pentium4_escr_reg_t pentium4_escrs[]; extern pentium4_cccr_reg_t pentium4_cccrs[]; extern pentium4_event_t pentium4_events[]; static pentium4_replay_regs_t p4_replay_regs[] = { /* 0 */ {.enb = 0, /* dummy */ .mat_vert = 0, }, /* 1 */ {.enb = 0, /* dummy */ .mat_vert = 0, }, /* 2 */ {.enb = 0x01000001, /* 1stL_cache_load_miss_retired */ .mat_vert = 0x00000001, }, /* 3 */ {.enb = 0x01000002, /* 2ndL_cache_load_miss_retired */ .mat_vert = 0x00000001, }, /* 4 */ {.enb = 0x01000004, /* DTLB_load_miss_retired */ .mat_vert = 0x00000001, }, /* 5 */ {.enb = 0x01000004, /* DTLB_store_miss_retired */ .mat_vert = 0x00000002, }, /* 6 */ {.enb = 0x01000004, /* DTLB_all_miss_retired */ .mat_vert = 0x00000003, }, /* 7 */ {.enb = 0x01018001, /* Tagged_mispred_branch */ .mat_vert = 0x00000010, }, /* 8 */ {.enb = 0x01000200, /* MOB_load_replay_retired */ .mat_vert = 0x00000001, }, /* 9 */ {.enb = 0x01000400, /* split_load_retired */ .mat_vert = 0x00000001, }, /* 10 */ {.enb = 0x01000400, /* split_store_retired */ .mat_vert = 0x00000002, }, }; /* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */ static int pfm2intel[] = { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }; /* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */ /* Also, libpfm assumes events can live on different counters with different codes. This call only returns the first occurence found. */ /* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be generally useful it should be fixed. - dkt */ static int _pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code ) { pfmlib_regmask_t cnt, impl; unsigned int num; unsigned int i, first = 1; int ret; if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) { PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt, pfm_strerror( ret ) ); return PAPI_ESYS; } if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) { PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) ); return PAPI_ESYS; } if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) { PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl, pfm_strerror( ret ) ); return PAPI_ESYS; } *selector = 0; for ( i = 0; num; i++ ) { if ( pfm_regmask_isset( &impl, i ) ) num--; if ( pfm_regmask_isset( &cnt, i ) ) { if ( first ) { if ( ( ret = pfm_get_event_code_counter( event, i, code ) ) != PFMLIB_SUCCESS ) { PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s", event, i, code, pfm_strerror( ret ) ); return PAPI_ESYS; } first = 0; } *selector |= 1 << i; } } return PAPI_OK; } int _papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode, hwd_register_t *newbits ) { unsigned int event, umask; X86_register_t *bits = (X86_register_t *)newbits; if ( is_pentium4() ) { pentium4_escr_value_t escr_value; pentium4_cccr_value_t cccr_value; unsigned int num_masks, replay_mask, unit_masks[12]; unsigned int event_mask; unsigned int tag_value, tag_enable; unsigned int i; int j, escr, cccr, pmd; if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) return PAPI_ENOEVNT; /* for each allowed escr (1 or 2) find the allowed cccrs. for each allowed cccr find the pmd index convert to an intel counter number; or it into bits->counter */ for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) { bits->counter[i] = 0; escr = pentium4_events[event].allowed_escrs[i]; if ( escr < 0 ) { continue; } bits->escr[i] = escr; for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) { cccr = pentium4_escrs[escr].allowed_cccrs[j]; if ( cccr < 0 ) { continue; } pmd = pentium4_cccrs[cccr].pmd; bits->counter[i] |= ( 1 << pfm2intel[pmd] ); } } /* if there's only one valid escr, copy the values */ if ( escr < 0 ) { bits->escr[1] = bits->escr[0]; bits->counter[1] = bits->counter[0]; } /* Calculate the event-mask value. Invalid masks * specified by the caller are ignored. */ tag_value = 0; tag_enable = 0; event_mask = _pfm_convert_umask( event, umask ); if ( event_mask & 0xF0000 ) { tag_enable = 1; tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS ); } event_mask &= 0x0FFFF; /* mask off possible tag bits */ /* Set up the ESCR and CCCR register values. */ escr_value.val = 0; escr_value.bits.t1_usr = 0; /* controlled by kernel */ escr_value.bits.t1_os = 0; /* controlled by kernel */ // escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0; // escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0; escr_value.bits.tag_enable = tag_enable; escr_value.bits.tag_value = tag_value; escr_value.bits.event_mask = event_mask; escr_value.bits.event_select = pentium4_events[event].event_select; escr_value.bits.reserved = 0; /* initialize the proper bits in the cccr register */ cccr_value.val = 0; cccr_value.bits.reserved1 = 0; cccr_value.bits.enable = 1; cccr_value.bits.escr_select = pentium4_events[event].escr_select; cccr_value.bits.active_thread = 3; /* FIXME: This is set to count when either logical * CPU is active. Need a way to distinguish * between logical CPUs when HT is enabled. * the docs say these bits should always * be set. */ cccr_value.bits.compare = 0; /* FIXME: What do we do with "threshold" settings? */ cccr_value.bits.complement = 0; /* FIXME: What do we do with "threshold" settings? */ cccr_value.bits.threshold = 0; /* FIXME: What do we do with "threshold" settings? */ cccr_value.bits.force_ovf = 0; /* FIXME: Do we want to allow "forcing" overflow * interrupts on all counter increments? */ cccr_value.bits.ovf_pmi_t0 = 0; cccr_value.bits.ovf_pmi_t1 = 0; /* PMI taken care of by kernel typically */ cccr_value.bits.reserved2 = 0; cccr_value.bits.cascade = 0; /* FIXME: How do we handle "cascading" counters? */ cccr_value.bits.overflow = 0; /* these flags are always zero, from what I can tell... */ bits->pebs_enable = 0; /* flag for PEBS counting */ bits->pebs_matrix_vert = 0; /* flag for PEBS_MATRIX_VERT, whatever that is */ /* ...unless the event is replay_event */ if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) { escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK; num_masks = prepare_umask( umask, unit_masks ); for ( i = 0; i < num_masks; i++ ) { replay_mask = unit_masks[i]; if ( replay_mask > 1 && replay_mask < 11 ) { /* process each valid mask we find */ bits->pebs_enable |= p4_replay_regs[replay_mask].enb; bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert; } } } /* store the escr and cccr values */ bits->event = escr_value.val; bits->cccr = cccr_value.val; bits->ireset = 0; /* I don't really know what this does */ SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val ); } else { int ret, code; if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) return PAPI_ENOEVNT; if ( ( ret = _pfm_get_counter_info( event, &bits->selector, &code ) ) != PAPI_OK ) return ret; bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) ); SUBDBG( "selector: %#x\n", bits->selector ); SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event, umask, code, ( ( hwd_register_t * ) bits )->counter_cmd ); } return PAPI_OK; } papi_vector_t _perfctr_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "perfctr", .description = "Linux perfctr CPU counters", .default_domain = PAPI_DOM_USER, .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .hardware_intr_sig = PAPI_INT_SIGNAL, /* component specific cmp_info initializations */ .fast_real_timer = 1, .fast_virtual_timer = 1, .attach = 1, .attach_must_ptrace = 1, .cntr_umasks = 1, } , /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( X86_perfctr_context_t ), .control_state = sizeof ( X86_perfctr_control_t ), .reg_value = sizeof ( X86_register_t ), .reg_alloc = sizeof ( X86_reg_alloc_t ), } , /* function pointers in this component */ .init_control_state = _x86_init_control_state, .start = _x86_start, .stop = _x86_stop, .read = _x86_read, .allocate_registers = _x86_allocate_registers, .update_control_state = _x86_update_control_state, .set_domain = _x86_set_domain, .reset = _x86_reset, .set_overflow = _x86_set_overflow, .stop_profiling = _x86_stop_profiling, .init_component = _perfctr_init_component, .ctl = _perfctr_ctl, .dispatch_timer = _perfctr_dispatch_timer, .init_thread = _perfctr_init_thread, .shutdown_thread = _perfctr_shutdown_thread, /* from libpfm */ .ntv_enum_events = _papi_libpfm_ntv_enum_events, .ntv_name_to_code = _papi_libpfm_ntv_name_to_code, .ntv_code_to_name = _papi_libpfm_ntv_code_to_name, .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr, .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr, }; papi-5.6.0/ChangeLogP543.txt000664 001750 001750 00000033147 13216244355 017512 0ustar00jshenry1963jshenry1963000000 000000 2016-01-25 * d779d1172a6e4c73b5ece9939c4d067c2b3d7b8d Update libpfm4 current with Jan 25 08:33:02 2016 version. 2016-01-07 * 0d9776b8 src/components/stealtime/linux-stealtime.c: Free allocated memory in the stealtime component when component is shutdown Thanks to William Cohen for contributing this patch and the following explaination: Running examples with "valgrind --leak-check=full ..." showed a number of items allocated by the stealtime component were not freed when PAPI_shutdown() was called. This patch frees those unused memory allocations. 2016-01-06 * de40668c src/papi_preset.c: Fixed memory leak in papi_preset.c by updating the infix_to_postfix function. Thanks to William Cohen for discovering the leak. The infix_to_postfix function was re-written and tested using user defined events. 2015-12-30 * db37e115 src/utils/avail.c src/utils/native_avail.c: Added "-check" flag to papi_avail and papi_native_avail to test counter availability/validity" This patch updates the papi_avail and papi_native_avail utilities to use the "-check" flag to test the actual availability of counters. There were previously two different flags for this capability, papi_native_avail used "-validate" and papi_avail used "-avail_test". Based on a mailing list discussion these flags have been consolidated as "-check". 2015-12-29 * 72e0ffe8 src/components/lmsensors/linux-lmsensors.c: Fixed minor error with multiple initializers for lmsensors_vector .default_granularity. Thanks to William Cohen for the bug report. 2015-12-07 * ec3582d8 src/utils/avail.c: papi_avail to test actual availability of counters using "papi_avail --avail-test" This problem and the associated patch were detected and contributed by Harald Savat. Thanks. On an Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz system with PAPI 5.4.1 installed The papi_avail command indicates that both PAPI_LD_INS and PAPI_SR_INS are available, however the papi_event_chooser does not accept them (see below) and return -1. This problem can occur in kernels from version 3.1 till 4.1. The kernel devs blocked all uses of the MEM_OPS events (including load and store). The patch modifies papi_avail to test the counters to see if they can be added. papi_avail # gets all PAPI counters papi_avail -a # gets all available PAPI counters papi_avail -at # shows all available PAPI counters that can be added [Ptools-perfapi: Oct 14 2015] 2015-11-30 * 1fab922e src/components/libmsr/README src/components/libmsr/configure src/components/libmsr/configure.in...: The libmsr component is updated to match major changes in LLNL libmsr library and the LLNL msr-safe kernel module 2015-11-18 * 242b16d3 src/Makefile.inc src/components/cuda/configure src/components/cuda/configure.in...: Added papi_cuda_sampling utility in /src/components/cuda/sampling changed src/Makefile.inc , src/components/cuda/configure.in to build the utiltiy during PAPI installation Added in /src/components/cuda/tests/Makefile which is -ldl switch because 3.10.0-229.14.1.el7.x86_64 had issues using libpapi.a during compilation of cuda component test programs 2015-10-21 * a10e8331 src/papi_events.csv: papi_events: add Intel Skylake presets This just shares all of teh broadwell events with skylake. Some quick tests show that this probably works. Someone with skylake hardware should validate this at some point. 2015-10-08 * 91736851 src/papi_internal.c: Thanks to David Eberius of ICL for reporting a bug in PAPI_get_event_info() in papi_internal.c, (info->component_index = (unsigned int) cidx) was missing at line 2554, of papi_internal.c 2015-08-27 * 502df070 src/Makefile.inc: Thanks to Steve Kaufmann for reporting about the redundant () paramater in the OBJECTS expression of src/Makefile.inc file. Updated Makefile.inc by removing the redundant paramater 2015-08-24 * 69fdc2e0 src/papi.c: Thanks to Harald Servat for reporting the PAPI_overflow issue for multiple eventsets. The problem was in the PAPI_start() function in the branch at line-2166:papi.c , if(is_dirty). After update_control_state(), it is required to re-initialize the overflow settings using set_overflow() 2015-07-29 * be81dc43 src/components/perf_event/perf_event.c: perf_event: update the ARM domain workaround older ARM processors could not separate out KERNEL vs USER events. ARMv7 starting with the Cortex A15 can, as can all ARMv8 (ARM64). This updates the code with a whitelist to properly allow setting the domains. * 43be2588 src/linux-common.c: linux-common: clean up ARM cpu detection Parsing cpuinfo is always a pain. Extra work because of Raspberry Pi (ARM1176) lying and saying it's ARMv7 rather than ARMv6. * 5a101a50 src/linux-common.c: linux-common: split up x86, power and arm cpuinfo parsing * 0d7772d9 src/linux-common.c: linux-common: clean up and comment the cpuinfo parsing code 2015-07-16 * 59489b1f src/components/libmsr/Makefile.libmsr.in src/components/libmsr/README src/components/libmsr/Rules.libmsr...: Create libmsr component for reading power information and writing power constraints using MSRs on some Intel processsors The PAPI libmsr component supports measuring and capping power usage on recent Intel architectures using the RAPL interface exposed through MSRs (model-specific registers). Lawrence Livermore National Laboratory has released a library (libmsr) designed to provide a simple, safe, consistent interface to several of the model-specific registers (MSRs) in Intel processors. The problem is that permitting open access to the MSRs on a machine can be a safety hazard, so access to MSRs is usually limited. In order to encourage system administrators to give wider access to the MSRs on a machine, LLNL has released a Linux kernel module (msr_safe) which provides safer, white-listed access to the MSRs. PAPI has created a libmsr component that can provide read and write access to the information and controls exposed via the libmsr library. This PAPI component introduces a new ability for PAPI; it is the first case where PAPI is writing information to a counter as well as reading the data from the counter. 2015-07-13 * d326ecc9 src/components/perf_event/perf_event_lib.h src/papi_internal.c: Thanks to Steve Kaufman for providing a patch that increases the PERF_EVENT_MAX_MPX_COUNTERS to 192 from 128 and enhances the corresponding warning message in papi_internal.c 2015-06-29 * e829baa5 src/components/cuda/tests/Makefile src/components/cuda/tests/cuda_ld_preload_example.README src/components/cuda/tests/cuda_ld_preload_example.c: Example of using LD_PRELOAD with the CUDA component. A short example of using LD_PRELOAD on a Linux system to intercept function calls and PAPI-enable an un-instrumented CUDA binary. Several CUDA events (e.g. SM PM counters) require a CUcontext handle to be a provided since they are context switched. This means that we cannot use a PAPI_attach from an external process to measure those events in a preexisting executable. These events can only be measured from within the CUcontext, that is, within the CUDA enabled code we are trying to measure. If the user is unable to change the source code, they may be able to use LD_PRELOAD's ability to trap functions and measure the events for within the executable. See src/components/cuda/tests/cuda_ld_preload_example.README for details. 2015-06-26 * 0829a4f5 src/papi_events.csv: Add future broadwell-ep support. libpfm4 doesn't support it yet, but add it for when it appears. 2015-06-25 * 36c5b5b6 src/papi_events.csv: add broadwell predefined events For now they are the same as Haswell, as that's what the Linux kernel does. * f42eda64 src/papi_events.csv: Added definitions to Power8 for PAPI_SP_OPS, PAPI_DP_OPS. 2015-06-18 * f87542f7 src/components/perf_event/tests/event_name_lib.c: Added [case 63: /*Haswell EP*/] line the src/components/perf_event/tests/event_name_lib.c file to support offcore for haswell EP * fbfc641f src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c src/components/perf_event_uncore/tests/perf_event_uncore_lib.c: Added suuport for Haswell-EP processor with model-63 in src/components/perf_event_uncore/tests/perf_event_uncore_lib.c and src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c files. As a result perf_event_uncore, perf_event_uncore_multiple and perf_event_uncore_cbox tests get passed. Tested and verified on Intel(R) Xeon(R) CPU E5-2650 v3 @ 2.30GHz with linux kernel 4.0.4-1.el6.elrepo.x86_64 2015-06-17 * 56698211 src/components/lustre/linux-lustre.c: Thanks to Garry Mohr for the patch that removes the error message (PAPI Error: Error Code -7,Event does not exist) on executing papi_native_avail in PAPI built with lustre component 2015-06-16 * 1b9fd867 src/components/rapl/linux-rapl.c: rapl: allow DRAM to have separate scaling factor from CPU on Haswell-EP the DRAM scaling value is different and cannot be detected. See https://lkml.org/lkml/2015/3/20/582 * 1aa74f85 src/components/rapl/linux-rapl.c: rapl: add support for Broadwell 2015-06-11 * a5ecda79 src/components/rapl/linux-rapl.c: Thanks to William Cohen for the patch which does the following: Checking the cpu family and module number is not sufficient to determine whether RAPL can be used. If the papi is running inside a guest VM, the MSR used by the PAPI RAPL component may not be available. There should be a simple read test to verify the RAPL MSR registers are available. This allows the component to more clearly report that RAPL is unsupported rather than just exiting program when the RAPL 2015-05-19 * 54c45107 src/components/rapl/utils/rapl_plot.c: Updated rapl_plot utility so that the correct values/units are reported (e.g. scaled and fixed value counts should not be converted) 2015-05-04 * a34fbc62 src/papi_events.csv: papi_events.csv: typo in the ARM Cortex A53 definitions 2015-04-30 * caa3af72 src/papi_events.csv: papi_events.csv: add preset events for ARM Cortex A53 This is based purely on the names in the libpfm4 output, these were not validated in any way. 2015-04-20 * 66553715 INSTALL.txt: added compile incantation for compiling programs that offload code to MIC 2015-04-16 * 8914dcfc src/papi_events.csv: Bug reported by William Cohen in papi_events.csv for the event PAPI_L1_TCM 2015-03-31 * 023af5ec src/components/nvml/configure: Updated the NVML configure script which requires an autoconf and an updated configure script * 2385c1b2 src/components/nvml/Makefile.nvml.in src/components/nvml/Rules.nvml src/components/nvml/configure.in: Updated the NVML configure script to allow separate include and library paths 2015-03-30 * 3d509095 src/components/infiniband_umad/linux-infiniband_umad.c: Bugfix linux-infiniband_umad.c to include linux-infiniband_umad.h rather than linux-infiniband.h. Thanks to Aurelien Bouteiller for pointing out this bug. * b865f227 src/components/vmware/vmware.c: Corrected function name in _vmware_vector from _vmware_init to _vmware_init_thread. 2015-03-24 * 2f58a4d8 src/configure: Regenerated configure to match the PAPI_GRN_SYS patch * 12e6ef31 src/components/perf_event/tests/perf_event_system_wide.c: Support PAPI_GRN_SYS granularity for perf component, updating the system wide test (patch 2 of 2). Thanks to William Cohen for this patch and the documentation Make sure that a sane cpu number is selected with PAPI_GRN_SYS Corrections to output and comments of perf_event_system_wide.c test * 42879693 src/components/perf_event/perf_event.c src/components/perf_event/tests/perf_event_system_wide.c src/config.h.in...: Support PAPI_GRN_SYS granularity for perf component, picking a sane CPU number (patch 1 of 2). Thanks to William Cohen for this patch and the documentation The checks in perf_event_open syscall cause a failure when both pid=-1 and cpu=-1. The perf_event component was passing in pid=-1 and cpu=-1 when PAPI_GRN_SYS was selected. If possible, the code should pick the current processor that the command is running so that the permission check works properly when PAPI_GRN_SYS is used. The patch also adds a test fail if PAPI_GRN_SYS unable to add PAPI_TOT_CYC. * 0ab9b0c8 src/ctests/krentel_pthreads.c: Added call to unregister the overflow handler.. plus small code cleanup 2015-03-05 * d886c49c src/papi.c src/papi_libpfm4_events.c src/utils/avail.c: Clean output from papi_avail tools when there are no user defined events Thanks to Gary Mohr for this patch. The changes in this patch improve the output from the papi_avail tool. It was printing the user defined events header and a PAPI Error message when no user defined events existed. These changes add code in the enum call to return an error when trying to fetch the first user defined event if no user events are defined. This allows the application to detect that no user events are known and skip printing the user defined event heading. It also prevents the application from calling PAPI_get_event_info with a user defined event code that does not exist which avoids the PAPI Error message. Also a one line change to modify a debug message type to make the debug output produced by papi_libpfm4_events.c consistent. 2015-03-03 * ee0c58d7 src/components/cuda/linux-cuda.c: Do not generate an error if the CUDA libraries cannot be loaded, just write a debug message * 08bb9bf0 src/configure: Updating the number to 5.4.1 2015-03-02 * 01f742c1 release_procedure.txt: Minor change to specify locations of some files papi-5.6.0/src/libpfm-3.y/lib/pfmlib_itanium2.c000664 001750 001750 00000173532 13216244363 023212 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_itanium2.c : support for the Itanium2 PMU family * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_priv_ia64.h" /* architecture private */ #include "pfmlib_itanium2_priv.h" /* PMU private */ #include "itanium2_events.h" /* PMU private */ #define is_ear(i) event_is_ear(itanium2_pe+(i)) #define is_ear_tlb(i) event_is_ear_tlb(itanium2_pe+(i)) #define is_ear_alat(i) event_is_ear_alat(itanium2_pe+(i)) #define is_ear_cache(i) event_is_ear_cache(itanium2_pe+(i)) #define is_iear(i) event_is_iear(itanium2_pe+(i)) #define is_dear(i) event_is_dear(itanium2_pe+(i)) #define is_btb(i) event_is_btb(itanium2_pe+(i)) #define has_opcm(i) event_opcm_ok(itanium2_pe+(i)) #define has_iarr(i) event_iarr_ok(itanium2_pe+(i)) #define has_darr(i) event_darr_ok(itanium2_pe+(i)) #define evt_use_opcm(e) ((e)->pfp_ita2_pmc8.opcm_used != 0 || (e)->pfp_ita2_pmc9.opcm_used !=0) #define evt_use_irange(e) ((e)->pfp_ita2_irange.rr_used) #define evt_use_drange(e) ((e)->pfp_ita2_drange.rr_used) #define evt_grp(e) (int)itanium2_pe[e].pme_qualifiers.pme_qual.pme_group #define evt_set(e) (int)itanium2_pe[e].pme_qualifiers.pme_qual.pme_set #define evt_umask(e) itanium2_pe[e].pme_umask #define FINE_MODE_BOUNDARY_BITS 12 #define FINE_MODE_MASK ~((1U<<12)-1) /* let's define some handy shortcuts! */ #define pmc_plm pmc_ita2_counter_reg.pmc_plm #define pmc_ev pmc_ita2_counter_reg.pmc_ev #define pmc_oi pmc_ita2_counter_reg.pmc_oi #define pmc_pm pmc_ita2_counter_reg.pmc_pm #define pmc_es pmc_ita2_counter_reg.pmc_es #define pmc_umask pmc_ita2_counter_reg.pmc_umask #define pmc_thres pmc_ita2_counter_reg.pmc_thres #define pmc_ism pmc_ita2_counter_reg.pmc_ism static char * pfm_ita2_get_event_name(unsigned int i); /* * Description of the PMC register mappings use by * this module (as reported in pfmlib_reg_t.reg_num): * * 0 -> PMC0 * 1 -> PMC1 * n -> PMCn * * The following are in the model specific rr_br[]: * IBR0 -> 0 * IBR1 -> 1 * ... * IBR7 -> 7 * DBR0 -> 0 * DBR1 -> 1 * ... * DBR7 -> 7 * * We do not use a mapping table, instead we make up the * values on the fly given the base. */ /* * The Itanium2 PMU has a bug in the fine mode implementation. * It only sees ranges with a granularity of two bundles. * So we prepare for the day they fix it. */ static int has_fine_mode_bug; static int pfm_ita2_detect(void) { int tmp; int ret = PFMLIB_ERR_NOTSUPP; tmp = pfm_ia64_get_cpu_family(); if (tmp == 0x1f) { has_fine_mode_bug = 1; ret = PFMLIB_SUCCESS; } return ret; } /* * Check the event for incompatibilities. This is useful * for L1 and L2 related events. Due to wire limitations, * some caches events are separated into sets. There * are 5 sets for the L1D cache group and 6 sets for L2 group. * It is NOT possible to simultaneously measure events from * differents sets within a group. For instance, you cannot * measure events from set0 and set1 in L1D cache group. However * it is possible to measure set0 in L1D and set1 in L2 at the same * time. * * This function verifies that the set constraint are respected. */ static int check_cross_groups_and_umasks(pfmlib_input_param_t *inp) { unsigned long ref_umask, umask; int g, s; unsigned int cnt = inp->pfp_event_count; pfmlib_event_t *e = inp->pfp_events; unsigned int i, j; /* * XXX: could possibly be optimized */ for (i=0; i < cnt; i++) { g = evt_grp(e[i].event); s = evt_set(e[i].event); if (g == PFMLIB_ITA2_EVT_NO_GRP) continue; ref_umask = evt_umask(e[i].event); for (j=i+1; j < cnt; j++) { if (evt_grp(e[j].event) != g) continue; if (evt_set(e[j].event) != s) return PFMLIB_ERR_EVTSET; /* only care about L2 cache group */ if (g != PFMLIB_ITA2_EVT_L2_CACHE_GRP || (s == 1 || s == 2)) continue; umask = evt_umask(e[j].event); /* * there is no assignement possible if the event in PMC4 * has a umask (ref_umask) and an event (from the same * set) also has a umask AND it is different. For some * sets, the umasks are shared, therefore the value * programmed into PMC4 determines the umask for all * the other events (with umask) from the set. */ if (umask && ref_umask != umask) return PFMLIB_ERR_NOASSIGN; } } return PFMLIB_SUCCESS; } /* * Certain prefetch events must be treated specially when instruction range restriction * is in use because they can only be constrained by IBRP1 in fine-mode. Other events * will use IBRP0 if tagged as a demand fetch OR IBPR1 if tagged as a prefetch match. * From the library's point of view there is no way of distinguishing this, so we leave * it up to the user to interpret the results. * * Events which can be qualified by the two pairs depending on their tag: * - IBP_BUNPAIRS_IN * - L1I_FETCH_RAB_HIT * - L1I_FETCH_ISB_HIT * - L1I_FILLS * * This function returns the number of qualifying prefetch events found * * XXX: not clear which events do qualify as prefetch events. */ static int prefetch_events[]={ PME_ITA2_L1I_PREFETCHES, PME_ITA2_L1I_STRM_PREFETCHES, PME_ITA2_L2_INST_PREFETCHES }; #define NPREFETCH_EVENTS sizeof(prefetch_events)/sizeof(int) static int check_prefetch_events(pfmlib_input_param_t *inp) { int code; int prefetch_codes[NPREFETCH_EVENTS]; unsigned int i, j, count; int c; int found = 0; for(i=0; i < NPREFETCH_EVENTS; i++) { pfm_get_event_code(prefetch_events[i], &code); prefetch_codes[i] = code; } count = inp->pfp_event_count; for(i=0; i < count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); for(j=0; j < NPREFETCH_EVENTS; j++) { if (c == prefetch_codes[j]) found++; } } return found; } /* * IA64_INST_RETIRED (and subevents) is the only event which can be measured on all * 4 IBR when non-fine mode is not possible. * * This function returns: * - the number of events matching the IA64_INST_RETIRED code * - in retired_mask the bottom 4 bits indicates which of the 4 INST_RETIRED event * is present */ static unsigned int check_inst_retired_events(pfmlib_input_param_t *inp, unsigned long *retired_mask) { int code; int c, ret; unsigned int i, count, found = 0; unsigned long umask, mask; pfm_get_event_code(PME_ITA2_IA64_INST_RETIRED_THIS, &code); count = inp->pfp_event_count; mask = 0; for(i=0; i < count; i++) { ret = pfm_get_event_code(inp->pfp_events[i].event, &c); if (c == code) { ret = pfm_ita2_get_event_umask(inp->pfp_events[i].event, &umask); if (ret != PFMLIB_SUCCESS) break; switch(umask) { case 0: mask |= 1; break; case 1: mask |= 2; break; case 2: mask |= 4; break; case 3: mask |= 8; break; } found++; } } if (retired_mask) *retired_mask = mask; return found; } static int check_fine_mode_possible(pfmlib_ita2_input_rr_t *rr, int n) { pfmlib_ita2_input_rr_desc_t *lim = rr->rr_limits; int i; for(i=0; i < n; i++) { if ((lim[i].rr_start & FINE_MODE_MASK) != (lim[i].rr_end & FINE_MODE_MASK)) return 0; } return 1; } /* * mode = 0 -> check code (enforce bundle alignment) * mode = 1 -> check data */ static int check_intervals(pfmlib_ita2_input_rr_t *irr, int mode, unsigned int *n_intervals) { unsigned int i; pfmlib_ita2_input_rr_desc_t *lim = irr->rr_limits; for(i=0; i < 4; i++) { /* end marker */ if (lim[i].rr_start == 0 && lim[i].rr_end == 0) break; /* invalid entry */ if (lim[i].rr_start >= lim[i].rr_end) return PFMLIB_ERR_IRRINVAL; if (mode == 0 && (lim[i].rr_start & 0xf || lim[i].rr_end & 0xf)) return PFMLIB_ERR_IRRALIGN; } *n_intervals = i; return PFMLIB_SUCCESS; } static int valid_assign(pfmlib_event_t *e, unsigned int *as, pfmlib_regmask_t *r_pmcs, unsigned int cnt) { unsigned long pmc4_umask = 0, umask; char *name; int l1_grp_present = 0, l2_grp_present = 0; unsigned int i; int c, failure; int need_pmc5, need_pmc4; int pmc5_evt = -1, pmc4_evt = -1; if (PFMLIB_DEBUG()) { unsigned int j; for(j=0;jpfp_event_count; for(i=0; i < count; i++) { for (j=0; j < NCANCEL_EVENTS; j++) { pfm_get_event_code(inp->pfp_events[i].event, &code); if (code == cancel_codes[j]) { if (idx != -1) { return PFMLIB_ERR_INVAL; } idx = inp->pfp_events[i].event; } } } return PFMLIB_SUCCESS; } /* * Automatically dispatch events to corresponding counters following constraints. * Upon return the pfarg_regt structure is ready to be submitted to kernel */ static int pfm_ita2_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { #define has_counter(e,b) (itanium2_pe[e].pme_counters & (1 << (b)) ? (b) : 0) pfmlib_ita2_input_param_t *param = mod_in; pfm_ita2_pmc_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; unsigned int i,j,k,l; int ret; unsigned int max_l0, max_l1, max_l2, max_l3; unsigned int assign[PMU_ITA2_NUM_COUNTERS]; unsigned int m, cnt; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; if (PFMLIB_DEBUG()) for (m=0; m < cnt; m++) { DPRINT("ev[%d]=%s counters=0x%lx\n", m, itanium2_pe[e[m].event].pme_name, itanium2_pe[e[m].event].pme_counters); } if (cnt > PMU_ITA2_NUM_COUNTERS) return PFMLIB_ERR_TOOMANY; ret = check_cross_groups_and_umasks(inp); if (ret != PFMLIB_SUCCESS) return ret; ret = check_cancel_events(inp); if (ret != PFMLIB_SUCCESS) return ret; max_l0 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS; max_l1 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>1); max_l2 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>2); max_l3 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>3); DPRINT("max_l0=%u max_l1=%u max_l2=%u max_l3=%u\n", max_l0, max_l1, max_l2, max_l3); /* * For now, worst case in the loop nest: 4! (factorial) */ for (i=PMU_ITA2_FIRST_COUNTER; i < max_l0; i++) { assign[0] = has_counter(e[0].event,i); if (max_l1 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (j=PMU_ITA2_FIRST_COUNTER; j < max_l1; j++) { if (j == i) continue; assign[1] = has_counter(e[1].event,j); if (max_l2 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (k=PMU_ITA2_FIRST_COUNTER; k < max_l2; k++) { if(k == i || k == j) continue; assign[2] = has_counter(e[2].event,k); if (max_l3 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (l=PMU_ITA2_FIRST_COUNTER; l < max_l3; l++) { if(l == i || l == j || l == k) continue; assign[3] = has_counter(e[3].event,l); if (valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; } } } } /* we cannot satisfy the constraints */ return PFMLIB_ERR_NOASSIGN; done: for (j=0; j < cnt ; j++ ) { reg.pmc_val = 0; /* clear all, bits 26-27 must be zero for proper operations */ /* if plm is 0, then assume not specified per-event and use default */ reg.pmc_plm = inp->pfp_events[j].plm ? inp->pfp_events[j].plm : inp->pfp_dfl_plm; reg.pmc_oi = 1; /* overflow interrupt */ reg.pmc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc_thres = param ? param->pfp_ita2_counters[j].thres: 0; reg.pmc_ism = param ? param->pfp_ita2_counters[j].ism : PFMLIB_ITA2_ISM_BOTH; reg.pmc_umask = is_ear(e[j].event) ? 0x0 : itanium2_pe[e[j].event].pme_umask; reg.pmc_es = itanium2_pe[e[j].event].pme_code; /* * Note that we don't force PMC4.pmc_ena = 1 because the kernel takes care of this for us. * This way we don't have to program something in PMC4 even when we don't use it */ pc[j].reg_num = assign[j]; pc[j].reg_value = reg.pmc_val; pc[j].reg_addr = pc[j].reg_alt_addr = assign[j]; pd[j].reg_num = assign[j]; pd[j].reg_addr = pd[j].reg_alt_addr = assign[j]; __pfm_vbprintf("[PMC%u(pmc%u)=0x%06lx thres=%d es=0x%02x plm=%d umask=0x%x pm=%d ism=0x%x oi=%d] %s\n", assign[j], assign[j], reg.pmc_val, reg.pmc_thres, reg.pmc_es,reg.pmc_plm, reg.pmc_umask, reg.pmc_pm, reg.pmc_ism, reg.pmc_oi, itanium2_pe[e[j].event].pme_name); __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of PMC registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_dispatch_iear(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_iear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_ita2_iear.ear_used == 0) { /* * case 3: no I-EAR event, no (or nothing) in param->pfp_ita2_iear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_ita2_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_ita2_iear.ear_mode); param->pfp_ita2_iear.ear_umask = evt_umask(inp->pfp_events[i].event); param->pfp_ita2_iear.ear_ism = PFMLIB_ITA2_ISM_BOTH; /* force both instruction sets */ DPRINT("I-EAR event with no info\n"); } /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running I-EAR), use param info */ reg.pmc_val = 0; if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc10_ita2_tlb_reg.iear_plm = param->pfp_ita2_iear.ear_plm ? param->pfp_ita2_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc10_ita2_tlb_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc10_ita2_tlb_reg.iear_ct = 0x0; reg.pmc10_ita2_tlb_reg.iear_umask = param->pfp_ita2_iear.ear_umask; reg.pmc10_ita2_tlb_reg.iear_ism = param->pfp_ita2_iear.ear_ism; } else if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_CACHE_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc10_ita2_cache_reg.iear_plm = param->pfp_ita2_iear.ear_plm ? param->pfp_ita2_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc10_ita2_cache_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc10_ita2_cache_reg.iear_ct = 0x1; reg.pmc10_ita2_cache_reg.iear_umask = param->pfp_ita2_iear.ear_umask; reg.pmc10_ita2_cache_reg.iear_ism = param->pfp_ita2_iear.ear_ism; } else { DPRINT("ALAT mode not supported in I-EAR mode\n"); return PFMLIB_ERR_INVAL; } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 10)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 10; /* PMC10 is I-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 10; pos1++; pd[pos2].reg_num = 0; pd[pos2].reg_addr = pd[pos2].reg_alt_addr= 0; pos2++; pd[pos2].reg_num = 1; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 1; pos2++; if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE) { __pfm_vbprintf("[PMC10(pmc10)=0x%lx ctb=tlb plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc10_ita2_tlb_reg.iear_plm, reg.pmc10_ita2_tlb_reg.iear_pm, reg.pmc10_ita2_tlb_reg.iear_ism, reg.pmc10_ita2_tlb_reg.iear_umask); } else { __pfm_vbprintf("[PMC10(pmc10)=0x%lx ctb=cache plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc10_ita2_cache_reg.iear_plm, reg.pmc10_ita2_cache_reg.iear_pm, reg.pmc10_ita2_cache_reg.iear_ism, reg.pmc10_ita2_cache_reg.iear_umask); } __pfm_vbprintf("[PMD0(pmd0)]\n[PMD1(pmd1)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_dear(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_dear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_ita2_dear.ear_used == 0) { /* * case 3: no D-EAR event, no (or nothing) in param->pfp_ita2_dear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_ita2_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_ita2_dear.ear_mode); param->pfp_ita2_dear.ear_umask = evt_umask(inp->pfp_events[i].event); param->pfp_ita2_dear.ear_ism = PFMLIB_ITA2_ISM_BOTH; /* force both instruction sets */ DPRINT("D-EAR event with no info\n"); } /* sanity check on the mode */ if ( param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_CACHE_MODE && param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_TLB_MODE && param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_ALAT_MODE) return PFMLIB_ERR_INVAL; /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running D-EAR), use param info */ reg.pmc_val = 0; /* if plm is 0, then assume not specified per-event and use default */ reg.pmc11_ita2_reg.dear_plm = param->pfp_ita2_dear.ear_plm ? param->pfp_ita2_dear.ear_plm : inp->pfp_dfl_plm; reg.pmc11_ita2_reg.dear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc11_ita2_reg.dear_mode = param->pfp_ita2_dear.ear_mode; reg.pmc11_ita2_reg.dear_umask = param->pfp_ita2_dear.ear_umask; reg.pmc11_ita2_reg.dear_ism = param->pfp_ita2_dear.ear_ism; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 11)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 11; /* PMC11 is D-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 11; pos1++; pd[pos2].reg_num = 2; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 2; pos2++; pd[pos2].reg_num = 3; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 3; pos2++; pd[pos2].reg_num = 17; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 17; pos2++; __pfm_vbprintf("[PMC11(pmc11)=0x%lx mode=%s plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc11_ita2_reg.dear_mode == 0 ? "L1D" : (reg.pmc11_ita2_reg.dear_mode == 1 ? "L1DTLB" : "ALAT"), reg.pmc11_ita2_reg.dear_plm, reg.pmc11_ita2_reg.dear_pm, reg.pmc11_ita2_reg.dear_ism, reg.pmc11_ita2_reg.dear_umask); __pfm_vbprintf("[PMD2(pmd2)]\n[PMD3(pmd3)\nPMD17(pmd17)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_opcm(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfm_ita2_pmc_reg_t reg, pmc15; unsigned int i, has_1st_pair, has_2nd_pair, count; unsigned int pos = outp->pfp_pmc_count; if (param == NULL) return PFMLIB_SUCCESS; /* not constrained by PMC8 nor PMC9 */ pmc15.pmc_val = 0xffffffff; /* XXX: use PAL instead. PAL value is 0xfffffff0 */ if (param->pfp_ita2_irange.rr_used && mod_out == NULL) return PFMLIB_ERR_INVAL; if (param->pfp_ita2_pmc8.opcm_used || (param->pfp_ita2_irange.rr_used && mod_out->pfp_ita2_irange.rr_nbr_used!=0) ) { reg.pmc_val = param->pfp_ita2_pmc8.opcm_used ? param->pfp_ita2_pmc8.pmc_val : 0xffffffff3fffffff; if (param->pfp_ita2_irange.rr_used) { reg.pmc8_9_ita2_reg.opcm_ig_ad = 0; reg.pmc8_9_ita2_reg.opcm_inv = param->pfp_ita2_irange.rr_flags & PFMLIB_ITA2_RR_INV ? 1 : 0; } else { /* clear range restriction fields when none is used */ reg.pmc8_9_ita2_reg.opcm_ig_ad = 1; reg.pmc8_9_ita2_reg.opcm_inv = 0; } /* force bit 2 to 1 */ reg.pmc8_9_ita2_reg.opcm_bit2 = 1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 8)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 8; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 8; pos++; /* * will be constrained by PMC8 */ if (param->pfp_ita2_pmc8.opcm_used) { has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP0_PMC8) has_1st_pair=1; if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP2_PMC8) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp0_pmc8 = 0; if (has_2nd_pair || has_1st_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp2_pmc8 = 0; } __pfm_vbprintf("[PMC8(pmc8)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x inv=%d ig_ad=%d]\n", reg.pmc_val, reg.pmc8_9_ita2_reg.opcm_m, reg.pmc8_9_ita2_reg.opcm_i, reg.pmc8_9_ita2_reg.opcm_f, reg.pmc8_9_ita2_reg.opcm_b, reg.pmc8_9_ita2_reg.opcm_match, reg.pmc8_9_ita2_reg.opcm_mask, reg.pmc8_9_ita2_reg.opcm_inv, reg.pmc8_9_ita2_reg.opcm_ig_ad); } if (param->pfp_ita2_pmc9.opcm_used) { /* * PMC9 can only be used to qualify IA64_INST_RETIRED_* events */ if (check_inst_retired_events(inp, NULL) != inp->pfp_event_count) return PFMLIB_ERR_FEATCOMB; reg.pmc_val = param->pfp_ita2_pmc9.pmc_val; /* ig_ad, inv are ignored for PMC9, to avoid confusion we force default values */ reg.pmc8_9_ita2_reg.opcm_ig_ad = 1; reg.pmc8_9_ita2_reg.opcm_inv = 0; /* force bit 2 to 1 */ reg.pmc8_9_ita2_reg.opcm_bit2 = 1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 9)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 9; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 9; pos++; /* * will be constrained by PMC9 */ has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP1_PMC9) has_1st_pair=1; if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP3_PMC9) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp1_pmc9 = 0; if (has_2nd_pair || has_1st_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp3_pmc9 = 0; __pfm_vbprintf("[PMC9(pmc9)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x]\n", reg.pmc_val, reg.pmc8_9_ita2_reg.opcm_m, reg.pmc8_9_ita2_reg.opcm_i, reg.pmc8_9_ita2_reg.opcm_f, reg.pmc8_9_ita2_reg.opcm_b, reg.pmc8_9_ita2_reg.opcm_match, reg.pmc8_9_ita2_reg.opcm_mask); } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 15)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 15; pc[pos].reg_value = pmc15.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 15; pos++; __pfm_vbprintf("[PMC15(pmc15)=0x%lx ibrp0_pmc8=%d ibrp1_pmc9=%d ibrp2_pmc8=%d ibrp3_pmc9=%d]\n", pmc15.pmc_val, pmc15.pmc15_ita2_reg.opcmc_ibrp0_pmc8, pmc15.pmc15_ita2_reg.opcmc_ibrp1_pmc9, pmc15.pmc15_ita2_reg.opcmc_ibrp2_pmc8, pmc15.pmc15_ita2_reg.opcmc_ibrp3_pmc9); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int pfm_dispatch_btb(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_event_t *e= inp->pfp_events; pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; int found_btb = 0, found_bad_dear = 0; int has_btb_param; unsigned int i, pos1, pos2; unsigned int count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; /* * explicit BTB settings */ has_btb_param = param && param->pfp_ita2_btb.btb_used; reg.pmc_val = 0UL; /* * we need to scan all events looking for DEAR ALAT/TLB due to incompatibility */ count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_btb(e[i].event)) found_btb = 1; /* * keep track of the first BTB event */ /* look only for DEAR TLB */ if (is_dear(e[i].event) && (is_ear_tlb(e[i].event) || is_ear_alat(e[i].event))) { found_bad_dear = 1; } } DPRINT("found_btb=%d found_bar_dear=%d\n", found_btb, found_bad_dear); /* * did not find D-EAR TLB/ALAT event, need to check param structure */ if (found_bad_dear == 0 && param && param->pfp_ita2_dear.ear_used == 1) { if ( param->pfp_ita2_dear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE || param->pfp_ita2_dear.ear_mode == PFMLIB_ITA2_EAR_ALAT_MODE) found_bad_dear = 1; } /* * no explicit BTB event and no special case to deal with (cover part of case 3) */ if (found_btb == 0 && has_btb_param == 0 && found_bad_dear == 0) return PFMLIB_SUCCESS; if (has_btb_param == 0) { /* * case 3: no BTB event, btb_used=0 but found_bad_dear=1, need to cleanup PMC12 */ if (found_btb == 0) goto assign_zero; /* * case 1: we have a BTB event but no param, default setting is to capture * all branches. */ memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; param->pfp_ita2_btb.btb_ds = 0; /* capture branch targets */ param->pfp_ita2_btb.btb_tm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_ptm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_ppm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_brt = 0x0; /* all branches */ DPRINT("BTB event with no info\n"); } /* * case 2: BTB event in the list, param provided * case 4: no BTB event, param provided (free running mode) */ reg.pmc12_ita2_reg.btbc_plm = param->pfp_ita2_btb.btb_plm ? param->pfp_ita2_btb.btb_plm : inp->pfp_dfl_plm; reg.pmc12_ita2_reg.btbc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc12_ita2_reg.btbc_ds = param->pfp_ita2_btb.btb_ds & 0x1; reg.pmc12_ita2_reg.btbc_tm = param->pfp_ita2_btb.btb_tm & 0x3; reg.pmc12_ita2_reg.btbc_ptm = param->pfp_ita2_btb.btb_ptm & 0x3; reg.pmc12_ita2_reg.btbc_ppm = param->pfp_ita2_btb.btb_ppm & 0x3; reg.pmc12_ita2_reg.btbc_brt = param->pfp_ita2_btb.btb_brt & 0x3; /* * if DEAR-ALAT or DEAR-TLB is set then PMC12 must be set to zero (see documentation p. 87) * * D-EAR ALAT/TLB and BTB cannot be used at the same time. * From documentation: PMC12 must be zero in this mode; else the wrong IP for misses * coming right after a mispredicted branch. * * D-EAR cache is fine. */ assign_zero: if (found_bad_dear && reg.pmc_val != 0UL) return PFMLIB_ERR_EVTINCOMP; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 12)) return PFMLIB_ERR_NOASSIGN; memset(pc+pos1, 0, sizeof(pfmlib_reg_t)); pc[pos1].reg_num = 12; pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 12; pos1++; __pfm_vbprintf("[PMC12(pmc12)=0x%lx plm=%d pm=%d ds=%d tm=%d ptm=%d ppm=%d brt=%d]\n", reg.pmc_val, reg.pmc12_ita2_reg.btbc_plm, reg.pmc12_ita2_reg.btbc_pm, reg.pmc12_ita2_reg.btbc_ds, reg.pmc12_ita2_reg.btbc_tm, reg.pmc12_ita2_reg.btbc_ptm, reg.pmc12_ita2_reg.btbc_ppm, reg.pmc12_ita2_reg.btbc_brt); /* * only add BTB PMD when actually using BTB. * Not needed when dealing with D-EAR TLB and DEAR-ALAT * PMC12 restriction */ if (found_btb || has_btb_param) { /* * PMD16 is included in list of used PMD */ for(i=8; i < 17; i++, pos2++) { pd[pos2].reg_num = i; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = i; __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[pos2].reg_num, pd[pos2].reg_num); } } /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static void do_normal_rr(unsigned long start, unsigned long end, pfmlib_reg_t *br, int nbr, int dir, int *idx, int *reg_idx, int plm) { unsigned long size, l_addr, c; unsigned long l_offs = 0, r_offs = 0; unsigned long l_size, r_size; dbreg_t db; int p2; if (nbr < 1 || end <= start) return; size = end - start; DPRINT("start=0x%016lx end=0x%016lx size=0x%lx bytes (%lu bundles) nbr=%d dir=%d\n", start, end, size, size >> 4, nbr, dir); p2 = pfm_ia64_fls(size); c = ALIGN_DOWN(end, p2); DPRINT("largest power of two possible: 2^%d=0x%lx, crossing=0x%016lx\n", p2, 1UL << p2, c); if ((c - (1UL<= start) { l_addr = c - (1UL << p2); } else { p2--; if ((c + (1UL<>l_offs: 0x%lx\n", l_offs); } } else if (dir == 1 && r_size != 0 && nbr == 1) { p2++; l_addr = start; if (PFMLIB_DEBUG()) { r_offs = l_addr+(1UL<>r_offs: 0x%lx\n", r_offs); } } l_size = l_addr - start; r_size = end - l_addr-(1UL<>largest chunk: 2^%d @0x%016lx-0x%016lx\n", p2, l_addr, l_addr+(1UL<>before: 0x%016lx-0x%016lx\n", start, l_addr); if (r_size && !r_offs) printf(">>after : 0x%016lx-0x%016lx\n", l_addr+(1UL<>1; if (nbr & 0x1) { /* * our simple heuristic is: * we assign the largest number of registers to the largest * of the two chunks */ if (l_size > r_size) { l_nbr++; } else { r_nbr++; } } do_normal_rr(start, l_addr, br, l_nbr, 0, idx, reg_idx, plm); do_normal_rr(l_addr+(1UL<rr_start, in_rr->rr_end, n_pairs, fine_mode ? ", fine_mode" : "", rr_flags & PFMLIB_ITA2_RR_INV ? ", inversed" : ""); __pfm_vbprintf("start offset: -0x%lx end_offset: +0x%lx\n", out_rr->rr_soff, out_rr->rr_eoff); for (j=0; j < n_pairs; j++, base_idx+=2) { d.val = dbr[base_idx+1].reg_value; r_end = dbr[base_idx].reg_value+((~(d.db.db_mask)) & ~(0xffUL << 56)); if (fine_mode) __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask); else __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx end=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask, r_end); } } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_fine_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_ita2_output_rr_t *orr) { int i; pfmlib_reg_t *br; pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; unsigned long addr; int reg_idx; dbreg_t db; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; db.val = 0; db.db.db_mask = FINE_MODE_MASK; if (n > 2) return PFMLIB_ERR_IRRTOOMANY; for (i=0; i < n; i++, reg_idx += 2, in_rr++, br+= 4) { /* * setup lower limit pair * * because of the PMU bug, we must align down to the closest bundle-pair * aligned address. 5 => 32-byte aligned address */ addr = has_fine_mode_bug ? ALIGN_DOWN(in_rr->rr_start, 5) : in_rr->rr_start; out_rr->rr_soff = in_rr->rr_start - addr; /* * adjust plm for each range */ db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[0].reg_num = reg_idx; br[0].reg_value = addr; br[0].reg_addr = br[0].reg_alt_addr = reg_idx; br[1].reg_num = reg_idx+1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = reg_idx+1; /* * setup upper limit pair * * * In fine mode, the bundle address stored in the upper limit debug * registers is included in the count, so we substract 0x10 to exclude it. * * because of the PMU bug, we align the (corrected) end to the nearest * 32-byte aligned address + 0x10. With this correction and depending * on the correction, we may count one * * */ addr = in_rr->rr_end - 0x10; if (has_fine_mode_bug && (addr & 0x1f) == 0) addr += 0x10; out_rr->rr_eoff = addr - in_rr->rr_end + 0x10; br[2].reg_num = reg_idx+4; br[2].reg_value = addr; br[2].reg_addr = br[2].reg_alt_addr = reg_idx+4; br[3].reg_num = reg_idx+5; br[3].reg_value = db.val; br[3].reg_addr = br[3].reg_alt_addr = reg_idx+5; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 2, 1, irr->rr_flags); } orr->rr_nbr_used += i<<2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_single_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int *base_idx, pfmlib_ita2_output_rr_t *orr) { unsigned long size, end, start; unsigned long p_start, p_end; pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; pfmlib_reg_t *br; dbreg_t db; int reg_idx; int l, m; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; start = in_rr->rr_start; end = in_rr->rr_end; size = end - start; reg_idx = *base_idx; l = pfm_ia64_fls(size); m = l; if (size & ((1UL << l)-1)) { if (l>62) { printf("range: [0x%lx-0x%lx] too big\n", start, end); return PFMLIB_ERR_IRRTOOBIG; } m++; } DPRINT("size=%ld, l=%d m=%d, internal: 0x%lx full: 0x%lx\n", size, l, m, 1UL << l, 1UL << m); for (; m < 64; m++) { p_start = ALIGN_DOWN(start, m); p_end = p_start+(1UL<= end) goto found; } return PFMLIB_ERR_IRRINVAL; found: DPRINT("m=%d p_start=0x%lx p_end=0x%lx\n", m, p_start,p_end); /* when the event is not IA64_INST_RETIRED, then we MUST use ibrp0 */ br[0].reg_num = reg_idx; br[0].reg_value = p_start; br[0].reg_addr = br[0].reg_alt_addr = reg_idx; db.val = 0; db.db.db_mask = ~((1UL << m)-1); db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[1].reg_num = reg_idx + 1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = reg_idx + 1; out_rr->rr_soff = start - p_start; out_rr->rr_eoff = p_end - end; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 1, 0, irr->rr_flags); orr->rr_nbr_used += 2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int compute_normal_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_ita2_output_rr_t *orr) { pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; unsigned long r_end; pfmlib_reg_t *br; dbreg_t d; int i, j; int br_index, reg_idx, prev_index; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; br_index = 0; for (i=0; i < n; i++, in_rr++, out_rr++) { /* * running out of registers */ if (br_index == 8) break; prev_index = br_index; do_normal_rr( in_rr->rr_start, in_rr->rr_end, br, 4 - (reg_idx>>1), /* how many pairs available */ 0, &br_index, ®_idx, in_rr->rr_plm ? in_rr->rr_plm : dfl_plm); DPRINT("br_index=%d reg_idx=%d\n", br_index, reg_idx); /* * compute offsets */ out_rr->rr_soff = out_rr->rr_eoff = 0; for(j=prev_index; j < br_index; j+=2) { d.val = br[j+1].reg_value; r_end = br[j].reg_value+((~(d.db.db_mask)+1) & ~(0xffUL << 56)); if (br[j].reg_value <= in_rr->rr_start) out_rr->rr_soff = in_rr->rr_start - br[j].reg_value; if (r_end >= in_rr->rr_end) out_rr->rr_eoff = r_end - in_rr->rr_end; } if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, prev_index, (br_index-prev_index)>>1, 0, irr->rr_flags); } /* do not have enough registers to cover all the ranges */ if (br_index == 8 && i < n) return PFMLIB_ERR_TOOMANY; orr->rr_nbr_used += br_index; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int pfm_dispatch_irange(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_ita2_input_rr_t *irr; pfmlib_ita2_output_rr_t *orr; pfmlib_reg_t *pc = outp->pfp_pmcs; unsigned int i, pos = outp->pfp_pmc_count, count; int ret; unsigned int retired_only, retired_count, fine_mode, prefetch_count; unsigned int n_intervals; int base_idx = 0; unsigned long retired_mask; if (param == NULL) return PFMLIB_SUCCESS; if (param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita2_irange; orr = &mod_out->pfp_ita2_irange; ret = check_intervals(irr, 0, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_IRRINVAL; retired_count = check_inst_retired_events(inp, &retired_mask); retired_only = retired_count == inp->pfp_event_count; prefetch_count = check_prefetch_events(inp); fine_mode = irr->rr_flags & PFMLIB_ITA2_RR_NO_FINE_MODE ? 0 : check_fine_mode_possible(irr, n_intervals); DPRINT("n_intervals=%d retired_only=%d retired_count=%d prefetch_count=%d fine_mode=%d\n", n_intervals, retired_only, retired_count, prefetch_count, fine_mode); /* * On Itanium2, there are more constraints on what can be measured with irange. * * - The fine mode is the best because you directly set the lower and upper limits of * the range. This uses 2 ibr pairs for range (ibrp0/ibrp2 and ibp1/ibrp3). Therefore * at most 2 fine mode ranges can be defined. There is a limit on the size and alignment * of the range to allow fine mode: the range must be less than 4KB in size AND the lower * and upper limits must NOT cross a 4KB page boundary. The fine mode works will all events. * * - if the fine mode fails, then for all events, except IA64_TAGGED_INST_RETIRED_*, only * the first pair of ibr is available: ibrp0. This imposes some severe restrictions on the * size and alignement of the range. It can be bigger than 4KB and must be properly aligned * on its size. The library relaxes these constraints by allowing the covered areas to be * larger than the expected range. It may start before and end after. You can determine how * far off the range is in either direction for each range by looking at the rr_soff (start * offset) and rr_eoff (end offset). * * - if the events include certain prefetch events then only IBRP1 can be used in fine mode * See 10.3.5.1 Exception 1. * * - Finally, when the events are ONLY IA64_TAGGED_INST_RETIRED_* then all IBR pairs can be used * to cover the range giving us more flexibility to approximate the range when it is not * properly aligned on its size (see 10.3.5.2 Exception 2). */ if (fine_mode == 0 && retired_only == 0 && n_intervals > 1) return PFMLIB_ERR_IRRTOOMANY; /* we do not default to non-fine mode to support more ranges */ if (n_intervals > 2 && fine_mode == 1) return PFMLIB_ERR_IRRTOOMANY; if (fine_mode == 0) { if (retired_only) { ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } else { /* unless we have only prefetch and instruction retired events, * we cannot satisfy the request because the other events cannot * be measured on anything but IBRP0. */ if (prefetch_count && (prefetch_count+retired_count) != inp->pfp_event_count) return PFMLIB_ERR_FEATCOMB; base_idx = prefetch_count ? 2 : 0; ret = compute_single_rr(irr, inp->pfp_dfl_plm, &base_idx, orr); } } else { if (prefetch_count && n_intervals != 1) return PFMLIB_ERR_IRRTOOMANY; base_idx = prefetch_count ? 2 : 0; ret = compute_fine_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_IRRTOOMANY : ret; } reg.pmc_val = 0xdb6; /* default value */ count = orr->rr_nbr_used; for (i=0; i < count; i++) { switch(orr->rr_br[i].reg_num) { case 0: reg.pmc14_ita2_reg.iarc_ibrp0 = 0; break; case 2: reg.pmc14_ita2_reg.iarc_ibrp1 = 0; break; case 4: reg.pmc14_ita2_reg.iarc_ibrp2 = 0; break; case 6: reg.pmc14_ita2_reg.iarc_ibrp3 = 0; break; } } if (retired_only && (param->pfp_ita2_pmc8.opcm_used ||param->pfp_ita2_pmc9.opcm_used)) { /* * PMC8 + IA64_INST_RETIRED only works if irange on IBRP0 and/or IBRP2 * PMC9 + IA64_INST_RETIRED only works if irange on IBRP1 and/or IBRP3 */ count = orr->rr_nbr_used; for (i=0; i < count; i++) { if (orr->rr_br[i].reg_num == 0 && param->pfp_ita2_pmc9.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 2 && param->pfp_ita2_pmc8.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 4 && param->pfp_ita2_pmc9.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 6 && param->pfp_ita2_pmc8.opcm_used) return PFMLIB_ERR_FEATCOMB; } } if (fine_mode) { reg.pmc14_ita2_reg.iarc_fine = 1; } else if (retired_only) { /* * we need to check that the user provided all the events needed to cover * all the ibr pairs used to cover the range */ if ((retired_mask & 0x1) == 0 && reg.pmc14_ita2_reg.iarc_ibrp0 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x2) == 0 && reg.pmc14_ita2_reg.iarc_ibrp1 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x4) == 0 && reg.pmc14_ita2_reg.iarc_ibrp2 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x8) == 0 && reg.pmc14_ita2_reg.iarc_ibrp3 == 0) return PFMLIB_ERR_IRRINVAL; } /* initialize pmc request slot */ memset(pc+pos, 0, sizeof(pfmlib_reg_t)); if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 14)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 14; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 14; pos++; __pfm_vbprintf("[PMC14(pmc14)=0x%lx ibrp0=%d ibrp1=%d ibrp2=%d ibrp3=%d fine=%d]\n", reg.pmc_val, reg.pmc14_ita2_reg.iarc_ibrp0, reg.pmc14_ita2_reg.iarc_ibrp1, reg.pmc14_ita2_reg.iarc_ibrp2, reg.pmc14_ita2_reg.iarc_ibrp3, reg.pmc14_ita2_reg.iarc_fine); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static const unsigned long iod_tab[8]={ /* --- */ 3, /* --D */ 2, /* -O- */ 3, /* should not be used */ /* -OD */ 0, /* =IOD safe because default IBR is harmless */ /* I-- */ 1, /* =IO safe because by defaut OPC is turned off */ /* I-D */ 0, /* =IOD safe because by default opc is turned off */ /* IO- */ 1, /* IOD */ 0 }; /* * IMPORTANT: MUST BE CALLED *AFTER* pfm_dispatch_irange() to make sure we see * the irange programming to adjust pmc13. */ static int pfm_dispatch_drange(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfmlib_ita2_input_rr_t *irr; pfmlib_ita2_output_rr_t *orr, *orr2; pfm_ita2_pmc_reg_t pmc13; pfm_ita2_pmc_reg_t pmc14; unsigned int i, pos = outp->pfp_pmc_count; int iod_codes[4], dfl_val_pmc8, dfl_val_pmc9; unsigned int n_intervals; int ret; int base_idx = 0; int fine_mode = 0; #define DR_USED 0x1 /* data range is used */ #define OP_USED 0x2 /* opcode matching is used */ #define IR_USED 0x4 /* code range is used */ if (param == NULL) return PFMLIB_SUCCESS; /* * if only pmc8/pmc9 opcode matching is used, we do not need to change * the default value of pmc13 regardless of the events being measured. */ if ( param->pfp_ita2_drange.rr_used == 0 && param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * it seems like the ignored bits need to have special values * otherwise this does not work. */ pmc13.pmc_val = 0x2078fefefefe; /* * initialize iod codes */ iod_codes[0] = iod_codes[1] = iod_codes[2] = iod_codes[3] = 0; /* * setup default iod value, we need to separate because * if drange is used we do not know in advance which DBR will be used * therefore we need to apply dfl_val later */ dfl_val_pmc8 = param->pfp_ita2_pmc8.opcm_used ? OP_USED : 0; dfl_val_pmc9 = param->pfp_ita2_pmc9.opcm_used ? OP_USED : 0; if (param->pfp_ita2_drange.rr_used == 1) { if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita2_drange; orr = &mod_out->pfp_ita2_drange; ret = check_intervals(irr, 1, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_DRRINVAL; ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_DRRTOOMANY : ret; } /* * Update iod_codes to reflect the use of the DBR constraint. */ for (i=0; i < orr->rr_nbr_used; i++) { if (orr->rr_br[i].reg_num == 0) iod_codes[0] |= DR_USED | dfl_val_pmc8; if (orr->rr_br[i].reg_num == 2) iod_codes[1] |= DR_USED | dfl_val_pmc9; if (orr->rr_br[i].reg_num == 4) iod_codes[2] |= DR_USED | dfl_val_pmc8; if (orr->rr_br[i].reg_num == 6) iod_codes[3] |= DR_USED | dfl_val_pmc9; } } /* * XXX: assume dispatch_irange executed before calling this function */ if (param->pfp_ita2_irange.rr_used == 1) { orr2 = &mod_out->pfp_ita2_irange; if (mod_out == NULL) return PFMLIB_ERR_INVAL; /* * we need to find out whether or not the irange is using * fine mode. If this is the case, then we only need to * program pmc13 for the ibr pairs which designate the lower * bounds of a range. For instance, if IBRP0/IBRP2 are used, * then we only need to program pmc13.cfg_dbrp0 and pmc13.ena_dbrp0, * the PMU will automatically use IBRP2, even though pmc13.ena_dbrp2=0. */ for(i=0; i <= pos; i++) { if (pc[i].reg_num == 14) { pmc14.pmc_val = pc[i].reg_value; if (pmc14.pmc14_ita2_reg.iarc_fine == 1) fine_mode = 1; break; } } /* * Update to reflect the use of the IBR constraint */ for (i=0; i < orr2->rr_nbr_used; i++) { if (orr2->rr_br[i].reg_num == 0) iod_codes[0] |= IR_USED | dfl_val_pmc8; if (orr2->rr_br[i].reg_num == 2) iod_codes[1] |= IR_USED | dfl_val_pmc9; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 4) iod_codes[2] |= IR_USED | dfl_val_pmc8; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 6) iod_codes[3] |= IR_USED | dfl_val_pmc9; } } if (param->pfp_ita2_irange.rr_used == 0 && param->pfp_ita2_drange.rr_used ==0) { iod_codes[0] = iod_codes[2] = dfl_val_pmc8; iod_codes[1] = iod_codes[3] = dfl_val_pmc9; } /* * update the cfg dbrpX field. If we put a constraint on a cfg dbrp, then * we must enable it in the corresponding ena_dbrpX */ pmc13.pmc13_ita2_reg.darc_ena_dbrp0 = iod_codes[0] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp0 = iod_tab[iod_codes[0]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp1 = iod_codes[1] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp1 = iod_tab[iod_codes[1]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp2 = iod_codes[2] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp2 = iod_tab[iod_codes[2]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp3 = iod_codes[3] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp3 = iod_tab[iod_codes[3]]; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 13)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 13; pc[pos].reg_value = pmc13.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 13; pos++; __pfm_vbprintf("[PMC13(pmc13)=0x%lx cfg_dbrp0=%d cfg_dbrp1=%d cfg_dbrp2=%d cfg_dbrp3=%d ena_dbrp0=%d ena_dbrp1=%d ena_dbrp2=%d ena_dbrp3=%d]\n", pmc13.pmc_val, pmc13.pmc13_ita2_reg.darc_cfg_dbrp0, pmc13.pmc13_ita2_reg.darc_cfg_dbrp1, pmc13.pmc13_ita2_reg.darc_cfg_dbrp2, pmc13.pmc13_ita2_reg.darc_cfg_dbrp3, pmc13.pmc13_ita2_reg.darc_ena_dbrp0, pmc13.pmc13_ita2_reg.darc_ena_dbrp1, pmc13.pmc13_ita2_reg.darc_ena_dbrp2, pmc13.pmc13_ita2_reg.darc_ena_dbrp3); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int check_qualifier_constraints(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_event_t *e = inp->pfp_events; unsigned int i, count; count = inp->pfp_event_count; for(i=0; i < count; i++) { /* * skip check for counter which requested it. Use at your own risk. * No all counters have necessarily been validated for use with * qualifiers. Typically the event is counted as if no constraint * existed. */ if (param->pfp_ita2_counters[i].flags & PFMLIB_ITA2_FL_EVT_NO_QUALCHECK) continue; if (evt_use_irange(param) && has_iarr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_drange(param) && has_darr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_opcm(param) && has_opcm(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int check_range_plm(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in) { pfmlib_ita2_input_param_t *param = mod_in; unsigned int i, count; if (param->pfp_ita2_drange.rr_used == 0 && param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * range restriction applies to all events, therefore we must have a consistent * set of plm and they must match the pfp_dfl_plm which is used to setup the debug * registers */ count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].plm && inp->pfp_events[i].plm != inp->pfp_dfl_plm) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int pfm_ita2_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { int ret; pfmlib_ita2_input_param_t *mod_in = (pfmlib_ita2_input_param_t *)model_in; pfmlib_ita2_output_param_t *mod_out = (pfmlib_ita2_output_param_t *)model_out; /* * nothing will come out of this combination */ if (mod_out && mod_in == NULL) return PFMLIB_ERR_INVAL; /* check opcode match, range restriction qualifiers */ if (mod_in && check_qualifier_constraints(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; /* check for problems with raneg restriction and per-event plm */ if (mod_in && check_range_plm(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; ret = pfm_ita2_dispatch_counters(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for I-EAR */ ret = pfm_dispatch_iear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for D-EAR */ ret = pfm_dispatch_dear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* XXX: must be done before dispatch_opcm() and dispatch_drange() */ ret = pfm_dispatch_irange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_drange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; /* now check for Opcode matchers */ ret = pfm_dispatch_opcm(inp, mod_in, outp, mod_out); if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_btb(inp, mod_in, outp); return ret; } /* XXX: return value is also error code */ int pfm_ita2_get_event_maxincr(unsigned int i, unsigned int *maxincr) { if (i >= PME_ITA2_EVENT_COUNT || maxincr == NULL) return PFMLIB_ERR_INVAL; *maxincr = itanium2_pe[i].pme_maxincr; return PFMLIB_SUCCESS; } int pfm_ita2_is_ear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_ear(i); } int pfm_ita2_is_dear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i); } int pfm_ita2_is_dear_tlb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i) && is_ear_tlb(i); } int pfm_ita2_is_dear_cache(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i) && is_ear_cache(i); } int pfm_ita2_is_dear_alat(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_ear_alat(i); } int pfm_ita2_is_iear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i); } int pfm_ita2_is_iear_tlb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i) && is_ear_tlb(i); } int pfm_ita2_is_iear_cache(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i) && is_ear_cache(i); } int pfm_ita2_is_btb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_btb(i); } int pfm_ita2_support_iarr(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_iarr(i); } int pfm_ita2_support_darr(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_darr(i); } int pfm_ita2_support_opcm(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_opcm(i); } int pfm_ita2_get_ear_mode(unsigned int i, pfmlib_ita2_ear_mode_t *m) { pfmlib_ita2_ear_mode_t r; if (!is_ear(i) || m == NULL) return PFMLIB_ERR_INVAL; r = PFMLIB_ITA2_EAR_TLB_MODE; if (is_ear_tlb(i)) goto done; r = PFMLIB_ITA2_EAR_CACHE_MODE; if (is_ear_cache(i)) goto done; r = PFMLIB_ITA2_EAR_ALAT_MODE; if (is_ear_alat(i)) goto done; return PFMLIB_ERR_INVAL; done: *m = r; return PFMLIB_SUCCESS; } static int pfm_ita2_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt < 4 || cnt > 7)) return PFMLIB_ERR_INVAL; *code = (int)itanium2_pe[i].pme_code; return PFMLIB_SUCCESS; } /* * This function is accessible directly to the user */ int pfm_ita2_get_event_umask(unsigned int i, unsigned long *umask) { if (i >= PME_ITA2_EVENT_COUNT || umask == NULL) return PFMLIB_ERR_INVAL; *umask = evt_umask(i); return PFMLIB_SUCCESS; } int pfm_ita2_get_event_group(unsigned int i, int *grp) { if (i >= PME_ITA2_EVENT_COUNT || grp == NULL) return PFMLIB_ERR_INVAL; *grp = evt_grp(i); return PFMLIB_SUCCESS; } int pfm_ita2_get_event_set(unsigned int i, int *set) { if (i >= PME_ITA2_EVENT_COUNT || set == NULL) return PFMLIB_ERR_INVAL; *set = evt_set(i) == 0xf ? PFMLIB_ITA2_EVT_NO_SET : evt_set(i); return PFMLIB_SUCCESS; } /* external interface */ int pfm_ita2_irange_is_fine(pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_output_param_t *param = mod_out; pfm_ita2_pmc_reg_t reg; unsigned int i, count; /* some sanity checks */ if (outp == NULL || param == NULL) return 0; if (outp->pfp_pmc_count >= PFMLIB_MAX_PMCS) return 0; if (param->pfp_ita2_irange.rr_nbr_used == 0) return 0; /* * we look for pmc14 as it contains the bit indicating if fine mode is used */ count = outp->pfp_pmc_count; for(i=0; i < count; i++) { if (outp->pfp_pmcs[i].reg_num == 14) goto found; } return 0; found: reg.pmc_val = outp->pfp_pmcs[i].reg_value; return reg.pmc14_ita2_reg.iarc_fine ? 1 : 0; } static char * pfm_ita2_get_event_name(unsigned int i) { return itanium2_pe[i].pme_name; } static void pfm_ita2_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; unsigned long m; memset(counters, 0, sizeof(*counters)); m =itanium2_pe[j].pme_counters; for(i=0; m ; i++, m>>=1) { if (m & 0x1) pfm_regmask_set(counters, i); } } static void pfm_ita2_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { unsigned int i = 0; /* all pmcs are contiguous */ for(i=0; i < PMU_ITA2_NUM_PMCS; i++) pfm_regmask_set(impl_pmcs, i); } static void pfm_ita2_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { unsigned int i = 0; /* all pmds are contiguous */ for(i=0; i < PMU_ITA2_NUM_PMDS; i++) pfm_regmask_set(impl_pmds, i); } static void pfm_ita2_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i = 0; /* counting pmds are contiguous */ for(i=4; i < 8; i++) pfm_regmask_set(impl_counters, i); } static void pfm_ita2_get_hw_counter_width(unsigned int *width) { *width = PMU_ITA2_COUNTER_WIDTH; } static int pfm_ita2_get_event_description(unsigned int ev, char **str) { char *s; s = itanium2_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_ita2_get_cycle_event(pfmlib_event_t *e) { e->event = PME_ITA2_CPU_CYCLES; return PFMLIB_SUCCESS; } static int pfm_ita2_get_inst_retired(pfmlib_event_t *e) { e->event = PME_ITA2_IA64_INST_RETIRED; return PFMLIB_SUCCESS; } pfm_pmu_support_t itanium2_support={ .pmu_name = "itanium2", .pmu_type = PFMLIB_ITANIUM2_PMU, .pme_count = PME_ITA2_EVENT_COUNT, .pmc_count = PMU_ITA2_NUM_PMCS, .pmd_count = PMU_ITA2_NUM_PMDS, .num_cnt = PMU_ITA2_NUM_COUNTERS, .get_event_code = pfm_ita2_get_event_code, .get_event_name = pfm_ita2_get_event_name, .get_event_counters = pfm_ita2_get_event_counters, .dispatch_events = pfm_ita2_dispatch_events, .pmu_detect = pfm_ita2_detect, .get_impl_pmcs = pfm_ita2_get_impl_pmcs, .get_impl_pmds = pfm_ita2_get_impl_pmds, .get_impl_counters = pfm_ita2_get_impl_counters, .get_hw_counter_width = pfm_ita2_get_hw_counter_width, .get_event_desc = pfm_ita2_get_event_description, .get_cycle_event = pfm_ita2_get_cycle_event, .get_inst_retired_event = pfm_ita2_get_inst_retired }; papi-5.6.0/src/libpfm-3.y/examples_v2.x/ia64/ita_dear.c000664 001750 001750 00000025133 13216244362 024361 0ustar00jshenry1963jshenry1963000000 000000 /* * ita_dear.c - example of how use the D-EAR with the Itanium PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define EVENT_NAME "DATA_EAR_CACHE_LAT4" #define SMPL_PERIOD (40) #define M_PMD(x) (1UL<<(x)) #define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) typedef pfm_dfl_smpl_hdr_t dear_hdr_t; typedef pfm_dfl_smpl_entry_t dear_entry_t; typedef pfm_dfl_smpl_arg_t smpl_arg_t; static void *smpl_vaddr; static unsigned long entry_size; static int id; #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } long do_test(unsigned long size) { unsigned long i, sum = 0; int *array; printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); array = (int *)malloc(size * sizeof(int)); if (array == NULL ) { printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); exit(1); } for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (dear_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita_pmd_reg_t*)(ent+1); safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); reg++; safe_printf("PMD3 : 0x%016lx, latency %u\n", reg->pmd_val, reg->pmd3_ita_reg.dear_latency); reg++; safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", reg->pmd_val, reg->pmd17_ita_reg.dear_vl ? 'Y': 'N', (reg->pmd17_ita_reg.dear_iaddr << 4) | (unsigned long)reg->pmd17_ita_reg.dear_slot); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (pfm_restart(id) == -1) { perror("pfm_restart"); exit(1); } } int main(void) { pfarg_pmd_t pd[NUM_PMDS]; pfarg_pmc_t pc[NUM_PMCS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_ctx_t ctx; smpl_arg_t buf_arg; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; int ret, type = 0; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&buf_arg, 0, sizeof(buf_arg)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("cannot find event %s\n", EVENT_NAME); } /* * set the (global) privilege mode: * PFM_PLM0 : kernel level only */ inp.pfp_dfl_plm = PFM_PLM0|PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS * * We use all global settings for this EAR. */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * prepare context structure. * * format specific parameters MUST be concatenated to the regular * pfarg_ctx_t structure. For convenience, the default sampling * format provides a data structure that already combines the pfarg_ctx_t * with what is needed fot this format. */ /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ buf_arg.buf_size = getpagesize(); /* * now create the context for self monitoring/per-task */ id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; pfm_bv_set(pd[0].reg_smpl_pmds, 2); pfm_bv_set(pd[0].reg_smpl_pmds, 3); pfm_bv_set(pd[0].reg_smpl_pmds, 17); entry_size = sizeof(dear_entry_t) + 3 * 8; /* * initialize the PMD and the sampling period */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * attach context to stopped task */ load_args.load_pid = getpid(); if (pfm_load_context(id, &load_args) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ pfm_self_start(id); do_test(10000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer, i.e. which may not be full */ process_smpl_buffer(); /* * let's stop this now */ munmap(smpl_vaddr, (size_t)buf_arg.buf_size); close(id); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_intel_hsw.c000664 001750 001750 00000007610 13216244365 023126 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_hsw.c : Intel Haswell core PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_hsw_events.h" static const int hsw_models[] = { 60, /* Haswell */ 69, /* Haswell */ 70, /* Haswell */ 0 }; static const int hsw_ep_models[] = { 63, /* Haswell */ 0 }; static int pfm_hsw_init(void *this) { pfm_intel_x86_cfg.arch_version = 4; return PFM_SUCCESS; } pfmlib_pmu_t intel_hsw_support={ .desc = "Intel Haswell", .name = "hsw", .pmu = PFM_PMU_INTEL_HSW, .pme_count = LIBPFM_ARRAY_SIZE(intel_hsw_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_hsw_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = hsw_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_hsw_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; pfmlib_pmu_t intel_hsw_ep_support={ .desc = "Intel Haswell EP", .name = "hsw_ep", .pmu = PFM_PMU_INTEL_HSW_EP, .pme_count = LIBPFM_ARRAY_SIZE(intel_hsw_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_hsw_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = hsw_ep_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_hsw_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; papi-5.6.0/src/libpfm-3.y/lib/pfmlib_os_linux_v3.c000664 001750 001750 00000007064 13216244363 023726 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_os_linux_v3.c: Perfmon3 API syscalls * * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include #include #include #include #include #include #include "pfmlib_priv.h" /* * v3.x interface */ #define PFM_pfm_create (_pfmlib_get_sys_base()+0) #define PFM_pfm_write (_pfmlib_get_sys_base()+1) #define PFM_pfm_read (_pfmlib_get_sys_base()+2) #define PFM_pfm_attach (_pfmlib_get_sys_base()+3) #define PFM_pfm_set_state (_pfmlib_get_sys_base()+4) #define PFM_pfm_create_sets (_pfmlib_get_sys_base()+5) #define PFM_pfm_getinfo_sets (_pfmlib_get_sys_base()+6) /* * perfmon v3 interface */ int //pfm_create(int flags, pfarg_sinfo_t *sif, char *name, void *smpl_arg, size_t smpl_size) pfm_create(int flags, pfarg_sinfo_t *sif, ...) { va_list ap; char *name = NULL; void *smpl_arg = NULL; size_t smpl_size = 0; int ret; if (_pfmlib_major_version < 3) { errno = ENOSYS; return -1; } if (flags & PFM_FL_SMPL_FMT) va_start(ap, sif); if (flags & PFM_FL_SMPL_FMT) { name = va_arg(ap, char *); smpl_arg = va_arg(ap, void *); smpl_size = va_arg(ap, size_t); } ret = (int)syscall(PFM_pfm_create, flags, sif, name, smpl_arg, smpl_size); if (flags & PFM_FL_SMPL_FMT) va_end(ap); return ret; } int pfm_write(int fd, int flags, int type, void *pms, size_t sz) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_write, fd, flags, type, pms, sz); } int pfm_read(int fd, int flags, int type, void *pms, size_t sz) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_read, fd, flags, type, pms, sz); } int pfm_create_sets(int fd, int flags, pfarg_set_desc_t *setd, size_t sz) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_create_sets, fd, flags, setd, sz); } int pfm_getinfo_sets(int fd, int flags, pfarg_set_info_t *info, size_t sz) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_getinfo_sets, fd, flags, info, sz); } int pfm_attach(int fd, int flags, int target) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_attach, fd, flags, target); } int pfm_set_state(int fd, int flags, int state) { if (_pfmlib_major_version < 3) return -ENOSYS; return (int)syscall(PFM_pfm_set_state, fd, flags, state); } papi-5.6.0/man/man3/PAPIF_thread_id.3000664 001750 001750 00000000737 13216244356 021105 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_thread_id" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_thread_id \- .PP Get the thread identifier of the current thread\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_thread_id( C_INT id )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_thread_id\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/validation_tests/papi_br_msp.c000664 001750 001750 00000011234 13216244370 023336 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the mispredicted branches */ /* performance event as counted by PAPI_BR_MSP */ /* by Vince Weaver, */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; int num_random_branches=500000; long long high=0,low=0,average=0,expected=1500000; long long count,total=0; int quiet=0,retval,ins_result; int total_eventset=PAPI_NULL,miss_eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_BR_MSP event.\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create total eventset */ retval=PAPI_create_eventset(&total_eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(total_eventset,"PAPI_BR_INS"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_BR_INS\n"); test_skip( __FILE__, __LINE__, "adding PAPI_BR_INS", retval ); } /* Create miss eventset */ retval=PAPI_create_eventset(&miss_eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(miss_eventset,"PAPI_BR_MSP"); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "adding PAPI_BR_MSP", retval ); } if (!quiet) { printf("\nPart 1: Testing that easy to predict loop has few misses\n"); printf("Testing a loop with %lld branches (%d times):\n", expected,num_runs); printf("\tOn a simple loop like this, " "miss rate should be very small.\n"); } for(i=0;ihigh) high=count; if ((low==0) || (count1000) { if (!quiet) printf("Branch miss rate too high\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } /*******************/ if (!quiet) { printf("\nPart 2\n"); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (counthigh) high=count; if ((low==0) || (count (num_random_branches/4)*3) { if (!quiet) printf("Mispredicts too high\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/components/host_micpower/000775 001750 001750 00000000000 13216244357 022376 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/papi.spec000664 001750 001750 00000005360 13216244356 016350 0ustar00jshenry1963jshenry1963000000 000000 Summary: Performance Application Programming Interface Name: papi Version: 5.6.0.0 Release: 1%{?dist} License: BSD Group: Development/System URL: http://icl.utk.edu/papi/ Source0: http://icl.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root BuildRequires: ncurses-devel BuildRequires: gcc-gfortran BuildRequires: kernel-headers >= 2.6.32 BuildRequires: chrpath #Right now libpfm does not know anything about s390 and will fail ExcludeArch: s390 s390x %description PAPI provides a programmer interface to monitor the performance of running programs. %package devel Summary: Header files for the compiling programs with PAPI Group: Development/System Requires: papi = %{version}-%{release} %description devel PAPI-devel includes the C header files that specify the PAPI userspace libraries and interfaces. This is required for rebuilding any program that uses PAPI. %prep %setup -q %build cd src %configure --with-static-lib=no --with-shared-lib=yes --with-shlib #DBG workaround to make sure libpfm just uses the normal CFLAGS DBG="" make #%check #cd src #make fulltest %install rm -rf $RPM_BUILD_ROOT cd src make DESTDIR=$RPM_BUILD_ROOT install chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* # Remove the static libraries. Static libraries are undesirable: # https://fedoraproject.org/wiki/Packaging/Guidelines#Packaging_Static_Libraries rm -rf $RPM_BUILD_ROOT%{_libdir}/*.a %post -p /sbin/ldconfig %postun -p /sbin/ldconfig %clean rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) %{_bindir}/* %{_libdir}/*.so.* /usr/share/papi %doc INSTALL.txt README LICENSE.txt RELEASENOTES.txt %files devel %defattr(-,root,root,-) %{_includedir}/*.h %{_includedir}/perfmon %{_libdir}/*.so %doc %{_mandir}/man3/* %doc %{_mandir}/man1/* %changelog * Tue Jan 31 2012 Dan Terpstra - 4.2.1 - Rebase to papi-4.2.1 * Wed Dec 8 2010 Dan Terpstra - 4.1.2-1 - Rebase to papi-4.1.2 * Mon Jun 8 2010 William Cohen - 4.1.0-1 - Rebase to papi-4.1.0 * Mon May 17 2010 William Cohen - 4.0.0-5 - Test run with upstream cvs version. * Wed Feb 10 2010 William Cohen - 4.0.0-4 - Resolves: rhbz562935 Rebase to papi-4.0.0 (correct ExcludeArch). * Wed Feb 10 2010 William Cohen - 4.0.0-3 - Resolves: rhbz562935 Rebase to papi-4.0.0 (bump nvr). * Wed Feb 10 2010 William Cohen - 4.0.0-2 - correct the ctests/shlib test - have PAPI_set_multiplex() return proper value - properly handle event unit masks - correct PAPI_name_to_code() to match events - Resolves: rhbz562935 Rebase to papi-4.0.0 * Wed Jan 13 2010 William Cohen - 4.0.0-1 - Generate papi.spec file for papi-4.0.0. papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/ita2_rr.c000664 001750 001750 00000027266 13216244362 024134 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_rr.c - example of how to use data range restriction with the Itanium2 PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define clear_psr_ac() __rum(1UL<<3) #elif defined(__GNUC__) static inline void clear_psr_ac(void) { __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); } #else #error "You need to define clear_psr_ac() for your compiler" #endif #define TEST_DATA_COUNT 16 #define N_LOOP 100000000UL #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * here we capture only misaligned_loads because it cannot * be measured with misaligned_stores_retired at the same time */ static char *event_list[]={ "misaligned_loads_retired", NULL }; typedef union { unsigned long l_tab[2]; unsigned int i_tab[4]; unsigned short s_tab[8]; unsigned char c_tab[16]; } test_data_t; static int do_test(test_data_t *data) { unsigned int *l, v; l = (unsigned int *)(data->c_tab+1); if (((unsigned long)l & 0x1) == 0) { printf("Data is not unaligned, can't run test\n"); return -1; } v = *l; v++; *l = v; return 0; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(int argc, char **argv) { char **p; test_data_t *test_data, *test_data_fake; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita2_input_param_t ita2_inp; pfmlib_ita2_output_param_t ita2_outp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_dbreg_t dbrs[8]; pfarg_context_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * now let's allocate the data structure we will be monitoring */ test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data == NULL) { fatal_error("cannot allocate test data structure"); } test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data_fake == NULL) { fatal_error("cannot allocate test data structure"); } /* * Compute the range we are interested in */ range_start = (unsigned long)test_data; range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(ctx, 0, sizeof(ctx)); memset(dbrs, 0, sizeof(dbrs)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita2_inp,0, sizeof(ita2_inp)); memset(&ita2_outp,0, sizeof(ita2_outp)); /* * find requested event */ p = event_list; for (i=0; *p ; i++, p++) { if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", *p); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields in rr_infos on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. */ ita2_inp.pfp_ita2_drange.rr_used = 1; ita2_inp.pfp_ita2_drange.rr_limits[0].rr_start = range_start; ita2_inp.pfp_ita2_drange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" "start_offset:-0x%lx end_offset:+0x%lx\n", range_start, range_end, ita2_outp.pfp_ita2_drange.rr_nbr_used >> 1, ita2_outp.pfp_ita2_drange.rr_infos[0].rr_soff, ita2_outp.pfp_ita2_drange.rr_infos[0].rr_eoff); printf("fake data range: [0x%016lx-0x%016lx)\n", (unsigned long)test_data_fake, (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the unique identifier for our context, a regular file descriptor */ id = ctx[0].ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * propagate the setup for the debug registers from the library to the arguments * to the perfmonctl() syscall. The library does not know the type of the syscall * anymore. */ for (i=0; i < ita2_outp.pfp_ita2_drange.rr_nbr_used; i++) { dbrs[i].dbreg_num = ita2_outp.pfp_ita2_drange.rr_br[i].reg_num; dbrs[i].dbreg_value = ita2_outp.pfp_ita2_drange.rr_br[i].reg_value; } /* * Program the data debug registers. * * IMPORTANT: programming the debug register MUST always be done before the PMCs * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. */ if (perfmonctl(id, PFM_WRITE_DBRS, dbrs, ita2_outp.pfp_ita2_drange.rr_nbr_used) == -1) { fatal_error( "child: perfmonctl error PFM_WRITE_DBRS errno %d\n",errno); } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("child: perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "child: perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * Let's make sure that the hardware does the unaligned accesses (do not use the * kernel software handler otherwise the PMU won't see the unaligned fault). */ clear_psr_ac(); /* * Let's roll now. * * The idea behind this test is to have two dynamically allocated data structures * which are access in a unaligned fashion. But we want to capture only the unaligned * accesses on one of the two. So the debug registers are programmed to cover the * first one ONLY. Then we activate monotoring and access the two data structures. * This is an artificial example just to demonstrate how to use data address range * restrictions. */ pfm_self_start(id); for(i=0; i < N_LOOP; i++) { do_test(test_data); do_test(test_data_fake); } pfm_self_stop(id); /* * now read the results */ if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * * For this example, we expect to see a value of 1 for misaligned loads. * But it can be two when the test_data and test_data_fake * are allocated very close from each other and the range created with the debug * registers is larger then test_data. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, N_LOOP); if (pd[i].reg_value != N_LOOP) { printf("error: Result should be 1 for %s\n", name); break; } } /* * let's stop this now */ close(id); free(test_data); free(test_data_fake); return 0; } papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumMMX-166000775 001750 001750 00000001304 13216244366 023714 0ustar00jshenry1963jshenry1963000000 000000 [data from a 166MHz Pentium MMX] PERFCTR INIT: vendor 0, family 5, model 4, stepping 3, clock 166590 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 56 cycles PERFCTR INIT: rdtsc cost is 9.1 cycles (640 total) PERFCTR INIT: rdpmc cost is 13.8 cycles (944 total) PERFCTR INIT: rdmsr (counter) cost is 28.1 cycles (1860 total) PERFCTR INIT: rdmsr (evntsel) cost is 23.8 cycles (1583 total) PERFCTR INIT: wrmsr (counter) cost is 36.0 cycles (2362 total) PERFCTR INIT: wrmsr (evntsel) cost is 36.3 cycles (2385 total) PERFCTR INIT: read %cr4 cost is 4.0 cycles (312 total) PERFCTR INIT: write %cr4 cost is 17.0 cycles (1149 total) perfctr: driver 2.0-pre5, cpu type Intel Pentium MMX at 166590 kHz papi-5.6.0/man/man3/PAPI_remove_events.3000664 001750 001750 00000005046 13216244356 021733 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_remove_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_remove_events \- .PP Remove an array of hardware event codes from a PAPI event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP A hardware event can be either a PAPI Preset or a native hardware event code\&. For a list of PAPI preset events, see PAPI_presets or run the papi_avail utility in the PAPI distribution\&. PAPI Presets can be passed to \fBPAPI_query_event\fP to see if they exist on the underlying architecture\&. For a list of native events available on current platform, run papi_native_avail in the PAPI distribution\&. It should be noted that \fBPAPI_remove_events\fP can partially succeed, exactly like \fBPAPI_add_events\fP\&. .PP \fBC Prototype:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_remove_events( int EventSet, int * EventCode, int number )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIEventSet\fP an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP .br \fI*Events\fP an array of defined events .br \fInumber\fP an integer indicating the number of events in the array *EventCode .RE .PP \fBReturn values:\fP .RS 4 \fIPositive\fP integer The number of consecutive elements that succeeded before the error\&. .br \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .br \fIPAPI_EISRUN\fP The EventSet is currently counting events\&. .br \fIPAPI_ECNFLCT\fP The underlying counter hardware can not count this event and other events in the EventSet simultaneously\&. .br \fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. .RE .PP \fBExample:\fP .RS 4 .PP .nf int EventSet = PAPI_NULL; int Events[] = {PAPI_TOT_INS, PAPI_FP_OPS}; int ret; // Create an empty EventSet ret = PAPI_create_eventset(&EventSet); if (ret != PAPI_OK) handle_error(ret); // Add two events to our EventSet ret = PAPI_add_events(EventSet, Events, 2); if (ret != PAPI_OK) handle_error(ret); // Start counting ret = PAPI_start(EventSet); if (ret != PAPI_OK) handle_error(ret); // Stop counting, ignore values ret = PAPI_stop(EventSet, NULL); if (ret != PAPI_OK) handle_error(ret); // Remove event ret = PAPI_remove_events(EventSet, Events, 2); if (ret != PAPI_OK) handle_error(ret); * .fi .PP .RE .PP .PP .nf @see PAPI_cleanup_eventset PAPI_destroy_eventset PAPI_event_name_to_code PAPI_presets PAPI_add_event PAPI_add_events.fi .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/net/000775 001750 001750 00000000000 13216244357 020302 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/usr.lib/event_set_amd.c000664 001750 001750 00000055047 13216244370 024113 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_amd.c,v 1.9 2004/07/17 00:41:57 mikpe Exp $ * Performance counter event descriptions for AMD K7 and K8. * * Copyright (C) 2003 Mikael Pettersson * * References * ---------- * "AMD Athlon Processor x86 Code Optimization Guide", * Appendix D: "Performance Monitoring Counters". * AMD Publication #22007 * Revision E (on AMD Processor Technical Documents CD, Med-12/99-0, 21860F) * Revision K (at http://www.amd.com/). * * "BIOS and Kernel Developer's Guide for AMD Athlon 64 and * AMD Opteron Processors", Chapter 10: "Performance Monitoring". * AMD Publication #26094, Revision 3.14 (at http://www.amd.com). * "Revision Guide for AMD Opteron Processors", * AMD Publication #25759, Revision 3.09 */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" /* * AMD K7 events. * * Note: Different revisions of AMD #22007 list different sets of events. * We split the K7 event set into an "official" part based on recent * revisions of #22007, and an "unofficial" part which includes events * only documented in older revisions of #22007 (specifically Rev. E). * * All official K7 events are also present in K8, as are most of the * unofficial K7 events. */ static const struct perfctr_unit_mask_5 k7_um_moesi = { { .type = perfctr_um_type_bitmask, .default_value = 0x1F, .nvalues = 5 }, { { 0x10, "Modified (M)" }, { 0x08, "Owner (O)" }, { 0x04, "Exclusive (E)" }, { 0x02, "Shared (S)" }, { 0x01, "Invalid (I)" } } }; static const struct perfctr_event k7_official_events[] = { { 0x40, 0xF, NULL, "DATA_CACHE_ACCESSES", "Data cache accesses" }, { 0x41, 0xF, NULL, "DATA_CACHE_MISSES", "Data cache misses" }, { 0x42, 0xF, UM(k7_um_moesi), "DATA_CACHE_REFILLS_FROM_L2", "Data cache refills from L2" }, { 0x43, 0xF, UM(k7_um_moesi), "DATA_CACHE_REFILLS_FROM_SYSTEM", "Data cache refills from system" }, { 0x44, 0xF, UM(k7_um_moesi), "DATA_CACHE_WRITEBACKS", "Data cache writebacks" }, { 0x45, 0xF, NULL, "L1_DTLB_MISSES_AND_L2_DTLB_HITS", "L1 DTLB misses and L2 DTLB hits" }, { 0x46, 0xF, NULL, "L1_AND_L2_DTLB_MISSES", "L1 and L2 DTLB misses" }, { 0x47, 0xF, NULL, "MISALIGNED_DATA_REFERENCES", "Misaligned data references" }, { 0x80, 0xF, NULL, "INSTRUCTION_CACHE_FETCHES", "Instruction cache fetches" }, { 0x81, 0xF, NULL, "INSTRUCTION_CACHE_MISSES", "Instruction cache misses" }, { 0x84, 0xF, NULL, "L1_ITLB_MISSES_AND_L2_ITLB_HITS", /* XXX: was L1_ITLB_MISSES */ "L1 ITLB misses (and L2 ITLB hits)" }, { 0x85, 0xF, NULL, "L1_AND_L2_ITLB_MISSES", /* XXX: was L2_ITLB_MISSES */ "(L1 and) L2 ITLB misses" }, { 0xC0, 0xF, NULL, "RETIRED_INSTRUCTIONS", "Retired instructions (includes exceptions, interrupts, resyncs)" }, { 0xC1, 0xF, NULL, "RETIRED_OPS", "Retired Ops" }, { 0xC2, 0xF, NULL, "RETIRED_BRANCHES", "Retired branches (conditional, unconditional, exceptions, interrupts)" }, { 0xC3, 0xF, NULL, "RETIRED_BRANCHES_MISPREDICTED", "Retired branches mispredicted" }, { 0xC4, 0xF, NULL, "RETIRED_TAKEN_BRANCHES", "Retired taken branches" }, { 0xC5, 0xF, NULL, "RETIRED_TAKEN_BRANCHES_MISPREDICTED", "Retired taken branches mispredicted" }, { 0xC6, 0xF, NULL, "RETIRED_FAR_CONTROL_TRANSFERS", "Retired far control transfers" }, { 0xC7, 0xF, NULL, "RETIRED_RESYNC_BRANCHES", "Retired resync branches (only non-control transfer branches counted)" }, { 0xCD, 0xF, NULL, "INTERRUPTS_MASKED_CYCLES", "Interrupts masked cycles (IF=0)" }, { 0xCE, 0xF, NULL, "INTERRUPTS_MASKED_WHILE_PENDING_CYCLES", "Interrupts masked while pending cycles (INTR while IF=0)" }, { 0xCF, 0xF, NULL, "NUMBER_OF_TAKEN_HARDWARE_INTERRUPTS", "Number of taken hardware interrupts" }, }; static const struct perfctr_event_set k7_official_event_set = { .cpu_type = PERFCTR_X86_AMD_K7, .event_prefix = "K7_", .include = NULL, .nevents = ARRAY_SIZE(k7_official_events), .events = k7_official_events, }; /* also in K8 */ static const struct perfctr_unit_mask_7 k7_um_seg_reg = { { .type = perfctr_um_type_bitmask, .default_value = 0x3F, .nvalues = 7 }, { { 0x40, "HS" }, /* what's this? */ { 0x20, "GS" }, { 0x10, "FS" }, { 0x08, "DS" }, { 0x04, "SS" }, { 0x02, "CS" }, { 0x01, "ES" } } }; /* not in K8 */ static const struct perfctr_unit_mask_5 k7_um_system_request = { { .type = perfctr_um_type_bitmask, .default_value = 0x73, .nvalues = 5 }, { { 0x40, "WB" }, { 0x20, "WP" }, { 0x10, "WT" }, { 0x02, "WC" }, { 0x01, "UC" } } }; /* not in K8 */ static const struct perfctr_unit_mask_3 k7_um_snoop_hits = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x04, "L2 (L2 hit and no DC hit)" }, { 0x02, "Data cache" }, { 0x01, "Instruction cache" } } }; /* not in K8 */ static const struct perfctr_unit_mask_2 k7_um_ecc = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x02, "L2 single bit error" }, { 0x01, "System single bit error" } } }; /* not in K8 */ static const struct perfctr_unit_mask_4 k7_um_invalidates = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x08, "I invalidates D" }, { 0x04, "I invalidates I" }, { 0x02, "D invalidates D" }, { 0x01, "D invalidates I" } } }; /* not in K8 */ static const struct perfctr_unit_mask_8 k7_um_L2_requests = { { .type = perfctr_um_type_bitmask, .default_value = 0xFF, .nvalues = 8 }, { { 0x80, "Data block write from the L2 (TBL RMW)" }, { 0x40, "Data block write from the DC" }, { 0x20, "Data block write from the system" }, { 0x10, "Data block read data store" }, { 0x08, "Data block read data load" }, { 0x04, "Data block read instruction" }, { 0x02, "Tag write" }, { 0x01, "Tag read" } } }; static const struct perfctr_event k7_unofficial_events[] = { { 0x20, 0xF, UM(k7_um_seg_reg), "SEGMENT_REGISTER_LOADS", /* also in K8 */ "Segment register loads" }, { 0x21, 0xF, NULL, "STORES_TO_ACTIVE_INSTRUCTION_STREAM", /* also in K8 as SELF_MODIFY_RESYNC */ "Stores to active instruction stream" }, { 0x64, 0xF, NULL, "DRAM_SYSTEM_REQUESTS", /* not in K8 */ "DRAM system requests" }, { 0x65, 0xF, UM(k7_um_system_request), "SYSTEM_REQUESTS_WITH_THE_SELECTED_TYPE", /* not in K8 */ "System requests with the selected type" }, { 0x73, 0xF, UM(k7_um_snoop_hits), "SNOOP_HITS", /* not in K8 */ "Snoop hits" }, { 0x74, 0xF, UM(k7_um_ecc), "SINGLE_BIT_ECC_ERRORS_DETECTED_CORRECTED", /* not in K8 */ /* XXX: was SINGLE_BIT_ECC_ERRORS_DETECTED_OR_CORRECTED */ "Single-bit ECC errors detected/corrected" }, { 0x75, 0xF, UM(k7_um_invalidates), "INTERNAL_CACHE_LINE_INVALIDATES", /* not in K8 */ "Internal cache line invalidates" }, { 0x76, 0xF, NULL, "CYCLES_PROCESSOR_IS_RUNNING", /* also in K8 */ "Cycles processor is running (not in HLT or STPCLK)" }, { 0x79, 0xF, UM(k7_um_L2_requests), "L2_REQUESTS", /* not in K8 */ "L2 requests" }, { 0x7A, 0xF, NULL, "CYCLES_THAT_AT_LEAST_ONE_FILL_REQUEST_WAITED_TO_USE_THE_L2", /* not in K8 */ "Cycles that at least one fill request waited to use the L2" }, { 0x82, 0xF, NULL, "INSTRUCTION_CACHE_REFILLS_FROM_L2", /* also in K8 */ "Instruction cache refills from L2" }, { 0x83, 0xF, NULL, "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", /* also in K8 */ "Instruction cache refills from system" }, { 0x86, 0xF, NULL, "SNOOP_RESYNCS", /* also in K8 */ "Snoop resyncs" }, { 0x87, 0xF, NULL, "INSTRUCTION_FETCH_STALL_CYCLES", /* also in K8 */ "Instruction fetch stall cycles" }, { 0x88, 0xF, NULL, "RETURN_STACK_HITS", /* also in K8 */ "Instruction cache hits" }, { 0x89, 0xF, NULL, "RETURN_STACK_OVERFLOW", /* also in K8 */ "Return stack overflow" }, { 0xC8, 0xF, NULL, "RETIRED_NEAR_RETURNS", /* also in K8 */ "Retired near returns" }, { 0xC9, 0xF, NULL, "RETIRED_NEAR_RETURNS_MISPREDICTED", /* also in K8 */ "Retired near returns mispredicted" }, { 0xCA, 0xF, NULL, "RETIRED_INDIRECT_BRANCHES_WITH_TARGET_MISPREDICTED", /* also in K8 */ "Retired indirect branches with target mispredicted" }, { 0xD0, 0xF, NULL, "INSTRUCTION_DECODER_EMPTY", /* also in K8 */ "Instruction decoder empty" }, { 0xD1, 0xF, NULL, "DISPATCH_STALLS", /* also in K8 */ "Dispatch stalls (event masks D2h through DAh below combined)" }, { 0xD2, 0xF, NULL, "BRANCH_ABORT_TO_RETIRE", /* also in K8 */ /* XXX: was BRANCH_ABORTS_TO_RETIRE */ "Branch abort to retire" }, { 0xD3, 0xF, NULL, "SERIALIZE", /* also in K8 */ "Serialize" }, { 0xD4, 0xF, NULL, "SEGMENT_LOAD_STALL", /* also in K8 */ "Segment load stall" }, { 0xD5, 0xF, NULL, "ICU_FULL", /* also in K8 */ "ICU full" }, { 0xD6, 0xF, NULL, "RESERVATION_STATIONS_FULL", /* also in K8 */ "Reservation stations full" }, { 0xD7, 0xF, NULL, "FPU_FULL", /* also in K8 */ "FPU full" }, { 0xD8, 0xF, NULL, "LS_FULL", /* also in K8 */ "LS full" }, { 0xD9, 0xF, NULL, "ALL_QUIET_STALL", /* also in K8 */ "All quiet stall" }, { 0xDA, 0xF, NULL, "FAR_TRANSFER_OR_RESYNC_BRANCH_PENDING", /* also in K8 */ "Fall transfer or resync branch pending" }, { 0xDC, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR0", /* also in K8 */ "Breakpoint matches for DR0" }, { 0xDD, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR1", /* also in K8 */ "Breakpoint matches for DR1" }, { 0xDE, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR2", /* also in K8 */ "Breakpoint matches for DR2" }, { 0xDF, 0xF, NULL, "BREAKPOINT_MATCHES_FOR_DR3", /* also in K8 */ "Breakpoint matches for DR3" }, }; const struct perfctr_event_set perfctr_k7_event_set = { .cpu_type = PERFCTR_X86_AMD_K7, .event_prefix = "K7_", .include = &k7_official_event_set, .nevents = ARRAY_SIZE(k7_unofficial_events), .events = k7_unofficial_events, }; /* * AMD K8 events. * * Some events are described as being "Revision B and later", but * AMD does not document how to distinguish Revision B processors * from earlier ones. */ static const struct perfctr_unit_mask_6 k8_um_fpu_ops = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x3F, .nvalues = 6 }, { { 0x01, "Add pipe ops excluding junk ops" }, { 0x02, "Multiply pipe ops excluding junk ops" }, { 0x04, "Store pipe ops excluding junk ops" }, { 0x08, "Add pipe junk ops" }, { 0x10, "Multiply pipe junk ops" }, { 0x20, "Store pipe junk ops" } } }; static const struct perfctr_unit_mask_2 k8_um_ecc = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Scrubber error" }, { 0x02, "Piggyback scrubber errors" } } }; static const struct perfctr_unit_mask_3 k8_um_prefetch = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "Load" }, { 0x02, "Store" }, { 0x04, "NTA" } } }; static const struct perfctr_unit_mask_5 k8_um_int_L2_req = { { .type = perfctr_um_type_bitmask, .default_value = 0x1F, .nvalues = 5 }, { { 0x01, "IC fill" }, { 0x02, "DC fill" }, { 0x04, "TLB reload" }, { 0x08, "Tag snoop request" }, { 0x10, "Cancelled request" } } }; static const struct perfctr_unit_mask_3 k8_um_fill_req = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "IC fill" }, { 0x02, "DC fill" }, { 0x04, "TLB reload" } } }; static const struct perfctr_unit_mask_2 k8_um_fill_L2 = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Dirty L2 victim" }, { 0x02, "Victim from L2" } } }; static const struct perfctr_unit_mask_4 k8_um_fpu_instr = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "x87 instructions" }, { 0x02, "Combined MMX & 3DNow! instructions" }, { 0x04, "Combined packed SSE and SSE2 instructions" }, { 0x08, "Combined scalar SSE and SSE2 instructions" } } }; static const struct perfctr_unit_mask_3 k8_um_fpu_fastpath = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "With low op in position 0" }, { 0x02, "With low op in position 1" }, { 0x04, "With low op in position 2" } } }; static const struct perfctr_unit_mask_4 k8_um_fpu_exceptions = { /* Revision B and later */ { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "x87 reclass microfaults" }, { 0x02, "SSE retype microfaults" }, { 0x04, "SSE reclass microfaults" }, { 0x08, "SSE and x87 microtraps" } } }; static const struct perfctr_unit_mask_3 k8_um_page_access = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "Page hit" }, { 0x02, "Page miss" }, { 0x04, "Page conflict" } } }; static const struct perfctr_unit_mask_3 k8_um_turnaround = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "DIMM turnaround" }, { 0x02, "Read to write turnaround" }, { 0x04, "Write to read turnaround" } } }; static const struct perfctr_unit_mask_4 k8_um_saturation = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Memory controller high priority bypass" }, { 0x02, "Memory controller low priority bypass" }, { 0x04, "DRAM controller interface bypass" }, { 0x08, "DRAM controller queue bypass" } } }; static const struct perfctr_unit_mask_7 k8_um_sized_commands = { { .type = perfctr_um_type_bitmask, .default_value = 0x7F, .nvalues = 7 }, { { 0x01, "NonPostWrSzByte" }, { 0x02, "NonPostWrSzDword" }, { 0x04, "PostWrSzByte" }, { 0x08, "PostWrSzDword" }, { 0x10, "RdSzByte" }, { 0x20, "RdSzDword" }, { 0x40, "RdModWr" } } }; static const struct perfctr_unit_mask_4 k8_um_probe = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Probe miss" }, { 0x02, "Probe hit" }, { 0x04, "Probe hit dirty without memory cancel" }, { 0x08, "Probe hit dirty with memory cancel" } } }; static const struct perfctr_unit_mask_4 k8_um_ht = { { .type = perfctr_um_type_bitmask, .default_value = 0x0F, .nvalues = 4 }, { { 0x01, "Command sent" }, { 0x02, "Data sent" }, { 0x04, "Buffer release sent" }, { 0x08, "Nop sent" } } }; static const struct perfctr_event k8_common_events[] = { { 0x00, 0xF, UM(k8_um_fpu_ops), "DISPATCHED_FPU_OPS", /* Revision B and later */ "Dispatched FPU ops" }, { 0x01, 0xF, NULL, "NO_FPU_OPS", /* Revision B and later */ "Cycles with no FPU ops retired" }, { 0x02, 0xF, NULL, "FAST_FPU_OPS", /* Revision B and later */ "Dispatched FPU ops that use the fast flag interface" }, { 0x20, 0xF, UM(k7_um_seg_reg), "SEG_REG_LOAD", "Segment register load" }, { 0x21, 0xF, NULL, "SELF_MODIFY_RESYNC", "Microarchitectural resync caused by self modifying code" }, { 0x22, 0xF, NULL, "LS_RESYNC_BY_SNOOP", /* similar to 0x86, but LS unit instead of IC unit */ "Microarchitectural resync caused by snoop" }, { 0x23, 0xF, NULL, "LS_BUFFER_FULL", "LS Buffer 2 Full" }, /* 0x24: changed in Revision C */ { 0x25, 0xF, NULL, "OP_LATE_CANCEL", "Microarchitectural late cancel of an operation" }, { 0x26, 0xF, NULL, "CFLUSH_RETIRED", "Retired CFLUSH instructions" }, { 0x27, 0xF, NULL, "CPUID_RETIRED", "Retired CPUID instructions" }, /* 0x40-0x47: from K7 official event set */ { 0x48, 0xF, NULL, "ACCESS_CANCEL_LATE", "Microarchitectural late cancel of an access" }, { 0x49, 0xF, NULL, "ACCESS_CANCEL_EARLY", "Microarchitectural early cancel of an access" }, { 0x4A, 0xF, UM(k8_um_ecc), "ECC_BIT_ERR", "One bit ECC error recorded found by scrubber" }, { 0x4B, 0xF, UM(k8_um_prefetch), "DISPATCHED_PRE_INSTRS", "Dispatched prefetch instructions" }, /* 0x4C: added in Revision C */ { 0x76, 0xF, NULL, "CPU_CLK_UNHALTED", /* XXX: was CYCLES_PROCESSOR_IS_RUNNING */ "Cycles processor is running (not in HLT or STPCLK)" }, { 0x7D, 0xF, UM(k8_um_int_L2_req), "BU_INT_L2_REQ", "Internal L2 request" }, { 0x7E, 0xF, UM(k8_um_fill_req), "BU_FILL_REQ", "Fill request that missed in L2" }, { 0x7F, 0xF, UM(k8_um_fill_L2), "BU_FILL_L2", "Fill into L2" }, /* 0x80-0x81: from K7 official event set */ { 0x82, 0xF, NULL, "IC_REFILL_FROM_L2", "Refill from L2" }, { 0x83, 0xF, NULL, "IC_REFILL_FROM_SYS", "Refill from system" }, /* 0x84-0x85: from K7 official event set */ { 0x86, 0xF, NULL, "IC_RESYNC_BY_SNOOP", /* similar to 0x22, but IC unit instead of LS unit */ "Microarchitectural resync caused by snoop" }, { 0x87, 0xF, NULL, "IC_FETCH_STALL", "Instruction fetch stall" }, { 0x88, 0xF, NULL, "IC_STACK_HIT", "Return stack hit" }, { 0x89, 0xF, NULL, "IC_STACK_OVERFLOW", "Return stack overflow" }, /* 0xC0-0xC7: from K7 official event set */ { 0xC8, 0xF, NULL, "RETIRED_NEAR_RETURNS", "Retired near returns" }, { 0xC9, 0xF, NULL, "RETIRED_RETURNS_MISPREDICT", "Retired near returns mispredicted" }, { 0xCA, 0xF, NULL, "RETIRED_BRANCH_MISCOMPARE", "Retired taken branches mispredicted due to address miscompare" }, { 0xCB, 0xF, UM(k8_um_fpu_instr), "RETIRED_FPU_INSTRS", /* Revision B and later */ "Retired FPU instructions" }, { 0xCC, 0xF, UM(k8_um_fpu_fastpath), "RETIRED_FASTPATH_INSTRS", /* Revision B and later */ "Retired fastpath double op instructions" }, /* 0xCD-0xCF: from K7 official event set */ { 0xD0, 0xF, NULL, "DECODER_EMPTY", "Nothing to dispatch (decoder empty)" }, { 0xD1, 0xF, NULL, "DISPATCH_STALLS", "Dispatch stalls (events 0xD2-0xDA combined)" }, { 0xD2, 0xF, NULL, "DISPATCH_STALL_FROM_BRANCH_ABORT", "Dispatch stall from branch abort to retire" }, { 0xD3, 0xF, NULL, "DISPATCH_STALL_SERIALIZATION", "Dispatch stall for serialization" }, { 0xD4, 0xF, NULL, "DISPATCH_STALL_SEG_LOAD", "Dispatch stall for segment load" }, { 0xD5, 0xF, NULL, "DISPATCH_STALL_REORDER_BUFFER", "Dispatch stall when reorder buffer is full" }, { 0xD6, 0xF, NULL, "DISPATCH_STALL_RESERVE_STATIONS", "Dispatch stall when reservation stations are full" }, { 0xD7, 0xF, NULL, "DISPATCH_STALL_FPU", "Dispatch stall when FPU is full" }, { 0xD8, 0xF, NULL, "DISPATCH_STALL_LS", "Dispatch stall when LS is full" }, { 0xD9, 0xF, NULL, "DISPATCH_STALL_QUIET_WAIT", "Dispatch stall when waiting for all to be quiet" }, { 0xDA, 0xF, NULL, "DISPATCH_STALL_PENDING", "Dispatch stall when far control transfer or resync branch is pending" }, { 0xDB, 0xF, UM(k8_um_fpu_exceptions), "FPU_EXCEPTIONS", /* Revision B and later */ "FPU exceptions" }, { 0xDC, 0xF, NULL, "DR0_BREAKPOINTS", "Number of breakpoints for DR0" }, { 0xDD, 0xF, NULL, "DR1_BREAKPOINTS", "Number of breakpoints for DR1" }, { 0xDE, 0xF, NULL, "DR2_BREAKPOINTS", "Number of breakpoints for DR2" }, { 0xDF, 0xF, NULL, "DR3_BREAKPOINTS", "Number of breakpoints for DR3" }, { 0xE0, 0xF, UM(k8_um_page_access), "MEM_PAGE_ACCESS", "Memory controller page access" }, { 0xE1, 0xF, NULL, "MEM_PAGE_TBL_OVERFLOW", "Memory controller page table overflow" }, { 0xE2, 0xF, NULL, "DRAM_SLOTS_MISSED", "Memory controller DRAM command slots missed (in MemClks)" }, { 0xE3, 0xF, UM(k8_um_turnaround), "MEM_TURNAROUND", "Memory controller turnaround" }, { 0xE4, 0xF, UM(k8_um_saturation), "MEM_BYPASS_SAT", "Memory controller bypass counter saturation" }, { 0xEB, 0xF, UM(k8_um_sized_commands), "SIZED_COMMANDS", "Sized commands" }, { 0xEC, 0xF, UM(k8_um_probe), "PROBE_RESULT", "Probe result" }, { 0xF6, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS0_WIDTH", "Hypertransport (tm) bus 0 bandwidth" }, { 0xF7, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS1_WIDTH", "Hypertransport (tm) bus 1 bandwidth" }, { 0xF8, 0xF, UM(k8_um_ht), "HYPERTRANSPORT_BUS2_WIDTH", "Hypertransport (tm) bus 2 bandwidth" }, }; static const struct perfctr_event_set k8_common_event_set = { .cpu_type = PERFCTR_X86_AMD_K8, .event_prefix = "K8_", .include = &k7_official_event_set, .nevents = ARRAY_SIZE(k8_common_events), .events = k8_common_events, }; static const struct perfctr_event k8_events[] = { { 0x24, 0xF, NULL, "LOCKED_OP", /* unit mask changed in Rev. C */ "Locked operation" }, }; const struct perfctr_event_set perfctr_k8_event_set = { .cpu_type = PERFCTR_X86_AMD_K8, .event_prefix = "K8_", .include = &k8_common_event_set, .nevents = ARRAY_SIZE(k8_events), .events = k8_events, }; /* * K8 Revision C. Starts at CPUID 0xF58 for Opteron/Athlon64FX and * CPUID 0xF48 for Athlon64. (CPUID 0xF51 is Opteron Revision B3.) */ static const struct perfctr_unit_mask_3 k8c_um_locked_op = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 3 }, { { 0x01, "Number of lock instructions executed" }, { 0x02, "Number of cycles spent in the lock request/grant stage" }, { 0x04, "Number of cycles a lock takes to complete once it is " "non-speculative and is the oldest load/store operation " "(non-speculative cycles in Ls2 entry 0)" } } }; static const struct perfctr_unit_mask_2 k8c_um_lock_accesses = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "Number of dcache accesses by lock instructions" }, { 0x02, "Number of dcache misses by lock instructions" } } }; static const struct perfctr_event k8c_events[] = { { 0x24, 0xF, UM(k8c_um_locked_op), "LOCKED_OP", /* unit mask changed */ "Locked operation" }, { 0x4C, 0xF, UM(k8c_um_lock_accesses), "LOCK_ACCESSES", "DCACHE accesses by locks" }, }; const struct perfctr_event_set perfctr_k8c_event_set = { .cpu_type = PERFCTR_X86_AMD_K8C, .event_prefix = "K8C_", .include = &k8_common_event_set, .nevents = ARRAY_SIZE(k8c_events), .events = k8c_events, }; papi-5.6.0/src/components/stealtime/Rules.stealtime000664 001750 001750 00000000370 13216244360 024500 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/stealtime/linux-stealtime.c COMPOBJS += linux-stealtime.o linux-stealtime.o: components/stealtime/linux-stealtime.c $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/stealtime/linux-stealtime.c -o linux-stealtime.o papi-5.6.0/src/validation_tests/papi_l1_dca.c000664 001750 001750 00000007042 13216244370 023201 0ustar00jshenry1963jshenry1963000000 000000 /* This code attempts to test the L1 Data Cache Accesses */ /* performance counter PAPI_L1_DCA */ /* by Vince Weaver, */ /* Note on AMD fam15h we get 3x expected on writes? */ #include #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #include "display_error.h" #define NUM_RUNS 100 #define ARRAYSIZE 65536 static double array[ARRAYSIZE]; int main(int argc, char **argv) { int i; int quiet; int eventset=PAPI_NULL; int errors=0; int retval; int num_runs=NUM_RUNS; long long high,low,average,expected=ARRAYSIZE; long long count,total; double aSumm = 0.0; double error; quiet=tests_quiet(argc,argv); if (!quiet) { printf("Testing the PAPI_L1_DCA event\n"); } /* Init the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { test_fail(__FILE__,__LINE__,"PAPI_library_init",retval); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_L1_DCA"); if (retval!=PAPI_OK) { test_skip( __FILE__, __LINE__, "adding PAPI_L1_DCA", retval ); } /*******************************************************************/ /* Test if the C compiler uses a sane number of data cache acceess */ /* This tests writes to memory. */ /*******************************************************************/ if (!quiet) { printf("Write Test: Initializing an array of %d doubles:\n", ARRAYSIZE); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); errors++; } if (!quiet) printf("\n"); /*******************************************************************/ /* Test if the C compiler uses a sane number of data cache acceess */ /* This tests writes to memory. */ /*******************************************************************/ if (!quiet) { printf("Read Test: Summing an array of %d doubles:\n", ARRAYSIZE); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); errors++; } if (!quiet) { printf("\n"); } if (errors) { test_fail( __FILE__, __LINE__, "Error too high", 1 ); } test_pass(__FILE__); return 0; } papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-850000664 001750 001750 00000001265 13216244367 023143 0ustar00jshenry1963jshenry1963000000 000000 [data from an 850 Mhz Athlon] PERFCTR INIT: vendor 2, family 6, model 4, stepping 2, clock 850063 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 15.2 cycles (1064 total) PERFCTR INIT: rdpmc cost is 13.3 cycles (947 total) PERFCTR INIT: rdmsr (counter) cost is 51.5 cycles (3391 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.6 cycles (3458 total) PERFCTR INIT: wrmsr (counter) cost is 79.7 cycles (5191 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.8 cycles (14928 total) PERFCTR INIT: read cr4 cost is 7.0 cycles (540 total) PERFCTR INIT: write cr4 cost is 62.9 cycles (4121 total) perfctr: driver 2.3.4, cpu type AMD K7 at 850063 kHz papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_compat.h000664 001750 001750 00000012745 13216244362 025467 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This header file contains obsolete user-level perfmon interface * definitions for the Itanium Processor Family architecture. * * please use replacements as indicated below whenever possible. */ #ifndef _PERFMON_COMPAT_H_ #define _PERFMON_COMPAT_H_ #ifndef __ia64__ #error "you should not include this file on non Itanium platforms" #endif /* * old perfmon2 interface for backward compatibility. * Do not use in portable applications. */ extern int perfmonctl(int fd, int cmd, void *arg, int narg); typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ /* * obsolete perfmon comamnds supported on all CPU models */ #define PFM_WRITE_PMCS 0x01 #define PFM_WRITE_PMDS 0x02 #define PFM_READ_PMDS 0x03 #define PFM_STOP 0x04 #define PFM_START 0x05 #define PFM_ENABLE 0x06 /* obsolete */ #define PFM_DISABLE 0x07 /* obsolete */ #define PFM_CREATE_CONTEXT 0x08 #define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ #define PFM_RESTART 0x0a #define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ #define PFM_GET_FEATURES 0x0c /* obsolete: use /proc/sys/kernel/perfmon */ #define PFM_DEBUG 0x0d /* obsolete: use /proc/sys/kernel/perfmon/debug */ #define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ #define PFM_GET_PMC_RESET_VAL 0x0f /* obsolete: use /proc/perfmon_mappings */ #define PFM_LOAD_CONTEXT 0x10 #define PFM_UNLOAD_CONTEXT 0x11 /* * PMU model specific commands (may not be supported on all PMU models) */ #define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263] */ #define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271] */ /* * argument to PFM_CREATE_CONTEXT */ typedef struct { pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ unsigned long ctx_flags; /* noblock/block */ unsigned int ctx_reserved1; /* for future use */ int ctx_fd; /* return arg: unique identification for context */ void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ unsigned long ctx_reserved3[11];/* for future use */ } pfarg_context_t; /* * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS */ typedef struct { unsigned int reg_num; /* which register */ unsigned short reg_set; /* event set for this register */ unsigned short reg_reserved1; /* for future use */ unsigned long reg_value; /* initial pmc/pmd value */ unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ unsigned long reg_long_reset; /* reset after buffer overflow notification */ unsigned long reg_short_reset; /* reset after counter overflow */ unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ unsigned long reg_random_seed; /* seed value when randomization is used */ unsigned long reg_random_mask; /* bitmask used to limit random value */ unsigned long reg_last_reset_val;/* return: PMD last reset value */ unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ unsigned long reg_ovfl_switch_cnt; /* how many overflow before switch for next set */ unsigned long reg_reserved2[2]; /* for future use */ } pfarg_reg_t; /* * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS */ typedef struct { unsigned int dbreg_num; /* which debug register */ unsigned short dbreg_set; /* event set for this register */ unsigned short dbreg_reserved1; /* for future use */ unsigned long dbreg_value; /* value for debug register */ unsigned long dbreg_flags; /* return: dbreg error */ unsigned long dbreg_reserved2[1]; /* for future use */ } pfarg_dbreg_t; /* * argument to PFM_GET_FEATURES */ typedef struct { unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ unsigned int ft_reserved; /* reserved for future use */ unsigned long reserved[4]; /* for future use */ } pfarg_features_t; typedef struct { int msg_type; /* generic message header */ int msg_ctx_fd; /* generic message header */ unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ unsigned short msg_active_set; /* active set at the time of overflow */ unsigned short msg_reserved1; /* for future use */ unsigned int msg_reserved2; /* for future use */ unsigned long msg_tstamp; /* for perf tuning/debug */ } pfm_ovfl_msg_t; typedef struct { int msg_type; /* generic message header */ int msg_ctx_fd; /* generic message header */ unsigned long msg_tstamp; /* for perf tuning */ } pfm_end_msg_t; typedef struct { int msg_type; /* type of the message */ int msg_ctx_fd; /* unique identifier for the context */ unsigned long msg_tstamp; /* for perf tuning */ } pfm_gen_msg_t; typedef union { int type; pfm_ovfl_msg_t pfm_ovfl_msg; pfm_end_msg_t pfm_end_msg; pfm_gen_msg_t pfm_gen_msg; } pfm_msg_t; /* * PMD/PMC return flags in case of error (ignored on input) * * Those flags are used on output and must be checked in case EINVAL is returned * by a command accepting a vector of values and each has a flag field, such as * pfarg_pmc_t or pfarg_pmd_t. */ #define PFM_REG_RETFL_NOTAVAIL (1<<31) /* set if register is implemented but not available */ #define PFM_REG_RETFL_EINVAL (1<<30) /* set if register entry is invalid */ #define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) #define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) #endif /* _PERFMON_COMPAT_H_ */ papi-5.6.0/src/ctests/flops.c000664 001750 001750 00000005352 13216244360 020122 0ustar00jshenry1963jshenry1963000000 000000 /* * A simple example for the use of PAPI, the number of flops you should * get is about INDEX^3 on machines that consider add and multiply one flop * such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL * -Kevin London */ #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #include "display_error.h" int main( int argc, char **argv ) { float real_time, proc_time, mflops; long long flpins; int retval; int fip = 0; int quiet=0; long long expected; double double_result,error; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Initialize PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Try to use one of the FP events */ if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { fip = 1; } else if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { fip = 2; } else { if ( !quiet ) printf( "PAPI_FP_INS and PAPI_FP_OPS are not defined for this platform.\n" ); test_skip(__FILE__,__LINE__,"No FP events available",1); } /* Shutdown? */ /* I guess because it would interfere with the high-level interface? */ PAPI_shutdown( ); /* Initialize the Matrix arrays */ expected=flops_float_init_matrix(); /* Setup PAPI library and begin collecting data from the counters */ if ( fip == 1 ) { retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); } } else { retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); } } /* Matrix-Matrix multiply */ double_result=flops_float_matrix_matrix_multiply(); /* Collect the data into the variables passed in */ if ( fip == 1 ) { retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); } } else { retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); } } if (!quiet) printf("result=%lf\n",double_result); if ( !quiet ) { printf( "Real_time: %f Proc_time: %f MFLOPS: %f\n", real_time, proc_time, mflops ); if ( fip == 1 ) { printf( "Total flpins: "); } else { printf( "Total flpops: "); } printf( "%lld\n\n", flpins ); } error=display_error(flpins,flpins,flpins,expected,quiet); if ((error > 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Validation failed", 1 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_intel_bdx_unc_qpi.c000664 001750 001750 00000006233 13216244365 024620 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_bdx_qpi.c : Intel BroadwellX QPI uncore PMU * * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_bdx_unc_qpi_events.h" static void display_qpi(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC_QPI=0x%"PRIx64" event=0x%x sel_ext=%d umask=0x%x en=%d " "inv=%d edge=%d thres=%d] %s\n", reg->val, reg->qpi.unc_event, reg->qpi.unc_event_ext, reg->qpi.unc_umask, reg->qpi.unc_en, reg->qpi.unc_inv, reg->qpi.unc_edge, reg->qpi.unc_thres, pe[e->event].name); } #define DEFINE_QPI_BOX(n) \ pfmlib_pmu_t intel_bdx_unc_qpi##n##_support = {\ .desc = "Intel BroadwellX QPI"#n" uncore",\ .name = "bdx_unc_qpi"#n,\ .perf_name = "uncore_qpi_"#n,\ .pmu = PFM_PMU_INTEL_BDX_UNC_QPI##n,\ .pme_count = LIBPFM_ARRAY_SIZE(intel_bdx_unc_q_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 4,\ .num_fixed_cntrs = 0,\ .max_encoding = 3,\ .pe = intel_bdx_unc_q_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK,\ .pmu_detect = pfm_intel_bdx_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ .display_reg = display_qpi,\ } DEFINE_QPI_BOX(0); DEFINE_QPI_BOX(1); DEFINE_QPI_BOX(2); papi-5.6.0/src/ctests/case2.c000664 001750 001750 00000004306 13216244360 017772 0ustar00jshenry1963jshenry1963000000 000000 /* From Dave McNamara at PSRV. Thanks! */ /* If an event is countable but you've exhausted the counter resources and you try to add an event, it seems subsequent PAPI_start and/or PAPI_stop will causes a Seg. Violation. I got around this by calling PAPI to get the # of countable events, then making sure that I didn't try to add more than these number of events. I still have a problem if someone adds Level 2 cache misses and then adds FLOPS 'cause I didn't count FLOPS as actually requiring 2 counters. */ #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { double c, a = 0.999, b = 1.001; int n = 1000; int EventSet = PAPI_NULL; int retval; int j = 0, i; long long g1[3]; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ if ( ( retval = PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); if ( PAPI_query_event( PAPI_BR_CN ) == PAPI_OK ) j++; if ( j == 1 && ( retval = PAPI_add_event( EventSet, PAPI_BR_CN ) ) != PAPI_OK ) { if ( retval != PAPI_ECNFLCT ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } i = j; if ( PAPI_query_event( PAPI_TOT_CYC ) == PAPI_OK ) j++; if ( j == ( i + 1 ) && ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) { if ( retval != PAPI_ECNFLCT ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } i = j; if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) j++; if ( j == ( i + 1 ) && ( retval = PAPI_add_event( EventSet, PAPI_TOT_INS ) ) != PAPI_OK ) { if ( retval != PAPI_ECNFLCT ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } if ( j ) { if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); for ( i = 0; i < n; i++ ) { c = a * b; } if (!TESTS_QUIET) fprintf(stdout,"c=%lf\n",c); if ( ( retval = PAPI_stop( EventSet, g1 ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/events/sparc_ultra4plus_events.h000664 001750 001750 00000043640 13216244365 026010 0ustar00jshenry1963jshenry1963000000 000000 static const sparc_entry_t ultra4plus_pe[] = { /* These two must always be first. */ { .name = "Cycle_cnt", .desc = "Accumulated cycles", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x0, }, { .name = "Instr_cnt", .desc = "Number of instructions completed", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x1, }, /* PIC0 UltraSPARC-IV+ events */ { .name = "Dispatch0_IC_miss", .desc = "I-buffer is empty from I-Cache miss", .ctrl = PME_CTRL_S0, .code = 0x2, }, { .name = "IU_stat_jmp_correct_pred", .desc = "Retired non-annulled register indirect jumps predicted correctly", .ctrl = PME_CTRL_S0, .code = 0x3, }, { .name = "Dispatch0_2nd_br", .desc = "Stall cycles due to having two branch instructions line-up in one 4-instruction group causing the second branch in the group to be re-fetched, delaying it's entrance into the I-buffer", .ctrl = PME_CTRL_S0, .code = 0x4, }, { .name = "Rstall_storeQ", .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, { .name = "IU_stat_ret_correct_pred", .desc = "Retired non-annulled returns predicted correctly", .ctrl = PME_CTRL_S0, .code = 0x7, }, { .name = "IC_ref", .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, { .name = "DC_rd", .desc = "D-cache read references (including accesses that subsequently trap)", .ctrl = PME_CTRL_S0, .code = 0x9, }, { .name = "Rstall_FP_use", .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0xa, }, { .name = "SW_pf_instr", .desc = "Retired SW prefetch instructions", .ctrl = PME_CTRL_S0, .code = 0xb, }, { .name = "L2_ref", .desc = "L2-cache references", .ctrl = PME_CTRL_S0, .code = 0xc, }, { .name = "L2_write_hit_RTO", .desc = "L2-cache exclusive requests that hit L2-cache in S, O, or Os state and thus, do a read-to-own bus transaction", .ctrl = PME_CTRL_S0, .code = 0xd, }, { .name = "L2_snoop_inv_sh", .desc = "L2 cache lines that were written back to the L3 cache due to requests from both cores", .ctrl = PME_CTRL_S0, .code = 0xe, }, { .name = "L2_rd_miss", .desc = "L2-cache miss events (including atomics) from D-cache events", .ctrl = PME_CTRL_S0, .code = 0xf, }, { .name = "PC_rd", .desc = "P-cache cacheable loads", .ctrl = PME_CTRL_S0, .code = 0x10, }, { .name = "SI_snoop_sh", .desc = "Counts snoops from remote processor(s) including RTS, RTSR, RTO, RTOR, RS, RSR, RTSM, and WS", .ctrl = PME_CTRL_S0, .code = 0x11, }, { .name = "SI_ciq_flow_sh", .desc = "Counts system clock cycles when the flow control (PauseOut) signal is asserted", .ctrl = PME_CTRL_S0, .code = 0x12, }, { .name = "Re_DC_miss", .desc = "Stall due to loads that miss D-cache and get recirculated", .ctrl = PME_CTRL_S0, .code = 0x13, }, { .name = "SW_count_NOP0", .desc = "Retired, non-annulled special software NOP instructions (which is equivalent to 'sethi %hi(0xfc000), %g0' instruction)", .ctrl = PME_CTRL_S0, .code = 0x14, }, { .name = "IU_Stat_Br_miss_taken", .desc = "Retired branches that were predicted to be taken, but in fact were not taken", .ctrl = PME_CTRL_S0, .code = 0x15, }, { .name = "IU_Stat_Br_Count_taken", .desc = "Retired taken branches", .ctrl = PME_CTRL_S0, .code = 0x16, }, { .name = "HW_pf_exec", .desc = "Hardware prefetches enqueued in the prefetch queue", .ctrl = PME_CTRL_S0, .code = 0x17, }, { .name = "FA_pipe_completion", .desc = "Instructions that complete execution on the FPG ALU pipelines", .ctrl = PME_CTRL_S0, .code = 0x18, }, { .name = "SSM_L3_wb_remote", .desc = "L3 cache line victimizations from this core which generate R_WB transactions to non-LPA (remote physical address) regions", .ctrl = PME_CTRL_S0, .code = 0x19, }, { .name = "SSM_L3_miss_local", .desc = "L3 cache misses to LPA (local physical address) from this core which generate an RTS, RTO, or RS transaction", .ctrl = PME_CTRL_S0, .code = 0x1a, }, { .name = "SSM_L3_miss_mtag_remote", .desc = "L3 cache misses to LPA (local physical address) from this core which generate retry (R_*) transactions including R_RTS, R_RTO, and R_RS", .ctrl = PME_CTRL_S0, .code = 0x1b, }, { .name = "SW_pf_str_trapped", .desc = "Strong software prefetch instructions trapping due to TLB miss", .ctrl = PME_CTRL_S0, .code = 0x1c, }, { .name = "SW_pf_PC_installed", .desc = "Software prefetch instructions that installed lines in the P-cache", .ctrl = PME_CTRL_S0, .code = 0x1d, }, { .name = "IPB_to_IC_fill", .desc = "I-cache fills from the instruction prefetch buffer", .ctrl = PME_CTRL_S0, .code = 0x1e, }, { .name = "L2_write_miss", .desc = "L2-cache misses from this core by cacheable store requests", .ctrl = PME_CTRL_S0, .code = 0x1f, }, { .name = "MC_reads_0_sh", .desc = "Read requests completed to memory bank 0", .ctrl = PME_CTRL_S0, .code = 0x20, }, { .name = "MC_reads_1_sh", .desc = "Read requests completed to memory bank 1", .ctrl = PME_CTRL_S0, .code = 0x21, }, { .name = "MC_reads_2_sh", .desc = "Read requests completed to memory bank 2", .ctrl = PME_CTRL_S0, .code = 0x22, }, { .name = "MC_reads_3_sh", .desc = "Read requests completed to memory bank 3", .ctrl = PME_CTRL_S0, .code = 0x23, }, { .name = "MC_stalls_0_sh", .desc = "Clock cycles that requests were stalled in the MCU queues because bank 0 was busy with a previous request", .ctrl = PME_CTRL_S0, .code = 0x24, }, { .name = "MC_stalls_2_sh", .desc = "Clock cycles that requests were stalled in the MCU queues because bank 2 was busy with a previous request", .ctrl = PME_CTRL_S0, .code = 0x25, }, { .name = "L2_hit_other_half", .desc = "L2 cache hits from this core to the ways filled by the other core when the cache is in the pseudo-split mode", .ctrl = PME_CTRL_S0, .code = 0x26, }, { .name = "L3_rd_miss", .desc = "L3 cache misses sent out to SIU from this code by cacheable I-cache, D-cache, PO-cache, and W-cache (excluding block store) requests", .ctrl = PME_CTRL_S0, .code = 0x28, }, { .name = "Re_L2_miss", .desc = "Stall cycles due to recirculation of cacheable loads that miss both D-cache and L2 cache", .ctrl = PME_CTRL_S0, .code = 0x29, }, { .name = "IC_miss_cancelled", .desc = "I-cache miss requests cancelled due to new fetch stream", .ctrl = PME_CTRL_S0, .code = 0x2a, }, { .name = "DC_wr_miss", .desc = "D-cache store accesses that miss D-cache", .ctrl = PME_CTRL_S0, .code = 0x2b, }, { .name = "L3_hit_I_state_sh", .desc = "Tag hits in L3 cache when the line is in I state", .ctrl = PME_CTRL_S0, .code = 0x2c, }, { .name = "SI_RTS_src_data", .desc = "Local RTS transactions due to I-cache, D-cache, or P-cache requests from this core where data is from the cache of another processor on the system, not from memory", .ctrl = PME_CTRL_S0, .code = 0x2d, }, { .name = "L2_IC_miss", .desc = "L2 cache misses from this code by cacheable I-cache requests", .ctrl = PME_CTRL_S0, .code = 0x2e, }, { .name = "SSM_new_transaction_sh", .desc = "New SSM transactions (RTSU, RTOU, UGM) observed by this processor on the Fireplane Interconnect", .ctrl = PME_CTRL_S0, .code = 0x2f, }, { .name = "L2_SW_pf_miss", .desc = "L2 cache misses by software prefetch requests from this core", .ctrl = PME_CTRL_S0, .code = 0x30, }, { .name = "L2_wb", .desc = "L2 cache lines that were written back to the L3 cache because of requests from this core", .ctrl = PME_CTRL_S0, .code = 0x31, }, { .name = "L2_wb_sh", .desc = "L2 cache lines that were written back to the L3 cache because of requests from both cores", .ctrl = PME_CTRL_S0, .code = 0x32, }, { .name = "L2_snoop_cb_sh", .desc = "L2 cache lines that were copied back due to other processors", .ctrl = PME_CTRL_S0, .code = 0x33, }, /* PIC1 UltraSPARC-IV+ events */ { .name = "Dispatch0_other", .desc = "Stall cycles due to the event that no instructions are dispatched because the I-queue is empty due to various other events, including branch target address fetch and various events which cause an instruction to be refetched", .ctrl = PME_CTRL_S1, .code = 0x2, }, { .name = "DC_wr", .desc = "D-cache write references by cacheable stores (excluding block stores)", .ctrl = PME_CTRL_S1, .code = 0x3, }, { .name = "Re_DC_missovhd", .desc = "Stall cycles due to D-cache load miss", .ctrl = PME_CTRL_S1, .code = 0x4, }, { .name = "Re_FPU_bypass", .desc = "Stall due to recirculation when an FPU bypass condition that does not have a direct bypass path occurs", .ctrl = PME_CTRL_S1, .code = 0x5, }, { .name = "L3_write_hit_RTO", .desc = "L3 cache hits in O, Os, or S state by cacheable store requests from this core that do a read-to-own (RTO) bus transaction", .ctrl = PME_CTRL_S1, .code = 0x6, }, { .name = "L2L3_snoop_inv_sh", .desc = "L2 and L3 cache lines that were invalidated due to other processors doing RTO, RTOR, RTOU, or WS transactions", .ctrl = PME_CTRL_S1, .code = 0x7, }, { .name = "IC_L2_req", .desc = "I-cache requests sent to L2 cache", .ctrl = PME_CTRL_S1, .code = 0x8, }, { .name = "DC_rd_miss", .desc = "Cacheable loads (excluding atomics and block loads) that miss D-cache as well as P-cache (for FP loads)", .ctrl = PME_CTRL_S1, .code = 0x9, }, { .name = "L2_hit_I_state_sh", .desc = "Tag hits in L2 cache when the line is in I state", .ctrl = PME_CTRL_S1, .code = 0xa, }, { .name = "L3_write_miss_RTO", .desc = "L3 cache misses from this core by cacheable store requests that do a read-to-own (RTO) bus transaction. This count does not include RTO requests for prefetch (fcn=2,3/22,23) instructions", .ctrl = PME_CTRL_S1, .code = 0xb, }, { .name = "L2_miss", .desc = "L2 cache misses from this core by cacheable I-cache, D-cache, P-cache, and W-cache (excluding block stores) requests", .ctrl = PME_CTRL_S1, .code = 0xc, }, { .name = "SI_owned_sh", .desc = "Number of times owned_in is asserted on bus requests from the local processor", .ctrl = PME_CTRL_S1, .code = 0xd, }, { .name = "SI_RTO_src_data", .desc = "Number of local RTO transactions due to W-cache or P-cache requests from this core where data is from the cache of another processor on the system, not from memory", .ctrl = PME_CTRL_S1, .code = 0xe, }, { .name = "SW_pf_duplicate", .desc = "Number of software prefetch instructions that were dropped because the prefetch request matched an outstanding requests in the prefetch queue or the request hit the P-cache", .ctrl = PME_CTRL_S1, .code = 0xf, }, { .name = "IU_stat_jmp_mispred", .desc = "Number of retired non-annulled register indirect jumps mispredicted", .ctrl = PME_CTRL_S1, .code = 0x10, }, { .name = "ITLB_miss", .desc = "I-TLB misses", .ctrl = PME_CTRL_S1, .code = 0x11, }, { .name = "DTLB_miss", .desc = "D-TLB misses", .ctrl = PME_CTRL_S1, .code = 0x12, }, { .name = "WC_miss", .desc = "W-cache misses", .ctrl = PME_CTRL_S1, .code = 0x13, }, { .name = "IC_fill", .desc = "Number of I-cache fills excluding fills from the instruction prefetch buffer. This is the best approximation of the number of I-cache misses for instructions that were actually executed", .ctrl = PME_CTRL_S1, .code = 0x14, }, { .name = "IU_stat_ret_mispred", .desc = "Number of retired non-annulled returns mispredicted", .ctrl = PME_CTRL_S1, .code = 0x15, }, { .name = "Re_L3_miss", .desc = "Stall cycles due to recirculation of cacheable loads that miss D-cache, L2, and L3 cache", .ctrl = PME_CTRL_S1, .code = 0x16, }, { .name = "Re_PFQ_full", .desc = "Stall cycles due to recirculation of prefetch instructions because the prefetch queue (PFQ) was full", .ctrl = PME_CTRL_S1, .code = 0x17, }, { .name = "PC_soft_hit", .desc = "Number of cacheable FP loads that hit a P-cache line that was prefetched by a software prefetch instruction", .ctrl = PME_CTRL_S1, .code = 0x18, }, { .name = "PC_inv", .desc = "Number of P-cache lines that were invalidated due to external snoops, internal stores, and L2 evictions", .ctrl = PME_CTRL_S1, .code = 0x19, }, { .name = "PC_hard_hit", .desc = "Number of FP loads that hit a P-cache line that was fetched by a FP load or a hardware prefetch, irrespective of whether the loads hit or miss the D-cache", .ctrl = PME_CTRL_S1, .code = 0x1a, }, { .name = "IC_pf", .desc = "Number of I-cache prefetch requests sent to L2 cache", .ctrl = PME_CTRL_S1, .code = 0x1b, }, { .name = "SW_count_NOP1", .desc = "Retired, non-annulled special software NOP instructions (which is equivalent to 'sethi %hi(0xfc000), %g0' instruction)", .ctrl = PME_CTRL_S1, .code = 0x1c, }, { .name = "IU_stat_br_miss_untaken", .desc = "Number of retired non-annulled conditional branches that were predicted to be not taken, but in fact were taken", .ctrl = PME_CTRL_S1, .code = 0x1d, }, { .name = "IU_stat_br_count_taken", .desc = "Number of retired non-annulled conditional branches that were taken", .ctrl = PME_CTRL_S1, .code = 0x1e, }, { .name = "PC_miss", .desc = "Number of cacheable FP loads that miss P-cache, irrespective of whether the loads hit or miss the D-cache", .ctrl = PME_CTRL_S1, .code = 0x1f, }, { .name = "MC_writes_0_sh", .desc = "Number of write requests complete to memory bank 0", .ctrl = PME_CTRL_S1, .code = 0x20, }, { .name = "MC_writes_1_sh", .desc = "Number of write requests complete to memory bank 1", .ctrl = PME_CTRL_S1, .code = 0x21, }, { .name = "MC_writes_2_sh", .desc = "Number of write requests complete to memory bank 2", .ctrl = PME_CTRL_S1, .code = 0x22, }, { .name = "MC_writes_3_sh", .desc = "Number of write requests complete to memory bank 3", .ctrl = PME_CTRL_S1, .code = 0x23, }, { .name = "MC_stalls_1_sh", .desc = "Number of processor cycles that requests were stalled in the MCU queues because bank 0 was busy with a previous requests", .ctrl = PME_CTRL_S1, .code = 0x24, }, { .name = "MC_stalls_3_sh", .desc = "Number of processor cycles that requests were stalled in the MCU queues because bank 3 was busy with a previous requests", .ctrl = PME_CTRL_S1, .code = 0x25, }, { .name = "Re_RAW_miss", .desc = "Stall cycles due to recirculation when there is a load instruction in the E-stage of the pipeline which has a non-bypassable read-after-write (RAW) hazard with an earlier store instruction", .ctrl = PME_CTRL_S1, .code = 0x26, }, { .name = "FM_pipe_completion", .desc = "Number of retired instructions that complete execution on the FLoat-Point/Graphics Multiply pipeline", .ctrl = PME_CTRL_S1, .code = 0x27, }, { .name = "SSM_L3_miss_mtag_remote", .desc = "Number of L3 cache misses to LPA (local physical address) from this core which generate retry (R_*) transactions including R_RTS, R_RTO, and R_RS", .ctrl = PME_CTRL_S1, .code = 0x28, }, { .name = "SSM_L3_miss_remote", .desc = "Number of L3 cache misses from this core which generate retry (R_*) transactions to non-LPA (non-local physical address) address space, or R_WS transactions due to block store (BST) / block store commit (BSTC) to any address space (LPA or non-LPA), or R_RTO due to atomic request on Os state to LPA space.", .ctrl = PME_CTRL_S1, .code = 0x29, }, { .name = "SW_pf_exec", .desc = "Number of retired, non-trapping software prefetch instructions that completed, i.e. number of retired prefetch instructions that were not dropped due to the prefecth queue being full", .ctrl = PME_CTRL_S1, .code = 0x2a, }, { .name = "SW_pf_str_exec", .desc = "Number of retired, non-trapping strong prefetch instructions that completed", .ctrl = PME_CTRL_S1, .code = 0x2b, }, { .name = "SW_pf_dropped", .desc = "Number of software prefetch instructions dropped due to TLB miss or due to the prefetch queue being full", .ctrl = PME_CTRL_S1, .code = 0x2c, }, { .name = "SW_pf_L2_installed", .desc = "Number of software prefetch instructions that installed lines in the L2 cache", .ctrl = PME_CTRL_S1, .code = 0x2d, }, { .name = "L2_HW_pf_miss", .desc = "Number of L2 cache misses by hardware prefetch requests from this core", .ctrl = PME_CTRL_S1, .code = 0x2f, }, { .name = "L3_miss", .desc = "Number of L3 cache misses sent out to SIU from this core by cacheable I-cache, D-cache, P-cache, and W-cache (excluding block stores) requests", .ctrl = PME_CTRL_S1, .code = 0x31, }, { .name = "L3_IC_miss", .desc = "Number of L3 cache misses by cacheable I-cache requests from this core", .ctrl = PME_CTRL_S1, .code = 0x32, }, { .name = "L3_SW_pf_miss", .desc = "Number of L3 cache misses by software prefetch requests from this core", .ctrl = PME_CTRL_S1, .code = 0x33, }, { .name = "L3_hit_other_half", .desc = "Number of L3 cache hits from this core to the ways filled by the other core when the cache is in pseudo-split mode", .ctrl = PME_CTRL_S1, .code = 0x34, }, { .name = "L3_wb", .desc = "Number of L3 cache lines that were written back because of requests from this core", .ctrl = PME_CTRL_S1, .code = 0x35, }, { .name = "L3_wb_sh", .desc = "Number of L3 cache lines that were written back because of requests from both cores", .ctrl = PME_CTRL_S1, .code = 0x36, }, { .name = "L2L3_snoop_cb_sh", .desc = "Total number of L2 and L3 cache lines that were copied back due to other processors", .ctrl = PME_CTRL_S1, .code = 0x37, }, }; #define PME_SPARC_ULTRA4PLUS_EVENT_COUNT (sizeof(ultra4plus_pe)/sizeof(sparc_entry_t)) papi-5.6.0/src/libpfm4/include/perfmon/000775 001750 001750 00000000000 13216244364 021753 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/examples_v3.x/x86/Makefile000664 001750 001750 00000003531 13216244362 023766 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/../.. include $(TOPDIR)/config.mk include $(TOPDIR)/rules.mk LIBS += -lm ifeq ($(SYS),Linux) TARGETS=smpl_p4_pebs smpl_core_pebs smpl_amd64_ibs smpl_core_pebs_sys endif all: $(TARGETS) $(TARGETS): %:%.o $(PFMLIB) ../detect_pmcs.o $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LIBS) clean: $(RM) -f *.o $(TARGETS) *~ distclean: clean install_examples: $(TARGETS) install_examples: @echo installing: $(TARGETS) -mkdir -p $(DESTDIR)$(EXAMPLESDIR)/x86 $(INSTALL) -m 755 $(TARGETS) $(DESTDIR)$(EXAMPLESDIR)/x86 # # examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ # papi-5.6.0/src/components/stealtime/tests/Makefile000664 001750 001750 00000000537 13216244360 024304 0ustar00jshenry1963jshenry1963000000 000000 NAME=stealtime include ../../Makefile_comp_tests.target %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< TESTS = stealtime_basic stealtime_tests: $(TESTS) stealtime_basic: stealtime_basic.o $(UTILOBJS) $(PAPILIB) $(CC) $(INCLUDE) -o stealtime_basic stealtime_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/validation_tests/papi_br_prc.c000664 001750 001750 00000012641 13216244370 023326 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the predicted correctly branches */ /* performance event as counted by PAPI_BR_PRC */ /* Ideally this event should measure */ /* predicted correctly *conditional* branches */ /* If that's not available, then use total branches. */ /* by Vince Weaver, */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; int num_random_branches=500000; long long high=0,low=0,average=0,expected=1500000; long long expected_high,expected_low; long long count,total=0; int quiet=0,retval,ins_result; int total_eventset=PAPI_NULL,miss_eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_BR_PRC event.\n\n"); printf("This should measure predicted correctly conditional branches\n"); printf("If such a counter is not available, it may report predicted correctly\n"); printf("total branches instead.\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create total eventset */ retval=PAPI_create_eventset(&total_eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(total_eventset,"PAPI_BR_CN"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_BR_CN\n"); //test_skip( __FILE__, __LINE__, "adding PAPI_BR_CN", retval ); retval=PAPI_add_named_event(total_eventset,"PAPI_BR_INS"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_BR_INS\n"); test_skip( __FILE__, __LINE__, "adding PAPI_BR_INS", retval ); } } /* Create correct eventset */ retval=PAPI_create_eventset(&miss_eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(miss_eventset,"PAPI_BR_PRC"); if (retval!=PAPI_OK) { if (!quiet) printf("Could not add PAPI_BR_PRC\n"); test_skip( __FILE__, __LINE__, "adding PAPI_BR_PRC", retval ); } if (!quiet) { printf("\nPart 1: Testing that easy to predict loop has few misses\n"); printf("Testing a loop with %lld branches (%d times):\n", expected,num_runs); printf("\tOn a simple loop like this, " "hit rate should be very high.\n"); } for(i=0;ihigh) high=count; if ((low==0) || (counthigh) high=count; if ((low==0) || (counthigh) high=count; if ((low==0) || (count expected_high) { if (!quiet) printf("Branch hits too high\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/libpfm-3.y/libpfms/000775 001750 001750 00000000000 13216244363 020640 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/pfmlib_sparc_ultra4.c000664 001750 001750 00000004247 13216244365 023540 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_sparc_ultra4.c : SPARC Ultra 4+ * * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_sparc_priv.h" #include "events/sparc_ultra4plus_events.h" pfmlib_pmu_t sparc_ultra4plus_support={ .desc = "Ultra Sparc 4+", .name = "ultra4p", .pmu = PFM_PMU_SPARC_ULTRA4PLUS, .pme_count = LIBPFM_ARRAY_SIZE(ultra4plus_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = SPARC_PLM, .max_encoding = 2, .num_cntrs = 2, .pe = ultra4plus_pe, .atdesc = NULL, .flags = 0, .pmu_detect = pfm_sparc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_sparc_get_encoding, PFMLIB_ENCODE_PERF(pfm_sparc_get_perf_encoding), .get_event_first = pfm_sparc_get_event_first, .get_event_next = pfm_sparc_get_event_next, .event_is_valid = pfm_sparc_event_is_valid, .validate_table = pfm_sparc_validate_table, .get_event_info = pfm_sparc_get_event_info, .get_event_attr_info = pfm_sparc_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_sparc_perf_validate_pattrs), .get_event_nattrs = pfm_sparc_get_event_nattrs, }; papi-5.6.0/src/components/appio/tests/iozone/write_telemetry000664 001750 001750 00000001123 13216244356 026434 0ustar00jshenry1963jshenry1963000000 000000 # # # The format is: # # All fields are space delimited. # A # symbol in column 1 indicates a comment. # First field: Byte offset within the file. # Second field: Size in bytes of the I/O operation. # Third field: Number of milliseconds to delay before I/O operation. # # This is an example of sequential 64k writer with 2 milliseconds # before each write. # 0 65536 2 65536 65536 2 131072 65536 2 196608 65536 2 262144 65536 2 327680 65536 2 393216 65536 2 458752 65536 2 524288 65536 2 589824 65536 2 655360 65536 2 720896 65536 2 786432 65536 2 851968 65536 2 917504 65536 2 983040 65536 2 papi-5.6.0/man/man3/PAPI_set_cmp_domain.3000664 001750 001750 00000005044 13216244356 022031 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_set_cmp_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_set_cmp_domain \- .PP Set the default counting domain for new event sets bound to the specified component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par C Prototype: \#include @n int PAPI_set_cmp_domain( int domain, int cidx ); @param domain one of the following constants as defined in the papi.h header file @arg PAPI_DOM_USER User context counted @arg PAPI_DOM_KERNEL Kernel/OS context counted @arg PAPI_DOM_OTHER Exception/transient mode counted @arg PAPI_DOM_SUPERVISOR Supervisor/hypervisor context counted @arg PAPI_DOM_ALL All above contexts counted @arg PAPI_DOM_MIN The smallest available context @arg PAPI_DOM_MAX The largest available context @arg PAPI_DOM_HWSPEC Something other than CPU like stuff. Individual components can decode low order bits for more meaning @param cidx An integer identifier for a component. By convention, component 0 is always the cpu component. .fi .PP .PP \fBReturn values:\fP .RS 4 \fIPAPI_OK\fP .br \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOCMP\fP The argument cidx is not a valid component\&. .RE .PP \fBPAPI_set_cmp_domain\fP sets the default counting domain for all new event sets in all threads, and requires an explicit component argument\&. Event sets that are already in existence are not affected\&. To change the domain of an existing event set, please see \fBPAPI_set_opt\fP\&. The reader should note that the domain of an event set affects only the mode in which the counter continues to run\&. Counts are still aggregated for the current process, and not for any other processes in the system\&. Thus when requesting PAPI_DOM_KERNEL , the user is asking for events that occur on behalf of the process, inside the kernel\&. .PP \fBExample:\fP .RS 4 .PP .nf int ret; // Initialize the library ret = PAPI_library_init(PAPI_VER_CURRENT); if (ret > 0 && ret != PAPI_VER_CURRENT) { fprintf(stderr,"PAPI library version mismatch!\n"); exit(1); } if (ret < 0) handle_error(ret); // Set the default domain for the cpu component ret = PAPI_set_cmp_domain(PAPI_DOM_KERNEL,0); if (ret != PAPI_OK) handle_error(ret); ret = PAPI_create_eventset(&EventSet); if (ret != PAPI_OK) handle_error(ret); * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_domain\fP \fBPAPI_set_granularity\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/nvml/tests/000775 001750 001750 00000000000 13216244357 021632 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/ita_opcode.c000664 001750 001750 00000020325 13216244362 024665 0ustar00jshenry1963jshenry1963000000 000000 /* * ita_opcode.c - example of how to use the opcode matcher with the Itanium PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * we don't use static to make sure the compiler does not inline the function */ int do_test(unsigned long loop) { unsigned long sum = 0; while(loop--) sum += loop; return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(void) { int ret; int type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita_input_param_t ita_inp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_context_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with the %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita_inp,0, sizeof(ita_inp)); memset(&load_args,0, sizeof(load_args)); /* * We indicate that we are using the PMC8 opcode matcher. This is required * otherwise the library add PMC8 to the list of PMC to pogram during * pfm_dispatch_events(). */ ita_inp.pfp_ita_pmc8.opcm_used = 1; /* * We want to match all the br.cloop in our test function. * This branch is an IP-relative branch for which the major * opcode (bits [40-37]=4) and the btype field is 5 (which represents * bits[6-8]) so it is included in the match/mask fields of PMC8. * It is necessarily in a B slot. * * We don't care which operands are used with br.cloop therefore * the mask field of pmc8 is set such that only the 4 bits of the * opcode and 3 bits of btype must match exactly. This is accomplished by * clearing the top 4 bits and bits [6-8] of the mask field and setting the * remaining bits. Similarly, the match field only has the opcode value and btype * set according to the encoding of br.cloop, the * remaining bits are zero. Bit 60 of PMC8 is set to indicate * that we look only in B slots (this is the only possibility for * this instruction anyway). * * So the binary representation of the value for PMC8 is as follows: * * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 * ---------------------------------------------------------------- * 0001010000000000000000101000000000000011111111111111000111111000 * * which yields a value of 0x1400028003fff1f8. * * Depending on the level of optimization to compile this code, it may * be that the count reported could be zero, if the compiler uses a br.cond * instead of br.cloop. */ ita_inp.pfp_ita_pmc8.pmc_val = 0x1400028003fff1f8; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("Cannot find event IA64_TAGGED_INST_RETIRED_PMC8\n"); } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract our file descriptor */ id = ctx[0].ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * Let's roll now. */ pfm_self_start(id); do_test(100UL); pfm_self_stop(id); /* * now read the results */ if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); } /* * print the results */ pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); printf("PMD%u %20lu %s\n", pd[0].reg_num, pd[0].reg_value, name); if (pd[0].reg_value != 0) printf("compiler used br.cloop\n"); else printf("compiler did not use br.cloop\n"); /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/libpfm-3.y/lib/power5_events.h000664 001750 001750 00001323300 13216244363 022732 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __POWER5_EVENTS_H__ #define __POWER5_EVENTS_H__ /* * File: power5_events.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2007. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define POWER5_PME_PM_LSU_REJECT_RELOAD_CDF 0 #define POWER5_PME_PM_FPU1_SINGLE 1 #define POWER5_PME_PM_L3SB_REF 2 #define POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC 3 #define POWER5_PME_PM_INST_FROM_L275_SHR 4 #define POWER5_PME_PM_MRK_DATA_FROM_L375_MOD 5 #define POWER5_PME_PM_DTLB_MISS_4K 6 #define POWER5_PME_PM_CLB_FULL_CYC 7 #define POWER5_PME_PM_MRK_ST_CMPL 8 #define POWER5_PME_PM_LSU_FLUSH_LRQ_FULL 9 #define POWER5_PME_PM_MRK_DATA_FROM_L275_SHR 10 #define POWER5_PME_PM_1INST_CLB_CYC 11 #define POWER5_PME_PM_MEM_SPEC_RD_CANCEL 12 #define POWER5_PME_PM_MRK_DTLB_MISS_16M 13 #define POWER5_PME_PM_FPU_FDIV 14 #define POWER5_PME_PM_FPU_SINGLE 15 #define POWER5_PME_PM_FPU0_FMA 16 #define POWER5_PME_PM_SLB_MISS 17 #define POWER5_PME_PM_LSU1_FLUSH_LRQ 18 #define POWER5_PME_PM_L2SA_ST_HIT 19 #define POWER5_PME_PM_DTLB_MISS 20 #define POWER5_PME_PM_BR_PRED_TA 21 #define POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC 22 #define POWER5_PME_PM_CMPLU_STALL_FXU 23 #define POWER5_PME_PM_EXT_INT 24 #define POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ 25 #define POWER5_PME_PM_LSU1_LDF 26 #define POWER5_PME_PM_MRK_ST_GPS 27 #define POWER5_PME_PM_FAB_CMD_ISSUED 28 #define POWER5_PME_PM_LSU0_SRQ_STFWD 29 #define POWER5_PME_PM_CR_MAP_FULL_CYC 30 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL 31 #define POWER5_PME_PM_MRK_LSU0_FLUSH_ULD 32 #define POWER5_PME_PM_LSU_FLUSH_SRQ_FULL 33 #define POWER5_PME_PM_FLUSH_IMBAL 34 #define POWER5_PME_PM_MEM_RQ_DISP_Q16to19 35 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC 36 #define POWER5_PME_PM_DATA_FROM_L35_MOD 37 #define POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL 38 #define POWER5_PME_PM_FPU1_FDIV 39 #define POWER5_PME_PM_FPU0_FRSP_FCONV 40 #define POWER5_PME_PM_MEM_RQ_DISP 41 #define POWER5_PME_PM_LWSYNC_HELD 42 #define POWER5_PME_PM_FXU_FIN 43 #define POWER5_PME_PM_DSLB_MISS 44 #define POWER5_PME_PM_FXLS1_FULL_CYC 45 #define POWER5_PME_PM_DATA_FROM_L275_SHR 46 #define POWER5_PME_PM_THRD_SEL_T0 47 #define POWER5_PME_PM_PTEG_RELOAD_VALID 48 #define POWER5_PME_PM_LSU_LMQ_LHR_MERGE 49 #define POWER5_PME_PM_MRK_STCX_FAIL 50 #define POWER5_PME_PM_2INST_CLB_CYC 51 #define POWER5_PME_PM_FAB_PNtoVN_DIRECT 52 #define POWER5_PME_PM_PTEG_FROM_L2MISS 53 #define POWER5_PME_PM_CMPLU_STALL_LSU 54 #define POWER5_PME_PM_MRK_DSLB_MISS 55 #define POWER5_PME_PM_LSU_FLUSH_ULD 56 #define POWER5_PME_PM_PTEG_FROM_LMEM 57 #define POWER5_PME_PM_MRK_BRU_FIN 58 #define POWER5_PME_PM_MEM_WQ_DISP_WRITE 59 #define POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC 60 #define POWER5_PME_PM_LSU1_NCLD 61 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER 62 #define POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ 63 #define POWER5_PME_PM_FPR_MAP_FULL_CYC 64 #define POWER5_PME_PM_FPU1_FULL_CYC 65 #define POWER5_PME_PM_L3SA_ALL_BUSY 66 #define POWER5_PME_PM_3INST_CLB_CYC 67 #define POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 68 #define POWER5_PME_PM_L2SA_SHR_INV 69 #define POWER5_PME_PM_THRESH_TIMEO 70 #define POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL 71 #define POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL 72 #define POWER5_PME_PM_FPU_FSQRT 73 #define POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ 74 #define POWER5_PME_PM_PMC1_OVERFLOW 75 #define POWER5_PME_PM_L3SC_SNOOP_RETRY 76 #define POWER5_PME_PM_DATA_TABLEWALK_CYC 77 #define POWER5_PME_PM_THRD_PRIO_6_CYC 78 #define POWER5_PME_PM_FPU_FEST 79 #define POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY 80 #define POWER5_PME_PM_MRK_DATA_FROM_RMEM 81 #define POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC 82 #define POWER5_PME_PM_MEM_PWQ_DISP 83 #define POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY 84 #define POWER5_PME_PM_LD_MISS_L1_LSU0 85 #define POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL 86 #define POWER5_PME_PM_FPU1_STALL3 87 #define POWER5_PME_PM_GCT_USAGE_80to99_CYC 88 #define POWER5_PME_PM_WORK_HELD 89 #define POWER5_PME_PM_INST_CMPL 90 #define POWER5_PME_PM_LSU1_FLUSH_UST 91 #define POWER5_PME_PM_FXU_IDLE 92 #define POWER5_PME_PM_LSU0_FLUSH_ULD 93 #define POWER5_PME_PM_LSU1_REJECT_LMQ_FULL 94 #define POWER5_PME_PM_GRP_DISP_REJECT 95 #define POWER5_PME_PM_L2SA_MOD_INV 96 #define POWER5_PME_PM_PTEG_FROM_L25_SHR 97 #define POWER5_PME_PM_FAB_CMD_RETRIED 98 #define POWER5_PME_PM_L3SA_SHR_INV 99 #define POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL 100 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR 101 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL 102 #define POWER5_PME_PM_PTEG_FROM_L375_MOD 103 #define POWER5_PME_PM_MRK_LSU1_FLUSH_UST 104 #define POWER5_PME_PM_BR_ISSUED 105 #define POWER5_PME_PM_MRK_GRP_BR_REDIR 106 #define POWER5_PME_PM_EE_OFF 107 #define POWER5_PME_PM_MEM_RQ_DISP_Q4to7 108 #define POWER5_PME_PM_MEM_FAST_PATH_RD_DISP 109 #define POWER5_PME_PM_INST_FROM_L3 110 #define POWER5_PME_PM_ITLB_MISS 111 #define POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE 112 #define POWER5_PME_PM_FXLS_FULL_CYC 113 #define POWER5_PME_PM_DTLB_REF_4K 114 #define POWER5_PME_PM_GRP_DISP_VALID 115 #define POWER5_PME_PM_LSU_FLUSH_UST 116 #define POWER5_PME_PM_FXU1_FIN 117 #define POWER5_PME_PM_THRD_PRIO_4_CYC 118 #define POWER5_PME_PM_MRK_DATA_FROM_L35_MOD 119 #define POWER5_PME_PM_4INST_CLB_CYC 120 #define POWER5_PME_PM_MRK_DTLB_REF_16M 121 #define POWER5_PME_PM_INST_FROM_L375_MOD 122 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR 123 #define POWER5_PME_PM_GRP_CMPL 124 #define POWER5_PME_PM_FPU1_1FLOP 125 #define POWER5_PME_PM_FPU_FRSP_FCONV 126 #define POWER5_PME_PM_5INST_CLB_CYC 127 #define POWER5_PME_PM_L3SC_REF 128 #define POWER5_PME_PM_THRD_L2MISS_BOTH_CYC 129 #define POWER5_PME_PM_MEM_PW_GATH 130 #define POWER5_PME_PM_FAB_PNtoNN_SIDECAR 131 #define POWER5_PME_PM_FAB_DCLAIM_ISSUED 132 #define POWER5_PME_PM_GRP_IC_MISS 133 #define POWER5_PME_PM_INST_FROM_L35_SHR 134 #define POWER5_PME_PM_LSU_LMQ_FULL_CYC 135 #define POWER5_PME_PM_MRK_DATA_FROM_L2_CYC 136 #define POWER5_PME_PM_LSU_SRQ_SYNC_CYC 137 #define POWER5_PME_PM_LSU0_BUSY_REJECT 138 #define POWER5_PME_PM_LSU_REJECT_ERAT_MISS 139 #define POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC 140 #define POWER5_PME_PM_DATA_FROM_L375_SHR 141 #define POWER5_PME_PM_FPU0_FMOV_FEST 142 #define POWER5_PME_PM_PTEG_FROM_L25_MOD 143 #define POWER5_PME_PM_LD_REF_L1_LSU0 144 #define POWER5_PME_PM_THRD_PRIO_7_CYC 145 #define POWER5_PME_PM_LSU1_FLUSH_SRQ 146 #define POWER5_PME_PM_L2SC_RCST_DISP 147 #define POWER5_PME_PM_CMPLU_STALL_DIV 148 #define POWER5_PME_PM_MEM_RQ_DISP_Q12to15 149 #define POWER5_PME_PM_INST_FROM_L375_SHR 150 #define POWER5_PME_PM_ST_REF_L1 151 #define POWER5_PME_PM_L3SB_ALL_BUSY 152 #define POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY 153 #define POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC 154 #define POWER5_PME_PM_FAB_HOLDtoNN_EMPTY 155 #define POWER5_PME_PM_DATA_FROM_LMEM 156 #define POWER5_PME_PM_RUN_CYC 157 #define POWER5_PME_PM_PTEG_FROM_RMEM 158 #define POWER5_PME_PM_L2SC_RCLD_DISP 159 #define POWER5_PME_PM_LSU0_LDF 160 #define POWER5_PME_PM_LSU_LRQ_S0_VALID 161 #define POWER5_PME_PM_PMC3_OVERFLOW 162 #define POWER5_PME_PM_MRK_IMR_RELOAD 163 #define POWER5_PME_PM_MRK_GRP_TIMEO 164 #define POWER5_PME_PM_ST_MISS_L1 165 #define POWER5_PME_PM_STOP_COMPLETION 166 #define POWER5_PME_PM_LSU_BUSY_REJECT 167 #define POWER5_PME_PM_ISLB_MISS 168 #define POWER5_PME_PM_CYC 169 #define POWER5_PME_PM_THRD_ONE_RUN_CYC 170 #define POWER5_PME_PM_GRP_BR_REDIR_NONSPEC 171 #define POWER5_PME_PM_LSU1_SRQ_STFWD 172 #define POWER5_PME_PM_L3SC_MOD_INV 173 #define POWER5_PME_PM_L2_PREF 174 #define POWER5_PME_PM_GCT_NOSLOT_BR_MPRED 175 #define POWER5_PME_PM_MRK_DATA_FROM_L25_MOD 176 #define POWER5_PME_PM_L2SB_MOD_INV 177 #define POWER5_PME_PM_L2SB_ST_REQ 178 #define POWER5_PME_PM_MRK_L1_RELOAD_VALID 179 #define POWER5_PME_PM_L3SB_HIT 180 #define POWER5_PME_PM_L2SB_SHR_MOD 181 #define POWER5_PME_PM_EE_OFF_EXT_INT 182 #define POWER5_PME_PM_1PLUS_PPC_CMPL 183 #define POWER5_PME_PM_L2SC_SHR_MOD 184 #define POWER5_PME_PM_PMC6_OVERFLOW 185 #define POWER5_PME_PM_LSU_LRQ_FULL_CYC 186 #define POWER5_PME_PM_IC_PREF_INSTALL 187 #define POWER5_PME_PM_TLB_MISS 188 #define POWER5_PME_PM_GCT_FULL_CYC 189 #define POWER5_PME_PM_FXU_BUSY 190 #define POWER5_PME_PM_MRK_DATA_FROM_L3_CYC 191 #define POWER5_PME_PM_LSU_REJECT_LMQ_FULL 192 #define POWER5_PME_PM_LSU_SRQ_S0_ALLOC 193 #define POWER5_PME_PM_GRP_MRK 194 #define POWER5_PME_PM_INST_FROM_L25_SHR 195 #define POWER5_PME_PM_FPU1_FIN 196 #define POWER5_PME_PM_DC_PREF_STREAM_ALLOC 197 #define POWER5_PME_PM_BR_MPRED_TA 198 #define POWER5_PME_PM_CRQ_FULL_CYC 199 #define POWER5_PME_PM_L2SA_RCLD_DISP 200 #define POWER5_PME_PM_SNOOP_WR_RETRY_QFULL 201 #define POWER5_PME_PM_MRK_DTLB_REF_4K 202 #define POWER5_PME_PM_LSU_SRQ_S0_VALID 203 #define POWER5_PME_PM_LSU0_FLUSH_LRQ 204 #define POWER5_PME_PM_INST_FROM_L275_MOD 205 #define POWER5_PME_PM_GCT_EMPTY_CYC 206 #define POWER5_PME_PM_LARX_LSU0 207 #define POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC 208 #define POWER5_PME_PM_SNOOP_RETRY_1AHEAD 209 #define POWER5_PME_PM_FPU1_FSQRT 210 #define POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 211 #define POWER5_PME_PM_MRK_FPU_FIN 212 #define POWER5_PME_PM_THRD_PRIO_5_CYC 213 #define POWER5_PME_PM_MRK_DATA_FROM_LMEM 214 #define POWER5_PME_PM_FPU1_FRSP_FCONV 215 #define POWER5_PME_PM_SNOOP_TLBIE 216 #define POWER5_PME_PM_L3SB_SNOOP_RETRY 217 #define POWER5_PME_PM_FAB_VBYPASS_EMPTY 218 #define POWER5_PME_PM_MRK_DATA_FROM_L275_MOD 219 #define POWER5_PME_PM_6INST_CLB_CYC 220 #define POWER5_PME_PM_L2SB_RCST_DISP 221 #define POWER5_PME_PM_FLUSH 222 #define POWER5_PME_PM_L2SC_MOD_INV 223 #define POWER5_PME_PM_FPU_DENORM 224 #define POWER5_PME_PM_L3SC_HIT 225 #define POWER5_PME_PM_SNOOP_WR_RETRY_RQ 226 #define POWER5_PME_PM_LSU1_REJECT_SRQ 227 #define POWER5_PME_PM_IC_PREF_REQ 228 #define POWER5_PME_PM_L3SC_ALL_BUSY 229 #define POWER5_PME_PM_MRK_GRP_IC_MISS 230 #define POWER5_PME_PM_GCT_NOSLOT_IC_MISS 231 #define POWER5_PME_PM_MRK_DATA_FROM_L3 232 #define POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL 233 #define POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD 234 #define POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS 235 #define POWER5_PME_PM_L3SA_MOD_INV 236 #define POWER5_PME_PM_LSU_FLUSH_LRQ 237 #define POWER5_PME_PM_THRD_PRIO_2_CYC 238 #define POWER5_PME_PM_LSU_FLUSH_SRQ 239 #define POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID 240 #define POWER5_PME_PM_L3SA_REF 241 #define POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL 242 #define POWER5_PME_PM_FPU0_STALL3 243 #define POWER5_PME_PM_GPR_MAP_FULL_CYC 244 #define POWER5_PME_PM_TB_BIT_TRANS 245 #define POWER5_PME_PM_MRK_LSU_FLUSH_LRQ 246 #define POWER5_PME_PM_FPU0_STF 247 #define POWER5_PME_PM_MRK_DTLB_MISS 248 #define POWER5_PME_PM_FPU1_FMA 249 #define POWER5_PME_PM_L2SA_MOD_TAG 250 #define POWER5_PME_PM_LSU1_FLUSH_ULD 251 #define POWER5_PME_PM_MRK_LSU0_FLUSH_UST 252 #define POWER5_PME_PM_MRK_INST_FIN 253 #define POWER5_PME_PM_FPU0_FULL_CYC 254 #define POWER5_PME_PM_LSU_LRQ_S0_ALLOC 255 #define POWER5_PME_PM_MRK_LSU1_FLUSH_ULD 256 #define POWER5_PME_PM_MRK_DTLB_REF 257 #define POWER5_PME_PM_BR_UNCOND 258 #define POWER5_PME_PM_THRD_SEL_OVER_L2MISS 259 #define POWER5_PME_PM_L2SB_SHR_INV 260 #define POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL 261 #define POWER5_PME_PM_L3SC_MOD_TAG 262 #define POWER5_PME_PM_MRK_ST_MISS_L1 263 #define POWER5_PME_PM_GRP_DISP_SUCCESS 264 #define POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC 265 #define POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT 266 #define POWER5_PME_PM_MEM_WQ_DISP_Q8to15 267 #define POWER5_PME_PM_FPU0_SINGLE 268 #define POWER5_PME_PM_LSU_DERAT_MISS 269 #define POWER5_PME_PM_THRD_PRIO_1_CYC 270 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER 271 #define POWER5_PME_PM_FPU1_FEST 272 #define POWER5_PME_PM_FAB_HOLDtoVN_EMPTY 273 #define POWER5_PME_PM_SNOOP_RD_RETRY_RQ 274 #define POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL 275 #define POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC 276 #define POWER5_PME_PM_MRK_ST_CMPL_INT 277 #define POWER5_PME_PM_FLUSH_BR_MPRED 278 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR 279 #define POWER5_PME_PM_FPU_STF 280 #define POWER5_PME_PM_CMPLU_STALL_FPU 281 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC 282 #define POWER5_PME_PM_GCT_NOSLOT_CYC 283 #define POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE 284 #define POWER5_PME_PM_PTEG_FROM_L35_SHR 285 #define POWER5_PME_PM_MRK_LSU_FLUSH_UST 286 #define POWER5_PME_PM_L3SA_HIT 287 #define POWER5_PME_PM_MRK_DATA_FROM_L25_SHR 288 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR 289 #define POWER5_PME_PM_MRK_DATA_FROM_L35_SHR 290 #define POWER5_PME_PM_IERAT_XLATE_WR 291 #define POWER5_PME_PM_L2SA_ST_REQ 292 #define POWER5_PME_PM_THRD_SEL_T1 293 #define POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT 294 #define POWER5_PME_PM_INST_FROM_LMEM 295 #define POWER5_PME_PM_FPU0_1FLOP 296 #define POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC 297 #define POWER5_PME_PM_PTEG_FROM_L2 298 #define POWER5_PME_PM_MEM_PW_CMPL 299 #define POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC 300 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER 301 #define POWER5_PME_PM_FPU0_FIN 302 #define POWER5_PME_PM_MRK_DTLB_MISS_4K 303 #define POWER5_PME_PM_L3SC_SHR_INV 304 #define POWER5_PME_PM_GRP_BR_REDIR 305 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL 306 #define POWER5_PME_PM_MRK_LSU_FLUSH_SRQ 307 #define POWER5_PME_PM_PTEG_FROM_L275_SHR 308 #define POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL 309 #define POWER5_PME_PM_SNOOP_RD_RETRY_WQ 310 #define POWER5_PME_PM_LSU0_NCLD 311 #define POWER5_PME_PM_FAB_DCLAIM_RETRIED 312 #define POWER5_PME_PM_LSU1_BUSY_REJECT 313 #define POWER5_PME_PM_FXLS0_FULL_CYC 314 #define POWER5_PME_PM_FPU0_FEST 315 #define POWER5_PME_PM_DTLB_REF_16M 316 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR 317 #define POWER5_PME_PM_LSU0_REJECT_ERAT_MISS 318 #define POWER5_PME_PM_DATA_FROM_L25_MOD 319 #define POWER5_PME_PM_GCT_USAGE_60to79_CYC 320 #define POWER5_PME_PM_DATA_FROM_L375_MOD 321 #define POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 322 #define POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF 323 #define POWER5_PME_PM_0INST_FETCH 324 #define POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF 325 #define POWER5_PME_PM_L1_PREF 326 #define POWER5_PME_PM_MEM_WQ_DISP_Q0to7 327 #define POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC 328 #define POWER5_PME_PM_BRQ_FULL_CYC 329 #define POWER5_PME_PM_GRP_IC_MISS_NONSPEC 330 #define POWER5_PME_PM_PTEG_FROM_L275_MOD 331 #define POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 332 #define POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC 333 #define POWER5_PME_PM_LSU_FLUSH 334 #define POWER5_PME_PM_DATA_FROM_L3 335 #define POWER5_PME_PM_INST_FROM_L2 336 #define POWER5_PME_PM_PMC2_OVERFLOW 337 #define POWER5_PME_PM_FPU0_DENORM 338 #define POWER5_PME_PM_FPU1_FMOV_FEST 339 #define POWER5_PME_PM_INST_FETCH_CYC 340 #define POWER5_PME_PM_LSU_LDF 341 #define POWER5_PME_PM_INST_DISP 342 #define POWER5_PME_PM_DATA_FROM_L25_SHR 343 #define POWER5_PME_PM_L1_DCACHE_RELOAD_VALID 344 #define POWER5_PME_PM_MEM_WQ_DISP_DCLAIM 345 #define POWER5_PME_PM_FPU_FULL_CYC 346 #define POWER5_PME_PM_MRK_GRP_ISSUED 347 #define POWER5_PME_PM_THRD_PRIO_3_CYC 348 #define POWER5_PME_PM_FPU_FMA 349 #define POWER5_PME_PM_INST_FROM_L35_MOD 350 #define POWER5_PME_PM_MRK_CRU_FIN 351 #define POWER5_PME_PM_SNOOP_WR_RETRY_WQ 352 #define POWER5_PME_PM_CMPLU_STALL_REJECT 353 #define POWER5_PME_PM_LSU1_REJECT_ERAT_MISS 354 #define POWER5_PME_PM_MRK_FXU_FIN 355 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER 356 #define POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY 357 #define POWER5_PME_PM_PMC4_OVERFLOW 358 #define POWER5_PME_PM_L3SA_SNOOP_RETRY 359 #define POWER5_PME_PM_PTEG_FROM_L35_MOD 360 #define POWER5_PME_PM_INST_FROM_L25_MOD 361 #define POWER5_PME_PM_THRD_SMT_HANG 362 #define POWER5_PME_PM_CMPLU_STALL_ERAT_MISS 363 #define POWER5_PME_PM_L3SA_MOD_TAG 364 #define POWER5_PME_PM_FLUSH_SYNC 365 #define POWER5_PME_PM_INST_FROM_L2MISS 366 #define POWER5_PME_PM_L2SC_ST_HIT 367 #define POWER5_PME_PM_MEM_RQ_DISP_Q8to11 368 #define POWER5_PME_PM_MRK_GRP_DISP 369 #define POWER5_PME_PM_L2SB_MOD_TAG 370 #define POWER5_PME_PM_CLB_EMPTY_CYC 371 #define POWER5_PME_PM_L2SB_ST_HIT 372 #define POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL 373 #define POWER5_PME_PM_BR_PRED_CR_TA 374 #define POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ 375 #define POWER5_PME_PM_MRK_LSU_FLUSH_ULD 376 #define POWER5_PME_PM_INST_DISP_ATTEMPT 377 #define POWER5_PME_PM_INST_FROM_RMEM 378 #define POWER5_PME_PM_ST_REF_L1_LSU0 379 #define POWER5_PME_PM_LSU0_DERAT_MISS 380 #define POWER5_PME_PM_L2SB_RCLD_DISP 381 #define POWER5_PME_PM_FPU_STALL3 382 #define POWER5_PME_PM_BR_PRED_CR 383 #define POWER5_PME_PM_MRK_DATA_FROM_L2 384 #define POWER5_PME_PM_LSU0_FLUSH_SRQ 385 #define POWER5_PME_PM_FAB_PNtoNN_DIRECT 386 #define POWER5_PME_PM_IOPS_CMPL 387 #define POWER5_PME_PM_L2SC_SHR_INV 388 #define POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER 389 #define POWER5_PME_PM_L2SA_RCST_DISP 390 #define POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION 391 #define POWER5_PME_PM_FAB_PNtoVN_SIDECAR 392 #define POWER5_PME_PM_LSU_LMQ_S0_ALLOC 393 #define POWER5_PME_PM_LSU0_REJECT_LMQ_FULL 394 #define POWER5_PME_PM_SNOOP_PW_RETRY_RQ 395 #define POWER5_PME_PM_DTLB_REF 396 #define POWER5_PME_PM_PTEG_FROM_L3 397 #define POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY 398 #define POWER5_PME_PM_LSU_SRQ_EMPTY_CYC 399 #define POWER5_PME_PM_FPU1_STF 400 #define POWER5_PME_PM_LSU_LMQ_S0_VALID 401 #define POWER5_PME_PM_GCT_USAGE_00to59_CYC 402 #define POWER5_PME_PM_DATA_FROM_L2MISS 403 #define POWER5_PME_PM_GRP_DISP_BLK_SB_CYC 404 #define POWER5_PME_PM_FPU_FMOV_FEST 405 #define POWER5_PME_PM_XER_MAP_FULL_CYC 406 #define POWER5_PME_PM_FLUSH_SB 407 #define POWER5_PME_PM_MRK_DATA_FROM_L375_SHR 408 #define POWER5_PME_PM_MRK_GRP_CMPL 409 #define POWER5_PME_PM_SUSPENDED 410 #define POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC 411 #define POWER5_PME_PM_SNOOP_RD_RETRY_QFULL 412 #define POWER5_PME_PM_L3SB_MOD_INV 413 #define POWER5_PME_PM_DATA_FROM_L35_SHR 414 #define POWER5_PME_PM_LD_MISS_L1_LSU1 415 #define POWER5_PME_PM_STCX_FAIL 416 #define POWER5_PME_PM_DC_PREF_DST 417 #define POWER5_PME_PM_GRP_DISP 418 #define POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR 419 #define POWER5_PME_PM_FPU0_FPSCR 420 #define POWER5_PME_PM_DATA_FROM_L2 421 #define POWER5_PME_PM_FPU1_DENORM 422 #define POWER5_PME_PM_FPU_1FLOP 423 #define POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER 424 #define POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL 425 #define POWER5_PME_PM_FPU0_FSQRT 426 #define POWER5_PME_PM_LD_REF_L1 427 #define POWER5_PME_PM_INST_FROM_L1 428 #define POWER5_PME_PM_TLBIE_HELD 429 #define POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS 430 #define POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC 431 #define POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ 432 #define POWER5_PME_PM_MEM_RQ_DISP_Q0to3 433 #define POWER5_PME_PM_ST_REF_L1_LSU1 434 #define POWER5_PME_PM_MRK_LD_MISS_L1 435 #define POWER5_PME_PM_L1_WRITE_CYC 436 #define POWER5_PME_PM_L2SC_ST_REQ 437 #define POWER5_PME_PM_CMPLU_STALL_FDIV 438 #define POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY 439 #define POWER5_PME_PM_BR_MPRED_CR 440 #define POWER5_PME_PM_L3SB_MOD_TAG 441 #define POWER5_PME_PM_MRK_DATA_FROM_L2MISS 442 #define POWER5_PME_PM_LSU_REJECT_SRQ 443 #define POWER5_PME_PM_LD_MISS_L1 444 #define POWER5_PME_PM_INST_FROM_PREF 445 #define POWER5_PME_PM_DC_INV_L2 446 #define POWER5_PME_PM_STCX_PASS 447 #define POWER5_PME_PM_LSU_SRQ_FULL_CYC 448 #define POWER5_PME_PM_FPU_FIN 449 #define POWER5_PME_PM_L2SA_SHR_MOD 450 #define POWER5_PME_PM_LSU_SRQ_STFWD 451 #define POWER5_PME_PM_0INST_CLB_CYC 452 #define POWER5_PME_PM_FXU0_FIN 453 #define POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL 454 #define POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC 455 #define POWER5_PME_PM_PMC5_OVERFLOW 456 #define POWER5_PME_PM_FPU0_FDIV 457 #define POWER5_PME_PM_PTEG_FROM_L375_SHR 458 #define POWER5_PME_PM_LD_REF_L1_LSU1 459 #define POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY 460 #define POWER5_PME_PM_HV_CYC 461 #define POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC 462 #define POWER5_PME_PM_LR_CTR_MAP_FULL_CYC 463 #define POWER5_PME_PM_L3SB_SHR_INV 464 #define POWER5_PME_PM_DATA_FROM_RMEM 465 #define POWER5_PME_PM_DATA_FROM_L275_MOD 466 #define POWER5_PME_PM_LSU0_REJECT_SRQ 467 #define POWER5_PME_PM_LSU1_DERAT_MISS 468 #define POWER5_PME_PM_MRK_LSU_FIN 469 #define POWER5_PME_PM_DTLB_MISS_16M 470 #define POWER5_PME_PM_LSU0_FLUSH_UST 471 #define POWER5_PME_PM_L2SC_MOD_TAG 472 #define POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY 473 static const int power5_event_ids[][POWER5_NUM_EVENT_COUNTERS] = { [ POWER5_PME_PM_LSU_REJECT_RELOAD_CDF ] = { -1, 145, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_SINGLE ] = { 51, 50, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SB_REF ] = { 111, 109, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC ] = { -1, -1, 173, 179, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L275_SHR ] = { -1, -1, 57, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD ] = { 165, -1, -1, 139, -1, -1 }, [ POWER5_PME_PM_DTLB_MISS_4K ] = { 24, 23, -1, -1, -1, -1 }, [ POWER5_PME_PM_CLB_FULL_CYC ] = { 10, 9, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_ST_CMPL ] = { 179, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_LRQ_FULL ] = { 140, 139, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR ] = { -1, -1, 130, -1, -1, -1 }, [ POWER5_PME_PM_1INST_CLB_CYC ] = { 1, 1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MEM_SPEC_RD_CANCEL ] = { 157, 155, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_MISS_16M ] = { 167, 168, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_FDIV ] = { 55, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_SINGLE ] = { 58, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FMA ] = { 39, 38, -1, -1, -1, -1 }, [ POWER5_PME_PM_SLB_MISS ] = { -1, 184, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_FLUSH_LRQ ] = { 130, 128, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_ST_HIT ] = { -1, -1, 70, 74, -1, -1 }, [ POWER5_PME_PM_DTLB_MISS ] = { 22, 21, -1, -1, -1, -1 }, [ POWER5_PME_PM_BR_PRED_TA ] = { -1, 8, 4, 6, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC ] = { -1, -1, -1, 140, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_FXU ] = { -1, 12, -1, -1, -1, -1 }, [ POWER5_PME_PM_EXT_INT ] = { -1, -1, -1, 21, -1, -1 }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { -1, -1, 143, 154, -1, -1 }, [ POWER5_PME_PM_LSU1_LDF ] = { -1, -1, 107, 111, -1, -1 }, [ POWER5_PME_PM_MRK_ST_GPS ] = { -1, 178, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_CMD_ISSUED ] = { 27, 26, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_SRQ_STFWD ] = { 127, 125, -1, -1, -1, -1 }, [ POWER5_PME_PM_CR_MAP_FULL_CYC ] = { 11, 14, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL ] = { 86, 84, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_ULD ] = { -1, -1, 142, 153, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_SRQ_FULL ] = { -1, -1, 110, 114, -1, -1 }, [ POWER5_PME_PM_FLUSH_IMBAL ] = { -1, -1, 25, 30, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP_Q16to19 ] = { 151, 149, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC ] = { -1, -1, 176, 182, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L35_MOD ] = { -1, 17, 9, -1, -1, -1 }, [ POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL ] = { 152, 150, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FDIV ] = { 47, 46, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FRSP_FCONV ] = { -1, -1, 33, 38, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP ] = { 156, 154, -1, -1, -1, -1 }, [ POWER5_PME_PM_LWSYNC_HELD ] = { -1, -1, 120, 125, -1, -1 }, [ POWER5_PME_PM_FXU_FIN ] = { -1, -1, 45, -1, -1, -1 }, [ POWER5_PME_PM_DSLB_MISS ] = { 21, 20, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXLS1_FULL_CYC ] = { -1, -1, 41, 46, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L275_SHR ] = { -1, -1, 8, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_T0 ] = { -1, -1, 182, 188, -1, -1 }, [ POWER5_PME_PM_PTEG_RELOAD_VALID ] = { -1, -1, 191, 195, -1, -1 }, [ POWER5_PME_PM_LSU_LMQ_LHR_MERGE ] = { -1, -1, 112, 117, -1, -1 }, [ POWER5_PME_PM_MRK_STCX_FAIL ] = { 178, 177, -1, -1, -1, -1 }, [ POWER5_PME_PM_2INST_CLB_CYC ] = { 3, 2, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_PNtoVN_DIRECT ] = { 34, 33, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L2MISS ] = { -1, -1, 189, -1, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_LSU ] = { -1, 13, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DSLB_MISS ] = { -1, -1, 134, 144, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_ULD ] = { 142, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_LMEM ] = { -1, 183, 157, -1, -1, -1 }, [ POWER5_PME_PM_MRK_BRU_FIN ] = { -1, 158, -1, -1, -1, -1 }, [ POWER5_PME_PM_MEM_WQ_DISP_WRITE ] = { 159, 157, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC ] = { -1, -1, -1, 137, -1, -1 }, [ POWER5_PME_PM_LSU1_NCLD ] = { -1, -1, 108, 112, -1, -1 }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER ] = { -1, -1, 65, 69, -1, -1 }, [ POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ ] = { -1, -1, 159, 167, -1, -1 }, [ POWER5_PME_PM_FPR_MAP_FULL_CYC ] = { 35, 34, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FULL_CYC ] = { 50, 49, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SA_ALL_BUSY ] = { 106, 104, -1, -1, -1, -1 }, [ POWER5_PME_PM_3INST_CLB_CYC ] = { 4, 3, -1, -1, -1, -1 }, [ POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 ] = { -1, -1, 123, 128, -1, -1 }, [ POWER5_PME_PM_L2SA_SHR_INV ] = { -1, -1, 69, 73, -1, -1 }, [ POWER5_PME_PM_THRESH_TIMEO ] = { -1, -1, 185, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL ] = { -1, -1, 68, 72, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL ] = { -1, -1, 179, 185, -1, -1 }, [ POWER5_PME_PM_FPU_FSQRT ] = { -1, 53, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { -1, -1, 139, 150, -1, -1 }, [ POWER5_PME_PM_PMC1_OVERFLOW ] = { -1, 180, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_SNOOP_RETRY ] = { -1, -1, 99, 103, -1, -1 }, [ POWER5_PME_PM_DATA_TABLEWALK_CYC ] = { 20, 19, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_6_CYC ] = { 208, 202, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_FEST ] = { -1, -1, -1, 43, -1, -1 }, [ POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY ] = { 31, 30, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM ] = { 166, -1, -1, 142, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC ] = { -1, -1, -1, 138, -1, -1 }, [ POWER5_PME_PM_MEM_PWQ_DISP ] = { 153, 151, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY ] = { 32, 31, -1, -1, -1, -1 }, [ POWER5_PME_PM_LD_MISS_L1_LSU0 ] = { -1, -1, 101, 104, -1, -1 }, [ POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL ] = { -1, -1, 158, 166, -1, -1 }, [ POWER5_PME_PM_FPU1_STALL3 ] = { 52, 51, -1, -1, -1, -1 }, [ POWER5_PME_PM_GCT_USAGE_80to99_CYC ] = { -1, -1, 47, -1, -1, -1 }, [ POWER5_PME_PM_WORK_HELD ] = { -1, -1, -1, 192, -1, -1 }, [ POWER5_PME_PM_INST_CMPL ] = { 174, 174, -1, -1, 0, -1 }, [ POWER5_PME_PM_LSU1_FLUSH_UST ] = { 133, 131, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXU_IDLE ] = { 59, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_FLUSH_ULD ] = { 121, 119, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_REJECT_LMQ_FULL ] = { 135, 133, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_DISP_REJECT ] = { 65, 65, -1, 55, -1, -1 }, [ POWER5_PME_PM_L2SA_MOD_INV ] = { -1, -1, 63, 67, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L25_SHR ] = { 184, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_CMD_RETRIED ] = { -1, -1, 17, 22, -1, -1 }, [ POWER5_PME_PM_L3SA_SHR_INV ] = { -1, -1, 90, 94, -1, -1 }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL ] = { -1, -1, 76, 80, -1, -1 }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR ] = { -1, -1, 66, 70, -1, -1 }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL ] = { 84, 82, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L375_MOD ] = { 188, -1, -1, 164, -1, -1 }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_UST ] = { -1, -1, 146, 157, -1, -1 }, [ POWER5_PME_PM_BR_ISSUED ] = { -1, -1, 0, 1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_BR_REDIR ] = { -1, 172, -1, -1, -1, -1 }, [ POWER5_PME_PM_EE_OFF ] = { -1, -1, 15, 19, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP_Q4to7 ] = { -1, -1, 126, 131, -1, -1 }, [ POWER5_PME_PM_MEM_FAST_PATH_RD_DISP ] = { -1, -1, 190, 193, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L3 ] = { 78, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_ITLB_MISS ] = { 81, 79, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { -1, -1, -1, 49, -1, -1 }, [ POWER5_PME_PM_FXLS_FULL_CYC ] = { -1, -1, -1, 47, -1, -1 }, [ POWER5_PME_PM_DTLB_REF_4K ] = { 26, 25, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_DISP_VALID ] = { 66, 66, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_UST ] = { -1, 140, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXU1_FIN ] = { -1, -1, 44, 50, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_4_CYC ] = { 206, 200, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD ] = { -1, 163, 131, -1, -1, -1 }, [ POWER5_PME_PM_4INST_CLB_CYC ] = { 5, 4, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_REF_16M ] = { 169, 170, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L375_MOD ] = { -1, -1, -1, 62, -1, -1 }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR ] = { -1, -1, 82, 86, -1, -1 }, [ POWER5_PME_PM_GRP_CMPL ] = { -1, -1, 49, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_1FLOP ] = { 45, 44, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_FRSP_FCONV ] = { -1, -1, 39, -1, -1, -1 }, [ POWER5_PME_PM_5INST_CLB_CYC ] = { 6, 5, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_REF ] = { 114, 112, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_L2MISS_BOTH_CYC ] = { -1, -1, 170, 176, -1, -1 }, [ POWER5_PME_PM_MEM_PW_GATH ] = { -1, -1, 124, 129, -1, -1 }, [ POWER5_PME_PM_FAB_PNtoNN_SIDECAR ] = { -1, -1, 21, 26, -1, -1 }, [ POWER5_PME_PM_FAB_DCLAIM_ISSUED ] = { 28, 27, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_IC_MISS ] = { 67, 67, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L35_SHR ] = { 79, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_LMQ_FULL_CYC ] = { -1, -1, 111, 116, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L2_CYC ] = { -1, 162, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_SYNC_CYC ] = { -1, -1, 119, 124, -1, -1 }, [ POWER5_PME_PM_LSU0_BUSY_REJECT ] = { 117, 115, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_REJECT_ERAT_MISS ] = { 145, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { -1, -1, -1, 143, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L375_SHR ] = { -1, -1, 10, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FMOV_FEST ] = { -1, -1, 31, 36, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L25_MOD ] = { -1, 181, 153, -1, -1, -1 }, [ POWER5_PME_PM_LD_REF_L1_LSU0 ] = { -1, -1, 103, 107, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_7_CYC ] = { 209, 203, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_FLUSH_SRQ ] = { 131, 129, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RCST_DISP ] = { 101, 99, -1, -1, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_DIV ] = { -1, -1, -1, 7, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP_Q12to15 ] = { -1, -1, 121, 126, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L375_SHR ] = { -1, -1, 58, -1, -1, -1 }, [ POWER5_PME_PM_ST_REF_L1 ] = { -1, -1, 165, -1, -1, -1 }, [ POWER5_PME_PM_L3SB_ALL_BUSY ] = { 109, 107, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY ] = { -1, -1, 20, 25, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC ] = { -1, 161, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_HOLDtoNN_EMPTY ] = { 29, 28, -1, -1, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_LMEM ] = { -1, 18, 11, -1, -1, -1 }, [ POWER5_PME_PM_RUN_CYC ] = { 190, -1, -1, -1, -1, 0 }, [ POWER5_PME_PM_PTEG_FROM_RMEM ] = { 189, -1, -1, 165, -1, -1 }, [ POWER5_PME_PM_L2SC_RCLD_DISP ] = { 99, 97, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_LDF ] = { -1, -1, 105, 109, -1, -1 }, [ POWER5_PME_PM_LSU_LRQ_S0_VALID ] = { 144, 143, -1, -1, -1, -1 }, [ POWER5_PME_PM_PMC3_OVERFLOW ] = { -1, -1, -1, 162, -1, -1 }, [ POWER5_PME_PM_MRK_IMR_RELOAD ] = { 173, 173, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_TIMEO ] = { -1, -1, -1, 148, -1, -1 }, [ POWER5_PME_PM_ST_MISS_L1 ] = { -1, -1, 164, 171, -1, -1 }, [ POWER5_PME_PM_STOP_COMPLETION ] = { -1, -1, 163, -1, -1, -1 }, [ POWER5_PME_PM_LSU_BUSY_REJECT ] = { 139, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_ISLB_MISS ] = { 80, 78, -1, -1, -1, -1 }, [ POWER5_PME_PM_CYC ] = { 12, 15, 6, 12, -1, -1 }, [ POWER5_PME_PM_THRD_ONE_RUN_CYC ] = { 202, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_BR_REDIR_NONSPEC ] = { 64, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_SRQ_STFWD ] = { 138, 136, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_MOD_INV ] = { -1, -1, 97, 101, -1, -1 }, [ POWER5_PME_PM_L2_PREF ] = { -1, -1, 87, 91, -1, -1 }, [ POWER5_PME_PM_GCT_NOSLOT_BR_MPRED ] = { -1, -1, -1, 51, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD ] = { -1, 159, 129, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_MOD_INV ] = { -1, -1, 71, 75, -1, -1 }, [ POWER5_PME_PM_L2SB_ST_REQ ] = { 97, 95, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_L1_RELOAD_VALID ] = { -1, -1, 138, 149, -1, -1 }, [ POWER5_PME_PM_L3SB_HIT ] = { -1, -1, 92, 96, -1, -1 }, [ POWER5_PME_PM_L2SB_SHR_MOD ] = { 96, 94, -1, -1, -1, -1 }, [ POWER5_PME_PM_EE_OFF_EXT_INT ] = { -1, -1, 16, 20, -1, -1 }, [ POWER5_PME_PM_1PLUS_PPC_CMPL ] = { 2, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_SHR_MOD ] = { 104, 102, -1, -1, -1, -1 }, [ POWER5_PME_PM_PMC6_OVERFLOW ] = { -1, -1, 152, -1, -1, -1 }, [ POWER5_PME_PM_LSU_LRQ_FULL_CYC ] = { -1, -1, 116, 120, -1, -1 }, [ POWER5_PME_PM_IC_PREF_INSTALL ] = { -1, -1, 54, 58, -1, -1 }, [ POWER5_PME_PM_TLB_MISS ] = { 210, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_GCT_FULL_CYC ] = { 61, 60, -1, 52, -1, -1 }, [ POWER5_PME_PM_FXU_BUSY ] = { -1, 57, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L3_CYC ] = { -1, 166, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_REJECT_LMQ_FULL ] = { -1, 144, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_S0_ALLOC ] = { 147, 146, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_MRK ] = { 70, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L25_SHR ] = { 77, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FIN ] = { -1, -1, 35, 40, -1, -1 }, [ POWER5_PME_PM_DC_PREF_STREAM_ALLOC ] = { -1, -1, 14, 18, -1, -1 }, [ POWER5_PME_PM_BR_MPRED_TA ] = { -1, -1, 2, 3, -1, -1 }, [ POWER5_PME_PM_CRQ_FULL_CYC ] = { -1, -1, 5, 11, -1, -1 }, [ POWER5_PME_PM_L2SA_RCLD_DISP ] = { 83, 81, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_WR_RETRY_QFULL ] = { -1, -1, 161, 169, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_REF_4K ] = { 170, 171, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_S0_VALID ] = { 148, 147, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_FLUSH_LRQ ] = { 119, 117, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L275_MOD ] = { -1, -1, -1, 61, -1, -1 }, [ POWER5_PME_PM_GCT_EMPTY_CYC ] = { -1, 195, -1, -1, -1, -1 }, [ POWER5_PME_PM_LARX_LSU0 ] = { 115, 113, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC ] = { -1, -1, 174, 180, -1, -1 }, [ POWER5_PME_PM_SNOOP_RETRY_1AHEAD ] = { 195, 189, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FSQRT ] = { 49, 48, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { 177, 176, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_FPU_FIN ] = { -1, -1, 136, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_5_CYC ] = { 207, 201, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM ] = { -1, 167, 133, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FRSP_FCONV ] = { -1, -1, 37, 42, -1, -1 }, [ POWER5_PME_PM_SNOOP_TLBIE ] = { 196, 190, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SB_SNOOP_RETRY ] = { -1, -1, 95, 99, -1, -1 }, [ POWER5_PME_PM_FAB_VBYPASS_EMPTY ] = { -1, -1, 23, 28, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD ] = { 162, -1, -1, 136, -1, -1 }, [ POWER5_PME_PM_6INST_CLB_CYC ] = { 7, 6, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RCST_DISP ] = { 93, 91, -1, -1, -1, -1 }, [ POWER5_PME_PM_FLUSH ] = { -1, -1, 26, 31, -1, -1 }, [ POWER5_PME_PM_L2SC_MOD_INV ] = { -1, -1, 79, 83, -1, -1 }, [ POWER5_PME_PM_FPU_DENORM ] = { 54, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_HIT ] = { -1, -1, 96, 100, -1, -1 }, [ POWER5_PME_PM_SNOOP_WR_RETRY_RQ ] = { 197, 191, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_REJECT_SRQ ] = { 137, 135, -1, -1, -1, -1 }, [ POWER5_PME_PM_IC_PREF_REQ ] = { 71, 69, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_ALL_BUSY ] = { 112, 110, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_IC_MISS ] = { -1, -1, -1, 147, -1, -1 }, [ POWER5_PME_PM_GCT_NOSLOT_IC_MISS ] = { -1, 59, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L3 ] = { 163, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL ] = { -1, -1, 46, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD ] = { -1, -1, 180, 186, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { -1, 10, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SA_MOD_INV ] = { -1, -1, 89, 93, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_LRQ ] = { -1, 138, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_2_CYC ] = { 204, 198, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH_SRQ ] = { 141, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { -1, -1, 149, 161, -1, -1 }, [ POWER5_PME_PM_L3SA_REF ] = { 108, 106, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL ] = { -1, -1, 84, 88, -1, -1 }, [ POWER5_PME_PM_FPU0_STALL3 ] = { 43, 42, -1, -1, -1, -1 }, [ POWER5_PME_PM_GPR_MAP_FULL_CYC ] = { -1, -1, 48, 53, -1, -1 }, [ POWER5_PME_PM_TB_BIT_TRANS ] = { 201, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_FLUSH_LRQ ] = { -1, -1, 147, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_STF ] = { 44, 43, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_MISS ] = { -1, -1, 135, 145, -1, -1 }, [ POWER5_PME_PM_FPU1_FMA ] = { 48, 47, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_MOD_TAG ] = { 82, 80, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_FLUSH_ULD ] = { 132, 130, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_UST ] = { -1, -1, 141, 152, -1, -1 }, [ POWER5_PME_PM_MRK_INST_FIN ] = { -1, -1, 137, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FULL_CYC ] = { 41, 40, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_LRQ_S0_ALLOC ] = { 143, 142, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_ULD ] = { -1, -1, 145, 156, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_REF ] = { 213, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_BR_UNCOND ] = { 9, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_OVER_L2MISS ] = { -1, -1, 181, 187, -1, -1 }, [ POWER5_PME_PM_L2SB_SHR_INV ] = { -1, -1, 77, 81, -1, -1 }, [ POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL ] = { -1, -1, 122, 127, -1, -1 }, [ POWER5_PME_PM_L3SC_MOD_TAG ] = { 113, 111, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_ST_MISS_L1 ] = { 180, 179, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_DISP_SUCCESS ] = { -1, -1, 51, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC ] = { -1, -1, 172, 178, -1, -1 }, [ POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { -1, -1, 52, 56, -1, -1 }, [ POWER5_PME_PM_MEM_WQ_DISP_Q8to15 ] = { -1, -1, 127, 132, -1, -1 }, [ POWER5_PME_PM_FPU0_SINGLE ] = { 42, 41, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_DERAT_MISS ] = { -1, 137, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_1_CYC ] = { 203, 197, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER ] = { -1, -1, 83, 87, -1, -1 }, [ POWER5_PME_PM_FPU1_FEST ] = { -1, -1, 34, 39, -1, -1 }, [ POWER5_PME_PM_FAB_HOLDtoVN_EMPTY ] = { 30, 29, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_RD_RETRY_RQ ] = { 194, 188, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL ] = { 191, 185, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC ] = { -1, 160, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_ST_CMPL_INT ] = { -1, -1, 150, -1, -1, -1 }, [ POWER5_PME_PM_FLUSH_BR_MPRED ] = { -1, -1, 24, 29, -1, -1 }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR ] = { -1, -1, 72, 76, -1, -1 }, [ POWER5_PME_PM_FPU_STF ] = { -1, 56, -1, -1, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_FPU ] = { -1, -1, -1, 9, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC ] = { -1, -1, 175, 181, -1, -1 }, [ POWER5_PME_PM_GCT_NOSLOT_CYC ] = { 60, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { -1, -1, 42, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L35_SHR ] = { 187, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_FLUSH_UST ] = { -1, -1, 148, -1, -1, -1 }, [ POWER5_PME_PM_L3SA_HIT ] = { -1, -1, 88, 92, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR ] = { 161, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR ] = { -1, -1, 74, 78, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR ] = { 164, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_IERAT_XLATE_WR ] = { 72, 70, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_ST_REQ ] = { 89, 87, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_T1 ] = { -1, -1, 183, 189, -1, -1 }, [ POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { -1, -1, 53, 57, -1, -1 }, [ POWER5_PME_PM_INST_FROM_LMEM ] = { -1, 77, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_1FLOP ] = { 36, 35, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC ] = { -1, 164, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L2 ] = { 183, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MEM_PW_CMPL ] = { 154, 152, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC ] = { -1, -1, 177, 183, -1, -1 }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER ] = { -1, -1, 73, 77, -1, -1 }, [ POWER5_PME_PM_FPU0_FIN ] = { -1, -1, 30, 35, -1, -1 }, [ POWER5_PME_PM_MRK_DTLB_MISS_4K ] = { 168, 169, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SC_SHR_INV ] = { -1, -1, 98, 102, -1, -1 }, [ POWER5_PME_PM_GRP_BR_REDIR ] = { 63, 62, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL ] = { 100, 98, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_FLUSH_SRQ ] = { -1, -1, -1, 159, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L275_SHR ] = { -1, -1, 154, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL ] = { 92, 90, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_RD_RETRY_WQ ] = { -1, -1, 160, 168, -1, -1 }, [ POWER5_PME_PM_LSU0_NCLD ] = { -1, -1, 106, 110, -1, -1 }, [ POWER5_PME_PM_FAB_DCLAIM_RETRIED ] = { -1, -1, 18, 23, -1, -1 }, [ POWER5_PME_PM_LSU1_BUSY_REJECT ] = { 128, 126, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXLS0_FULL_CYC ] = { -1, -1, 40, 45, -1, -1 }, [ POWER5_PME_PM_FPU0_FEST ] = { -1, -1, 29, 34, -1, -1 }, [ POWER5_PME_PM_DTLB_REF_16M ] = { 25, 24, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR ] = { -1, -1, 80, 84, -1, -1 }, [ POWER5_PME_PM_LSU0_REJECT_ERAT_MISS ] = { 123, 121, -1, -1, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L25_MOD ] = { -1, 16, 7, -1, -1, -1 }, [ POWER5_PME_PM_GCT_USAGE_60to79_CYC ] = { -1, 61, -1, -1, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L375_MOD ] = { 18, -1, -1, 14, -1, -1 }, [ POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { -1, 141, 115, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { 125, 123, -1, -1, -1, -1 }, [ POWER5_PME_PM_0INST_FETCH ] = { -1, -1, -1, 0, -1, -1 }, [ POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { 136, 134, -1, -1, -1, -1 }, [ POWER5_PME_PM_L1_PREF ] = { -1, -1, 61, 65, -1, -1 }, [ POWER5_PME_PM_MEM_WQ_DISP_Q0to7 ] = { 158, 156, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { -1, -1, -1, 141, -1, -1 }, [ POWER5_PME_PM_BRQ_FULL_CYC ] = { 8, 7, -1, -1, -1, -1 }, [ POWER5_PME_PM_GRP_IC_MISS_NONSPEC ] = { 69, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L275_MOD ] = { 185, -1, -1, 163, -1, -1 }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { 176, 175, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC ] = { -1, 165, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_FLUSH ] = { -1, -1, 109, 113, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L3 ] = { 16, -1, 192, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L2 ] = { 76, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_PMC2_OVERFLOW ] = { -1, -1, 151, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_DENORM ] = { 37, 36, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_FMOV_FEST ] = { -1, -1, 36, 41, -1, -1 }, [ POWER5_PME_PM_INST_FETCH_CYC ] = { 75, 73, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_LDF ] = { -1, -1, -1, 115, -1, -1 }, [ POWER5_PME_PM_INST_DISP ] = { -1, -1, 56, 60, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L25_SHR ] = { 14, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L1_DCACHE_RELOAD_VALID ] = { -1, -1, 60, 64, -1, -1 }, [ POWER5_PME_PM_MEM_WQ_DISP_DCLAIM ] = { -1, -1, 128, 133, -1, -1 }, [ POWER5_PME_PM_FPU_FULL_CYC ] = { 57, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_ISSUED ] = { 172, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_3_CYC ] = { 205, 199, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_FMA ] = { -1, 54, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L35_MOD ] = { -1, 76, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_CRU_FIN ] = { -1, -1, -1, 134, -1, -1 }, [ POWER5_PME_PM_SNOOP_WR_RETRY_WQ ] = { -1, -1, 162, 170, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_REJECT ] = { -1, -1, -1, 10, -1, -1 }, [ POWER5_PME_PM_LSU1_REJECT_ERAT_MISS ] = { 134, 132, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_FXU_FIN ] = { -1, 58, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER ] = { -1, -1, 75, 79, -1, -1 }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY ] = { 103, 101, -1, -1, -1, -1 }, [ POWER5_PME_PM_PMC4_OVERFLOW ] = { 181, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SA_SNOOP_RETRY ] = { -1, -1, 91, 95, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L35_MOD ] = { -1, 182, 155, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L25_MOD ] = { -1, 75, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SMT_HANG ] = { -1, -1, 184, 190, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_ERAT_MISS ] = { -1, -1, -1, 8, -1, -1 }, [ POWER5_PME_PM_L3SA_MOD_TAG ] = { 107, 105, -1, -1, -1, -1 }, [ POWER5_PME_PM_FLUSH_SYNC ] = { -1, -1, 28, 33, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L2MISS ] = { 212, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_ST_HIT ] = { -1, -1, 86, 90, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP_Q8to11 ] = { 150, 148, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_DISP ] = { 171, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_MOD_TAG ] = { 90, 88, -1, -1, -1, -1 }, [ POWER5_PME_PM_CLB_EMPTY_CYC ] = { -1, -1, 169, 175, -1, -1 }, [ POWER5_PME_PM_L2SB_ST_HIT ] = { -1, -1, 78, 82, -1, -1 }, [ POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL ] = { -1, -1, 125, 130, -1, -1 }, [ POWER5_PME_PM_BR_PRED_CR_TA ] = { -1, -1, -1, 5, -1, -1 }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { -1, -1, 140, 151, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_FLUSH_ULD ] = { -1, -1, -1, 160, -1, -1 }, [ POWER5_PME_PM_INST_DISP_ATTEMPT ] = { 74, 72, -1, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_RMEM ] = { -1, -1, -1, 63, -1, -1 }, [ POWER5_PME_PM_ST_REF_L1_LSU0 ] = { -1, -1, 166, 172, -1, -1 }, [ POWER5_PME_PM_LSU0_DERAT_MISS ] = { 118, 116, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RCLD_DISP ] = { 91, 89, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_STALL3 ] = { -1, 55, -1, -1, -1, -1 }, [ POWER5_PME_PM_BR_PRED_CR ] = { -1, -1, 3, 4, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L2 ] = { 160, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_FLUSH_SRQ ] = { 120, 118, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_PNtoNN_DIRECT ] = { 33, 32, -1, -1, -1, -1 }, [ POWER5_PME_PM_IOPS_CMPL ] = { 73, 71, 55, 59, -1, -1 }, [ POWER5_PME_PM_L2SC_SHR_INV ] = { -1, -1, 85, 89, -1, -1 }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER ] = { -1, -1, 67, 71, -1, -1 }, [ POWER5_PME_PM_L2SA_RCST_DISP ] = { 85, 83, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION ] = { -1, -1, -1, 194, -1, -1 }, [ POWER5_PME_PM_FAB_PNtoVN_SIDECAR ] = { -1, -1, 22, 27, -1, -1 }, [ POWER5_PME_PM_LSU_LMQ_S0_ALLOC ] = { -1, -1, 113, 118, -1, -1 }, [ POWER5_PME_PM_LSU0_REJECT_LMQ_FULL ] = { 124, 122, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_PW_RETRY_RQ ] = { 192, 186, -1, 196, -1, -1 }, [ POWER5_PME_PM_DTLB_REF ] = { -1, 63, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L3 ] = { 186, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY ] = { -1, -1, 19, 24, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_EMPTY_CYC ] = { -1, -1, -1, 122, -1, -1 }, [ POWER5_PME_PM_FPU1_STF ] = { 53, 52, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_LMQ_S0_VALID ] = { -1, -1, 114, 119, -1, -1 }, [ POWER5_PME_PM_GCT_USAGE_00to59_CYC ] = { 62, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L2MISS ] = { -1, -1, 187, -1, -1, -1 }, [ POWER5_PME_PM_GRP_DISP_BLK_SB_CYC ] = { -1, -1, 50, 54, -1, -1 }, [ POWER5_PME_PM_FPU_FMOV_FEST ] = { -1, -1, 38, -1, -1, -1 }, [ POWER5_PME_PM_XER_MAP_FULL_CYC ] = { 211, 204, -1, -1, -1, -1 }, [ POWER5_PME_PM_FLUSH_SB ] = { -1, -1, 27, 32, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR ] = { -1, -1, 132, -1, -1, -1 }, [ POWER5_PME_PM_MRK_GRP_CMPL ] = { -1, -1, -1, 146, -1, -1 }, [ POWER5_PME_PM_SUSPENDED ] = { 200, 194, 168, 174, -1, -1 }, [ POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC ] = { 68, 205, -1, -1, -1, -1 }, [ POWER5_PME_PM_SNOOP_RD_RETRY_QFULL ] = { 193, 187, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SB_MOD_INV ] = { -1, -1, 93, 97, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L35_SHR ] = { 17, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LD_MISS_L1_LSU1 ] = { -1, -1, 102, 105, -1, -1 }, [ POWER5_PME_PM_STCX_FAIL ] = { 198, 192, -1, -1, -1, -1 }, [ POWER5_PME_PM_DC_PREF_DST ] = { -1, -1, 13, 17, -1, -1 }, [ POWER5_PME_PM_GRP_DISP ] = { -1, 64, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR ] = { -1, -1, 64, 68, -1, -1 }, [ POWER5_PME_PM_FPU0_FPSCR ] = { -1, -1, 32, 37, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L2 ] = { 13, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU1_DENORM ] = { 46, 45, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU_1FLOP ] = { 56, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER ] = { -1, -1, 81, 85, -1, -1 }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL ] = { 102, 100, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FSQRT ] = { 40, 39, -1, -1, -1, -1 }, [ POWER5_PME_PM_LD_REF_L1 ] = { -1, -1, -1, 106, -1, -1 }, [ POWER5_PME_PM_INST_FROM_L1 ] = { -1, 74, -1, -1, -1, -1 }, [ POWER5_PME_PM_TLBIE_HELD ] = { -1, -1, 186, 191, -1, -1 }, [ POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { -1, -1, 117, 121, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC ] = { -1, -1, -1, 135, -1, -1 }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { -1, -1, 144, 155, -1, -1 }, [ POWER5_PME_PM_MEM_RQ_DISP_Q0to3 ] = { 155, 153, -1, -1, -1, -1 }, [ POWER5_PME_PM_ST_REF_L1_LSU1 ] = { -1, -1, 167, 173, -1, -1 }, [ POWER5_PME_PM_MRK_LD_MISS_L1 ] = { 175, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_L1_WRITE_CYC ] = { -1, -1, 62, 66, -1, -1 }, [ POWER5_PME_PM_L2SC_ST_REQ ] = { 105, 103, -1, -1, -1, -1 }, [ POWER5_PME_PM_CMPLU_STALL_FDIV ] = { -1, 11, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY ] = { -1, -1, 178, 184, -1, -1 }, [ POWER5_PME_PM_BR_MPRED_CR ] = { -1, -1, 1, 2, -1, -1 }, [ POWER5_PME_PM_L3SB_MOD_TAG ] = { 110, 108, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_DATA_FROM_L2MISS ] = { -1, -1, 188, -1, -1, -1 }, [ POWER5_PME_PM_LSU_REJECT_SRQ ] = { 146, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_LD_MISS_L1 ] = { -1, -1, 100, -1, -1, -1 }, [ POWER5_PME_PM_INST_FROM_PREF ] = { -1, -1, 59, -1, -1, -1 }, [ POWER5_PME_PM_DC_INV_L2 ] = { -1, -1, 12, 16, -1, -1 }, [ POWER5_PME_PM_STCX_PASS ] = { 199, 193, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_FULL_CYC ] = { -1, -1, 118, 123, -1, -1 }, [ POWER5_PME_PM_FPU_FIN ] = { -1, -1, -1, 44, -1, -1 }, [ POWER5_PME_PM_L2SA_SHR_MOD ] = { 88, 86, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU_SRQ_STFWD ] = { 149, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_0INST_CLB_CYC ] = { 0, 0, -1, -1, -1, -1 }, [ POWER5_PME_PM_FXU0_FIN ] = { -1, -1, 43, 48, -1, -1 }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL ] = { 94, 92, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC ] = { -1, 196, -1, -1, -1, -1 }, [ POWER5_PME_PM_PMC5_OVERFLOW ] = { 182, -1, -1, -1, -1, -1 }, [ POWER5_PME_PM_FPU0_FDIV ] = { 38, 37, -1, -1, -1, -1 }, [ POWER5_PME_PM_PTEG_FROM_L375_SHR ] = { -1, -1, 156, -1, -1, -1 }, [ POWER5_PME_PM_LD_REF_L1_LSU1 ] = { -1, -1, 104, 108, -1, -1 }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY ] = { 87, 85, -1, -1, -1, -1 }, [ POWER5_PME_PM_HV_CYC ] = { -1, 68, -1, -1, -1, -1 }, [ POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC ] = { -1, -1, 171, 177, -1, -1 }, [ POWER5_PME_PM_LR_CTR_MAP_FULL_CYC ] = { 116, 114, -1, -1, -1, -1 }, [ POWER5_PME_PM_L3SB_SHR_INV ] = { -1, -1, 94, 98, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_RMEM ] = { 19, -1, -1, 15, -1, -1 }, [ POWER5_PME_PM_DATA_FROM_L275_MOD ] = { 15, -1, -1, 13, -1, -1 }, [ POWER5_PME_PM_LSU0_REJECT_SRQ ] = { 126, 124, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU1_DERAT_MISS ] = { 129, 127, -1, -1, -1, -1 }, [ POWER5_PME_PM_MRK_LSU_FIN ] = { -1, -1, -1, 158, -1, -1 }, [ POWER5_PME_PM_DTLB_MISS_16M ] = { 23, 22, -1, -1, -1, -1 }, [ POWER5_PME_PM_LSU0_FLUSH_UST ] = { 122, 120, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SC_MOD_TAG ] = { 98, 96, -1, -1, -1, -1 }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY ] = { 95, 93, -1, -1, -1, -1 } }; static const unsigned long long power5_group_vecs[][POWER5_NUM_GROUP_VEC] = { [ POWER5_PME_PM_LSU_REJECT_RELOAD_CDF ] = { 0x0000000000040000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_SINGLE ] = { 0x0000000000000000ULL, 0x0000000000400000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_REF ] = { 0x0000000000000000ULL, 0x0000000000001000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC ] = { 0x0000000000000000ULL, 0x0000000040000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L275_SHR ] = { 0x0040000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD ] = { 0x0000000000000000ULL, 0x0400000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_MISS_4K ] = { 0x0000400000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CLB_FULL_CYC ] = { 0x0000000000000800ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_ST_CMPL ] = { 0x0000000000000000ULL, 0x4000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_LRQ_FULL ] = { 0x0000000008000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR ] = { 0x0000000000000000ULL, 0x0080000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_1INST_CLB_CYC ] = { 0x0000000000001000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_SPEC_RD_CANCEL ] = { 0x0000000000000000ULL, 0x0000200000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DTLB_MISS_16M ] = { 0x0000000000000000ULL, 0x0800000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FDIV ] = { 0x0000000000000000ULL, 0x0000000000004000ULL, 0x0000000000000800ULL }, [ POWER5_PME_PM_FPU_SINGLE ] = { 0x0000000000000000ULL, 0x0000000000020000ULL, 0x0000000000000400ULL }, [ POWER5_PME_PM_FPU0_FMA ] = { 0x0000000000000000ULL, 0x0000000000800000ULL, 0x0000000000000080ULL }, [ POWER5_PME_PM_SLB_MISS ] = { 0x0000010000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_FLUSH_LRQ ] = { 0x0000000000400000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_ST_HIT ] = { 0x4000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_MISS ] = { 0x0000080000000000ULL, 0x0000000000000000ULL, 0x0000000000000004ULL }, [ POWER5_PME_PM_BR_PRED_TA ] = { 0x0000020000000000ULL, 0x0000000000000000ULL, 0x0000000000000020ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC ] = { 0x0000000000000000ULL, 0x0400000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_FXU ] = { 0x0000000040000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_EXT_INT ] = { 0x0000000000000000ULL, 0x0000400000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_LDF ] = { 0x0000000000000000ULL, 0x0000000000400000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_ST_GPS ] = { 0x0000000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_CMD_ISSUED ] = { 0x0000000000000000ULL, 0x0000002000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_SRQ_STFWD ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CR_MAP_FULL_CYC ] = { 0x0000000400000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL ] = { 0x2000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_ULD ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_SRQ_FULL ] = { 0x0000000008000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FLUSH_IMBAL ] = { 0x0000000000084000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_RQ_DISP_Q16to19 ] = { 0x0000000000000000ULL, 0x0000100000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC ] = { 0x0000000000000000ULL, 0x0000000080000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L35_MOD ] = { 0x0008000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL ] = { 0x0000000000000000ULL, 0x0000080000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FDIV ] = { 0x0000000000000000ULL, 0x0000000000100000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FRSP_FCONV ] = { 0x0000000000000000ULL, 0x0000000000100000ULL, 0x0000000000000080ULL }, [ POWER5_PME_PM_MEM_RQ_DISP ] = { 0x0000000000000000ULL, 0x0000200000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LWSYNC_HELD ] = { 0x0000000000010000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU_FIN ] = { 0x0000000000000000ULL, 0x0000000008000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DSLB_MISS ] = { 0x0000200000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXLS1_FULL_CYC ] = { 0x0000000200000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L275_SHR ] = { 0x0004000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SEL_T0 ] = { 0x0000000000000000ULL, 0x0000000400000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_RELOAD_VALID ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LMQ_LHR_MERGE ] = { 0x0000000000000200ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_STCX_FAIL ] = { 0x0000000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_2INST_CLB_CYC ] = { 0x0000000000000008ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_PNtoVN_DIRECT ] = { 0x0000000000000000ULL, 0x0000008000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L2MISS ] = { 0x0400000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_LSU ] = { 0x0000000010000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DSLB_MISS ] = { 0x0000000000000000ULL, 0x1800000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_ULD ] = { 0x0000000001000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_LMEM ] = { 0x0400000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_BRU_FIN ] = { 0x0000000000000000ULL, 0x0008000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_WQ_DISP_WRITE ] = { 0x0000000000000000ULL, 0x0000800000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC ] = { 0x0000000000000000ULL, 0x0200000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_NCLD ] = { 0x0000001000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER ] = { 0x1000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ ] = { 0x0000000000000000ULL, 0x0000100000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPR_MAP_FULL_CYC ] = { 0x0000000800000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FULL_CYC ] = { 0x0000000200000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_ALL_BUSY ] = { 0x0000000000000000ULL, 0x0000000000000800ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_3INST_CLB_CYC ] = { 0x0000000000000000ULL, 0x0000000010000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 ] = { 0x0000000000000000ULL, 0x0001000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000100ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRESH_TIMEO ] = { 0x0000000000000000ULL, 0x0002000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL ] = { 0x4000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL ] = { 0x0000000000000000ULL, 0x0000000800000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FSQRT ] = { 0x0000000000000000ULL, 0x0000000000008000ULL, 0x0000000000000800ULL }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC1_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_SNOOP_RETRY ] = { 0x0000000000000000ULL, 0x0000000000002000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_TABLEWALK_CYC ] = { 0x0000080000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_6_CYC ] = { 0x0000000000000000ULL, 0x0000000040000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FEST ] = { 0x0000000000000000ULL, 0x0000000000004000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY ] = { 0x0000000000000000ULL, 0x0000010000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM ] = { 0x0000000000000000ULL, 0x0080000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC ] = { 0x0000000000000008ULL, 0x0040000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_PWQ_DISP ] = { 0x0000000000000000ULL, 0x0001000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY ] = { 0x0000000000000000ULL, 0x0000004000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_MISS_L1_LSU0 ] = { 0x0000200000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL ] = { 0x0000000000000000ULL, 0x0000020000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_STALL3 ] = { 0x0000000000000000ULL, 0x0000000000200000ULL, 0x0000000000000040ULL }, [ POWER5_PME_PM_GCT_USAGE_80to99_CYC ] = { 0x0000000000000040ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_WORK_HELD ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_CMPL ] = { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x000000000001ffffULL }, [ POWER5_PME_PM_LSU1_FLUSH_UST ] = { 0x0000000004000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU_IDLE ] = { 0x0000000000000000ULL, 0x0000000004000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_FLUSH_ULD ] = { 0x0000000002000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_REJECT_LMQ_FULL ] = { 0x0000000000020000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_DISP_REJECT ] = { 0x0000000000000004ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000100ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L25_SHR ] = { 0x0100000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_CMD_RETRIED ] = { 0x0000000000000000ULL, 0x0000002000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000020ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL ] = { 0x0000000000000000ULL, 0x0000000000000002ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR ] = { 0x2000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL ] = { 0x1000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L375_MOD ] = { 0x0200000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_UST ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_ISSUED ] = { 0x0000000001020000ULL, 0x0000000000000000ULL, 0x0000000000000020ULL }, [ POWER5_PME_PM_MRK_GRP_BR_REDIR ] = { 0x0000000000000000ULL, 0x0000000008000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_EE_OFF ] = { 0x0000000000000000ULL, 0x0000010000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_RQ_DISP_Q4to7 ] = { 0x0000000000000000ULL, 0x0000400000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_FAST_PATH_RD_DISP ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L3 ] = { 0x0010000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ITLB_MISS ] = { 0x0000000000100000ULL, 0x0000000000000000ULL, 0x0000000000000004ULL }, [ POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { 0x0000000000000000ULL, 0x0000000004000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXLS_FULL_CYC ] = { 0x0000000000000000ULL, 0x0000000008000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_REF_4K ] = { 0x0000400000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_DISP_VALID ] = { 0x0000000000000004ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_UST ] = { 0x0000000001080000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU1_FIN ] = { 0x0000000000000000ULL, 0x0000000010000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_4_CYC ] = { 0x0000000000000000ULL, 0x0000000020000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD ] = { 0x0000000000000000ULL, 0x0040000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_4INST_CLB_CYC ] = { 0x0000000000000000ULL, 0x0000000010000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DTLB_REF_16M ] = { 0x0000000000000000ULL, 0x1000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L375_MOD ] = { 0x0080000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR ] = { 0x0000000000000000ULL, 0x0000000000000008ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_CMPL ] = { 0x0000000000000002ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_1FLOP ] = { 0x0000000000000000ULL, 0x0000000001000000ULL, 0x0000000000000100ULL }, [ POWER5_PME_PM_FPU_FRSP_FCONV ] = { 0x0000000000000000ULL, 0x0000000000008000ULL, 0x0000000000000800ULL }, [ POWER5_PME_PM_5INST_CLB_CYC ] = { 0x0000000000000010ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_REF ] = { 0x0000000000000000ULL, 0x0000000000002000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_L2MISS_BOTH_CYC ] = { 0x0000000000000000ULL, 0x0000000200000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_PW_GATH ] = { 0x0000000000000000ULL, 0x0001000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_PNtoNN_SIDECAR ] = { 0x0000000000000000ULL, 0x0000008000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_DCLAIM_ISSUED ] = { 0x0000000000000000ULL, 0x0000002000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_IC_MISS ] = { 0x0000008000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L35_SHR ] = { 0x0080000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LMQ_FULL_CYC ] = { 0x0000000100000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L2_CYC ] = { 0x0000000000000000ULL, 0x0010000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_SYNC_CYC ] = { 0x0000000000000100ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_BUSY_REJECT ] = { 0x0000002000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_REJECT_ERAT_MISS ] = { 0x0000000000004000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { 0x0000000000000000ULL, 0x0080000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L375_SHR ] = { 0x0008000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FMOV_FEST ] = { 0x0000000000000000ULL, 0x0000000000080000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L25_MOD ] = { 0x0100000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_REF_L1_LSU0 ] = { 0x0000400000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_7_CYC ] = { 0x0000000000000000ULL, 0x0000000020000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_FLUSH_SRQ ] = { 0x0000000000800000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCST_DISP ] = { 0x0000000000000000ULL, 0x0000000000000008ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_DIV ] = { 0x0000000040000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_RQ_DISP_Q12to15 ] = { 0x0000000000000000ULL, 0x0000100000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L375_SHR ] = { 0x0080000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ST_REF_L1 ] = { 0x0000100000000000ULL, 0x0000000000000000ULL, 0x0000000000008207ULL }, [ POWER5_PME_PM_L3SB_ALL_BUSY ] = { 0x0000000000000000ULL, 0x0000000000000800ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY ] = { 0x0000000000000000ULL, 0x0000004000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC ] = { 0x0000000000000000ULL, 0x0280000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_HOLDtoNN_EMPTY ] = { 0x0000000000000000ULL, 0x0000010000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_LMEM ] = { 0x0003000000000000ULL, 0x0000000000000000ULL, 0x000000000000000aULL }, [ POWER5_PME_PM_RUN_CYC ] = { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x000000000001ffffULL }, [ POWER5_PME_PM_PTEG_FROM_RMEM ] = { 0x0400000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCLD_DISP ] = { 0x0000000000000000ULL, 0x0000000000000004ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_LDF ] = { 0x0000000000000000ULL, 0x0000000002400000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LRQ_S0_VALID ] = { 0x0000000000000080ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC3_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_IMR_RELOAD ] = { 0x0000000000000000ULL, 0x0002000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_GRP_TIMEO ] = { 0x0000000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ST_MISS_L1 ] = { 0x0000100000000000ULL, 0x0000000000000000ULL, 0x0000000000004008ULL }, [ POWER5_PME_PM_STOP_COMPLETION ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_BUSY_REJECT ] = { 0x0000000000001000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ISLB_MISS ] = { 0x0000200000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CYC ] = { 0x0000000020000003ULL, 0x0000001000000000ULL, 0x000000000001f010ULL }, [ POWER5_PME_PM_THRD_ONE_RUN_CYC ] = { 0x0000000000000000ULL, 0x0000000200000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_BR_REDIR_NONSPEC ] = { 0x0000040000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_SRQ_STFWD ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000080ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2_PREF ] = { 0x0000000000003000ULL, 0x0000000000000000ULL, 0x0000000000000010ULL }, [ POWER5_PME_PM_GCT_NOSLOT_BR_MPRED ] = { 0x0000000000000020ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD ] = { 0x0000000000000000ULL, 0x0010000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000200ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_ST_REQ ] = { 0x0000000000000000ULL, 0x0000000000000002ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_L1_RELOAD_VALID ] = { 0x0000000000000000ULL, 0x0008000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_HIT ] = { 0x0000000000000000ULL, 0x0000000000001000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_SHR_MOD ] = { 0x0000000000000000ULL, 0x0000000000000200ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_EE_OFF_EXT_INT ] = { 0x0000000000000000ULL, 0x0000200000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_1PLUS_PPC_CMPL ] = { 0x0000000000000002ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_SHR_MOD ] = { 0x0000000000000000ULL, 0x0000000000000400ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC6_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LRQ_FULL_CYC ] = { 0x0000000100000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IC_PREF_INSTALL ] = { 0x0000004000000800ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_TLB_MISS ] = { 0x0000010000000000ULL, 0x0000000000000000ULL, 0x0000000000008000ULL }, [ POWER5_PME_PM_GCT_FULL_CYC ] = { 0x0000000000000040ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU_BUSY ] = { 0x0000000000000000ULL, 0x0000000004000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L3_CYC ] = { 0x0000000000000000ULL, 0x0040000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_REJECT_LMQ_FULL ] = { 0x0000000000004000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_S0_ALLOC ] = { 0x0000000000000100ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_MRK ] = { 0x0000000010000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L25_SHR ] = { 0x0040000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FIN ] = { 0x0000000000000000ULL, 0x0000000000010000ULL, 0x0000000000000500ULL }, [ POWER5_PME_PM_DC_PREF_STREAM_ALLOC ] = { 0x0000000000000400ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_MPRED_TA ] = { 0x0000010000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CRQ_FULL_CYC ] = { 0x0000000400000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCLD_DISP ] = { 0x1000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_WR_RETRY_QFULL ] = { 0x0000000000000000ULL, 0x0000020000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DTLB_REF_4K ] = { 0x0000000000000000ULL, 0x1000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_S0_VALID ] = { 0x0000000000000100ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_FLUSH_LRQ ] = { 0x0000000000400000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L275_MOD ] = { 0x0040000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_EMPTY_CYC ] = { 0x0000000000000002ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LARX_LSU0 ] = { 0x0000000100000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC ] = { 0x0000000000000000ULL, 0x0000000040000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_RETRY_1AHEAD ] = { 0x0000000000000000ULL, 0x0000040000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FSQRT ] = { 0x0000000000000000ULL, 0x0000000000040000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_FPU_FIN ] = { 0x0000000000000000ULL, 0x8000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_5_CYC ] = { 0x0000000000000000ULL, 0x0000000080000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM ] = { 0x0000000000000000ULL, 0x0100000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FRSP_FCONV ] = { 0x0000000000000000ULL, 0x0000000000900000ULL, 0x0000000000000080ULL }, [ POWER5_PME_PM_SNOOP_TLBIE ] = { 0x0000000000000000ULL, 0x0000000400000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_SNOOP_RETRY ] = { 0x0000000000000000ULL, 0x0000000000000800ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_VBYPASS_EMPTY ] = { 0x0000000000000000ULL, 0x0000004000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD ] = { 0x0000000000000000ULL, 0x0200000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_6INST_CLB_CYC ] = { 0x0000000000000010ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCST_DISP ] = { 0x0000000000000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FLUSH ] = { 0x0001000000040000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000400ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_DENORM ] = { 0x0000000000000000ULL, 0x0000000000010000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_HIT ] = { 0x0000000000000000ULL, 0x0000000000002000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_WR_RETRY_RQ ] = { 0x0000000000000000ULL, 0x0000080000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_REJECT_SRQ ] = { 0x0000000000002000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IC_PREF_REQ ] = { 0x0000004000000000ULL, 0x0000000000000000ULL, 0x0000000000000010ULL }, [ POWER5_PME_PM_L3SC_ALL_BUSY ] = { 0x0000000000000000ULL, 0x0000000000002000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_GRP_IC_MISS ] = { 0x0000000000000000ULL, 0x0008000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_NOSLOT_IC_MISS ] = { 0x0000000000000020ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L3 ] = { 0x0000000000000000ULL, 0x0040000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL ] = { 0x0000000000000020ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD ] = { 0x0000000000000000ULL, 0x0000001000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { 0x0000000020000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000020ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_LRQ ] = { 0x0000000000200000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_2_CYC ] = { 0x0000000000000000ULL, 0x0000000080000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH_SRQ ] = { 0x0000000000200000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { 0x0000000000000010ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_REF ] = { 0x0000000000000000ULL, 0x0000000000001000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL ] = { 0x0000000000000000ULL, 0x0000000000000010ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_STALL3 ] = { 0x0000000000000000ULL, 0x0000000000200000ULL, 0x0000000000000040ULL }, [ POWER5_PME_PM_GPR_MAP_FULL_CYC ] = { 0x0000000400000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_TB_BIT_TRANS ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_FLUSH_LRQ ] = { 0x0000000008000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_STF ] = { 0x0000000000000000ULL, 0x0000000002000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DTLB_MISS ] = { 0x0000000000000000ULL, 0x0800000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FMA ] = { 0x0000000000000000ULL, 0x0000000000800000ULL, 0x0000000000000080ULL }, [ POWER5_PME_PM_L2SA_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000100ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_FLUSH_ULD ] = { 0x0000000002000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_UST ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_INST_FIN ] = { 0x0000000000000000ULL, 0x0004000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FULL_CYC ] = { 0x0000000200000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LRQ_S0_ALLOC ] = { 0x0000000000000080ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_ULD ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DTLB_REF ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_UNCOND ] = { 0x0000020000000000ULL, 0x0000000000000000ULL, 0x0000000000000020ULL }, [ POWER5_PME_PM_THRD_SEL_OVER_L2MISS ] = { 0x0000000000000000ULL, 0x0000001000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000200ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL ] = { 0x0000000000000000ULL, 0x0000080000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000080ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_ST_MISS_L1 ] = { 0x0000000000000000ULL, 0x4004000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_DISP_SUCCESS ] = { 0x0800000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC ] = { 0x0000000000000000ULL, 0x0000000020000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { 0x0000002000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_WQ_DISP_Q8to15 ] = { 0x0000000000000000ULL, 0x0000800000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_SINGLE ] = { 0x0000000000000000ULL, 0x0000000000400000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_DERAT_MISS ] = { 0x0000100000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_1_CYC ] = { 0x0000000000000000ULL, 0x0000000100000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER ] = { 0x0000000000000000ULL, 0x0000000000000008ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FEST ] = { 0x0000000000000000ULL, 0x0000000000040000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_HOLDtoVN_EMPTY ] = { 0x0000000000000000ULL, 0x0000004000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_RD_RETRY_RQ ] = { 0x0000000000000000ULL, 0x0000040000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL ] = { 0x0000000000000000ULL, 0x0000020000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC ] = { 0x0000000000000000ULL, 0x0020000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_ST_CMPL_INT ] = { 0x0000000000000000ULL, 0x2000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FLUSH_BR_MPRED ] = { 0x0000040000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR ] = { 0x8000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_STF ] = { 0x0000000000000000ULL, 0x0000000000020000ULL, 0x0000000000002400ULL }, [ POWER5_PME_PM_CMPLU_STALL_FPU ] = { 0x0000000080000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC ] = { 0x0000000000000000ULL, 0x0000000080000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_NOSLOT_CYC ] = { 0x0000000000000020ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { 0x0000000000000000ULL, 0x0000000004000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L35_SHR ] = { 0x0200000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_FLUSH_UST ] = { 0x0000000000000000ULL, 0x4000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_HIT ] = { 0x0000000000000000ULL, 0x0000000000001000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR ] = { 0x0000000000000000ULL, 0x0020000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR ] = { 0x0000000000000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR ] = { 0x0000000000000000ULL, 0x0100000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IERAT_XLATE_WR ] = { 0x0000004000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_ST_REQ ] = { 0x4000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SEL_T1 ] = { 0x0000000000000000ULL, 0x0000000400000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { 0x0000002000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_LMEM ] = { 0x0020000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_1FLOP ] = { 0x0000000000000000ULL, 0x0000000001000000ULL, 0x0000000000000100ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC ] = { 0x0000000000000000ULL, 0x0100000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L2 ] = { 0x0400000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_PW_CMPL ] = { 0x0000000000000000ULL, 0x0001000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC ] = { 0x0000000000000000ULL, 0x0000000100000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER ] = { 0x8000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FIN ] = { 0x0000000000000000ULL, 0x0000000001010000ULL, 0x0000000000000540ULL }, [ POWER5_PME_PM_MRK_DTLB_MISS_4K ] = { 0x0000000000000000ULL, 0x0800000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SC_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000080ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_BR_REDIR ] = { 0x0000040000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL ] = { 0x0000000000000000ULL, 0x0000000000000004ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_FLUSH_SRQ ] = { 0x0000000000004000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L275_SHR ] = { 0x0100000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL ] = { 0x8000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_RD_RETRY_WQ ] = { 0x0000000000000000ULL, 0x0000040000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_NCLD ] = { 0x0000001000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_DCLAIM_RETRIED ] = { 0x0000000000000000ULL, 0x0000002000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_BUSY_REJECT ] = { 0x0000002000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXLS0_FULL_CYC ] = { 0x0000000200000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FEST ] = { 0x0000000000000000ULL, 0x0000000000040000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_REF_16M ] = { 0x0000800000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR ] = { 0x0000000000000000ULL, 0x0000000000000004ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_REJECT_ERAT_MISS ] = { 0x0000000000010000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L25_MOD ] = { 0x0004000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_USAGE_60to79_CYC ] = { 0x0000000000000040ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L375_MOD ] = { 0x0008000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { 0x0000000000000200ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { 0x0000000000008000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_0INST_FETCH ] = { 0x0020004000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { 0x0000000000008000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L1_PREF ] = { 0x0000000000000800ULL, 0x0000000000000000ULL, 0x0000000000000010ULL }, [ POWER5_PME_PM_MEM_WQ_DISP_Q0to7 ] = { 0x0000000000000000ULL, 0x0000800000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { 0x0000000000000000ULL, 0x0100000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BRQ_FULL_CYC ] = { 0x0000000100000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_IC_MISS_NONSPEC ] = { 0x0000008000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L275_MOD ] = { 0x0100000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC ] = { 0x0000000000000000ULL, 0x0400000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_FLUSH ] = { 0x0000000006e40000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L3 ] = { 0x0003000000000000ULL, 0x0000000000000000ULL, 0x000000000000000aULL }, [ POWER5_PME_PM_INST_FROM_L2 ] = { 0x0020000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC2_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_DENORM ] = { 0x0000000000000000ULL, 0x0000000000080000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_FMOV_FEST ] = { 0x0000000000000000ULL, 0x0000000000080000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FETCH_CYC ] = { 0x0000000000000400ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LDF ] = { 0x0000000000000000ULL, 0x0000000000020000ULL, 0x0000000000002000ULL }, [ POWER5_PME_PM_INST_DISP ] = { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000002000ULL }, [ POWER5_PME_PM_DATA_FROM_L25_SHR ] = { 0x0004000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L1_DCACHE_RELOAD_VALID ] = { 0x0000008000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_WQ_DISP_DCLAIM ] = { 0x0000000000000000ULL, 0x0000800000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FULL_CYC ] = { 0x0000000080000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_GRP_ISSUED ] = { 0x0000000000000000ULL, 0x0008000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_3_CYC ] = { 0x0000000000000000ULL, 0x0000000040000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FMA ] = { 0x0000000000000000ULL, 0x0000000000004000ULL, 0x0000000000010200ULL }, [ POWER5_PME_PM_INST_FROM_L35_MOD ] = { 0x0080000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_CRU_FIN ] = { 0x0000000000000000ULL, 0x2000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_WR_RETRY_WQ ] = { 0x0000000000000000ULL, 0x0000080000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_REJECT ] = { 0x0000000010000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_REJECT_ERAT_MISS ] = { 0x0000000000010000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_FXU_FIN ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000001000ULL }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER ] = { 0x0000000000000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY ] = { 0x0000000000000000ULL, 0x0000000000000010ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC4_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_SNOOP_RETRY ] = { 0x0000000000000000ULL, 0x0000000000000800ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L35_MOD ] = { 0x0200000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L25_MOD ] = { 0x0040000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SMT_HANG ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_ERAT_MISS ] = { 0x0000000020000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SA_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000020ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FLUSH_SYNC ] = { 0x0000000000100000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_FROM_L2MISS ] = { 0x0000000000000400ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_ST_HIT ] = { 0x0000000000000000ULL, 0x0000000000000010ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_RQ_DISP_Q8to11 ] = { 0x0000000000000000ULL, 0x0000400000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_GRP_DISP ] = { 0x0000000000000000ULL, 0x0006000008000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000200ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CLB_EMPTY_CYC ] = { 0x0000000000000008ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_ST_HIT ] = { 0x0000000000000000ULL, 0x0000000000000002ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL ] = { 0x0000000000000000ULL, 0x0000200000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_PRED_CR_TA ] = { 0x0000020000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_FLUSH_ULD ] = { 0x0000000000000000ULL, 0x4000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_INST_DISP_ATTEMPT ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000004000ULL }, [ POWER5_PME_PM_INST_FROM_RMEM ] = { 0x0010000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ST_REF_L1_LSU0 ] = { 0x0000800000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_DERAT_MISS ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCLD_DISP ] = { 0x8000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_STALL3 ] = { 0x0000000000000000ULL, 0x0000000000010000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_PRED_CR ] = { 0x0000020000000000ULL, 0x0000000000000000ULL, 0x0000000000000020ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L2 ] = { 0x0000000000000000ULL, 0x0010000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_FLUSH_SRQ ] = { 0x0000000000800000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_PNtoNN_DIRECT ] = { 0x0000000000000000ULL, 0x0000008000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_IOPS_CMPL ] = { 0x00210488fffa9811ULL, 0x3220041f03a200e0ULL, 0x0000000000010000ULL }, [ POWER5_PME_PM_L2SC_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000400ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER ] = { 0x2000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCST_DISP ] = { 0x2000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_PNtoVN_SIDECAR ] = { 0x0000000000000000ULL, 0x0000008000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LMQ_S0_ALLOC ] = { 0x0000000000000080ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_REJECT_LMQ_FULL ] = { 0x0000000000020000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_PW_RETRY_RQ ] = { 0x0000000000000000ULL, 0x0000100000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_REF ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L3 ] = { 0x0800000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY ] = { 0x0000000000000000ULL, 0x0000010000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_EMPTY_CYC ] = { 0x0000000000000200ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU1_STF ] = { 0x0000000000000000ULL, 0x0000000002000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_LMQ_S0_VALID ] = { 0x0000000000000080ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GCT_USAGE_00to59_CYC ] = { 0x0000000000000040ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L2MISS ] = { 0x0002000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_DISP_BLK_SB_CYC ] = { 0x0000000000000004ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FMOV_FEST ] = { 0x0000000000000000ULL, 0x0000000000004000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_XER_MAP_FULL_CYC ] = { 0x0000000800000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FLUSH_SB ] = { 0x0000000000100000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR ] = { 0x0000000000000000ULL, 0x0400000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_GRP_CMPL ] = { 0x0000000000000000ULL, 0x0004000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SUSPENDED ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC ] = { 0x0000000040000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_SNOOP_RD_RETRY_QFULL ] = { 0x0000000000000000ULL, 0x0000020000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_MOD_INV ] = { 0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L35_SHR ] = { 0x0008000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_MISS_L1_LSU1 ] = { 0x0000200000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_STCX_FAIL ] = { 0x0000001000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DC_PREF_DST ] = { 0x0000000000002000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_GRP_DISP ] = { 0x0800000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR ] = { 0x1000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FPSCR ] = { 0x0000000000000000ULL, 0x0000000000200000ULL, 0x0000000000000040ULL }, [ POWER5_PME_PM_DATA_FROM_L2 ] = { 0x0000100000000000ULL, 0x0000000000000000ULL, 0x0000000000000001ULL }, [ POWER5_PME_PM_FPU1_DENORM ] = { 0x0000000000000000ULL, 0x0000000000080000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_1FLOP ] = { 0x0000000000000000ULL, 0x0000000000008000ULL, 0x0000000000010200ULL }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER ] = { 0x0000000000000000ULL, 0x0000000000000004ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL ] = { 0x0000000000000000ULL, 0x0000000000000008ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FSQRT ] = { 0x0000000000000000ULL, 0x0000000000040000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_REF_L1 ] = { 0x0000080000000000ULL, 0x0000000000000000ULL, 0x0000000000008207ULL }, [ POWER5_PME_PM_INST_FROM_L1 ] = { 0x0010000000000000ULL, 0x0000000000000000ULL, 0x0000000000000001ULL }, [ POWER5_PME_PM_TLBIE_HELD ] = { 0x0000000000010000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { 0x0000000000000400ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC ] = { 0x0000000000000000ULL, 0x0010000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MEM_RQ_DISP_Q0to3 ] = { 0x0000000000000000ULL, 0x0000400000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_ST_REF_L1_LSU1 ] = { 0x0000800000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LD_MISS_L1 ] = { 0x0000000000000000ULL, 0x2000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L1_WRITE_CYC ] = { 0x0000000000008000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_ST_REQ ] = { 0x0000000000000000ULL, 0x0000000000000010ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_CMPLU_STALL_FDIV ] = { 0x0000000080000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY ] = { 0x0000000000000000ULL, 0x0000000800000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_BR_MPRED_CR ] = { 0x0000010000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_DATA_FROM_L2MISS ] = { 0x0000000800000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_REJECT_SRQ ] = { 0x0000000000040000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_MISS_L1 ] = { 0x0000080000000000ULL, 0x0000000000000000ULL, 0x0000000000004008ULL }, [ POWER5_PME_PM_INST_FROM_PREF ] = { 0x0010000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DC_INV_L2 ] = { 0x0800000000080000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_STCX_PASS ] = { 0x0000001000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_FULL_CYC ] = { 0x0000000000000100ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU_FIN ] = { 0x0000000000000000ULL, 0x0020000000008000ULL, 0x0000000000001800ULL }, [ POWER5_PME_PM_L2SA_SHR_MOD ] = { 0x0000000000000000ULL, 0x0000000000000100ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU_SRQ_STFWD ] = { 0x0000000000000200ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_0INST_CLB_CYC ] = { 0x0000000000000008ULL, 0x0000000800000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FXU0_FIN ] = { 0x0000000000000000ULL, 0x0000000010000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL ] = { 0x0000000000000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC ] = { 0x0000000000000000ULL, 0x0000000200000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PMC5_OVERFLOW ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_FPU0_FDIV ] = { 0x0000000000000000ULL, 0x0000000000100000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_PTEG_FROM_L375_SHR ] = { 0x0200000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LD_REF_L1_LSU1 ] = { 0x0000400000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY ] = { 0x4000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_HV_CYC ] = { 0x0000000000000000ULL, 0x0000000100000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC ] = { 0x0000000000000000ULL, 0x0000000020000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LR_CTR_MAP_FULL_CYC ] = { 0x0000000400000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L3SB_SHR_INV ] = { 0x0000000000000000ULL, 0x0000000000000040ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_RMEM ] = { 0x0002000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DATA_FROM_L275_MOD ] = { 0x0004000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_REJECT_SRQ ] = { 0x0000000000002000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU1_DERAT_MISS ] = { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_MRK_LSU_FIN ] = { 0x0000000000000000ULL, 0x0002000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_DTLB_MISS_16M ] = { 0x0000800000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_LSU0_FLUSH_UST ] = { 0x0000000004000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SC_MOD_TAG ] = { 0x0000000000000000ULL, 0x0000000000000400ULL, 0x0000000000000000ULL }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY ] = { 0x0000000000000000ULL, 0x0000000000000002ULL, 0x0000000000000000ULL } }; static const pme_power_entry_t power5_pe[] = { [ POWER5_PME_PM_LSU_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU_REJECT_RELOAD_CDF", .pme_code = 0x2c6090, .pme_short_desc = "LSU reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_REJECT_RELOAD_CDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_REJECT_RELOAD_CDF] }, [ POWER5_PME_PM_FPU1_SINGLE ] = { .pme_name = "PM_FPU1_SINGLE", .pme_code = 0x20e7, .pme_short_desc = "FPU1 executed single precision instruction", .pme_long_desc = "FPU1 has executed a single precision instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_SINGLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_SINGLE] }, [ POWER5_PME_PM_L3SB_REF ] = { .pme_name = "PM_L3SB_REF", .pme_code = 0x701c4, .pme_short_desc = "L3 slice B references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_REF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_REF] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_3or4_CYC", .pme_code = 0x430e5, .pme_short_desc = "Cycles thread priority difference is 3 or 4", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 3 or 4.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_3or4_CYC] }, [ POWER5_PME_PM_INST_FROM_L275_SHR ] = { .pme_name = "PM_INST_FROM_L275_SHR", .pme_code = 0x322096, .pme_short_desc = "Instruction fetched from L2.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L275_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L275_SHR] }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD", .pme_code = 0x1c70a7, .pme_short_desc = "Marked data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L375_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L375_MOD] }, [ POWER5_PME_PM_DTLB_MISS_4K ] = { .pme_name = "PM_DTLB_MISS_4K", .pme_code = 0xc40c0, .pme_short_desc = "Data TLB miss for 4K page", .pme_long_desc = "Data TLB references to 4KB pages that missed the TLB. Page size is determined at TLB reload time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_MISS_4K], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_MISS_4K] }, [ POWER5_PME_PM_CLB_FULL_CYC ] = { .pme_name = "PM_CLB_FULL_CYC", .pme_code = 0x220e5, .pme_short_desc = "Cycles CLB full", .pme_long_desc = "Cycles when both thread's CLB is full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CLB_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CLB_FULL_CYC] }, [ POWER5_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x100003, .pme_short_desc = "Marked store instruction completed", .pme_long_desc = "A sampled store has completed (data home)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_ST_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_ST_CMPL] }, [ POWER5_PME_PM_LSU_FLUSH_LRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_LRQ_FULL", .pme_code = 0x320e7, .pme_short_desc = "Flush caused by LRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_LRQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_LRQ_FULL] }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR", .pme_code = 0x3c7097, .pme_short_desc = "Marked data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L275_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L275_SHR] }, [ POWER5_PME_PM_1INST_CLB_CYC ] = { .pme_name = "PM_1INST_CLB_CYC", .pme_code = 0x400c1, .pme_short_desc = "Cycles 1 instruction in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_1INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_1INST_CLB_CYC] }, [ POWER5_PME_PM_MEM_SPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_SPEC_RD_CANCEL", .pme_code = 0x721e6, .pme_short_desc = "Speculative memory read cancelled", .pme_long_desc = "Speculative memory read cancelled (i.e. cresp = sourced by L2/L3)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_SPEC_RD_CANCEL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_SPEC_RD_CANCEL] }, [ POWER5_PME_PM_MRK_DTLB_MISS_16M ] = { .pme_name = "PM_MRK_DTLB_MISS_16M", .pme_code = 0xc40c5, .pme_short_desc = "Marked Data TLB misses for 16M page", .pme_long_desc = "Marked Data TLB misses for 16M page", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_MISS_16M], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_MISS_16M] }, [ POWER5_PME_PM_FPU_FDIV ] = { .pme_name = "PM_FPU_FDIV", .pme_code = 0x100088, .pme_short_desc = "FPU executed FDIV instruction", .pme_long_desc = "The floating point unit has executed a divide instruction. This could be fdiv, fdivs, fdiv., fdivs.. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FDIV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FDIV] }, [ POWER5_PME_PM_FPU_SINGLE ] = { .pme_name = "PM_FPU_SINGLE", .pme_code = 0x102090, .pme_short_desc = "FPU executed single precision instruction", .pme_long_desc = "FPU is executing single precision instruction. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_SINGLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_SINGLE] }, [ POWER5_PME_PM_FPU0_FMA ] = { .pme_name = "PM_FPU0_FMA", .pme_code = 0xc1, .pme_short_desc = "FPU0 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FMA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FMA] }, [ POWER5_PME_PM_SLB_MISS ] = { .pme_name = "PM_SLB_MISS", .pme_code = 0x280088, .pme_short_desc = "SLB misses", .pme_long_desc = "Total of all Segment Lookaside Buffer (SLB) misses, Instructions + Data.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SLB_MISS] }, [ POWER5_PME_PM_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_LSU1_FLUSH_LRQ", .pme_code = 0xc00c6, .pme_short_desc = "LSU1 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_FLUSH_LRQ] }, [ POWER5_PME_PM_L2SA_ST_HIT ] = { .pme_name = "PM_L2SA_ST_HIT", .pme_code = 0x733e0, .pme_short_desc = "L2 slice A store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_ST_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_ST_HIT] }, [ POWER5_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x800c4, .pme_short_desc = "Data TLB misses", .pme_long_desc = "Data TLB misses, all page sizes.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_MISS] }, [ POWER5_PME_PM_BR_PRED_TA ] = { .pme_name = "PM_BR_PRED_TA", .pme_code = 0x230e3, .pme_short_desc = "A conditional branch was predicted", .pme_long_desc = " target prediction", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_PRED_TA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_PRED_TA] }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD_CYC", .pme_code = 0x4c70a7, .pme_short_desc = "Marked load latency from L3.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L375_MOD_CYC] }, [ POWER5_PME_PM_CMPLU_STALL_FXU ] = { .pme_name = "PM_CMPLU_STALL_FXU", .pme_code = 0x211099, .pme_short_desc = "Completion stall caused by FXU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_FXU], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_FXU] }, [ POWER5_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x400003, .pme_short_desc = "External interrupts", .pme_long_desc = "An interrupt due to an external exception occurred", .pme_event_ids = power5_event_ids[POWER5_PME_PM_EXT_INT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_EXT_INT] }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_LRQ", .pme_code = 0x810c6, .pme_short_desc = "LSU1 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU1_FLUSH_LRQ] }, [ POWER5_PME_PM_LSU1_LDF ] = { .pme_name = "PM_LSU1_LDF", .pme_code = 0xc50c4, .pme_short_desc = "LSU1 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU1", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_LDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_LDF] }, [ POWER5_PME_PM_MRK_ST_GPS ] = { .pme_name = "PM_MRK_ST_GPS", .pme_code = 0x200003, .pme_short_desc = "Marked store sent to GPS", .pme_long_desc = "A sampled store has been sent to the memory subsystem", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_ST_GPS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_ST_GPS] }, [ POWER5_PME_PM_FAB_CMD_ISSUED ] = { .pme_name = "PM_FAB_CMD_ISSUED", .pme_code = 0x700c7, .pme_short_desc = "Fabric command issued", .pme_long_desc = "Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_CMD_ISSUED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_CMD_ISSUED] }, [ POWER5_PME_PM_LSU0_SRQ_STFWD ] = { .pme_name = "PM_LSU0_SRQ_STFWD", .pme_code = 0xc20e0, .pme_short_desc = "LSU0 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_SRQ_STFWD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_SRQ_STFWD] }, [ POWER5_PME_PM_CR_MAP_FULL_CYC ] = { .pme_name = "PM_CR_MAP_FULL_CYC", .pme_code = 0x100c4, .pme_short_desc = "Cycles CR logical operation mapper full", .pme_long_desc = "The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CR_MAP_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CR_MAP_FULL_CYC] }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU0_FLUSH_ULD", .pme_code = 0x810c0, .pme_short_desc = "LSU0 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU0_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU0_FLUSH_ULD] }, [ POWER5_PME_PM_LSU_FLUSH_SRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_SRQ_FULL", .pme_code = 0x330e0, .pme_short_desc = "Flush caused by SRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_SRQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_SRQ_FULL] }, [ POWER5_PME_PM_FLUSH_IMBAL ] = { .pme_name = "PM_FLUSH_IMBAL", .pme_code = 0x330e3, .pme_short_desc = "Flush caused by thread GCT imbalance", .pme_long_desc = "This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FLUSH_IMBAL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FLUSH_IMBAL] }, [ POWER5_PME_PM_MEM_RQ_DISP_Q16to19 ] = { .pme_name = "PM_MEM_RQ_DISP_Q16to19", .pme_code = 0x727e6, .pme_short_desc = "Memory read queue dispatched to queues 16-19", .pme_long_desc = "A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP_Q16to19], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP_Q16to19] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus3or4_CYC", .pme_code = 0x430e1, .pme_short_desc = "Cycles thread priority difference is -3 or -4", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 3 or 4.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC] }, [ POWER5_PME_PM_DATA_FROM_L35_MOD ] = { .pme_name = "PM_DATA_FROM_L35_MOD", .pme_code = 0x2c309e, .pme_short_desc = "Data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L35_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L35_MOD] }, [ POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_HI_PRIO_WR_CMPL", .pme_code = 0x726e6, .pme_short_desc = "High priority write completed", .pme_long_desc = "A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_HI_PRIO_WR_CMPL] }, [ POWER5_PME_PM_FPU1_FDIV ] = { .pme_name = "PM_FPU1_FDIV", .pme_code = 0xc4, .pme_short_desc = "FPU1 executed FDIV instruction", .pme_long_desc = "FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FDIV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FDIV] }, [ POWER5_PME_PM_FPU0_FRSP_FCONV ] = { .pme_name = "PM_FPU0_FRSP_FCONV", .pme_code = 0x10c1, .pme_short_desc = "FPU0 executed FRSP or FCONV instructions", .pme_long_desc = "FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FRSP_FCONV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FRSP_FCONV] }, [ POWER5_PME_PM_MEM_RQ_DISP ] = { .pme_name = "PM_MEM_RQ_DISP", .pme_code = 0x701c6, .pme_short_desc = "Memory read queue dispatched", .pme_long_desc = "A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP] }, [ POWER5_PME_PM_LWSYNC_HELD ] = { .pme_name = "PM_LWSYNC_HELD", .pme_code = 0x130e0, .pme_short_desc = "LWSYNC held at dispatch", .pme_long_desc = "Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LWSYNC_HELD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LWSYNC_HELD] }, [ POWER5_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x313088, .pme_short_desc = "FXU produced a result", .pme_long_desc = "The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU_FIN] }, [ POWER5_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x800c5, .pme_short_desc = "Data SLB misses", .pme_long_desc = "A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DSLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DSLB_MISS] }, [ POWER5_PME_PM_FXLS1_FULL_CYC ] = { .pme_name = "PM_FXLS1_FULL_CYC", .pme_code = 0x110c4, .pme_short_desc = "Cycles FXU1/LS1 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXLS1_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXLS1_FULL_CYC] }, [ POWER5_PME_PM_DATA_FROM_L275_SHR ] = { .pme_name = "PM_DATA_FROM_L275_SHR", .pme_code = 0x3c3097, .pme_short_desc = "Data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a demand load. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L275_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L275_SHR] }, [ POWER5_PME_PM_THRD_SEL_T0 ] = { .pme_name = "PM_THRD_SEL_T0", .pme_code = 0x410c0, .pme_short_desc = "Decode selected thread 0", .pme_long_desc = "Thread selection picked thread 0 for decode.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_T0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_T0] }, [ POWER5_PME_PM_PTEG_RELOAD_VALID ] = { .pme_name = "PM_PTEG_RELOAD_VALID", .pme_code = 0x830e4, .pme_short_desc = "PTEG reload valid", .pme_long_desc = "A Page Table Entry was loaded into the TLB.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_RELOAD_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_RELOAD_VALID] }, [ POWER5_PME_PM_LSU_LMQ_LHR_MERGE ] = { .pme_name = "PM_LSU_LMQ_LHR_MERGE", .pme_code = 0xc70e5, .pme_short_desc = "LMQ LHR merges", .pme_long_desc = "A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LMQ_LHR_MERGE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LMQ_LHR_MERGE] }, [ POWER5_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x820e6, .pme_short_desc = "Marked STCX failed", .pme_long_desc = "A marked stcx (stwcx or stdcx) failed", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_STCX_FAIL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_STCX_FAIL] }, [ POWER5_PME_PM_2INST_CLB_CYC ] = { .pme_name = "PM_2INST_CLB_CYC", .pme_code = 0x400c2, .pme_short_desc = "Cycles 2 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_2INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_2INST_CLB_CYC] }, [ POWER5_PME_PM_FAB_PNtoVN_DIRECT ] = { .pme_name = "PM_FAB_PNtoVN_DIRECT", .pme_code = 0x723e7, .pme_short_desc = "PN to VN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_PNtoVN_DIRECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_PNtoVN_DIRECT] }, [ POWER5_PME_PM_PTEG_FROM_L2MISS ] = { .pme_name = "PM_PTEG_FROM_L2MISS", .pme_code = 0x38309b, .pme_short_desc = "PTEG loaded from L2 miss", .pme_long_desc = "A Page Table Entry was loaded into the TLB but not from the local L2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L2MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L2MISS] }, [ POWER5_PME_PM_CMPLU_STALL_LSU ] = { .pme_name = "PM_CMPLU_STALL_LSU", .pme_code = 0x211098, .pme_short_desc = "Completion stall caused by LSU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a load/store instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_LSU], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_LSU] }, [ POWER5_PME_PM_MRK_DSLB_MISS ] = { .pme_name = "PM_MRK_DSLB_MISS", .pme_code = 0xc50c7, .pme_short_desc = "Marked Data SLB misses", .pme_long_desc = "A Data SLB miss was caused by a marked instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DSLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DSLB_MISS] }, [ POWER5_PME_PM_LSU_FLUSH_ULD ] = { .pme_name = "PM_LSU_FLUSH_ULD", .pme_code = 0x1c0088, .pme_short_desc = "LRQ unaligned load flushes", .pme_long_desc = "A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1). Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_ULD] }, [ POWER5_PME_PM_PTEG_FROM_LMEM ] = { .pme_name = "PM_PTEG_FROM_LMEM", .pme_code = 0x283087, .pme_short_desc = "PTEG loaded from local memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_LMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_LMEM] }, [ POWER5_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x200005, .pme_short_desc = "Marked instruction BRU processing finished", .pme_long_desc = "The branch unit finished a marked instruction. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_BRU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_BRU_FIN] }, [ POWER5_PME_PM_MEM_WQ_DISP_WRITE ] = { .pme_name = "PM_MEM_WQ_DISP_WRITE", .pme_code = 0x703c6, .pme_short_desc = "Memory write queue dispatched due to write", .pme_long_desc = "A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_WQ_DISP_WRITE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_WQ_DISP_WRITE] }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD_CYC", .pme_code = 0x4c70a3, .pme_short_desc = "Marked load latency from L2.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L275_MOD_CYC] }, [ POWER5_PME_PM_LSU1_NCLD ] = { .pme_name = "PM_LSU1_NCLD", .pme_code = 0xc50c5, .pme_short_desc = "LSU1 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by Unit 0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_NCLD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_NCLD] }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_WQ_PWQ", .pme_code = 0x717c6, .pme_short_desc = "Snoop partial-write retry due to collision with active write or partial-write queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_PW_RETRY_WQ_PWQ] }, [ POWER5_PME_PM_FPR_MAP_FULL_CYC ] = { .pme_name = "PM_FPR_MAP_FULL_CYC", .pme_code = 0x100c1, .pme_short_desc = "Cycles FPR mapper full", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPR_MAP_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPR_MAP_FULL_CYC] }, [ POWER5_PME_PM_FPU1_FULL_CYC ] = { .pme_name = "PM_FPU1_FULL_CYC", .pme_code = 0x100c7, .pme_short_desc = "Cycles FPU1 issue queue full", .pme_long_desc = "The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FULL_CYC] }, [ POWER5_PME_PM_L3SA_ALL_BUSY ] = { .pme_name = "PM_L3SA_ALL_BUSY", .pme_code = 0x721e3, .pme_short_desc = "L3 slice A active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_ALL_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_ALL_BUSY] }, [ POWER5_PME_PM_3INST_CLB_CYC ] = { .pme_name = "PM_3INST_CLB_CYC", .pme_code = 0x400c3, .pme_short_desc = "Cycles 3 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_3INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_3INST_CLB_CYC] }, [ POWER5_PME_PM_MEM_PWQ_DISP_Q2or3 ] = { .pme_name = "PM_MEM_PWQ_DISP_Q2or3", .pme_code = 0x734e6, .pme_short_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3", .pme_long_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_PWQ_DISP_Q2or3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_PWQ_DISP_Q2or3] }, [ POWER5_PME_PM_L2SA_SHR_INV ] = { .pme_name = "PM_L2SA_SHR_INV", .pme_code = 0x710c0, .pme_short_desc = "L2 slice A transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_SHR_INV] }, [ POWER5_PME_PM_THRESH_TIMEO ] = { .pme_name = "PM_THRESH_TIMEO", .pme_code = 0x30000b, .pme_short_desc = "Threshold timeout", .pme_long_desc = "The threshold timer expired", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRESH_TIMEO], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRESH_TIMEO] }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL] }, [ POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL ] = { .pme_name = "PM_THRD_SEL_OVER_GCT_IMBAL", .pme_code = 0x410c4, .pme_short_desc = "Thread selection overrides caused by GCT imbalance", .pme_long_desc = "Thread selection was overridden because of a GCT imbalance.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_OVER_GCT_IMBAL] }, [ POWER5_PME_PM_FPU_FSQRT ] = { .pme_name = "PM_FPU_FSQRT", .pme_code = 0x200090, .pme_short_desc = "FPU executed FSQRT instruction", .pme_long_desc = "The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FSQRT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FSQRT] }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_LRQ", .pme_code = 0x810c2, .pme_short_desc = "LSU0 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU0_FLUSH_LRQ] }, [ POWER5_PME_PM_PMC1_OVERFLOW ] = { .pme_name = "PM_PMC1_OVERFLOW", .pme_code = 0x20000a, .pme_short_desc = "PMC1 Overflow", .pme_long_desc = "Overflows from PMC1 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC1_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC1_OVERFLOW] }, [ POWER5_PME_PM_L3SC_SNOOP_RETRY ] = { .pme_name = "PM_L3SC_SNOOP_RETRY", .pme_code = 0x731e5, .pme_short_desc = "L3 slice C snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_SNOOP_RETRY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_SNOOP_RETRY] }, [ POWER5_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x800c7, .pme_short_desc = "Cycles doing data tablewalks", .pme_long_desc = "Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_TABLEWALK_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_TABLEWALK_CYC] }, [ POWER5_PME_PM_THRD_PRIO_6_CYC ] = { .pme_name = "PM_THRD_PRIO_6_CYC", .pme_code = 0x420e5, .pme_short_desc = "Cycles thread running at priority level 6", .pme_long_desc = "Cycles this thread was running at priority level 6.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_6_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_6_CYC] }, [ POWER5_PME_PM_FPU_FEST ] = { .pme_name = "PM_FPU_FEST", .pme_code = 0x401090, .pme_short_desc = "FPU executed FEST instruction", .pme_long_desc = "The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FEST] }, [ POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toP1_SIDECAR_EMPTY", .pme_code = 0x702c7, .pme_short_desc = "M1 to P1 sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_M1toP1_SIDECAR_EMPTY] }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM", .pme_code = 0x1c70a1, .pme_short_desc = "Marked data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_RMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_RMEM] }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD_CYC", .pme_code = 0x4c70a6, .pme_short_desc = "Marked load latency from L3.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L35_MOD_CYC] }, [ POWER5_PME_PM_MEM_PWQ_DISP ] = { .pme_name = "PM_MEM_PWQ_DISP", .pme_code = 0x704c6, .pme_short_desc = "Memory partial-write queue dispatched", .pme_long_desc = "Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_PWQ_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_PWQ_DISP] }, [ POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toM1_SIDECAR_EMPTY", .pme_code = 0x701c7, .pme_short_desc = "P1 to M1 sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_P1toM1_SIDECAR_EMPTY] }, [ POWER5_PME_PM_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_LD_MISS_L1_LSU0", .pme_code = 0xc10c2, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_MISS_L1_LSU0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_MISS_L1_LSU0] }, [ POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL ] = { .pme_name = "PM_SNOOP_PARTIAL_RTRY_QFULL", .pme_code = 0x730e6, .pme_short_desc = "Snoop partial write retry due to partial-write queues full", .pme_long_desc = "A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_PARTIAL_RTRY_QFULL] }, [ POWER5_PME_PM_FPU1_STALL3 ] = { .pme_name = "PM_FPU1_STALL3", .pme_code = 0x20e5, .pme_short_desc = "FPU1 stalled in pipe3", .pme_long_desc = "FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_STALL3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_STALL3] }, [ POWER5_PME_PM_GCT_USAGE_80to99_CYC ] = { .pme_name = "PM_GCT_USAGE_80to99_CYC", .pme_code = 0x30001f, .pme_short_desc = "Cycles GCT 80-99% full", .pme_long_desc = "Cycles when the Global Completion Table has between 80% and 99% of its slots used. The GCT has 20 entries shared between threads", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_USAGE_80to99_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_USAGE_80to99_CYC] }, [ POWER5_PME_PM_WORK_HELD ] = { .pme_name = "PM_WORK_HELD", .pme_code = 0x40000c, .pme_short_desc = "Work held", .pme_long_desc = "RAS Unit has signaled completion to stop and there are groups waiting to complete", .pme_event_ids = power5_event_ids[POWER5_PME_PM_WORK_HELD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_WORK_HELD] }, [ POWER5_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x100009, .pme_short_desc = "Instructions completed", .pme_long_desc = "Number of PowerPC instructions that completed. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_CMPL] }, [ POWER5_PME_PM_LSU1_FLUSH_UST ] = { .pme_name = "PM_LSU1_FLUSH_UST", .pme_code = 0xc00c5, .pme_short_desc = "LSU1 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_FLUSH_UST] }, [ POWER5_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x100012, .pme_short_desc = "FXU idle", .pme_long_desc = "FXU0 and FXU1 are both idle.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU_IDLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU_IDLE] }, [ POWER5_PME_PM_LSU0_FLUSH_ULD ] = { .pme_name = "PM_LSU0_FLUSH_ULD", .pme_code = 0xc00c0, .pme_short_desc = "LSU0 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_FLUSH_ULD] }, [ POWER5_PME_PM_LSU1_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU1_REJECT_LMQ_FULL", .pme_code = 0xc60e5, .pme_short_desc = "LSU1 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_REJECT_LMQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_REJECT_LMQ_FULL] }, [ POWER5_PME_PM_GRP_DISP_REJECT ] = { .pme_name = "PM_GRP_DISP_REJECT", .pme_code = 0x120e4, .pme_short_desc = "Group dispatch rejected", .pme_long_desc = "A group that previously attempted dispatch was rejected.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_DISP_REJECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_DISP_REJECT] }, [ POWER5_PME_PM_L2SA_MOD_INV ] = { .pme_name = "PM_L2SA_MOD_INV", .pme_code = 0x730e0, .pme_short_desc = "L2 slice A transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_MOD_INV] }, [ POWER5_PME_PM_PTEG_FROM_L25_SHR ] = { .pme_name = "PM_PTEG_FROM_L25_SHR", .pme_code = 0x183097, .pme_short_desc = "PTEG loaded from L2.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L25_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L25_SHR] }, [ POWER5_PME_PM_FAB_CMD_RETRIED ] = { .pme_name = "PM_FAB_CMD_RETRIED", .pme_code = 0x710c7, .pme_short_desc = "Fabric command retried", .pme_long_desc = "Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_CMD_RETRIED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_CMD_RETRIED] }, [ POWER5_PME_PM_L3SA_SHR_INV ] = { .pme_name = "PM_L3SA_SHR_INV", .pme_code = 0x710c3, .pme_short_desc = "L3 slice A transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_SHR_INV] }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL] }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_PTEG_FROM_L375_MOD ] = { .pme_name = "PM_PTEG_FROM_L375_MOD", .pme_code = 0x1830a7, .pme_short_desc = "PTEG loaded from L3.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L375_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L375_MOD] }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU1_FLUSH_UST", .pme_code = 0x810c5, .pme_short_desc = "LSU1 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU1_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU1_FLUSH_UST] }, [ POWER5_PME_PM_BR_ISSUED ] = { .pme_name = "PM_BR_ISSUED", .pme_code = 0x230e4, .pme_short_desc = "Branches issued", .pme_long_desc = "A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_ISSUED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_ISSUED] }, [ POWER5_PME_PM_MRK_GRP_BR_REDIR ] = { .pme_name = "PM_MRK_GRP_BR_REDIR", .pme_code = 0x212091, .pme_short_desc = "Group experienced marked branch redirect", .pme_long_desc = "A group containing a marked (sampled) instruction experienced a branch redirect.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_BR_REDIR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_BR_REDIR] }, [ POWER5_PME_PM_EE_OFF ] = { .pme_name = "PM_EE_OFF", .pme_code = 0x130e3, .pme_short_desc = "Cycles MSR(EE) bit off", .pme_long_desc = "Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_EE_OFF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_EE_OFF] }, [ POWER5_PME_PM_MEM_RQ_DISP_Q4to7 ] = { .pme_name = "PM_MEM_RQ_DISP_Q4to7", .pme_code = 0x712c6, .pme_short_desc = "Memory read queue dispatched to queues 4-7", .pme_long_desc = "A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP_Q4to7], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP_Q4to7] }, [ POWER5_PME_PM_MEM_FAST_PATH_RD_DISP ] = { .pme_name = "PM_MEM_FAST_PATH_RD_DISP", .pme_code = 0x713e6, .pme_short_desc = "Fast path memory read dispatched", .pme_long_desc = "Fast path memory read dispatched", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_FAST_PATH_RD_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_FAST_PATH_RD_DISP] }, [ POWER5_PME_PM_INST_FROM_L3 ] = { .pme_name = "PM_INST_FROM_L3", .pme_code = 0x12208d, .pme_short_desc = "Instruction fetched from L3", .pme_long_desc = "An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L3] }, [ POWER5_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x800c0, .pme_short_desc = "Instruction TLB misses", .pme_long_desc = "A TLB miss for an Instruction Fetch has occurred", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ITLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ITLB_MISS] }, [ POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { .pme_name = "PM_FXU1_BUSY_FXU0_IDLE", .pme_code = 0x400012, .pme_short_desc = "FXU1 busy FXU0 idle", .pme_long_desc = "FXU0 was idle while FXU1 was busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU1_BUSY_FXU0_IDLE] }, [ POWER5_PME_PM_FXLS_FULL_CYC ] = { .pme_name = "PM_FXLS_FULL_CYC", .pme_code = 0x411090, .pme_short_desc = "Cycles FXLS queue is full", .pme_long_desc = "Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXLS_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXLS_FULL_CYC] }, [ POWER5_PME_PM_DTLB_REF_4K ] = { .pme_name = "PM_DTLB_REF_4K", .pme_code = 0xc40c2, .pme_short_desc = "Data TLB reference for 4K page", .pme_long_desc = "Data TLB references for 4KB pages. Includes hits + misses.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_REF_4K], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_REF_4K] }, [ POWER5_PME_PM_GRP_DISP_VALID ] = { .pme_name = "PM_GRP_DISP_VALID", .pme_code = 0x120e3, .pme_short_desc = "Group dispatch valid", .pme_long_desc = "A group is available for dispatch. This does not mean it was successfully dispatched.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_DISP_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_DISP_VALID] }, [ POWER5_PME_PM_LSU_FLUSH_UST ] = { .pme_name = "PM_LSU_FLUSH_UST", .pme_code = 0x2c0088, .pme_short_desc = "SRQ unaligned store flushes", .pme_long_desc = "A store was flushed because it was unaligned (crossed a 4K boundary). Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_UST] }, [ POWER5_PME_PM_FXU1_FIN ] = { .pme_name = "PM_FXU1_FIN", .pme_code = 0x130e6, .pme_short_desc = "FXU1 produced a result", .pme_long_desc = "The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU1_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU1_FIN] }, [ POWER5_PME_PM_THRD_PRIO_4_CYC ] = { .pme_name = "PM_THRD_PRIO_4_CYC", .pme_code = 0x420e3, .pme_short_desc = "Cycles thread running at priority level 4", .pme_long_desc = "Cycles this thread was running at priority level 4.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_4_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_4_CYC] }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD", .pme_code = 0x2c709e, .pme_short_desc = "Marked data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L35_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L35_MOD] }, [ POWER5_PME_PM_4INST_CLB_CYC ] = { .pme_name = "PM_4INST_CLB_CYC", .pme_code = 0x400c4, .pme_short_desc = "Cycles 4 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_4INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_4INST_CLB_CYC] }, [ POWER5_PME_PM_MRK_DTLB_REF_16M ] = { .pme_name = "PM_MRK_DTLB_REF_16M", .pme_code = 0xc40c7, .pme_short_desc = "Marked Data TLB reference for 16M page", .pme_long_desc = "Data TLB references by a marked instruction for 16MB pages.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_REF_16M], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_REF_16M] }, [ POWER5_PME_PM_INST_FROM_L375_MOD ] = { .pme_name = "PM_INST_FROM_L375_MOD", .pme_code = 0x42209d, .pme_short_desc = "Instruction fetched from L3.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L375_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L375_MOD] }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_GRP_CMPL ] = { .pme_name = "PM_GRP_CMPL", .pme_code = 0x300013, .pme_short_desc = "Group completed", .pme_long_desc = "A group completed. Microcoded instructions that span multiple groups will generate this event once per group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_CMPL] }, [ POWER5_PME_PM_FPU1_1FLOP ] = { .pme_name = "PM_FPU1_1FLOP", .pme_code = 0xc7, .pme_short_desc = "FPU1 executed add", .pme_long_desc = " mult", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_1FLOP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_1FLOP] }, [ POWER5_PME_PM_FPU_FRSP_FCONV ] = { .pme_name = "PM_FPU_FRSP_FCONV", .pme_code = 0x301090, .pme_short_desc = "FPU executed FRSP or FCONV instructions", .pme_long_desc = "The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FRSP_FCONV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FRSP_FCONV] }, [ POWER5_PME_PM_5INST_CLB_CYC ] = { .pme_name = "PM_5INST_CLB_CYC", .pme_code = 0x400c5, .pme_short_desc = "Cycles 5 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_5INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_5INST_CLB_CYC] }, [ POWER5_PME_PM_L3SC_REF ] = { .pme_name = "PM_L3SC_REF", .pme_code = 0x701c5, .pme_short_desc = "L3 slice C references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_REF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_REF] }, [ POWER5_PME_PM_THRD_L2MISS_BOTH_CYC ] = { .pme_name = "PM_THRD_L2MISS_BOTH_CYC", .pme_code = 0x410c7, .pme_short_desc = "Cycles both threads in L2 misses", .pme_long_desc = "Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_L2MISS_BOTH_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_L2MISS_BOTH_CYC] }, [ POWER5_PME_PM_MEM_PW_GATH ] = { .pme_name = "PM_MEM_PW_GATH", .pme_code = 0x714c6, .pme_short_desc = "Memory partial-write gathered", .pme_long_desc = "Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_PW_GATH], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_PW_GATH] }, [ POWER5_PME_PM_FAB_PNtoNN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoNN_SIDECAR", .pme_code = 0x713c7, .pme_short_desc = "PN to NN beat went to sidecar first", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_PNtoNN_SIDECAR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_PNtoNN_SIDECAR] }, [ POWER5_PME_PM_FAB_DCLAIM_ISSUED ] = { .pme_name = "PM_FAB_DCLAIM_ISSUED", .pme_code = 0x720e7, .pme_short_desc = "dclaim issued", .pme_long_desc = "A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_DCLAIM_ISSUED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_DCLAIM_ISSUED] }, [ POWER5_PME_PM_GRP_IC_MISS ] = { .pme_name = "PM_GRP_IC_MISS", .pme_code = 0x120e7, .pme_short_desc = "Group experienced I cache miss", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_IC_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_IC_MISS] }, [ POWER5_PME_PM_INST_FROM_L35_SHR ] = { .pme_name = "PM_INST_FROM_L35_SHR", .pme_code = 0x12209d, .pme_short_desc = "Instruction fetched from L3.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L35_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L35_SHR] }, [ POWER5_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0xc30e7, .pme_short_desc = "Cycles LMQ full", .pme_long_desc = "The Load Miss Queue was full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LMQ_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LMQ_FULL_CYC] }, [ POWER5_PME_PM_MRK_DATA_FROM_L2_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_CYC", .pme_code = 0x2c70a0, .pme_short_desc = "Marked load latency from L2", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L2_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L2_CYC] }, [ POWER5_PME_PM_LSU_SRQ_SYNC_CYC ] = { .pme_name = "PM_LSU_SRQ_SYNC_CYC", .pme_code = 0x830e5, .pme_short_desc = "SRQ sync duration", .pme_long_desc = "Cycles that a sync instruction is active in the Store Request Queue.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_SYNC_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_SYNC_CYC] }, [ POWER5_PME_PM_LSU0_BUSY_REJECT ] = { .pme_name = "PM_LSU0_BUSY_REJECT", .pme_code = 0xc20e3, .pme_short_desc = "LSU0 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_BUSY_REJECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_BUSY_REJECT] }, [ POWER5_PME_PM_LSU_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU_REJECT_ERAT_MISS", .pme_code = 0x1c6090, .pme_short_desc = "LSU reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_REJECT_ERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_REJECT_ERAT_MISS] }, [ POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM_CYC", .pme_code = 0x4c70a1, .pme_short_desc = "Marked load latency from remote memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_RMEM_CYC] }, [ POWER5_PME_PM_DATA_FROM_L375_SHR ] = { .pme_name = "PM_DATA_FROM_L375_SHR", .pme_code = 0x3c309e, .pme_short_desc = "Data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L375_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L375_SHR] }, [ POWER5_PME_PM_FPU0_FMOV_FEST ] = { .pme_name = "PM_FPU0_FMOV_FEST", .pme_code = 0x10c0, .pme_short_desc = "FPU0 executed FMOV or FEST instructions", .pme_long_desc = "FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FMOV_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FMOV_FEST] }, [ POWER5_PME_PM_PTEG_FROM_L25_MOD ] = { .pme_name = "PM_PTEG_FROM_L25_MOD", .pme_code = 0x283097, .pme_short_desc = "PTEG loaded from L2.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L25_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L25_MOD] }, [ POWER5_PME_PM_LD_REF_L1_LSU0 ] = { .pme_name = "PM_LD_REF_L1_LSU0", .pme_code = 0xc10c0, .pme_short_desc = "LSU0 L1 D cache load references", .pme_long_desc = "Load references to Level 1 Data Cache, by unit 0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_REF_L1_LSU0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_REF_L1_LSU0] }, [ POWER5_PME_PM_THRD_PRIO_7_CYC ] = { .pme_name = "PM_THRD_PRIO_7_CYC", .pme_code = 0x420e6, .pme_short_desc = "Cycles thread running at priority level 7", .pme_long_desc = "Cycles this thread was running at priority level 7.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_7_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_7_CYC] }, [ POWER5_PME_PM_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_LSU1_FLUSH_SRQ", .pme_code = 0xc00c7, .pme_short_desc = "LSU1 SRQ lhs flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_FLUSH_SRQ] }, [ POWER5_PME_PM_L2SC_RCST_DISP ] = { .pme_name = "PM_L2SC_RCST_DISP", .pme_code = 0x702c2, .pme_short_desc = "L2 slice C RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCST_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCST_DISP] }, [ POWER5_PME_PM_CMPLU_STALL_DIV ] = { .pme_name = "PM_CMPLU_STALL_DIV", .pme_code = 0x411099, .pme_short_desc = "Completion stall caused by DIV instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point divide instruction. This is a subset of PM_CMPLU_STALL_FXU.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_DIV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_DIV] }, [ POWER5_PME_PM_MEM_RQ_DISP_Q12to15 ] = { .pme_name = "PM_MEM_RQ_DISP_Q12to15", .pme_code = 0x732e6, .pme_short_desc = "Memory read queue dispatched to queues 12-15", .pme_long_desc = "A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP_Q12to15], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP_Q12to15] }, [ POWER5_PME_PM_INST_FROM_L375_SHR ] = { .pme_name = "PM_INST_FROM_L375_SHR", .pme_code = 0x32209d, .pme_short_desc = "Instruction fetched from L3.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L375_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L375_SHR] }, [ POWER5_PME_PM_ST_REF_L1 ] = { .pme_name = "PM_ST_REF_L1", .pme_code = 0x3c1090, .pme_short_desc = "L1 D cache store references", .pme_long_desc = "Store references to the Data Cache. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ST_REF_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ST_REF_L1] }, [ POWER5_PME_PM_L3SB_ALL_BUSY ] = { .pme_name = "PM_L3SB_ALL_BUSY", .pme_code = 0x721e4, .pme_short_desc = "L3 slice B active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_ALL_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_ALL_BUSY] }, [ POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x711c7, .pme_short_desc = "P1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY] }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR_CYC", .pme_code = 0x2c70a3, .pme_short_desc = "Marked load latency from L2.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L275_SHR_CYC] }, [ POWER5_PME_PM_FAB_HOLDtoNN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoNN_EMPTY", .pme_code = 0x722e7, .pme_short_desc = "Hold buffer to NN empty", .pme_long_desc = "Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_HOLDtoNN_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_HOLDtoNN_EMPTY] }, [ POWER5_PME_PM_DATA_FROM_LMEM ] = { .pme_name = "PM_DATA_FROM_LMEM", .pme_code = 0x2c3087, .pme_short_desc = "Data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_LMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_LMEM] }, [ POWER5_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x100005, .pme_short_desc = "Run cycles", .pme_long_desc = "Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_RUN_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_RUN_CYC] }, [ POWER5_PME_PM_PTEG_FROM_RMEM ] = { .pme_name = "PM_PTEG_FROM_RMEM", .pme_code = 0x1830a1, .pme_short_desc = "PTEG loaded from remote memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_RMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_RMEM] }, [ POWER5_PME_PM_L2SC_RCLD_DISP ] = { .pme_name = "PM_L2SC_RCLD_DISP", .pme_code = 0x701c2, .pme_short_desc = "L2 slice C RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCLD_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCLD_DISP] }, [ POWER5_PME_PM_LSU0_LDF ] = { .pme_name = "PM_LSU0_LDF", .pme_code = 0xc50c0, .pme_short_desc = "LSU0 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU0", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_LDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_LDF] }, [ POWER5_PME_PM_LSU_LRQ_S0_VALID ] = { .pme_name = "PM_LSU_LRQ_S0_VALID", .pme_code = 0xc20e2, .pme_short_desc = "LRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LRQ_S0_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LRQ_S0_VALID] }, [ POWER5_PME_PM_PMC3_OVERFLOW ] = { .pme_name = "PM_PMC3_OVERFLOW", .pme_code = 0x40000a, .pme_short_desc = "PMC3 Overflow", .pme_long_desc = "Overflows from PMC3 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC3_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC3_OVERFLOW] }, [ POWER5_PME_PM_MRK_IMR_RELOAD ] = { .pme_name = "PM_MRK_IMR_RELOAD", .pme_code = 0x820e2, .pme_short_desc = "Marked IMR reloaded", .pme_long_desc = "A DL1 reload occurred due to marked load", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_IMR_RELOAD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_IMR_RELOAD] }, [ POWER5_PME_PM_MRK_GRP_TIMEO ] = { .pme_name = "PM_MRK_GRP_TIMEO", .pme_code = 0x40000b, .pme_short_desc = "Marked group completion timeout", .pme_long_desc = "The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_TIMEO], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_TIMEO] }, [ POWER5_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0xc10c3, .pme_short_desc = "L1 D cache store misses", .pme_long_desc = "A store missed the dcache. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ST_MISS_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ST_MISS_L1] }, [ POWER5_PME_PM_STOP_COMPLETION ] = { .pme_name = "PM_STOP_COMPLETION", .pme_code = 0x300018, .pme_short_desc = "Completion stopped", .pme_long_desc = "RAS Unit has signaled completion to stop", .pme_event_ids = power5_event_ids[POWER5_PME_PM_STOP_COMPLETION], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_STOP_COMPLETION] }, [ POWER5_PME_PM_LSU_BUSY_REJECT ] = { .pme_name = "PM_LSU_BUSY_REJECT", .pme_code = 0x1c2090, .pme_short_desc = "LSU busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions. Combined unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_BUSY_REJECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_BUSY_REJECT] }, [ POWER5_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x800c1, .pme_short_desc = "Instruction SLB misses", .pme_long_desc = "A SLB miss for an instruction fetch as occurred", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ISLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ISLB_MISS] }, [ POWER5_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0xf, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CYC] }, [ POWER5_PME_PM_THRD_ONE_RUN_CYC ] = { .pme_name = "PM_THRD_ONE_RUN_CYC", .pme_code = 0x10000b, .pme_short_desc = "One of the threads in run cycles", .pme_long_desc = "At least one thread has set its run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. This event does not respect FCWAIT.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_ONE_RUN_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_ONE_RUN_CYC] }, [ POWER5_PME_PM_GRP_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_BR_REDIR_NONSPEC", .pme_code = 0x112091, .pme_short_desc = "Group experienced non-speculative branch redirect", .pme_long_desc = "Number of groups, counted at completion, that have encountered a branch redirect.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_BR_REDIR_NONSPEC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_BR_REDIR_NONSPEC] }, [ POWER5_PME_PM_LSU1_SRQ_STFWD ] = { .pme_name = "PM_LSU1_SRQ_STFWD", .pme_code = 0xc20e4, .pme_short_desc = "LSU1 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_SRQ_STFWD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_SRQ_STFWD] }, [ POWER5_PME_PM_L3SC_MOD_INV ] = { .pme_name = "PM_L3SC_MOD_INV", .pme_code = 0x730e5, .pme_short_desc = "L3 slice C transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_MOD_INV] }, [ POWER5_PME_PM_L2_PREF ] = { .pme_name = "PM_L2_PREF", .pme_code = 0xc50c3, .pme_short_desc = "L2 cache prefetches", .pme_long_desc = "A request to prefetch data into L2 was made", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2_PREF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2_PREF] }, [ POWER5_PME_PM_GCT_NOSLOT_BR_MPRED ] = { .pme_name = "PM_GCT_NOSLOT_BR_MPRED", .pme_code = 0x41009c, .pme_short_desc = "No slot in GCT caused by branch mispredict", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of a branch misprediction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_NOSLOT_BR_MPRED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_NOSLOT_BR_MPRED] }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD", .pme_code = 0x2c7097, .pme_short_desc = "Marked data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L25_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L25_MOD] }, [ POWER5_PME_PM_L2SB_MOD_INV ] = { .pme_name = "PM_L2SB_MOD_INV", .pme_code = 0x730e1, .pme_short_desc = "L2 slice B transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_MOD_INV] }, [ POWER5_PME_PM_L2SB_ST_REQ ] = { .pme_name = "PM_L2SB_ST_REQ", .pme_code = 0x723e1, .pme_short_desc = "L2 slice B store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_ST_REQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_ST_REQ] }, [ POWER5_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0xc70e4, .pme_short_desc = "Marked L1 reload data source valid", .pme_long_desc = "The source information is valid and is for a marked load", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_L1_RELOAD_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_L1_RELOAD_VALID] }, [ POWER5_PME_PM_L3SB_HIT ] = { .pme_name = "PM_L3SB_HIT", .pme_code = 0x711c4, .pme_short_desc = "L3 slice B hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_HIT] }, [ POWER5_PME_PM_L2SB_SHR_MOD ] = { .pme_name = "PM_L2SB_SHR_MOD", .pme_code = 0x700c1, .pme_short_desc = "L2 slice B transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_SHR_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_SHR_MOD] }, [ POWER5_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x130e7, .pme_short_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_long_desc = "Cycles when an interrupt due to an external exception is pending but external exceptions were masked.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_EE_OFF_EXT_INT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_EE_OFF_EXT_INT] }, [ POWER5_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x100013, .pme_short_desc = "One or more PPC instruction completed", .pme_long_desc = "A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_1PLUS_PPC_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_1PLUS_PPC_CMPL] }, [ POWER5_PME_PM_L2SC_SHR_MOD ] = { .pme_name = "PM_L2SC_SHR_MOD", .pme_code = 0x700c2, .pme_short_desc = "L2 slice C transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_SHR_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_SHR_MOD] }, [ POWER5_PME_PM_PMC6_OVERFLOW ] = { .pme_name = "PM_PMC6_OVERFLOW", .pme_code = 0x30001a, .pme_short_desc = "PMC6 Overflow", .pme_long_desc = "Overflows from PMC6 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC6_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC6_OVERFLOW] }, [ POWER5_PME_PM_LSU_LRQ_FULL_CYC ] = { .pme_name = "PM_LSU_LRQ_FULL_CYC", .pme_code = 0x110c2, .pme_short_desc = "Cycles LRQ full", .pme_long_desc = "Cycles when the LRQ is full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LRQ_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LRQ_FULL_CYC] }, [ POWER5_PME_PM_IC_PREF_INSTALL ] = { .pme_name = "PM_IC_PREF_INSTALL", .pme_code = 0x210c7, .pme_short_desc = "Instruction prefetched installed in prefetch buffer", .pme_long_desc = "A prefetch buffer entry (line) is allocated but the request is not a demand fetch.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IC_PREF_INSTALL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IC_PREF_INSTALL] }, [ POWER5_PME_PM_TLB_MISS ] = { .pme_name = "PM_TLB_MISS", .pme_code = 0x180088, .pme_short_desc = "TLB misses", .pme_long_desc = "Total of Data TLB mises + Instruction TLB misses", .pme_event_ids = power5_event_ids[POWER5_PME_PM_TLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_TLB_MISS] }, [ POWER5_PME_PM_GCT_FULL_CYC ] = { .pme_name = "PM_GCT_FULL_CYC", .pme_code = 0x100c0, .pme_short_desc = "Cycles GCT full", .pme_long_desc = "The Global Completion Table is completely full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_FULL_CYC] }, [ POWER5_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x200012, .pme_short_desc = "FXU busy", .pme_long_desc = "Cycles when both FXU0 and FXU1 are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU_BUSY] }, [ POWER5_PME_PM_MRK_DATA_FROM_L3_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_CYC", .pme_code = 0x2c70a4, .pme_short_desc = "Marked load latency from L3", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L3_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L3_CYC] }, [ POWER5_PME_PM_LSU_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU_REJECT_LMQ_FULL", .pme_code = 0x2c6088, .pme_short_desc = "LSU reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all the eight entries are full, subsequent load instructions are rejected. Combined unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_REJECT_LMQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_REJECT_LMQ_FULL] }, [ POWER5_PME_PM_LSU_SRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_SRQ_S0_ALLOC", .pme_code = 0xc20e5, .pme_short_desc = "SRQ slot 0 allocated", .pme_long_desc = "SRQ Slot zero was allocated", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_S0_ALLOC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_S0_ALLOC] }, [ POWER5_PME_PM_GRP_MRK ] = { .pme_name = "PM_GRP_MRK", .pme_code = 0x100014, .pme_short_desc = "Group marked in IDU", .pme_long_desc = "A group was sampled (marked). The group is called a marked group. One instruction within the group is tagged for detailed monitoring. The sampled instruction is called a marked instructions. Events associated with the marked instruction are annotated with the marked term.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_MRK], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_MRK] }, [ POWER5_PME_PM_INST_FROM_L25_SHR ] = { .pme_name = "PM_INST_FROM_L25_SHR", .pme_code = 0x122096, .pme_short_desc = "Instruction fetched from L2.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T or SL) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L25_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L25_SHR] }, [ POWER5_PME_PM_FPU1_FIN ] = { .pme_name = "PM_FPU1_FIN", .pme_code = 0x10c7, .pme_short_desc = "FPU1 produced a result", .pme_long_desc = "FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FIN] }, [ POWER5_PME_PM_DC_PREF_STREAM_ALLOC ] = { .pme_name = "PM_DC_PREF_STREAM_ALLOC", .pme_code = 0x830e7, .pme_short_desc = "D cache new prefetch stream allocated", .pme_long_desc = "A new Prefetch Stream was allocated.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DC_PREF_STREAM_ALLOC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DC_PREF_STREAM_ALLOC] }, [ POWER5_PME_PM_BR_MPRED_TA ] = { .pme_name = "PM_BR_MPRED_TA", .pme_code = 0x230e6, .pme_short_desc = "Branch mispredictions due to target address", .pme_long_desc = "A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_MPRED_TA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_MPRED_TA] }, [ POWER5_PME_PM_CRQ_FULL_CYC ] = { .pme_name = "PM_CRQ_FULL_CYC", .pme_code = 0x110c1, .pme_short_desc = "Cycles CR issue queue full", .pme_long_desc = "The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CRQ_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CRQ_FULL_CYC] }, [ POWER5_PME_PM_L2SA_RCLD_DISP ] = { .pme_name = "PM_L2SA_RCLD_DISP", .pme_code = 0x701c0, .pme_short_desc = "L2 slice A RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCLD_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCLD_DISP] }, [ POWER5_PME_PM_SNOOP_WR_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_WR_RETRY_QFULL", .pme_code = 0x710c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_WR_RETRY_QFULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_WR_RETRY_QFULL] }, [ POWER5_PME_PM_MRK_DTLB_REF_4K ] = { .pme_name = "PM_MRK_DTLB_REF_4K", .pme_code = 0xc40c3, .pme_short_desc = "Marked Data TLB reference for 4K page", .pme_long_desc = "Data TLB references by a marked instruction for 4KB pages.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_REF_4K], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_REF_4K] }, [ POWER5_PME_PM_LSU_SRQ_S0_VALID ] = { .pme_name = "PM_LSU_SRQ_S0_VALID", .pme_code = 0xc20e1, .pme_short_desc = "SRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_S0_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_S0_VALID] }, [ POWER5_PME_PM_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_LSU0_FLUSH_LRQ", .pme_code = 0xc00c2, .pme_short_desc = "LSU0 LRQ flushes", .pme_long_desc = "A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_FLUSH_LRQ] }, [ POWER5_PME_PM_INST_FROM_L275_MOD ] = { .pme_name = "PM_INST_FROM_L275_MOD", .pme_code = 0x422096, .pme_short_desc = "Instruction fetched from L2.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L275_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L275_MOD] }, [ POWER5_PME_PM_GCT_EMPTY_CYC ] = { .pme_name = "PM_GCT_EMPTY_CYC", .pme_code = 0x200004, .pme_short_desc = "Cycles GCT empty", .pme_long_desc = "The Global Completion Table is completely empty", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_EMPTY_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_EMPTY_CYC] }, [ POWER5_PME_PM_LARX_LSU0 ] = { .pme_name = "PM_LARX_LSU0", .pme_code = 0x820e7, .pme_short_desc = "Larx executed on LSU0", .pme_long_desc = "A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LARX_LSU0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LARX_LSU0] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_5or6_CYC", .pme_code = 0x430e6, .pme_short_desc = "Cycles thread priority difference is 5 or 6", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 5 or 6.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_5or6_CYC] }, [ POWER5_PME_PM_SNOOP_RETRY_1AHEAD ] = { .pme_name = "PM_SNOOP_RETRY_1AHEAD", .pme_code = 0x725e6, .pme_short_desc = "Snoop retry due to one ahead collision", .pme_long_desc = "Snoop retry due to one ahead collision", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_RETRY_1AHEAD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_RETRY_1AHEAD] }, [ POWER5_PME_PM_FPU1_FSQRT ] = { .pme_name = "PM_FPU1_FSQRT", .pme_code = 0xc6, .pme_short_desc = "FPU1 executed FSQRT instruction", .pme_long_desc = "FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FSQRT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FSQRT] }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU1", .pme_code = 0x820e4, .pme_short_desc = "LSU1 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LD_MISS_L1_LSU1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LD_MISS_L1_LSU1] }, [ POWER5_PME_PM_MRK_FPU_FIN ] = { .pme_name = "PM_MRK_FPU_FIN", .pme_code = 0x300014, .pme_short_desc = "Marked instruction FPU processing finished", .pme_long_desc = "One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_FPU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_FPU_FIN] }, [ POWER5_PME_PM_THRD_PRIO_5_CYC ] = { .pme_name = "PM_THRD_PRIO_5_CYC", .pme_code = 0x420e4, .pme_short_desc = "Cycles thread running at priority level 5", .pme_long_desc = "Cycles this thread was running at priority level 5.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_5_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_5_CYC] }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM", .pme_code = 0x2c7087, .pme_short_desc = "Marked data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_LMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_LMEM] }, [ POWER5_PME_PM_FPU1_FRSP_FCONV ] = { .pme_name = "PM_FPU1_FRSP_FCONV", .pme_code = 0x10c5, .pme_short_desc = "FPU1 executed FRSP or FCONV instructions", .pme_long_desc = "FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FRSP_FCONV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FRSP_FCONV] }, [ POWER5_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x800c3, .pme_short_desc = "Snoop TLBIE", .pme_long_desc = "A tlbie was snooped from another processor.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_TLBIE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_TLBIE] }, [ POWER5_PME_PM_L3SB_SNOOP_RETRY ] = { .pme_name = "PM_L3SB_SNOOP_RETRY", .pme_code = 0x731e4, .pme_short_desc = "L3 slice B snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_SNOOP_RETRY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_SNOOP_RETRY] }, [ POWER5_PME_PM_FAB_VBYPASS_EMPTY ] = { .pme_name = "PM_FAB_VBYPASS_EMPTY", .pme_code = 0x731e7, .pme_short_desc = "Vertical bypass buffer empty", .pme_long_desc = "Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_VBYPASS_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_VBYPASS_EMPTY] }, [ POWER5_PME_PM_MRK_DATA_FROM_L275_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD", .pme_code = 0x1c70a3, .pme_short_desc = "Marked data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L275_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L275_MOD] }, [ POWER5_PME_PM_6INST_CLB_CYC ] = { .pme_name = "PM_6INST_CLB_CYC", .pme_code = 0x400c6, .pme_short_desc = "Cycles 6 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_6INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_6INST_CLB_CYC] }, [ POWER5_PME_PM_L2SB_RCST_DISP ] = { .pme_name = "PM_L2SB_RCST_DISP", .pme_code = 0x702c1, .pme_short_desc = "L2 slice B RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCST_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCST_DISP] }, [ POWER5_PME_PM_FLUSH ] = { .pme_name = "PM_FLUSH", .pme_code = 0x110c7, .pme_short_desc = "Flushes", .pme_long_desc = "Flushes occurred including LSU and Branch flushes.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FLUSH], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FLUSH] }, [ POWER5_PME_PM_L2SC_MOD_INV ] = { .pme_name = "PM_L2SC_MOD_INV", .pme_code = 0x730e2, .pme_short_desc = "L2 slice C transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_MOD_INV] }, [ POWER5_PME_PM_FPU_DENORM ] = { .pme_name = "PM_FPU_DENORM", .pme_code = 0x102088, .pme_short_desc = "FPU received denormalized data", .pme_long_desc = "The floating point unit has encountered a denormalized operand. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_DENORM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_DENORM] }, [ POWER5_PME_PM_L3SC_HIT ] = { .pme_name = "PM_L3SC_HIT", .pme_code = 0x711c5, .pme_short_desc = "L3 slice C hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_HIT] }, [ POWER5_PME_PM_SNOOP_WR_RETRY_RQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_RQ", .pme_code = 0x706c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active read queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_WR_RETRY_RQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_WR_RETRY_RQ] }, [ POWER5_PME_PM_LSU1_REJECT_SRQ ] = { .pme_name = "PM_LSU1_REJECT_SRQ", .pme_code = 0xc60e4, .pme_short_desc = "LSU1 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_REJECT_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_REJECT_SRQ] }, [ POWER5_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x220e6, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "An instruction prefetch request has been made.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IC_PREF_REQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IC_PREF_REQ] }, [ POWER5_PME_PM_L3SC_ALL_BUSY ] = { .pme_name = "PM_L3SC_ALL_BUSY", .pme_code = 0x721e5, .pme_short_desc = "L3 slice C active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_ALL_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_ALL_BUSY] }, [ POWER5_PME_PM_MRK_GRP_IC_MISS ] = { .pme_name = "PM_MRK_GRP_IC_MISS", .pme_code = 0x412091, .pme_short_desc = "Group experienced marked I cache miss", .pme_long_desc = "A group containing a marked (sampled) instruction experienced an instruction cache miss.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_IC_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_IC_MISS] }, [ POWER5_PME_PM_GCT_NOSLOT_IC_MISS ] = { .pme_name = "PM_GCT_NOSLOT_IC_MISS", .pme_code = 0x21009c, .pme_short_desc = "No slot in GCT caused by I cache miss", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of an Instruction Cache miss.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_NOSLOT_IC_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_NOSLOT_IC_MISS] }, [ POWER5_PME_PM_MRK_DATA_FROM_L3 ] = { .pme_name = "PM_MRK_DATA_FROM_L3", .pme_code = 0x1c708e, .pme_short_desc = "Marked data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L3] }, [ POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL ] = { .pme_name = "PM_GCT_NOSLOT_SRQ_FULL", .pme_code = 0x310084, .pme_short_desc = "No slot in GCT caused by SRQ full", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because the Store Request Queue (SRQ) is full. This happens when the storage subsystem can not process the stores in the SRQ. Groups can not be dispatched until a SRQ entry is available.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_NOSLOT_SRQ_FULL] }, [ POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD ] = { .pme_name = "PM_THRD_SEL_OVER_ISU_HOLD", .pme_code = 0x410c5, .pme_short_desc = "Thread selection overrides caused by ISU holds", .pme_long_desc = "Thread selection was overridden because of an ISU hold.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_OVER_ISU_HOLD] }, [ POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { .pme_name = "PM_CMPLU_STALL_DCACHE_MISS", .pme_code = 0x21109a, .pme_short_desc = "Completion stall caused by D cache miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a Data Cache Miss. Data Cache Miss has higher priority than any other Load/Store delay, so if an instruction encounters multiple delays only the Data Cache Miss will be reported and the entire delay period will be charged to Data Cache Miss. This is a subset of PM_CMPLU_STALL_LSU.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_DCACHE_MISS] }, [ POWER5_PME_PM_L3SA_MOD_INV ] = { .pme_name = "PM_L3SA_MOD_INV", .pme_code = 0x730e3, .pme_short_desc = "L3 slice A transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_MOD_INV] }, [ POWER5_PME_PM_LSU_FLUSH_LRQ ] = { .pme_name = "PM_LSU_FLUSH_LRQ", .pme_code = 0x2c0090, .pme_short_desc = "LRQ flushes", .pme_long_desc = "A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_LRQ] }, [ POWER5_PME_PM_THRD_PRIO_2_CYC ] = { .pme_name = "PM_THRD_PRIO_2_CYC", .pme_code = 0x420e1, .pme_short_desc = "Cycles thread running at priority level 2", .pme_long_desc = "Cycles this thread was running at priority level 2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_2_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_2_CYC] }, [ POWER5_PME_PM_LSU_FLUSH_SRQ ] = { .pme_name = "PM_LSU_FLUSH_SRQ", .pme_code = 0x1c0090, .pme_short_desc = "SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH_SRQ] }, [ POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { .pme_name = "PM_MRK_LSU_SRQ_INST_VALID", .pme_code = 0xc70e6, .pme_short_desc = "Marked instruction valid in SRQ", .pme_long_desc = "This signal is asserted every cycle when a marked request is resident in the Store Request Queue", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_SRQ_INST_VALID] }, [ POWER5_PME_PM_L3SA_REF ] = { .pme_name = "PM_L3SA_REF", .pme_code = 0x701c3, .pme_short_desc = "L3 slice A references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_REF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_REF] }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL] }, [ POWER5_PME_PM_FPU0_STALL3 ] = { .pme_name = "PM_FPU0_STALL3", .pme_code = 0x20e1, .pme_short_desc = "FPU0 stalled in pipe3", .pme_long_desc = "FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_STALL3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_STALL3] }, [ POWER5_PME_PM_GPR_MAP_FULL_CYC ] = { .pme_name = "PM_GPR_MAP_FULL_CYC", .pme_code = 0x130e5, .pme_short_desc = "Cycles GPR mapper full", .pme_long_desc = "The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GPR_MAP_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GPR_MAP_FULL_CYC] }, [ POWER5_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x100018, .pme_short_desc = "Time Base bit transition", .pme_long_desc = "When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_TB_BIT_TRANS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_TB_BIT_TRANS] }, [ POWER5_PME_PM_MRK_LSU_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_LRQ", .pme_code = 0x381088, .pme_short_desc = "Marked LRQ flushes", .pme_long_desc = "A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_FLUSH_LRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_FLUSH_LRQ] }, [ POWER5_PME_PM_FPU0_STF ] = { .pme_name = "PM_FPU0_STF", .pme_code = 0x20e2, .pme_short_desc = "FPU0 executed store instruction", .pme_long_desc = "FPU0 has executed a Floating Point Store instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_STF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_STF] }, [ POWER5_PME_PM_MRK_DTLB_MISS ] = { .pme_name = "PM_MRK_DTLB_MISS", .pme_code = 0xc50c6, .pme_short_desc = "Marked Data TLB misses", .pme_long_desc = "Data TLB references by a marked instruction that missed the TLB (all page sizes).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_MISS] }, [ POWER5_PME_PM_FPU1_FMA ] = { .pme_name = "PM_FPU1_FMA", .pme_code = 0xc5, .pme_short_desc = "FPU1 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FMA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FMA] }, [ POWER5_PME_PM_L2SA_MOD_TAG ] = { .pme_name = "PM_L2SA_MOD_TAG", .pme_code = 0x720e0, .pme_short_desc = "L2 slice A transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_MOD_TAG] }, [ POWER5_PME_PM_LSU1_FLUSH_ULD ] = { .pme_name = "PM_LSU1_FLUSH_ULD", .pme_code = 0xc00c4, .pme_short_desc = "LSU1 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_FLUSH_ULD] }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU0_FLUSH_UST", .pme_code = 0x810c1, .pme_short_desc = "LSU0 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 0 because it was unaligned", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU0_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU0_FLUSH_UST] }, [ POWER5_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x300005, .pme_short_desc = "Marked instruction finished", .pme_long_desc = "One of the execution units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_INST_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_INST_FIN] }, [ POWER5_PME_PM_FPU0_FULL_CYC ] = { .pme_name = "PM_FPU0_FULL_CYC", .pme_code = 0x100c3, .pme_short_desc = "Cycles FPU0 issue queue full", .pme_long_desc = "The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FULL_CYC] }, [ POWER5_PME_PM_LSU_LRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LRQ_S0_ALLOC", .pme_code = 0xc20e6, .pme_short_desc = "LRQ slot 0 allocated", .pme_long_desc = "LRQ slot zero was allocated", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LRQ_S0_ALLOC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LRQ_S0_ALLOC] }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU1_FLUSH_ULD", .pme_code = 0x810c4, .pme_short_desc = "LSU1 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU1_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU1_FLUSH_ULD] }, [ POWER5_PME_PM_MRK_DTLB_REF ] = { .pme_name = "PM_MRK_DTLB_REF", .pme_code = 0x1c4090, .pme_short_desc = "Marked Data TLB reference", .pme_long_desc = "Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_REF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_REF] }, [ POWER5_PME_PM_BR_UNCOND ] = { .pme_name = "PM_BR_UNCOND", .pme_code = 0x123087, .pme_short_desc = "Unconditional branch", .pme_long_desc = "An unconditional branch was executed.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_UNCOND], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_UNCOND] }, [ POWER5_PME_PM_THRD_SEL_OVER_L2MISS ] = { .pme_name = "PM_THRD_SEL_OVER_L2MISS", .pme_code = 0x410c3, .pme_short_desc = "Thread selection overrides caused by L2 misses", .pme_long_desc = "Thread selection was overridden because one thread was had a L2 miss pending.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_OVER_L2MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_OVER_L2MISS] }, [ POWER5_PME_PM_L2SB_SHR_INV ] = { .pme_name = "PM_L2SB_SHR_INV", .pme_code = 0x710c1, .pme_short_desc = "L2 slice B transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_SHR_INV] }, [ POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_LO_PRIO_WR_CMPL", .pme_code = 0x736e6, .pme_short_desc = "Low priority write completed", .pme_long_desc = "A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_LO_PRIO_WR_CMPL] }, [ POWER5_PME_PM_L3SC_MOD_TAG ] = { .pme_name = "PM_L3SC_MOD_TAG", .pme_code = 0x720e5, .pme_short_desc = "L3 slice C transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_MOD_TAG] }, [ POWER5_PME_PM_MRK_ST_MISS_L1 ] = { .pme_name = "PM_MRK_ST_MISS_L1", .pme_code = 0x820e3, .pme_short_desc = "Marked L1 D cache store misses", .pme_long_desc = "A marked store missed the dcache", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_ST_MISS_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_ST_MISS_L1] }, [ POWER5_PME_PM_GRP_DISP_SUCCESS ] = { .pme_name = "PM_GRP_DISP_SUCCESS", .pme_code = 0x300002, .pme_short_desc = "Group dispatch success", .pme_long_desc = "Number of groups sucessfully dispatched (not rejected)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_DISP_SUCCESS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_DISP_SUCCESS] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_1or2_CYC", .pme_code = 0x430e4, .pme_short_desc = "Cycles thread priority difference is 1 or 2", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 1 or 2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_1or2_CYC] }, [ POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BHT_REDIRECT", .pme_code = 0x230e0, .pme_short_desc = "L2 I cache demand request due to BHT redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IC_DEMAND_L2_BHT_REDIRECT] }, [ POWER5_PME_PM_MEM_WQ_DISP_Q8to15 ] = { .pme_name = "PM_MEM_WQ_DISP_Q8to15", .pme_code = 0x733e6, .pme_short_desc = "Memory write queue dispatched to queues 8-15", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_WQ_DISP_Q8to15], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_WQ_DISP_Q8to15] }, [ POWER5_PME_PM_FPU0_SINGLE ] = { .pme_name = "PM_FPU0_SINGLE", .pme_code = 0x20e3, .pme_short_desc = "FPU0 executed single precision instruction", .pme_long_desc = "FPU0 has executed a single precision instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_SINGLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_SINGLE] }, [ POWER5_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x280090, .pme_short_desc = "DERAT misses", .pme_long_desc = "Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_DERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_DERAT_MISS] }, [ POWER5_PME_PM_THRD_PRIO_1_CYC ] = { .pme_name = "PM_THRD_PRIO_1_CYC", .pme_code = 0x420e0, .pme_short_desc = "Cycles thread running at priority level 1", .pme_long_desc = "Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_1_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_1_CYC] }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_FPU1_FEST ] = { .pme_name = "PM_FPU1_FEST", .pme_code = 0x10c6, .pme_short_desc = "FPU1 executed FEST instruction", .pme_long_desc = "FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FEST] }, [ POWER5_PME_PM_FAB_HOLDtoVN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoVN_EMPTY", .pme_code = 0x721e7, .pme_short_desc = "Hold buffer to VN empty", .pme_long_desc = "Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_HOLDtoVN_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_HOLDtoVN_EMPTY] }, [ POWER5_PME_PM_SNOOP_RD_RETRY_RQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_RQ", .pme_code = 0x705c6, .pme_short_desc = "Snoop read retry due to collision with active read queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_RD_RETRY_RQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_RD_RETRY_RQ] }, [ POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_DCLAIM_RETRY_QFULL", .pme_code = 0x720e6, .pme_short_desc = "Snoop dclaim/flush retry due to write/dclaim queues full", .pme_long_desc = "The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_DCLAIM_RETRY_QFULL] }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR_CYC", .pme_code = 0x2c70a2, .pme_short_desc = "Marked load latency from L2.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L25_SHR_CYC] }, [ POWER5_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x300003, .pme_short_desc = "Marked store completed with intervention", .pme_long_desc = "A marked store previously sent to the memory subsystem completed (data home) after requiring intervention", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_ST_CMPL_INT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_ST_CMPL_INT] }, [ POWER5_PME_PM_FLUSH_BR_MPRED ] = { .pme_name = "PM_FLUSH_BR_MPRED", .pme_code = 0x110c6, .pme_short_desc = "Flush caused by branch mispredict", .pme_long_desc = "A flush was caused by a branch mispredict.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FLUSH_BR_MPRED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FLUSH_BR_MPRED] }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_FPU_STF ] = { .pme_name = "PM_FPU_STF", .pme_code = 0x202090, .pme_short_desc = "FPU executed store instruction", .pme_long_desc = "FPU has executed a store instruction. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_STF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_STF] }, [ POWER5_PME_PM_CMPLU_STALL_FPU ] = { .pme_name = "PM_CMPLU_STALL_FPU", .pme_code = 0x411098, .pme_short_desc = "Completion stall caused by FPU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_FPU], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_FPU] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus1or2_CYC", .pme_code = 0x430e2, .pme_short_desc = "Cycles thread priority difference is -1 or -2", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 1 or 2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC] }, [ POWER5_PME_PM_GCT_NOSLOT_CYC ] = { .pme_name = "PM_GCT_NOSLOT_CYC", .pme_code = 0x100004, .pme_short_desc = "Cycles no GCT slot allocated", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_NOSLOT_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_NOSLOT_CYC] }, [ POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { .pme_name = "PM_FXU0_BUSY_FXU1_IDLE", .pme_code = 0x300012, .pme_short_desc = "FXU0 busy FXU1 idle", .pme_long_desc = "FXU0 is busy while FXU1 was idle", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU0_BUSY_FXU1_IDLE] }, [ POWER5_PME_PM_PTEG_FROM_L35_SHR ] = { .pme_name = "PM_PTEG_FROM_L35_SHR", .pme_code = 0x18309e, .pme_short_desc = "PTEG loaded from L3.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L35_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L35_SHR] }, [ POWER5_PME_PM_MRK_LSU_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU_FLUSH_UST", .pme_code = 0x381090, .pme_short_desc = "Marked unaligned store flushes", .pme_long_desc = "A marked store was flushed because it was unaligned", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_FLUSH_UST] }, [ POWER5_PME_PM_L3SA_HIT ] = { .pme_name = "PM_L3SA_HIT", .pme_code = 0x711c3, .pme_short_desc = "L3 slice A hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_HIT] }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR", .pme_code = 0x1c7097, .pme_short_desc = "Marked data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L25_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L25_SHR] }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR", .pme_code = 0x1c709e, .pme_short_desc = "Marked data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L35_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L35_SHR] }, [ POWER5_PME_PM_IERAT_XLATE_WR ] = { .pme_name = "PM_IERAT_XLATE_WR", .pme_code = 0x220e7, .pme_short_desc = "Translation written to ierat", .pme_long_desc = "An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IERAT_XLATE_WR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IERAT_XLATE_WR] }, [ POWER5_PME_PM_L2SA_ST_REQ ] = { .pme_name = "PM_L2SA_ST_REQ", .pme_code = 0x723e0, .pme_short_desc = "L2 slice A store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_ST_REQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_ST_REQ] }, [ POWER5_PME_PM_THRD_SEL_T1 ] = { .pme_name = "PM_THRD_SEL_T1", .pme_code = 0x410c1, .pme_short_desc = "Decode selected thread 1", .pme_long_desc = "Thread selection picked thread 1 for decode.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_T1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_T1] }, [ POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BR_REDIRECT", .pme_code = 0x230e1, .pme_short_desc = "L2 I cache demand request due to branch redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IC_DEMAND_L2_BR_REDIRECT] }, [ POWER5_PME_PM_INST_FROM_LMEM ] = { .pme_name = "PM_INST_FROM_LMEM", .pme_code = 0x222086, .pme_short_desc = "Instruction fetched from local memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to the same module this proccessor is located on. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_LMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_LMEM] }, [ POWER5_PME_PM_FPU0_1FLOP ] = { .pme_name = "PM_FPU0_1FLOP", .pme_code = 0xc3, .pme_short_desc = "FPU0 executed add", .pme_long_desc = " mult", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_1FLOP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_1FLOP] }, [ POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR_CYC", .pme_code = 0x2c70a6, .pme_short_desc = "Marked load latency from L3.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L35_SHR_CYC] }, [ POWER5_PME_PM_PTEG_FROM_L2 ] = { .pme_name = "PM_PTEG_FROM_L2", .pme_code = 0x183087, .pme_short_desc = "PTEG loaded from L2", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L2 due to a demand load", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L2], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L2] }, [ POWER5_PME_PM_MEM_PW_CMPL ] = { .pme_name = "PM_MEM_PW_CMPL", .pme_code = 0x724e6, .pme_short_desc = "Memory partial-write completed", .pme_long_desc = "Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_PW_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_PW_CMPL] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus5or6_CYC", .pme_code = 0x430e0, .pme_short_desc = "Cycles thread priority difference is -5 or -6", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 5 or 6.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC] }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_FPU0_FIN ] = { .pme_name = "PM_FPU0_FIN", .pme_code = 0x10c3, .pme_short_desc = "FPU0 produced a result", .pme_long_desc = "FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FIN] }, [ POWER5_PME_PM_MRK_DTLB_MISS_4K ] = { .pme_name = "PM_MRK_DTLB_MISS_4K", .pme_code = 0xc40c1, .pme_short_desc = "Marked Data TLB misses for 4K page", .pme_long_desc = "Data TLB references to 4KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DTLB_MISS_4K], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DTLB_MISS_4K] }, [ POWER5_PME_PM_L3SC_SHR_INV ] = { .pme_name = "PM_L3SC_SHR_INV", .pme_code = 0x710c5, .pme_short_desc = "L3 slice C transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SC_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SC_SHR_INV] }, [ POWER5_PME_PM_GRP_BR_REDIR ] = { .pme_name = "PM_GRP_BR_REDIR", .pme_code = 0x120e6, .pme_short_desc = "Group experienced branch redirect", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_BR_REDIR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_BR_REDIR] }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_MRK_LSU_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_SRQ", .pme_code = 0x481088, .pme_short_desc = "Marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_FLUSH_SRQ] }, [ POWER5_PME_PM_PTEG_FROM_L275_SHR ] = { .pme_name = "PM_PTEG_FROM_L275_SHR", .pme_code = 0x383097, .pme_short_desc = "PTEG loaded from L2.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T) data from the L2 on a different module than this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L275_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L275_SHR] }, [ POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_SNOOP_RD_RETRY_WQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_WQ", .pme_code = 0x715c6, .pme_short_desc = "Snoop read retry due to collision with active write queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_RD_RETRY_WQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_RD_RETRY_WQ] }, [ POWER5_PME_PM_LSU0_NCLD ] = { .pme_name = "PM_LSU0_NCLD", .pme_code = 0xc50c1, .pme_short_desc = "LSU0 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by unit 0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_NCLD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_NCLD] }, [ POWER5_PME_PM_FAB_DCLAIM_RETRIED ] = { .pme_name = "PM_FAB_DCLAIM_RETRIED", .pme_code = 0x730e7, .pme_short_desc = "dclaim retried", .pme_long_desc = "A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_DCLAIM_RETRIED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_DCLAIM_RETRIED] }, [ POWER5_PME_PM_LSU1_BUSY_REJECT ] = { .pme_name = "PM_LSU1_BUSY_REJECT", .pme_code = 0xc20e7, .pme_short_desc = "LSU1 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_BUSY_REJECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_BUSY_REJECT] }, [ POWER5_PME_PM_FXLS0_FULL_CYC ] = { .pme_name = "PM_FXLS0_FULL_CYC", .pme_code = 0x110c0, .pme_short_desc = "Cycles FXU0/LS0 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXLS0_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXLS0_FULL_CYC] }, [ POWER5_PME_PM_FPU0_FEST ] = { .pme_name = "PM_FPU0_FEST", .pme_code = 0x10c2, .pme_short_desc = "FPU0 executed FEST instruction", .pme_long_desc = "FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FEST] }, [ POWER5_PME_PM_DTLB_REF_16M ] = { .pme_name = "PM_DTLB_REF_16M", .pme_code = 0xc40c6, .pme_short_desc = "Data TLB reference for 16M page", .pme_long_desc = "Data TLB references for 16MB pages. Includes hits + misses.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_REF_16M], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_REF_16M] }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_LSU0_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU0_REJECT_ERAT_MISS", .pme_code = 0xc60e3, .pme_short_desc = "LSU0 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_REJECT_ERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_REJECT_ERAT_MISS] }, [ POWER5_PME_PM_DATA_FROM_L25_MOD ] = { .pme_name = "PM_DATA_FROM_L25_MOD", .pme_code = 0x2c3097, .pme_short_desc = "Data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L25_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L25_MOD] }, [ POWER5_PME_PM_GCT_USAGE_60to79_CYC ] = { .pme_name = "PM_GCT_USAGE_60to79_CYC", .pme_code = 0x20001f, .pme_short_desc = "Cycles GCT 60-79% full", .pme_long_desc = "Cycles when the Global Completion Table has between 60% and 70% of its slots used. The GCT has 20 entries shared between threads.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_USAGE_60to79_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_USAGE_60to79_CYC] }, [ POWER5_PME_PM_DATA_FROM_L375_MOD ] = { .pme_name = "PM_DATA_FROM_L375_MOD", .pme_code = 0x1c30a7, .pme_short_desc = "Data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L375_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L375_MOD] }, [ POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x200015, .pme_short_desc = "Cycles LMQ and SRQ empty", .pme_long_desc = "Cycles when both the LMQ and SRQ are empty (LSU is idle)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC] }, [ POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU0_REJECT_RELOAD_CDF", .pme_code = 0xc60e2, .pme_short_desc = "LSU0 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_REJECT_RELOAD_CDF] }, [ POWER5_PME_PM_0INST_FETCH ] = { .pme_name = "PM_0INST_FETCH", .pme_code = 0x42208d, .pme_short_desc = "No instructions fetched", .pme_long_desc = "No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_0INST_FETCH], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_0INST_FETCH] }, [ POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU1_REJECT_RELOAD_CDF", .pme_code = 0xc60e6, .pme_short_desc = "LSU1 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_REJECT_RELOAD_CDF] }, [ POWER5_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0xc70e7, .pme_short_desc = "L1 cache data prefetches", .pme_long_desc = "A request to prefetch data into the L1 was made", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L1_PREF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L1_PREF] }, [ POWER5_PME_PM_MEM_WQ_DISP_Q0to7 ] = { .pme_name = "PM_MEM_WQ_DISP_Q0to7", .pme_code = 0x723e6, .pme_short_desc = "Memory write queue dispatched to queues 0-7", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_WQ_DISP_Q0to7], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_WQ_DISP_Q0to7] }, [ POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM_CYC", .pme_code = 0x4c70a0, .pme_short_desc = "Marked load latency from local memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_LMEM_CYC] }, [ POWER5_PME_PM_BRQ_FULL_CYC ] = { .pme_name = "PM_BRQ_FULL_CYC", .pme_code = 0x100c5, .pme_short_desc = "Cycles branch queue full", .pme_long_desc = "Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BRQ_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BRQ_FULL_CYC] }, [ POWER5_PME_PM_GRP_IC_MISS_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_NONSPEC", .pme_code = 0x112099, .pme_short_desc = "Group experienced non-speculative I cache miss", .pme_long_desc = "Number of groups, counted at completion, that have encountered an instruction cache miss.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_IC_MISS_NONSPEC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_IC_MISS_NONSPEC] }, [ POWER5_PME_PM_PTEG_FROM_L275_MOD ] = { .pme_name = "PM_PTEG_FROM_L275_MOD", .pme_code = 0x1830a3, .pme_short_desc = "PTEG loaded from L2.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L275_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L275_MOD] }, [ POWER5_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU0", .pme_code = 0x820e0, .pme_short_desc = "LSU0 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LD_MISS_L1_LSU0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LD_MISS_L1_LSU0] }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR_CYC", .pme_code = 0x2c70a7, .pme_short_desc = "Marked load latency from L3.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L375_SHR_CYC] }, [ POWER5_PME_PM_LSU_FLUSH ] = { .pme_name = "PM_LSU_FLUSH", .pme_code = 0x110c5, .pme_short_desc = "Flush initiated by LSU", .pme_long_desc = "A flush was initiated by the Load Store Unit", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_FLUSH], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_FLUSH] }, [ POWER5_PME_PM_DATA_FROM_L3 ] = { .pme_name = "PM_DATA_FROM_L3", .pme_code = 0x1c308e, .pme_short_desc = "Data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L3] }, [ POWER5_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x122086, .pme_short_desc = "Instruction fetched from L2", .pme_long_desc = "An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L2], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L2] }, [ POWER5_PME_PM_PMC2_OVERFLOW ] = { .pme_name = "PM_PMC2_OVERFLOW", .pme_code = 0x30000a, .pme_short_desc = "PMC2 Overflow", .pme_long_desc = "Overflows from PMC2 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC2_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC2_OVERFLOW] }, [ POWER5_PME_PM_FPU0_DENORM ] = { .pme_name = "PM_FPU0_DENORM", .pme_code = 0x20e0, .pme_short_desc = "FPU0 received denormalized data", .pme_long_desc = "FPU0 has encountered a denormalized operand. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_DENORM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_DENORM] }, [ POWER5_PME_PM_FPU1_FMOV_FEST ] = { .pme_name = "PM_FPU1_FMOV_FEST", .pme_code = 0x10c4, .pme_short_desc = "FPU1 executed FMOV or FEST instructions", .pme_long_desc = "FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_FMOV_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_FMOV_FEST] }, [ POWER5_PME_PM_INST_FETCH_CYC ] = { .pme_name = "PM_INST_FETCH_CYC", .pme_code = 0x220e4, .pme_short_desc = "Cycles at least 1 instruction fetched", .pme_long_desc = "Cycles when at least one instruction was sent from the fetch unit to the decode unit.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FETCH_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FETCH_CYC] }, [ POWER5_PME_PM_LSU_LDF ] = { .pme_name = "PM_LSU_LDF", .pme_code = 0x4c5090, .pme_short_desc = "LSU executed Floating Point load instruction", .pme_long_desc = "LSU executed Floating Point load instruction. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LDF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LDF] }, [ POWER5_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x300009, .pme_short_desc = "Instructions dispatched", .pme_long_desc = "Number of PowerPC instructions successfully dispatched.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_DISP] }, [ POWER5_PME_PM_DATA_FROM_L25_SHR ] = { .pme_name = "PM_DATA_FROM_L25_SHR", .pme_code = 0x1c3097, .pme_short_desc = "Data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L25_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L25_SHR] }, [ POWER5_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0xc30e4, .pme_short_desc = "L1 reload data source valid", .pme_long_desc = "The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L1_DCACHE_RELOAD_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L1_DCACHE_RELOAD_VALID] }, [ POWER5_PME_PM_MEM_WQ_DISP_DCLAIM ] = { .pme_name = "PM_MEM_WQ_DISP_DCLAIM", .pme_code = 0x713c6, .pme_short_desc = "Memory write queue dispatched due to dclaim/flush", .pme_long_desc = "A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_WQ_DISP_DCLAIM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_WQ_DISP_DCLAIM] }, [ POWER5_PME_PM_FPU_FULL_CYC ] = { .pme_name = "PM_FPU_FULL_CYC", .pme_code = 0x110090, .pme_short_desc = "Cycles FPU issue queue full", .pme_long_desc = "Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FULL_CYC] }, [ POWER5_PME_PM_MRK_GRP_ISSUED ] = { .pme_name = "PM_MRK_GRP_ISSUED", .pme_code = 0x100015, .pme_short_desc = "Marked group issued", .pme_long_desc = "A sampled instruction was issued.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_ISSUED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_ISSUED] }, [ POWER5_PME_PM_THRD_PRIO_3_CYC ] = { .pme_name = "PM_THRD_PRIO_3_CYC", .pme_code = 0x420e2, .pme_short_desc = "Cycles thread running at priority level 3", .pme_long_desc = "Cycles this thread was running at priority level 3.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_3_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_3_CYC] }, [ POWER5_PME_PM_FPU_FMA ] = { .pme_name = "PM_FPU_FMA", .pme_code = 0x200088, .pme_short_desc = "FPU executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FMA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FMA] }, [ POWER5_PME_PM_INST_FROM_L35_MOD ] = { .pme_name = "PM_INST_FROM_L35_MOD", .pme_code = 0x22209d, .pme_short_desc = "Instruction fetched from L3.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L35_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L35_MOD] }, [ POWER5_PME_PM_MRK_CRU_FIN ] = { .pme_name = "PM_MRK_CRU_FIN", .pme_code = 0x400005, .pme_short_desc = "Marked instruction CRU processing finished", .pme_long_desc = "The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_CRU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_CRU_FIN] }, [ POWER5_PME_PM_SNOOP_WR_RETRY_WQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_WQ", .pme_code = 0x716c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active write queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_WR_RETRY_WQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_WR_RETRY_WQ] }, [ POWER5_PME_PM_CMPLU_STALL_REJECT ] = { .pme_name = "PM_CMPLU_STALL_REJECT", .pme_code = 0x41109a, .pme_short_desc = "Completion stall caused by reject", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a load/store reject. This is a subset of PM_CMPLU_STALL_LSU.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_REJECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_REJECT] }, [ POWER5_PME_PM_LSU1_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU1_REJECT_ERAT_MISS", .pme_code = 0xc60e7, .pme_short_desc = "LSU1 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_REJECT_ERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_REJECT_ERAT_MISS] }, [ POWER5_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x200014, .pme_short_desc = "Marked instruction FXU processing finished", .pme_long_desc = "One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_FXU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_FXU_FIN] }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY] }, [ POWER5_PME_PM_PMC4_OVERFLOW ] = { .pme_name = "PM_PMC4_OVERFLOW", .pme_code = 0x10000a, .pme_short_desc = "PMC4 Overflow", .pme_long_desc = "Overflows from PMC4 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC4_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC4_OVERFLOW] }, [ POWER5_PME_PM_L3SA_SNOOP_RETRY ] = { .pme_name = "PM_L3SA_SNOOP_RETRY", .pme_code = 0x731e3, .pme_short_desc = "L3 slice A snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_SNOOP_RETRY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_SNOOP_RETRY] }, [ POWER5_PME_PM_PTEG_FROM_L35_MOD ] = { .pme_name = "PM_PTEG_FROM_L35_MOD", .pme_code = 0x28309e, .pme_short_desc = "PTEG loaded from L3.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L35_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L35_MOD] }, [ POWER5_PME_PM_INST_FROM_L25_MOD ] = { .pme_name = "PM_INST_FROM_L25_MOD", .pme_code = 0x222096, .pme_short_desc = "Instruction fetched from L2.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L25_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L25_MOD] }, [ POWER5_PME_PM_THRD_SMT_HANG ] = { .pme_name = "PM_THRD_SMT_HANG", .pme_code = 0x330e7, .pme_short_desc = "SMT hang detected", .pme_long_desc = "A hung thread was detected", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SMT_HANG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SMT_HANG] }, [ POWER5_PME_PM_CMPLU_STALL_ERAT_MISS ] = { .pme_name = "PM_CMPLU_STALL_ERAT_MISS", .pme_code = 0x41109b, .pme_short_desc = "Completion stall caused by ERAT miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered an ERAT miss. This is a subset of PM_CMPLU_STALL_REJECT.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_ERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_ERAT_MISS] }, [ POWER5_PME_PM_L3SA_MOD_TAG ] = { .pme_name = "PM_L3SA_MOD_TAG", .pme_code = 0x720e3, .pme_short_desc = "L3 slice A transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SA_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SA_MOD_TAG] }, [ POWER5_PME_PM_FLUSH_SYNC ] = { .pme_name = "PM_FLUSH_SYNC", .pme_code = 0x330e1, .pme_short_desc = "Flush caused by sync", .pme_long_desc = "This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FLUSH_SYNC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FLUSH_SYNC] }, [ POWER5_PME_PM_INST_FROM_L2MISS ] = { .pme_name = "PM_INST_FROM_L2MISS", .pme_code = 0x12209b, .pme_short_desc = "Instruction fetched missed L2", .pme_long_desc = "An instruction fetch group was fetched from beyond the local L2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L2MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L2MISS] }, [ POWER5_PME_PM_L2SC_ST_HIT ] = { .pme_name = "PM_L2SC_ST_HIT", .pme_code = 0x733e2, .pme_short_desc = "L2 slice C store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_ST_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_ST_HIT] }, [ POWER5_PME_PM_MEM_RQ_DISP_Q8to11 ] = { .pme_name = "PM_MEM_RQ_DISP_Q8to11", .pme_code = 0x722e6, .pme_short_desc = "Memory read queue dispatched to queues 8-11", .pme_long_desc = "A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP_Q8to11], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP_Q8to11] }, [ POWER5_PME_PM_MRK_GRP_DISP ] = { .pme_name = "PM_MRK_GRP_DISP", .pme_code = 0x100002, .pme_short_desc = "Marked group dispatched", .pme_long_desc = "A group containing a sampled instruction was dispatched", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_DISP] }, [ POWER5_PME_PM_L2SB_MOD_TAG ] = { .pme_name = "PM_L2SB_MOD_TAG", .pme_code = 0x720e1, .pme_short_desc = "L2 slice B transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_MOD_TAG] }, [ POWER5_PME_PM_CLB_EMPTY_CYC ] = { .pme_name = "PM_CLB_EMPTY_CYC", .pme_code = 0x410c6, .pme_short_desc = "Cycles CLB empty", .pme_long_desc = "Cycles when both thread's CLB is completely empty.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CLB_EMPTY_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CLB_EMPTY_CYC] }, [ POWER5_PME_PM_L2SB_ST_HIT ] = { .pme_name = "PM_L2SB_ST_HIT", .pme_code = 0x733e1, .pme_short_desc = "L2 slice B store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_ST_HIT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_ST_HIT] }, [ POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_NONSPEC_RD_CANCEL", .pme_code = 0x711c6, .pme_short_desc = "Non speculative memory read cancelled", .pme_long_desc = "A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_NONSPEC_RD_CANCEL] }, [ POWER5_PME_PM_BR_PRED_CR_TA ] = { .pme_name = "PM_BR_PRED_CR_TA", .pme_code = 0x423087, .pme_short_desc = "A conditional branch was predicted", .pme_long_desc = " CR and target prediction", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_PRED_CR_TA], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_PRED_CR_TA] }, [ POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_SRQ", .pme_code = 0x810c3, .pme_short_desc = "LSU0 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU0_FLUSH_SRQ] }, [ POWER5_PME_PM_MRK_LSU_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU_FLUSH_ULD", .pme_code = 0x481090, .pme_short_desc = "Marked unaligned load flushes", .pme_long_desc = "A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_FLUSH_ULD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_FLUSH_ULD] }, [ POWER5_PME_PM_INST_DISP_ATTEMPT ] = { .pme_name = "PM_INST_DISP_ATTEMPT", .pme_code = 0x120e1, .pme_short_desc = "Instructions dispatch attempted", .pme_long_desc = "Number of PowerPC Instructions dispatched (attempted, not filtered by success.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_DISP_ATTEMPT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_DISP_ATTEMPT] }, [ POWER5_PME_PM_INST_FROM_RMEM ] = { .pme_name = "PM_INST_FROM_RMEM", .pme_code = 0x422086, .pme_short_desc = "Instruction fetched from remote memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to a different module than this proccessor is located on. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_RMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_RMEM] }, [ POWER5_PME_PM_ST_REF_L1_LSU0 ] = { .pme_name = "PM_ST_REF_L1_LSU0", .pme_code = 0xc10c1, .pme_short_desc = "LSU0 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU0.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ST_REF_L1_LSU0], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ST_REF_L1_LSU0] }, [ POWER5_PME_PM_LSU0_DERAT_MISS ] = { .pme_name = "PM_LSU0_DERAT_MISS", .pme_code = 0x800c2, .pme_short_desc = "LSU0 DERAT misses", .pme_long_desc = "Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_DERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_DERAT_MISS] }, [ POWER5_PME_PM_L2SB_RCLD_DISP ] = { .pme_name = "PM_L2SB_RCLD_DISP", .pme_code = 0x701c1, .pme_short_desc = "L2 slice B RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCLD_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCLD_DISP] }, [ POWER5_PME_PM_FPU_STALL3 ] = { .pme_name = "PM_FPU_STALL3", .pme_code = 0x202088, .pme_short_desc = "FPU stalled in pipe3", .pme_long_desc = "FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_STALL3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_STALL3] }, [ POWER5_PME_PM_BR_PRED_CR ] = { .pme_name = "PM_BR_PRED_CR", .pme_code = 0x230e2, .pme_short_desc = "A conditional branch was predicted", .pme_long_desc = " CR prediction", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_PRED_CR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_PRED_CR] }, [ POWER5_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x1c7087, .pme_short_desc = "Marked data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L2], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L2] }, [ POWER5_PME_PM_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_LSU0_FLUSH_SRQ", .pme_code = 0xc00c3, .pme_short_desc = "LSU0 SRQ lhs flushes", .pme_long_desc = "A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_FLUSH_SRQ] }, [ POWER5_PME_PM_FAB_PNtoNN_DIRECT ] = { .pme_name = "PM_FAB_PNtoNN_DIRECT", .pme_code = 0x703c7, .pme_short_desc = "PN to NN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_PNtoNN_DIRECT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_PNtoNN_DIRECT] }, [ POWER5_PME_PM_IOPS_CMPL ] = { .pme_name = "PM_IOPS_CMPL", .pme_code = 0x1, .pme_short_desc = "Internal operations completed", .pme_long_desc = "Number of internal operations that completed.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_IOPS_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_IOPS_CMPL] }, [ POWER5_PME_PM_L2SC_SHR_INV ] = { .pme_name = "PM_L2SC_SHR_INV", .pme_code = 0x710c2, .pme_short_desc = "L2 slice C transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_SHR_INV] }, [ POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCST_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_L2SA_RCST_DISP ] = { .pme_name = "PM_L2SA_RCST_DISP", .pme_code = 0x702c0, .pme_short_desc = "L2 slice A RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCST_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCST_DISP] }, [ POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION ] = { .pme_name = "PM_SNOOP_RETRY_AB_COLLISION", .pme_code = 0x735e6, .pme_short_desc = "Snoop retry due to a b collision", .pme_long_desc = "Snoop retry due to a b collision", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_RETRY_AB_COLLISION] }, [ POWER5_PME_PM_FAB_PNtoVN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoVN_SIDECAR", .pme_code = 0x733e7, .pme_short_desc = "PN to VN beat went to sidecar first", .pme_long_desc = "Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_PNtoVN_SIDECAR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_PNtoVN_SIDECAR] }, [ POWER5_PME_PM_LSU_LMQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LMQ_S0_ALLOC", .pme_code = 0xc30e6, .pme_short_desc = "LMQ slot 0 allocated", .pme_long_desc = "The first entry in the LMQ was allocated.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LMQ_S0_ALLOC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LMQ_S0_ALLOC] }, [ POWER5_PME_PM_LSU0_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU0_REJECT_LMQ_FULL", .pme_code = 0xc60e1, .pme_short_desc = "LSU0 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_REJECT_LMQ_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_REJECT_LMQ_FULL] }, [ POWER5_PME_PM_SNOOP_PW_RETRY_RQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_RQ", .pme_code = 0x707c6, .pme_short_desc = "Snoop partial-write retry due to collision with active read queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_PW_RETRY_RQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_PW_RETRY_RQ] }, [ POWER5_PME_PM_DTLB_REF ] = { .pme_name = "PM_DTLB_REF", .pme_code = 0x2c4090, .pme_short_desc = "Data TLB references", .pme_long_desc = "Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_REF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_REF] }, [ POWER5_PME_PM_PTEG_FROM_L3 ] = { .pme_name = "PM_PTEG_FROM_L3", .pme_code = 0x18308e, .pme_short_desc = "PTEG loaded from L3", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L3 due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L3] }, [ POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x712c7, .pme_short_desc = "M1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY] }, [ POWER5_PME_PM_LSU_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_SRQ_EMPTY_CYC", .pme_code = 0x400015, .pme_short_desc = "Cycles SRQ empty", .pme_long_desc = "Cycles the Store Request Queue is empty", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_EMPTY_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_EMPTY_CYC] }, [ POWER5_PME_PM_FPU1_STF ] = { .pme_name = "PM_FPU1_STF", .pme_code = 0x20e6, .pme_short_desc = "FPU1 executed store instruction", .pme_long_desc = "FPU1 has executed a Floating Point Store instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_STF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_STF] }, [ POWER5_PME_PM_LSU_LMQ_S0_VALID ] = { .pme_name = "PM_LSU_LMQ_S0_VALID", .pme_code = 0xc30e5, .pme_short_desc = "LMQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_LMQ_S0_VALID], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_LMQ_S0_VALID] }, [ POWER5_PME_PM_GCT_USAGE_00to59_CYC ] = { .pme_name = "PM_GCT_USAGE_00to59_CYC", .pme_code = 0x10001f, .pme_short_desc = "Cycles GCT less than 60% full", .pme_long_desc = "Cycles when the Global Completion Table has fewer than 60% of its slots used. The GCT has 20 entries shared between threads.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GCT_USAGE_00to59_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GCT_USAGE_00to59_CYC] }, [ POWER5_PME_PM_DATA_FROM_L2MISS ] = { .pme_name = "PM_DATA_FROM_L2MISS", .pme_code = 0x3c309b, .pme_short_desc = "Data loaded missed L2", .pme_long_desc = "The processor's Data Cache was reloaded but not from the local L2.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L2MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L2MISS] }, [ POWER5_PME_PM_GRP_DISP_BLK_SB_CYC ] = { .pme_name = "PM_GRP_DISP_BLK_SB_CYC", .pme_code = 0x130e1, .pme_short_desc = "Cycles group dispatch blocked by scoreboard", .pme_long_desc = "A scoreboard operation on a non-renamed resource has blocked dispatch.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_DISP_BLK_SB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_DISP_BLK_SB_CYC] }, [ POWER5_PME_PM_FPU_FMOV_FEST ] = { .pme_name = "PM_FPU_FMOV_FEST", .pme_code = 0x301088, .pme_short_desc = "FPU executed FMOV or FEST instructions", .pme_long_desc = "The floating point unit has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.. Combined Unit 0 + Unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FMOV_FEST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FMOV_FEST] }, [ POWER5_PME_PM_XER_MAP_FULL_CYC ] = { .pme_name = "PM_XER_MAP_FULL_CYC", .pme_code = 0x100c2, .pme_short_desc = "Cycles XER mapper full", .pme_long_desc = "The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_XER_MAP_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_XER_MAP_FULL_CYC] }, [ POWER5_PME_PM_FLUSH_SB ] = { .pme_name = "PM_FLUSH_SB", .pme_code = 0x330e2, .pme_short_desc = "Flush caused by scoreboard operation", .pme_long_desc = "This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FLUSH_SB], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FLUSH_SB] }, [ POWER5_PME_PM_MRK_DATA_FROM_L375_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR", .pme_code = 0x3c709e, .pme_short_desc = "Marked data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a marked load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L375_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L375_SHR] }, [ POWER5_PME_PM_MRK_GRP_CMPL ] = { .pme_name = "PM_MRK_GRP_CMPL", .pme_code = 0x400013, .pme_short_desc = "Marked group completed", .pme_long_desc = "A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_GRP_CMPL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_GRP_CMPL] }, [ POWER5_PME_PM_SUSPENDED ] = { .pme_name = "PM_SUSPENDED", .pme_code = 0x0, .pme_short_desc = "Suspended", .pme_long_desc = "The counter is suspended (does not count).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SUSPENDED], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SUSPENDED] }, [ POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_BR_REDIR_NONSPEC", .pme_code = 0x120e5, .pme_short_desc = "Group experienced non-speculative I cache miss or branch redirect", .pme_long_desc = "Group experienced non-speculative I cache miss or branch redirect", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC] }, [ POWER5_PME_PM_SNOOP_RD_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_RD_RETRY_QFULL", .pme_code = 0x700c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_SNOOP_RD_RETRY_QFULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_SNOOP_RD_RETRY_QFULL] }, [ POWER5_PME_PM_L3SB_MOD_INV ] = { .pme_name = "PM_L3SB_MOD_INV", .pme_code = 0x730e4, .pme_short_desc = "L3 slice B transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_MOD_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_MOD_INV] }, [ POWER5_PME_PM_DATA_FROM_L35_SHR ] = { .pme_name = "PM_DATA_FROM_L35_SHR", .pme_code = 0x1c309e, .pme_short_desc = "Data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L35_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L35_SHR] }, [ POWER5_PME_PM_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_LD_MISS_L1_LSU1", .pme_code = 0xc10c6, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_MISS_L1_LSU1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_MISS_L1_LSU1] }, [ POWER5_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x820e1, .pme_short_desc = "STCX failed", .pme_long_desc = "A stcx (stwcx or stdcx) failed", .pme_event_ids = power5_event_ids[POWER5_PME_PM_STCX_FAIL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_STCX_FAIL] }, [ POWER5_PME_PM_DC_PREF_DST ] = { .pme_name = "PM_DC_PREF_DST", .pme_code = 0x830e6, .pme_short_desc = "DST (Data Stream Touch) stream start", .pme_long_desc = "A prefetch stream was started using the DST instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DC_PREF_DST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DC_PREF_DST] }, [ POWER5_PME_PM_GRP_DISP ] = { .pme_name = "PM_GRP_DISP", .pme_code = 0x200002, .pme_short_desc = "Group dispatches", .pme_long_desc = "A group was dispatched", .pme_event_ids = power5_event_ids[POWER5_PME_PM_GRP_DISP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_GRP_DISP] }, [ POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR] }, [ POWER5_PME_PM_FPU0_FPSCR ] = { .pme_name = "PM_FPU0_FPSCR", .pme_code = 0x30e0, .pme_short_desc = "FPU0 executed FPSCR instruction", .pme_long_desc = "FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FPSCR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FPSCR] }, [ POWER5_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x1c3087, .pme_short_desc = "Data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L2], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L2] }, [ POWER5_PME_PM_FPU1_DENORM ] = { .pme_name = "PM_FPU1_DENORM", .pme_code = 0x20e4, .pme_short_desc = "FPU1 received denormalized data", .pme_long_desc = "FPU1 has encountered a denormalized operand.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU1_DENORM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU1_DENORM] }, [ POWER5_PME_PM_FPU_1FLOP ] = { .pme_name = "PM_FPU_1FLOP", .pme_code = 0x100090, .pme_short_desc = "FPU executed one flop instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_1FLOP], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_1FLOP] }, [ POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER] }, [ POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_FPU0_FSQRT ] = { .pme_name = "PM_FPU0_FSQRT", .pme_code = 0xc2, .pme_short_desc = "FPU0 executed FSQRT instruction", .pme_long_desc = "FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FSQRT], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FSQRT] }, [ POWER5_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x4c1090, .pme_short_desc = "L1 D cache load references", .pme_long_desc = "Load references to the Level 1 Data Cache. Combined unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_REF_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_REF_L1] }, [ POWER5_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x22208d, .pme_short_desc = "Instruction fetched from L1", .pme_long_desc = "An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_L1] }, [ POWER5_PME_PM_TLBIE_HELD ] = { .pme_name = "PM_TLBIE_HELD", .pme_code = 0x130e4, .pme_short_desc = "TLBIE held at dispatch", .pme_long_desc = "Cycles a TLBIE instruction was held at dispatch.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_TLBIE_HELD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_TLBIE_HELD] }, [ POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { .pme_name = "PM_DC_PREF_OUT_OF_STREAMS", .pme_code = 0xc50c2, .pme_short_desc = "D cache out of prefetch streams", .pme_long_desc = "A new prefetch stream was detected but no more stream entries were available.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DC_PREF_OUT_OF_STREAMS] }, [ POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD_CYC", .pme_code = 0x4c70a2, .pme_short_desc = "Marked load latency from L2.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L25_MOD_CYC] }, [ POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_SRQ", .pme_code = 0x810c7, .pme_short_desc = "LSU1 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU1_FLUSH_SRQ] }, [ POWER5_PME_PM_MEM_RQ_DISP_Q0to3 ] = { .pme_name = "PM_MEM_RQ_DISP_Q0to3", .pme_code = 0x702c6, .pme_short_desc = "Memory read queue dispatched to queues 0-3", .pme_long_desc = "A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MEM_RQ_DISP_Q0to3], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MEM_RQ_DISP_Q0to3] }, [ POWER5_PME_PM_ST_REF_L1_LSU1 ] = { .pme_name = "PM_ST_REF_L1_LSU1", .pme_code = 0xc10c5, .pme_short_desc = "LSU1 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_ST_REF_L1_LSU1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_ST_REF_L1_LSU1] }, [ POWER5_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x182088, .pme_short_desc = "Marked L1 D cache load misses", .pme_long_desc = "Marked L1 D cache load misses", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LD_MISS_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LD_MISS_L1] }, [ POWER5_PME_PM_L1_WRITE_CYC ] = { .pme_name = "PM_L1_WRITE_CYC", .pme_code = 0x230e7, .pme_short_desc = "Cycles writing to instruction L1", .pme_long_desc = "Cycles that a cache line was written to the instruction cache.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L1_WRITE_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L1_WRITE_CYC] }, [ POWER5_PME_PM_L2SC_ST_REQ ] = { .pme_name = "PM_L2SC_ST_REQ", .pme_code = 0x723e2, .pme_short_desc = "L2 slice C store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_ST_REQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_ST_REQ] }, [ POWER5_PME_PM_CMPLU_STALL_FDIV ] = { .pme_name = "PM_CMPLU_STALL_FDIV", .pme_code = 0x21109b, .pme_short_desc = "Completion stall caused by FDIV or FQRT instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point divide or square root instruction. This is a subset of PM_CMPLU_STALL_FPU.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_CMPLU_STALL_FDIV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_CMPLU_STALL_FDIV] }, [ POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY ] = { .pme_name = "PM_THRD_SEL_OVER_CLB_EMPTY", .pme_code = 0x410c2, .pme_short_desc = "Thread selection overrides caused by CLB empty", .pme_long_desc = "Thread selection was overridden because one thread's CLB was empty.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_SEL_OVER_CLB_EMPTY] }, [ POWER5_PME_PM_BR_MPRED_CR ] = { .pme_name = "PM_BR_MPRED_CR", .pme_code = 0x230e5, .pme_short_desc = "Branch mispredictions due to CR bit setting", .pme_long_desc = "A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_BR_MPRED_CR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_BR_MPRED_CR] }, [ POWER5_PME_PM_L3SB_MOD_TAG ] = { .pme_name = "PM_L3SB_MOD_TAG", .pme_code = 0x720e4, .pme_short_desc = "L3 slice B transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_MOD_TAG] }, [ POWER5_PME_PM_MRK_DATA_FROM_L2MISS ] = { .pme_name = "PM_MRK_DATA_FROM_L2MISS", .pme_code = 0x3c709b, .pme_short_desc = "Marked data loaded missed L2", .pme_long_desc = "DL1 was reloaded from beyond L2 due to a marked demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_DATA_FROM_L2MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_DATA_FROM_L2MISS] }, [ POWER5_PME_PM_LSU_REJECT_SRQ ] = { .pme_name = "PM_LSU_REJECT_SRQ", .pme_code = 0x1c6088, .pme_short_desc = "LSU SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_REJECT_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_REJECT_SRQ] }, [ POWER5_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x3c1088, .pme_short_desc = "L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache. Combined unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_MISS_L1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_MISS_L1] }, [ POWER5_PME_PM_INST_FROM_PREF ] = { .pme_name = "PM_INST_FROM_PREF", .pme_code = 0x32208d, .pme_short_desc = "Instruction fetched from prefetch", .pme_long_desc = "An instruction fetch group was fetched from the prefetch buffer. Fetch groups can contain up to 8 instructions", .pme_event_ids = power5_event_ids[POWER5_PME_PM_INST_FROM_PREF], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_INST_FROM_PREF] }, [ POWER5_PME_PM_DC_INV_L2 ] = { .pme_name = "PM_DC_INV_L2", .pme_code = 0xc10c7, .pme_short_desc = "L1 D cache entries invalidated from L2", .pme_long_desc = "A dcache invalidated was received from the L2 because a line in L2 was castout.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DC_INV_L2], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DC_INV_L2] }, [ POWER5_PME_PM_STCX_PASS ] = { .pme_name = "PM_STCX_PASS", .pme_code = 0x820e5, .pme_short_desc = "Stcx passes", .pme_long_desc = "A stcx (stwcx or stdcx) instruction was successful", .pme_event_ids = power5_event_ids[POWER5_PME_PM_STCX_PASS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_STCX_PASS] }, [ POWER5_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x110c3, .pme_short_desc = "Cycles SRQ full", .pme_long_desc = "Cycles the Store Request Queue is full.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_FULL_CYC] }, [ POWER5_PME_PM_FPU_FIN ] = { .pme_name = "PM_FPU_FIN", .pme_code = 0x401088, .pme_short_desc = "FPU produced a result", .pme_long_desc = "FPU finished, produced a result. This only indicates finish, not completion. Combined Unit 0 + Unit 1. Floating Point Stores are included in this count but not Floating Point Loads., , , XYZs", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU_FIN] }, [ POWER5_PME_PM_L2SA_SHR_MOD ] = { .pme_name = "PM_L2SA_SHR_MOD", .pme_code = 0x700c0, .pme_short_desc = "L2 slice A transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_SHR_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_SHR_MOD] }, [ POWER5_PME_PM_LSU_SRQ_STFWD ] = { .pme_name = "PM_LSU_SRQ_STFWD", .pme_code = 0x1c2088, .pme_short_desc = "SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. Combined Unit 0 + 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU_SRQ_STFWD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU_SRQ_STFWD] }, [ POWER5_PME_PM_0INST_CLB_CYC ] = { .pme_name = "PM_0INST_CLB_CYC", .pme_code = 0x400c0, .pme_short_desc = "Cycles no instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_0INST_CLB_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_0INST_CLB_CYC] }, [ POWER5_PME_PM_FXU0_FIN ] = { .pme_name = "PM_FXU0_FIN", .pme_code = 0x130e2, .pme_short_desc = "FXU0 produced a result", .pme_long_desc = "The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FXU0_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FXU0_FIN] }, [ POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL] }, [ POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC ] = { .pme_name = "PM_THRD_GRP_CMPL_BOTH_CYC", .pme_code = 0x200013, .pme_short_desc = "Cycles group completed by both threads", .pme_long_desc = "Cycles that both threads completed.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_GRP_CMPL_BOTH_CYC] }, [ POWER5_PME_PM_PMC5_OVERFLOW ] = { .pme_name = "PM_PMC5_OVERFLOW", .pme_code = 0x10001a, .pme_short_desc = "PMC5 Overflow", .pme_long_desc = "Overflows from PMC5 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PMC5_OVERFLOW], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PMC5_OVERFLOW] }, [ POWER5_PME_PM_FPU0_FDIV ] = { .pme_name = "PM_FPU0_FDIV", .pme_code = 0xc0, .pme_short_desc = "FPU0 executed FDIV instruction", .pme_long_desc = "FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_FPU0_FDIV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_FPU0_FDIV] }, [ POWER5_PME_PM_PTEG_FROM_L375_SHR ] = { .pme_name = "PM_PTEG_FROM_L375_SHR", .pme_code = 0x38309e, .pme_short_desc = "PTEG loaded from L3.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_PTEG_FROM_L375_SHR], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_PTEG_FROM_L375_SHR] }, [ POWER5_PME_PM_LD_REF_L1_LSU1 ] = { .pme_name = "PM_LD_REF_L1_LSU1", .pme_code = 0xc10c4, .pme_short_desc = "LSU1 L1 D cache load references", .pme_long_desc = "Load references to Level 1 Data Cache, by unit 1.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LD_REF_L1_LSU1], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LD_REF_L1_LSU1] }, [ POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY] }, [ POWER5_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x20000b, .pme_short_desc = "Hypervisor Cycles", .pme_long_desc = "Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0)", .pme_event_ids = power5_event_ids[POWER5_PME_PM_HV_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_HV_CYC] }, [ POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_0_CYC", .pme_code = 0x430e3, .pme_short_desc = "Cycles no thread priority difference", .pme_long_desc = "Cycles when this thread's priority is equal to the other thread's priority.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_THRD_PRIO_DIFF_0_CYC] }, [ POWER5_PME_PM_LR_CTR_MAP_FULL_CYC ] = { .pme_name = "PM_LR_CTR_MAP_FULL_CYC", .pme_code = 0x100c6, .pme_short_desc = "Cycles LR/CTR mapper full", .pme_long_desc = "The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LR_CTR_MAP_FULL_CYC], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LR_CTR_MAP_FULL_CYC] }, [ POWER5_PME_PM_L3SB_SHR_INV ] = { .pme_name = "PM_L3SB_SHR_INV", .pme_code = 0x710c4, .pme_short_desc = "L3 slice B transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L3SB_SHR_INV], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L3SB_SHR_INV] }, [ POWER5_PME_PM_DATA_FROM_RMEM ] = { .pme_name = "PM_DATA_FROM_RMEM", .pme_code = 0x1c30a1, .pme_short_desc = "Data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_RMEM], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_RMEM] }, [ POWER5_PME_PM_DATA_FROM_L275_MOD ] = { .pme_name = "PM_DATA_FROM_L275_MOD", .pme_code = 0x1c30a3, .pme_short_desc = "Data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. ", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DATA_FROM_L275_MOD], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DATA_FROM_L275_MOD] }, [ POWER5_PME_PM_LSU0_REJECT_SRQ ] = { .pme_name = "PM_LSU0_REJECT_SRQ", .pme_code = 0xc60e0, .pme_short_desc = "LSU0 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_REJECT_SRQ], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_REJECT_SRQ] }, [ POWER5_PME_PM_LSU1_DERAT_MISS ] = { .pme_name = "PM_LSU1_DERAT_MISS", .pme_code = 0x800c6, .pme_short_desc = "LSU1 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU1_DERAT_MISS], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU1_DERAT_MISS] }, [ POWER5_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x400014, .pme_short_desc = "Marked instruction LSU processing finished", .pme_long_desc = "One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = power5_event_ids[POWER5_PME_PM_MRK_LSU_FIN], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_MRK_LSU_FIN] }, [ POWER5_PME_PM_DTLB_MISS_16M ] = { .pme_name = "PM_DTLB_MISS_16M", .pme_code = 0xc40c4, .pme_short_desc = "Data TLB miss for 16M page", .pme_long_desc = "Data TLB references to 16MB pages that missed the TLB. Page size is determined at TLB reload time.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_DTLB_MISS_16M], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_DTLB_MISS_16M] }, [ POWER5_PME_PM_LSU0_FLUSH_UST ] = { .pme_name = "PM_LSU0_FLUSH_UST", .pme_code = 0xc00c1, .pme_short_desc = "LSU0 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary).", .pme_event_ids = power5_event_ids[POWER5_PME_PM_LSU0_FLUSH_UST], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_LSU0_FLUSH_UST] }, [ POWER5_PME_PM_L2SC_MOD_TAG ] = { .pme_name = "PM_L2SC_MOD_TAG", .pme_code = 0x720e2, .pme_short_desc = "L2 slice C transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SC_MOD_TAG], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SC_MOD_TAG] }, [ POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", .pme_event_ids = power5_event_ids[POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY], .pme_group_vector = power5_group_vecs[POWER5_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY] } }; #define POWER5_PME_EVENT_COUNT 474 static const int power5_group_event_ids[][POWER5_NUM_EVENT_COUNTERS] = { [ 0 ] = { 190, 71, 56, 12, 0, 0 }, [ 1 ] = { 2, 195, 49, 12, 0, 0 }, [ 2 ] = { 66, 65, 50, 60, 0, 0 }, [ 3 ] = { 0, 2, 169, 138, 0, 0 }, [ 4 ] = { 6, 6, 149, 59, 0, 0 }, [ 5 ] = { 60, 59, 46, 51, 0, 0 }, [ 6 ] = { 62, 61, 47, 52, 0, 0 }, [ 7 ] = { 143, 143, 113, 119, 0, 0 }, [ 8 ] = { 147, 147, 119, 123, 0, 0 }, [ 9 ] = { 149, 141, 112, 122, 0, 0 }, [ 10 ] = { 212, 73, 117, 18, 0, 0 }, [ 11 ] = { 73, 9, 61, 58, 0, 0 }, [ 12 ] = { 139, 1, 87, 59, 0, 0 }, [ 13 ] = { 126, 135, 13, 91, 0, 0 }, [ 14 ] = { 145, 144, 25, 159, 0, 0 }, [ 15 ] = { 125, 134, 55, 66, 0, 0 }, [ 16 ] = { 123, 132, 120, 191, 0, 0 }, [ 17 ] = { 124, 133, 55, 1, 0, 0 }, [ 18 ] = { 146, 145, 109, 31, 0, 0 }, [ 19 ] = { 73, 140, 25, 16, 0, 0 }, [ 20 ] = { 81, 71, 27, 33, 0, 0 }, [ 21 ] = { 141, 138, 55, 113, 0, 0 }, [ 22 ] = { 119, 128, 109, 59, 0, 0 }, [ 23 ] = { 120, 129, 55, 113, 0, 0 }, [ 24 ] = { 142, 140, 0, 59, 0, 0 }, [ 25 ] = { 121, 130, 109, 59, 0, 0 }, [ 26 ] = { 122, 131, 55, 113, 0, 0 }, [ 27 ] = { 140, 71, 147, 114, 0, 0 }, [ 28 ] = { 70, 13, 55, 10, 0, 0 }, [ 29 ] = { 73, 10, 6, 8, 0, 0 }, [ 30 ] = { 68, 12, 55, 7, 0, 0 }, [ 31 ] = { 57, 11, 55, 9, 0, 0 }, [ 32 ] = { 115, 7, 116, 116, 0, 0 }, [ 33 ] = { 41, 49, 40, 46, 0, 0 }, [ 34 ] = { 11, 114, 48, 11, 0, 0 }, [ 35 ] = { 35, 204, 188, 59, 0, 0 }, [ 36 ] = { 198, 193, 106, 112, 0, 0 }, [ 37 ] = { 117, 126, 52, 57, 0, 0 }, [ 38 ] = { 72, 69, 54, 0, 0, 0 }, [ 39 ] = { 69, 67, 60, 59, 0, 0 }, [ 40 ] = { 210, 184, 1, 3, 0, 0 }, [ 41 ] = { 9, 8, 3, 5, 0, 0 }, [ 42 ] = { 64, 62, 24, 59, 0, 0 }, [ 43 ] = { 20, 21, 100, 106, 0, 0 }, [ 44 ] = { 13, 137, 165, 171, 0, 0 }, [ 45 ] = { 21, 78, 101, 105, 0, 0 }, [ 46 ] = { 26, 23, 103, 108, 0, 0 }, [ 47 ] = { 25, 22, 166, 173, 0, 0 }, [ 48 ] = { 16, 18, 26, 59, 0, 0 }, [ 49 ] = { 16, 18, 187, 15, 0, 0 }, [ 50 ] = { 14, 16, 8, 13, 0, 0 }, [ 51 ] = { 17, 17, 10, 14, 0, 0 }, [ 52 ] = { 78, 74, 59, 63, 0, 0 }, [ 53 ] = { 76, 77, 55, 0, 0, 0 }, [ 54 ] = { 77, 75, 57, 61, 0, 0 }, [ 55 ] = { 79, 76, 58, 62, 0, 0 }, [ 56 ] = { 184, 181, 154, 163, 0, 0 }, [ 57 ] = { 187, 182, 156, 164, 0, 0 }, [ 58 ] = { 183, 183, 189, 165, 0, 0 }, [ 59 ] = { 186, 64, 51, 16, 0, 0 }, [ 60 ] = { 83, 82, 64, 69, 0, 0 }, [ 61 ] = { 85, 84, 66, 71, 0, 0 }, [ 62 ] = { 87, 87, 68, 74, 0, 0 }, [ 63 ] = { 91, 90, 72, 77, 0, 0 }, [ 64 ] = { 93, 92, 74, 79, 0, 0 }, [ 65 ] = { 95, 95, 76, 82, 0, 0 }, [ 66 ] = { 99, 98, 80, 85, 0, 0 }, [ 67 ] = { 101, 100, 82, 87, 0, 0 }, [ 68 ] = { 103, 103, 84, 90, 0, 0 }, [ 69 ] = { 107, 71, 89, 94, 0, 0 }, [ 70 ] = { 73, 108, 93, 98, 0, 0 }, [ 71 ] = { 73, 111, 97, 102, 0, 0 }, [ 72 ] = { 82, 86, 63, 73, 0, 0 }, [ 73 ] = { 90, 94, 71, 81, 0, 0 }, [ 74 ] = { 98, 102, 79, 89, 0, 0 }, [ 75 ] = { 106, 107, 91, 99, 0, 0 }, [ 76 ] = { 108, 109, 88, 96, 0, 0 }, [ 77 ] = { 112, 112, 99, 100, 0, 0 }, [ 78 ] = { 55, 54, 38, 43, 0, 0 }, [ 79 ] = { 56, 53, 39, 44, 0, 0 }, [ 80 ] = { 54, 55, 30, 40, 0, 0 }, [ 81 ] = { 58, 56, 55, 115, 0, 0 }, [ 82 ] = { 40, 48, 29, 39, 0, 0 }, [ 83 ] = { 37, 45, 31, 41, 0, 0 }, [ 84 ] = { 38, 46, 33, 42, 0, 0 }, [ 85 ] = { 43, 51, 55, 37, 0, 0 }, [ 86 ] = { 42, 50, 105, 111, 0, 0 }, [ 87 ] = { 39, 47, 55, 42, 0, 0 }, [ 88 ] = { 36, 44, 30, 59, 0, 0 }, [ 89 ] = { 44, 52, 105, 59, 0, 0 }, [ 90 ] = { 59, 57, 42, 49, 0, 0 }, [ 91 ] = { 171, 172, 45, 47, 0, 0 }, [ 92 ] = { 4, 4, 43, 50, 0, 0 }, [ 93 ] = { 206, 203, 171, 178, 0, 0 }, [ 94 ] = { 205, 202, 173, 180, 0, 0 }, [ 95 ] = { 204, 201, 175, 182, 0, 0 }, [ 96 ] = { 203, 68, 177, 59, 0, 0 }, [ 97 ] = { 202, 196, 55, 176, 0, 0 }, [ 98 ] = { 196, 71, 182, 189, 0, 0 }, [ 99 ] = { 73, 0, 178, 185, 0, 0 }, [ 100 ] = { 73, 15, 180, 187, 0, 0 }, [ 101 ] = { 27, 27, 17, 23, 0, 0 }, [ 102 ] = { 32, 29, 20, 28, 0, 0 }, [ 103 ] = { 33, 33, 21, 27, 0, 0 }, [ 104 ] = { 31, 28, 15, 24, 0, 0 }, [ 105 ] = { 193, 185, 161, 166, 0, 0 }, [ 106 ] = { 194, 189, 160, 59, 0, 0 }, [ 107 ] = { 197, 150, 162, 127, 0, 0 }, [ 108 ] = { 192, 149, 159, 126, 0, 0 }, [ 109 ] = { 156, 155, 125, 20, 0, 0 }, [ 110 ] = { 155, 148, 126, 21, 0, 0 }, [ 111 ] = { 159, 156, 128, 132, 0, 0 }, [ 112 ] = { 153, 152, 124, 128, 0, 0 }, [ 113 ] = { 171, 173, 185, 158, 0, 0 }, [ 114 ] = { 171, 179, 137, 146, 0, 0 }, [ 115 ] = { 172, 158, 138, 147, 0, 0 }, [ 116 ] = { 160, 162, 129, 135, 0, 0 }, [ 117 ] = { 161, 160, 55, 44, 0, 0 }, [ 118 ] = { 163, 166, 131, 138, 0, 0 }, [ 119 ] = { 166, 161, 130, 143, 0, 0 }, [ 120 ] = { 164, 164, 133, 141, 0, 0 }, [ 121 ] = { 162, 161, 55, 137, 0, 0 }, [ 122 ] = { 165, 165, 132, 140, 0, 0 }, [ 123 ] = { 168, 168, 135, 144, 0, 0 }, [ 124 ] = { 170, 170, 55, 144, 0, 0 }, [ 125 ] = { 175, 71, 150, 134, 0, 0 }, [ 126 ] = { 179, 179, 148, 160, 0, 0 }, [ 127 ] = { 178, 178, 136, 148, 0, 0 }, [ 128 ] = { 13, 74, 165, 106, 0, 0 }, [ 129 ] = { 16, 18, 165, 106, 0, 0 }, [ 130 ] = { 81, 21, 165, 106, 0, 0 }, [ 131 ] = { 16, 18, 100, 171, 0, 0 }, [ 132 ] = { 12, 69, 61, 91, 0, 0 }, [ 133 ] = { 9, 8, 3, 1, 0, 0 }, [ 134 ] = { 43, 51, 30, 37, 0, 0 }, [ 135 ] = { 39, 47, 33, 42, 0, 0 }, [ 136 ] = { 36, 44, 30, 40, 0, 0 }, [ 137 ] = { 56, 54, 165, 106, 0, 0 }, [ 138 ] = { 58, 56, 30, 40, 0, 0 }, [ 139 ] = { 55, 53, 39, 44, 0, 0 }, [ 140 ] = { 12, 58, 6, 44, 0, 0 }, [ 141 ] = { 12, 56, 56, 115, 0, 0 }, [ 142 ] = { 12, 72, 100, 171, 0, 0 }, [ 143 ] = { 210, 15, 165, 106, 0, 0 }, [ 144 ] = { 56, 54, 6, 59, 0, 0 } }; static const pmg_power_group_t power5_groups[] = { [ 0 ] = { .pmg_name = "pm_utilization", .pmg_desc = "CPI and utilization data", .pmg_event_ids = power5_group_event_ids[0], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000000000a02121eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 1 ] = { .pmg_name = "pm_completion", .pmg_desc = "Completion and cycle counts", .pmg_event_ids = power5_group_event_ids[1], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000000002608261eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 2 ] = { .pmg_name = "pm_group_dispatch", .pmg_desc = "Group dispatch events", .pmg_event_ids = power5_group_event_ids[2], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000ec6c8c212ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 3 ] = { .pmg_name = "pm_clb1", .pmg_desc = "CLB fullness", .pmg_event_ids = power5_group_event_ids[3], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x015b000180848c4cULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 4 ] = { .pmg_name = "pm_clb2", .pmg_desc = "CLB fullness", .pmg_event_ids = power5_group_event_ids[4], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x014300028a8ccc02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 5 ] = { .pmg_name = "pm_gct_empty", .pmg_desc = "GCT empty reasons", .pmg_event_ids = power5_group_event_ids[5], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000008380838ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 6 ] = { .pmg_name = "pm_gct_usage", .pmg_desc = "GCT Usage", .pmg_event_ids = power5_group_event_ids[6], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000000003e3e3e3eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 7 ] = { .pmg_name = "pm_lsu1", .pmg_desc = "LSU LRQ and LMQ events", .pmg_event_ids = power5_group_event_ids[7], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000f000fccc4cccaULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 8 ] = { .pmg_name = "pm_lsu2", .pmg_desc = "LSU SRQ events", .pmg_event_ids = power5_group_event_ids[8], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x400e000ecac2ca86ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 9 ] = { .pmg_name = "pm_lsu3", .pmg_desc = "LSU SRQ and LMQ events", .pmg_event_ids = power5_group_event_ids[9], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010f000a102aca2aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 10 ] = { .pmg_name = "pm_prefetch1", .pmg_desc = "Prefetch stream allocation", .pmg_event_ids = power5_group_event_ids[10], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8432000d36c884ceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 11 ] = { .pmg_name = "pm_prefetch2", .pmg_desc = "Prefetch events", .pmg_event_ids = power5_group_event_ids[11], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8103000602cace8eULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 12 ] = { .pmg_name = "pm_prefetch3", .pmg_desc = "L2 prefetch and misc events", .pmg_event_ids = power5_group_event_ids[12], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x047c000820828602ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 13 ] = { .pmg_name = "pm_prefetch4", .pmg_desc = "Misc prefetch and reject events", .pmg_event_ids = power5_group_event_ids[13], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x063e000ec0c8cc86ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 14 ] = { .pmg_name = "pm_lsu_reject1", .pmg_desc = "LSU reject events", .pmg_event_ids = power5_group_event_ids[14], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0xc22c000e2010c610ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 15 ] = { .pmg_name = "pm_lsu_reject2", .pmg_desc = "LSU rejects due to reload CDF or tag update collision", .pmg_event_ids = power5_group_event_ids[15], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x820c000dc4cc02ceULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 16 ] = { .pmg_name = "LSU rejects due to ERAT", .pmg_desc = " held instuctions", .pmg_event_ids = power5_group_event_ids[16], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x420c000fc6cec0c8ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 17 ] = { .pmg_name = "pm_lsu_reject4", .pmg_desc = "LSU0/1 reject LMQ full", .pmg_event_ids = power5_group_event_ids[17], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x820c000dc2ca02c8ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 18 ] = { .pmg_name = "pm_lsu_reject5", .pmg_desc = "LSU misc reject and flush events", .pmg_event_ids = power5_group_event_ids[18], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x420c000c10208a8eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 19 ] = { .pmg_name = "pm_flush1", .pmg_desc = "Misc flush events", .pmg_event_ids = power5_group_event_ids[19], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0xc0f000020210c68eULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 20 ] = { .pmg_name = "pm_flush2", .pmg_desc = "Flushes due to scoreboard and sync", .pmg_event_ids = power5_group_event_ids[20], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0xc08000038002c4c2ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 21 ] = { .pmg_name = "pm_lsu_flush_srq_lrq", .pmg_desc = "LSU flush by SRQ and LRQ events", .pmg_event_ids = power5_group_event_ids[21], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40c000002020028aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 22 ] = { .pmg_name = "pm_lsu_flush_lrq", .pmg_desc = "LSU0/1 flush due to LRQ", .pmg_event_ids = power5_group_event_ids[22], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40c00000848c8a02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 23 ] = { .pmg_name = "pm_lsu_flush_srq", .pmg_desc = "LSU0/1 flush due to SRQ", .pmg_event_ids = power5_group_event_ids[23], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40c00000868e028aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 24 ] = { .pmg_name = "pm_lsu_flush_unaligned", .pmg_desc = "LSU flush due to unaligned data", .pmg_event_ids = power5_group_event_ids[24], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x80c000021010c802ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 25 ] = { .pmg_name = "pm_lsu_flush_uld", .pmg_desc = "LSU0/1 flush due to unaligned load", .pmg_event_ids = power5_group_event_ids[25], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40c0000080888a02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 26 ] = { .pmg_name = "pm_lsu_flush_ust", .pmg_desc = "LSU0/1 flush due to unaligned store", .pmg_event_ids = power5_group_event_ids[26], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40c00000828a028aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 27 ] = { .pmg_name = "pm_lsu_flush_full", .pmg_desc = "LSU flush due to LRQ/SRQ full", .pmg_event_ids = power5_group_event_ids[27], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0xc0200009ce0210c0ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 28 ] = { .pmg_name = "pm_lsu_stall1", .pmg_desc = "LSU Stalls", .pmg_event_ids = power5_group_event_ids[28], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000028300234ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 29 ] = { .pmg_name = "pm_lsu_stall2", .pmg_desc = "LSU Stalls", .pmg_event_ids = power5_group_event_ids[29], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000002341e36ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 30 ] = { .pmg_name = "pm_fxu_stall", .pmg_desc = "FXU Stalls", .pmg_event_ids = power5_group_event_ids[30], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000822320232ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 31 ] = { .pmg_name = "pm_fpu_stall", .pmg_desc = "FPU Stalls", .pmg_event_ids = power5_group_event_ids[31], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000020360230ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 32 ] = { .pmg_name = "pm_queue_full", .pmg_desc = "BRQ LRQ LMQ queue full", .pmg_event_ids = power5_group_event_ids[32], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x400b0009ce8a84ceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 33 ] = { .pmg_name = "pm_issueq_full", .pmg_desc = "FPU FX full", .pmg_event_ids = power5_group_event_ids[33], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40000000868e8088ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 34 ] = { .pmg_name = "pm_mapper_full1", .pmg_desc = "CR CTR GPR mapper full", .pmg_event_ids = power5_group_event_ids[34], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x40000002888cca82ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 35 ] = { .pmg_name = "pm_mapper_full2", .pmg_desc = "FPR XER mapper full", .pmg_event_ids = power5_group_event_ids[35], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4103000282843602ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 36 ] = { .pmg_name = "pm_misc_load", .pmg_desc = "Non-cachable loads and stcx events", .pmg_event_ids = power5_group_event_ids[36], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0438000cc2ca828aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 37 ] = { .pmg_name = "pm_ic_demand", .pmg_desc = "ICache demand from BR redirect", .pmg_event_ids = power5_group_event_ids[37], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x800c000fc6cec0c2ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 38 ] = { .pmg_name = "pm_ic_pref", .pmg_desc = "ICache prefetch", .pmg_event_ids = power5_group_event_ids[38], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8000000ccecc8e1aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 39 ] = { .pmg_name = "pm_ic_miss", .pmg_desc = "ICache misses", .pmg_event_ids = power5_group_event_ids[39], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4003000e32cec802ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 40 ] = { .pmg_name = "Branch mispredict", .pmg_desc = " TLB and SLB misses", .pmg_event_ids = power5_group_event_ids[40], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x808000031010caccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 41 ] = { .pmg_name = "pm_branch1", .pmg_desc = "Branch operations", .pmg_event_ids = power5_group_event_ids[41], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x800000030e0e0e0eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 42 ] = { .pmg_name = "pm_branch2", .pmg_desc = "Branch operations", .pmg_event_ids = power5_group_event_ids[42], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000ccacc8c02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 43 ] = { .pmg_name = "pm_L1_tlbmiss", .pmg_desc = "L1 load and TLB misses", .pmg_event_ids = power5_group_event_ids[43], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00b000008e881020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 44 ] = { .pmg_name = "pm_L1_DERAT_miss", .pmg_desc = "L1 store and DERAT misses", .pmg_event_ids = power5_group_event_ids[44], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00b300000e202086ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 45 ] = { .pmg_name = "pm_L1_slbmiss", .pmg_desc = "L1 load and SLB misses", .pmg_event_ids = power5_group_event_ids[45], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00b000008a82848cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 46 ] = { .pmg_name = "pm_L1_dtlbmiss_4K", .pmg_desc = "L1 load references and 4K Data TLB references and misses", .pmg_event_ids = power5_group_event_ids[46], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x08f0000084808088ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 47 ] = { .pmg_name = "pm_L1_dtlbmiss_16M", .pmg_desc = "L1 store references and 16M Data TLB references and misses", .pmg_event_ids = power5_group_event_ids[47], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x08f000008c88828aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 48 ] = { .pmg_name = "pm_dsource1", .pmg_desc = "L3 cache and memory data access", .pmg_event_ids = power5_group_event_ids[48], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x400300001c0e8e02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 49 ] = { .pmg_name = "pm_dsource2", .pmg_desc = "L3 cache and memory data access", .pmg_event_ids = power5_group_event_ids[49], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000300031c0e360eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 50 ] = { .pmg_name = "pm_dsource_L2", .pmg_desc = "L2 cache data access", .pmg_event_ids = power5_group_event_ids[50], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000300032e2e2e2eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 51 ] = { .pmg_name = "pm_dsource_L3", .pmg_desc = "L3 cache data access", .pmg_event_ids = power5_group_event_ids[51], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000300033c3c3c3cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 52 ] = { .pmg_name = "pm_isource1", .pmg_desc = "Instruction source information", .pmg_event_ids = power5_group_event_ids[52], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8000000c1a1a1a0cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 53 ] = { .pmg_name = "pm_isource2", .pmg_desc = "Instruction source information", .pmg_event_ids = power5_group_event_ids[53], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8000000c0c0c021aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 54 ] = { .pmg_name = "pm_isource_L2", .pmg_desc = "L2 instruction source information", .pmg_event_ids = power5_group_event_ids[54], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8000000c2c2c2c2cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 55 ] = { .pmg_name = "pm_isource_L3", .pmg_desc = "L3 instruction source information", .pmg_event_ids = power5_group_event_ids[55], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x8000000c3a3a3a3aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 56 ] = { .pmg_name = "pm_pteg_source1", .pmg_desc = "PTEG source information", .pmg_event_ids = power5_group_event_ids[56], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000200032e2e2e2eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 57 ] = { .pmg_name = "pm_pteg_source2", .pmg_desc = "PTEG source information", .pmg_event_ids = power5_group_event_ids[57], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000200033c3c3c3cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 58 ] = { .pmg_name = "pm_pteg_source3", .pmg_desc = "PTEG source information", .pmg_event_ids = power5_group_event_ids[58], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000200030e0e360eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 59 ] = { .pmg_name = "pm_pteg_source4", .pmg_desc = "L3 PTEG and group disptach events", .pmg_event_ids = power5_group_event_ids[59], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x003200001c04048eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 60 ] = { .pmg_name = "pm_L2SA_ld", .pmg_desc = "L2 slice A load events", .pmg_event_ids = power5_group_event_ids[60], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055400580c080c0ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 61 ] = { .pmg_name = "pm_L2SA_st", .pmg_desc = "L2 slice A store events", .pmg_event_ids = power5_group_event_ids[61], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055800580c080c0ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 62 ] = { .pmg_name = "pm_L2SA_st2", .pmg_desc = "L2 slice A store events", .pmg_event_ids = power5_group_event_ids[62], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055c00580c080c0ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 63 ] = { .pmg_name = "pm_L2SB_ld", .pmg_desc = "L2 slice B load events", .pmg_event_ids = power5_group_event_ids[63], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055400582c282c2ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 64 ] = { .pmg_name = "pm_L2SB_st", .pmg_desc = "L2 slice B store events", .pmg_event_ids = power5_group_event_ids[64], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055800582c282c2ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 65 ] = { .pmg_name = "pm_L2SB_st2", .pmg_desc = "L2 slice B store events", .pmg_event_ids = power5_group_event_ids[65], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055c00582c282c2ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 66 ] = { .pmg_name = "pm_L2SB_ld", .pmg_desc = "L2 slice C load events", .pmg_event_ids = power5_group_event_ids[66], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055400584c484c4ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 67 ] = { .pmg_name = "pm_L2SB_st", .pmg_desc = "L2 slice C store events", .pmg_event_ids = power5_group_event_ids[67], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055800584c484c4ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 68 ] = { .pmg_name = "pm_L2SB_st2", .pmg_desc = "L2 slice C store events", .pmg_event_ids = power5_group_event_ids[68], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055c00584c484c4ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 69 ] = { .pmg_name = "pm_L3SA_trans", .pmg_desc = "L3 slice A state transistions", .pmg_event_ids = power5_group_event_ids[69], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3015000ac602c686ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 70 ] = { .pmg_name = "pm_L3SB_trans", .pmg_desc = "L3 slice B state transistions", .pmg_event_ids = power5_group_event_ids[70], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3015000602c8c888ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 71 ] = { .pmg_name = "pm_L3SC_trans", .pmg_desc = "L3 slice C state transistions", .pmg_event_ids = power5_group_event_ids[71], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3015000602caca8aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 72 ] = { .pmg_name = "pm_L2SA_trans", .pmg_desc = "L2 slice A state transistions", .pmg_event_ids = power5_group_event_ids[72], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055000ac080c080ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 73 ] = { .pmg_name = "pm_L2SB_trans", .pmg_desc = "L2 slice B state transistions", .pmg_event_ids = power5_group_event_ids[73], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055000ac282c282ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 74 ] = { .pmg_name = "pm_L2SC_trans", .pmg_desc = "L2 slice C state transistions", .pmg_event_ids = power5_group_event_ids[74], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055000ac484c484ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 75 ] = { .pmg_name = "pm_L3SAB_retry", .pmg_desc = "L3 slice A/B snoop retry and all CI/CO busy", .pmg_event_ids = power5_group_event_ids[75], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3005100fc6c8c6c8ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 76 ] = { .pmg_name = "pm_L3SAB_hit", .pmg_desc = "L3 slice A/B hit and reference", .pmg_event_ids = power5_group_event_ids[76], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3050100086888688ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 77 ] = { .pmg_name = "pm_L3SC_retry_hit", .pmg_desc = "L3 slice C hit & snoop retry", .pmg_event_ids = power5_group_event_ids[77], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x3055100aca8aca8aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 78 ] = { .pmg_name = "pm_fpu1", .pmg_desc = "Floating Point events", .pmg_event_ids = power5_group_event_ids[78], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000010101020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 79 ] = { .pmg_name = "pm_fpu2", .pmg_desc = "Floating Point events", .pmg_event_ids = power5_group_event_ids[79], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000020202010ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 80 ] = { .pmg_name = "pm_fpu3", .pmg_desc = "Floating point events", .pmg_event_ids = power5_group_event_ids[80], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000c1010868eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 81 ] = { .pmg_name = "pm_fpu4", .pmg_desc = "Floating point events", .pmg_event_ids = power5_group_event_ids[81], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0430000c20200220ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 82 ] = { .pmg_name = "pm_fpu5", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[82], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000848c848cULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 83 ] = { .pmg_name = "pm_fpu6", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[83], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000cc0c88088ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 84 ] = { .pmg_name = "pm_fpu7", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[84], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000000008088828aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 85 ] = { .pmg_name = "pm_fpu8", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[85], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000dc2ca02c0ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 86 ] = { .pmg_name = "pm_fpu9", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[86], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0430000cc6ce8088ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 87 ] = { .pmg_name = "pm_fpu10", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[87], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000828a028aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 88 ] = { .pmg_name = "pm_fpu11", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[88], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000868e8602ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 89 ] = { .pmg_name = "pm_fpu12", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[89], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0430000cc4cc8002ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 90 ] = { .pmg_name = "pm_fxu1", .pmg_desc = "Fixed Point events", .pmg_event_ids = power5_group_event_ids[90], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000024242424ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 91 ] = { .pmg_name = "pm_fxu2", .pmg_desc = "Fixed Point events", .pmg_event_ids = power5_group_event_ids[91], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x4000000604221020ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 92 ] = { .pmg_name = "pm_fxu3", .pmg_desc = "Fixed Point events", .pmg_event_ids = power5_group_event_ids[92], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x404000038688c4ccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 93 ] = { .pmg_name = "pm_smt_priorities1", .pmg_desc = "Thread priority events", .pmg_event_ids = power5_group_event_ids[93], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0005000fc6ccc6c8ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 94 ] = { .pmg_name = "pm_smt_priorities2", .pmg_desc = "Thread priority events", .pmg_event_ids = power5_group_event_ids[94], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0005000fc4cacaccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 95 ] = { .pmg_name = "pm_smt_priorities3", .pmg_desc = "Thread priority events", .pmg_event_ids = power5_group_event_ids[95], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0005000fc2c8c4c2ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 96 ] = { .pmg_name = "pm_smt_priorities4", .pmg_desc = "Thread priority events", .pmg_event_ids = power5_group_event_ids[96], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0005000ac016c002ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 97 ] = { .pmg_name = "pm_smt_both", .pmg_desc = "Thread common events", .pmg_event_ids = power5_group_event_ids[97], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0010000016260208ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 98 ] = { .pmg_name = "pm_smt_selection", .pmg_desc = "Thread selection", .pmg_event_ids = power5_group_event_ids[98], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0090000086028082ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 99 ] = { .pmg_name = "pm_smt_selectover1", .pmg_desc = "Thread selection overide", .pmg_event_ids = power5_group_event_ids[99], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0050000002808488ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 100 ] = { .pmg_name = "pm_smt_selectover2", .pmg_desc = "Thread selection overide", .pmg_event_ids = power5_group_event_ids[100], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00100000021e8a86ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 101 ] = { .pmg_name = "pm_fabric1", .pmg_desc = "Fabric events", .pmg_event_ids = power5_group_event_ids[101], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305500058ece8eceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 102 ] = { .pmg_name = "pm_fabric2", .pmg_desc = "Fabric data movement", .pmg_event_ids = power5_group_event_ids[102], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305500858ece8eceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 103 ] = { .pmg_name = "pm_fabric3", .pmg_desc = "Fabric data movement", .pmg_event_ids = power5_group_event_ids[103], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305501858ece8eceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 104 ] = { .pmg_name = "pm_fabric4", .pmg_desc = "Fabric data movement", .pmg_event_ids = power5_group_event_ids[104], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x705401068ecec68eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 105 ] = { .pmg_name = "pm_snoop1", .pmg_desc = "Snoop retry", .pmg_event_ids = power5_group_event_ids[105], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305500058ccc8cccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 106 ] = { .pmg_name = "pm_snoop2", .pmg_desc = "Snoop read retry", .pmg_event_ids = power5_group_event_ids[106], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x30540a048ccc8c02ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 107 ] = { .pmg_name = "pm_snoop3", .pmg_desc = "Snoop write retry", .pmg_event_ids = power5_group_event_ids[107], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x30550c058ccc8cccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 108 ] = { .pmg_name = "pm_snoop4", .pmg_desc = "Snoop partial write retry", .pmg_event_ids = power5_group_event_ids[108], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x30550e058ccc8cccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 109 ] = { .pmg_name = "pm_mem_rq", .pmg_desc = "Memory read queue dispatch", .pmg_event_ids = power5_group_event_ids[109], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x705402058ccc8cceULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 110 ] = { .pmg_name = "pm_mem_read", .pmg_desc = "Memory read complete and cancel", .pmg_event_ids = power5_group_event_ids[110], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305404048ccc8c06ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 111 ] = { .pmg_name = "pm_mem_wq", .pmg_desc = "Memory write queue dispatch", .pmg_event_ids = power5_group_event_ids[111], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305506058ccc8cccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 112 ] = { .pmg_name = "pm_mem_pwq", .pmg_desc = "Memory partial write queue", .pmg_event_ids = power5_group_event_ids[112], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x305508058ccc8cccULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 113 ] = { .pmg_name = "pm_threshold", .pmg_desc = "Thresholding", .pmg_event_ids = power5_group_event_ids[113], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0008000404c41628ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 114 ] = { .pmg_name = "pm_mrk_grp1", .pmg_desc = "Marked group events", .pmg_event_ids = power5_group_event_ids[114], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0008000404c60a26ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 115 ] = { .pmg_name = "pm_mrk_grp2", .pmg_desc = "Marked group events", .pmg_event_ids = power5_group_event_ids[115], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x410300022a0ac822ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 116 ] = { .pmg_name = "pm_mrk_dsource1", .pmg_desc = "Marked data from ", .pmg_event_ids = power5_group_event_ids[116], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b00030e404444ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 117 ] = { .pmg_name = "pm_mrk_dsource2", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[117], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b00002e440210ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 118 ] = { .pmg_name = "pm_mrk_dsource3", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[118], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b00031c484c4cULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 119 ] = { .pmg_name = "pm_mrk_dsource4", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[119], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b000342462e42ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 120 ] = { .pmg_name = "pm_mrk_dsource5", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[120], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b00033c4c4040ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 121 ] = { .pmg_name = "pm_mrk_dsource6", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[121], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b000146460246ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 122 ] = { .pmg_name = "pm_mrk_dsource7", .pmg_desc = "Marked data from", .pmg_event_ids = power5_group_event_ids[122], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x010b00034e4e3c4eULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 123 ] = { .pmg_name = "pm_mrk_lbmiss", .pmg_desc = "Marked TLB and SLB misses", .pmg_event_ids = power5_group_event_ids[123], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0cf00000828a8c8eULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 124 ] = { .pmg_name = "pm_mrk_lbref", .pmg_desc = "Marked TLB and SLB references", .pmg_event_ids = power5_group_event_ids[124], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0cf00000868e028eULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 125 ] = { .pmg_name = "pm_mrk_lsmiss", .pmg_desc = "Marked load and store miss", .pmg_event_ids = power5_group_event_ids[125], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000800081002060aULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 126 ] = { .pmg_name = "pm_mrk_ulsflush", .pmg_desc = "Mark unaligned load and store flushes", .pmg_event_ids = power5_group_event_ids[126], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0028000406c62020ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 127 ] = { .pmg_name = "pm_mrk_misc", .pmg_desc = "Misc marked instructions", .pmg_event_ids = power5_group_event_ids[127], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00080008cc062816ULL, .pmg_mmcra = 0x0000000000000001ULL }, [ 128 ] = { .pmg_name = "pm_lsref_L1", .pmg_desc = "Load/Store operations and L1 activity", .pmg_event_ids = power5_group_event_ids[128], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x803300040e1a2020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 129 ] = { .pmg_name = "Load/Store operations and L2", .pmg_desc = "L3 activity", .pmg_event_ids = power5_group_event_ids[129], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x003300001c0e2020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 130 ] = { .pmg_name = "pm_lsref_tlbmiss", .pmg_desc = "Load/Store operations and TLB misses", .pmg_event_ids = power5_group_event_ids[130], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00b0000080882020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 131 ] = { .pmg_name = "pm_Dmiss", .pmg_desc = "Data cache misses", .pmg_event_ids = power5_group_event_ids[131], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x003300001c0e1086ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 132 ] = { .pmg_name = "pm_prefetchX", .pmg_desc = "Prefetch events", .pmg_event_ids = power5_group_event_ids[132], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x853300061eccce86ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 133 ] = { .pmg_name = "pm_branchX", .pmg_desc = "Branch operations", .pmg_event_ids = power5_group_event_ids[133], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x800000030e0e0ec8ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 134 ] = { .pmg_name = "pm_fpuX1", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[134], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000dc2ca86c0ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 135 ] = { .pmg_name = "pm_fpuX2", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[135], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000828a828aULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 136 ] = { .pmg_name = "pm_fpuX3", .pmg_desc = "Floating point events by unit", .pmg_event_ids = power5_group_event_ids[136], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000868e868eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 137 ] = { .pmg_name = "pm_fpuX4", .pmg_desc = "Floating point and L1 events", .pmg_event_ids = power5_group_event_ids[137], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0030000020102020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 138 ] = { .pmg_name = "pm_fpuX5", .pmg_desc = "Floating point events", .pmg_event_ids = power5_group_event_ids[138], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000c2020868eULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 139 ] = { .pmg_name = "pm_fpuX6", .pmg_desc = "Floating point events", .pmg_event_ids = power5_group_event_ids[139], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000010202010ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 140 ] = { .pmg_name = "pm_hpmcount1", .pmg_desc = "HPM group for set 1 ", .pmg_event_ids = power5_group_event_ids[140], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000000001e281e10ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 141 ] = { .pmg_name = "pm_hpmcount2", .pmg_desc = "HPM group for set 2", .pmg_event_ids = power5_group_event_ids[141], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x043000041e201220ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 142 ] = { .pmg_name = "pm_hpmcount3", .pmg_desc = "HPM group for set 3 ", .pmg_event_ids = power5_group_event_ids[142], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x403000041ec21086ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 143 ] = { .pmg_name = "pm_hpmcount4", .pmg_desc = "HPM group for set 7", .pmg_event_ids = power5_group_event_ids[143], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00b00000101e2020ULL, .pmg_mmcra = 0x0000000000000000ULL }, [ 144 ] = { .pmg_name = "pm_1flop_with_fma", .pmg_desc = "One flop instructions plus FMA", .pmg_event_ids = power5_group_event_ids[144], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x0000000020101e02ULL, .pmg_mmcra = 0x0000000000000000ULL } }; #endif papi-5.6.0/ChangeLogP411.txt000664 001750 001750 00000041547 13216244355 017507 0ustar00jshenry1963jshenry1963000000 000000 2010-09-30 * src/: configure, configure.in: When --with-OS=CLE is enabled, check the kernel version and use perfmon2 for old kernels and perf_events for new kernels. * src/: configure, configure.in: If no sources of perf counters are available, then use the generic_platform substrate instead. Currently the code would always fall back on perfctr even if no perfctr support was available. * src/: configure, configure.in: If you specify --with-perf-events or --with-pe-include but the required perf_event.h header is not available, then have configure fail with an error. * papi.spec: Bump version number to 4.1.1 in affected files. Also bump requirement for kernel from 2.6.31 to 2.6.32. This in prep for the pending release. * src/: configure, Makefile.in, configure.in, papi.h: Bump version number to 4.1.1 in affected files. This in prep for the pending release. * INSTALL.txt: Hope this late commit doesn't interfere with anything. This updates the INSTALL.txt to reflect all of the improvements we've made to perf_event support since the last release. 2010-09-29 * src/Rules.pfm: The -Werror problem was still occurring on ia64/perfmon compiles, as I hadn't updated Rules.pfm * src/: configure, configure.in, perf_events.c, perf_events.h, sys_perf_counter_open.c, sys_perf_event_open.c, syscalls.h: Remove support for the perf_counter interface in kernel 2.6.31. Now supports only the perf_event interface in kernel 2.6.32 and above. 2010-09-22 * src/perf_events.c: Attempt to add mmtimer support to perf_events substrate. * src/: multiplex.c, papi.c, papi_protos.h: The multiplex code currently does not make a final adjustment at the time of MPX_read(). This is to avoid the case where counts could be decreasing if you have multiple reads returning estimated values before the next actual counter read. While this code works to keep the results non-decreasing, it can cause significant differences from expected results for final reads, especially if many counters are being multiplexed. This is seen in the sdsc-mpx test. It was failing occasionally on some machines by having error of over 20% (the cutoff for a test error) when multiplexing 11 events. What this fix does is to special case the PAPI_stop() case when multiplexing is enabled, having the PAPI_stop() do a final adjustment. The intermediate PAPI_read() case is not changed. This fixes the sdsc-mpx case, while still passing the mendes-alt case (which checks for non-decreasing values). There is a #define that can be set in multiplex.c to restore the previous behavior. * src/ctests/mendes-alt.c: This is our only test that checks to see if multiplexed values are non-decreasing or not. Unfortunately the test currently doesn't fail if values do go backward. This change causes the test to fail if it finds multiplexed counts that decrease. 2010-09-17 * src/libpfm-3.y/: config.mk, lib/intel_wsm_events.h: Fix conflicts from merge. 2010-09-15 * src/: Makefile.inc, Rules.perfctr-pfm, Rules.pfm_pe: Finally fix the -WExtra problem. The issue was -WExtra was being passed to libpfm, but only in the case where the user had a CFLAGS env variable. It turns out this is due to the following from section 5.7.2 of the gmake manual: Except by explicit request, make exports a variable only if it is either defined in the environment initially or set on the command line, And the fix is also described: If you want to prevent a variable from being exported, use the unexport directive, So I've added an "unexport CFLAGS" directive, which seems to be the right thing as our Makefile explicitly passes CFLAGS to the sub-Makefiles that need it. This seems to fix the build. 2010-09-13 * src/libpfm-3.y/: docs/man3/libpfm_westmere.3, lib/intel_wsm_events.h, lib/intel_wsm_unc_events.h, lib/pfmlib_intel_nhm.c, lib/pfmlib_priv.h: Fix the missing files from the import (CVS claims this as a "conflict") 2010-09-08 * src/Makefile.inc: Fixed the recipies for [c|f]tests and utils. $(LIBRARY) => $(papiLIBS) (this way we don't build libpapi.a if we won't want it) 2010-09-03 * src/ctests/sdsc.c: Had a "%d" instead of "%lld" in that last commit. * src/ctests/sdsc.c: Give a more detailed error message on the sdsc-mpx test. We're seeing sporadic failures (probably due to results being close to the threshold value) but it's hard to tell on buildbot which counter is failing because the error message didn't print the value. 2010-09-02 * src/papi.c: Remove code that reported ENOSUPP if HW multiplexing is not available. PAPI can automatically perform SW multiplexing if HW is not available. With this part of my previous multiplexing patch reverted, multiplexing seems to work even on 2.6.32 perf_events (by reverting to SW mode on those machines) 2010-08-31 * src/perf_events.c: Explicitly set the disabled flag to zero in perf_events for new events. It was possible with an event set that if you removed an event then added a new one that the disabled flag was obtaining the value from the previously removed event. This fix doesn't seem to break anything, but the code involved is a bit tricky to follow. This fixes the sdsc4-mpx test on sol. * src/components/coretemp/: Rules.coretemp, linux-coretemp.c, linux-coretemp.h: Initial stab at a coretemp component. This component exposes every thing that looks like a useful file under /sys/class/hwmon. 2010-08-30 * src/perf_events.c: F_SETOWN_EX is not available until 2.6.32, so don't use it unless we are running on a recent enough kernel. * src/perf_events.c: Pentium 4 was not supported by perf_events until version 2.6.35. Print an error if we attempt to use it on an older kernel. 2010-08-27 * src/ctests/overflow_allcounters.c: The "overflow_allcounters" test failed on perfmon2 kernels because the behavior of a counter on overflow differs between the various substrates. Therefore detect if we're running on perfmon2 and print a warning, but still pass the test. * src/libpfm-3.y/lib/: intel_wsm_events.h, intel_wsm_unc_events.h, pfmlib_intel_nhm.c, pfmlib_priv.h: updating * src/libpfm-3.y/docs/man3/libpfm_westmere.3: removing westmere documentation * src/perf_events.c: Fix warning in compile due to missing parameter in a debug statement. * src/ctests/test_utils.c: In the ctests, test_skip() was attempting a PAPI_shutdown() before exiting. On multithreaded tests (that had already spawned threads before the decision to skip) this really causes the programs to end up confused and reports spurious memory errors. So remove the PAPI_shutdown() from test_skip(). There's a comment in test_fail() that indicates this was already done there for similar reasons. 2010-08-26 * src/ctests/byte_profile.c: byte_profile was failing on systems where fp_ops is a derived event. modify the test so it gives a warning instead of failing and avoids using the derived event. * src/perf_events.c: At PAPI_stop() time a counter with overflow enabled is being adjusted by a value equal to the sampling period. It looks like this isn't needed (and is generating an overcount that breaks overflow_allcounters). I'm still checking up on this code; if it turns out to be necessary I may have ro revert this later. * src/ctests/overflow_allcounters.c: Add validation check to overflow_allcounters It turns out perf_event kernels overcount overflows for some reason, while perfctr doesn't. I'm investigating. * src/ctests/: overflow_allcounters.c, papi_test.h, test_utils.c: On Power5 and Power6, hardware counters 5 and 6 cannot generate interrupts. This means the overflow_allcounters test was failing because overflow could not be generated for events 5 and 6. Add code that special cases Power5 and Power6 for this test (and generate a warning) * src/perf_events.c: Change some debug messages to be warnings instead of errors. * src/: papi.c, ctests/second.c: Fix ctests/second on bluegrass (POWER6) The test was testing domains by trying PAPI_DOM_ALL^PAPI_DOM_SUPERVISOR in an attempt to turn off the SUPERVISOR bit. This fails on Power6 as it leaves the PAPI_DOM_OTHER bit set, which isn't allowed. How did the test earlier measure PAPI_DOM_ALL then, which has all bits set? Well it turns out papi.c silently corrects PAPI_DOM_ALL to be available_domains. But if you fiddle any of the bits this correction is lost. This is probably not the right thing to do, but the best way to fix it is not clear. For now this modifies the "second" test to clear the DOM_OTHER bit too if the domain setting fails with it set. 2010-08-25 * src/: papi.c, papi.h, perf_events.c, ctests/kufrin.c, ctests/mendes-alt.c, ctests/multiplex1.c, ctests/multiplex1_pthreads.c, ctests/multiplex2.c, ctests/multiplex3_pthreads.c, ctests/sdsc.c, ctests/sdsc2.c, ctests/sdsc4.c, ftests/fmultiplex1.F, ftests/fmultiplex2.F: Add support for including the OS version in the component_info_t struct. Use this support under perf_events to disable multiplexing support if the kernel is < 2.6.33 Modify the various multiplexing tests to "skip" if they get a PAPI_ENOSUPP when attempting to set up multiplexing. * src/ctests/all_native_events.c: Update all_native_events ctest to print warning in the case where we skip events because they aren't implemented yet (offcore and uncore mostly). 2010-08-24 * src/ctests/: papi_test.h, profile.c, test_utils.c: Adds a new "test_warn()" function for the ctests. This allows you to let tests pass with a warning. This is useful in cases where you don't want to forget that an option needs implementing, but that the feature being missed isn't important enough to fail the test. The first user of this is the "profile" test. We warn that PAPI_PROFIL_RANDOM is not supported on perf_events. * src/perf_events.c: From what I can tell, on perf_events the overflow PAPI_OVERFLOW_FORCE_SW case was improperly falling through in _papi_pe_dispatch_timer() to also run the HARDWARE code. This meant that we were attempting to read non-existant hardware overflow data, causing a lot of errors to be printed to the screen. This shows up in the overflow_force_software test * src/ctests/: ipc.c, multiplex2.c, multiplex3_pthreads.c, test_utils.c: Some minor changes to the ctests. + ipc -- fail if the reported IPC value is zero + multiplex2 -- fail if all 32 counter values report as zero + multiplex3_pthread -- give up sooner if each counter returns zero. otherwise the test can take upwards of an hour to finish and makes the fan on my laptop sound like it's going to explode in the process 2010-08-20 * src/Makefile.inc: Disable CFLAGS += $(EXTRA_CFLAGS) (-Wextra) for now. This will get buildbot running again, and if I can manage to figure out exactly what the Makefiles are doing I'll re-enable it again. * src/perf_events.c: Add support for Pentium 4 under perf events. This requires a 2.6.35 kernel. On p4 perf events requires a special format for the raw event, so we modify the results from libpfm3 to conform to what the kernel expects. * release_procedure.txt: release_procedure updated to reflect files to keep under /doc 2010-08-18 * src/perf_events.c: Patch from Gary Mohr that allows PAPI on perf events to catch permissions problems at the time of configuration, rather than only appearing once papi_start() is called. Quick summary of changes: + Adds a check_permissions() routine PERF_COUNT_HW_INSTRUCTIONS is used as the test event. + check_permissions() is called during PAPI_ATTACH, PAPI_CPU_ATTACH and PAPI_DOMAIN + Various "ctl" structures renamed "pe_ctl" + Some minor debug changes 2010-08-05 * src/perf_events.c: Use F_SETOWN_EX instead of F_SETOWN in tune_up_fd() This fixes a multi-thread overflow bug found with the Rice test-suite. F_SETOWN_EX doesn't exist until Linux 2.6.32. We really need some infrastructure that detects the running kernel at init time and warns that things like F_SETOWN_EX, multiplexing, etc., are unavailable if the kernel is too old. 2010-08-04 * src/: Makefile.inc, cpus.c, cpus.h, genpapifdef.c, papi.c, papi.h, papi_defines.h, papi_internal.c, papi_internal.h, perf_events.c, perf_events.h, threads.h: This is the PAPI_CPU_ATTACH patch from Gary Mohr that also fixes a problem with multiple event sets on perf events. Changes by file: papi.h + Add PAPI_CPU_ATTACHED + Add strutctures needed for CPU_ATTACH Makefile.in + include the new cpus.c file papi_internal.c + add call to _papi_hwi_shutdown_cpu() in _papi_hwi_free_EventSet() + make remap_event_position() non-static + add_native_events() and remove_native_events() use _papi_hwi_get_context() + _papi_hw_read() has some whitespace and debug message changes, and removes an extraneous loop index papi_internal.h + a new CPUS_LOCK is added + cpuinfo struct added to various structures + an inline call called _papi_hwi_get_context() added perf_events.h + a cpu_num field added to control_state_t perf_events.c + open_pe_events() allows per-cpu counting, additional debug was added + set_cpu() function added + new debug messages in set_granularity() and _papi_pe_read() + _papi_pe_ctl() has PAPI_CPU_ATTACH code added + _papi_pe_update_control_state() has the default domain set to be PAPI_DOM_USER instead of pe_ctl->domain genpapifdef.c + PAPI_CPU_ATTACHED added threads.h + an ESI field added to ThreadInfo_t papi.c + many new ABIDBG() debug messages added + PAPI_start() updated to check for CPU_ATTACH conflicts, has whitespace fixes, gets context now, if dirty calls update_control_state() + PAPI_stop(), PAPI_reset(), PAPI_read(), PAPI_read_ts(), PAPI_accum(), PAPI_write(), PAPI_cleanup_eventset(), all use _papi_hwi_get_context() to get context + PAPI_read() has some braces added + PAPI_get_opt() and PAPI_set_opt() have CPU_ATTACHED code added. + PAPI_overflow() and PAPI_sprofil() now report errors if CPU_ATTACH enabled cpus.c, cpus.h + New files based on threads.c and threads.h I made some additional changes, based on warnings given by gcc + Added a few missing function prototypes in cpus.h + Update PAPI_MAX_LOCK as it wasn't increased to handle the new addition of CPUS_LOCK + Removed various variables and functions reported as being unused. 2010-08-03 * src/: papi_internal.h, papi_lock.h: The option --with-no-cpu-counters was not supported on AIX. This has been fixed and works now. Also the get_{real|virt}_{cycles|usec} implementations for AIX (checked in Jul 29) have now been tested and work correctly. 2010-07-29 * src/: configure, configure.in, papi_lock.h, papi_vector.c: Added AIX support for the get_{real|virt}_{cycles|usec} functions +++ Fortran tests are now compiling on AIX. Wrong compiler flags were used for the AIX compilers. 2010-07-26 * src/papi_events.csv: add PAPI_L1_DCM for atom * src/x86_cache_info.c: Update the x86 cache_info table. The data from this table now comes from figure 3-17 in the Intel Architectures Software Reference Manual 2A (cpuid instruction section) This fixes an issue on my Atom N270 machine where the L2 cache was not reported. 2010-07-16 * INSTALL.txt, src/perf_events.c, src/perf_events.h: Perf Events now support attach and detach. The patch for supporting this was written by Gary Mohr * src/papi_events.csv: Add a few missing events to Nehalem, based on reading Intel Volume 3b. * src/papi_events.csv: Fix Westmere to not use L1D_ALL_REF:ANY I tested this on a Nehalem which has the proper behavior, unfortunately no Westmere here to test on. * src/: papi_events.csv, papi_pfm_events.c, perfctr-x86.c: Enable support for having more than one CPU block with the same name in the .csv file. This allows easier support for sharing events between similar architectures. I *think* this is needed and *think* it shouldn't break anything, but I might have to back it out. Also fixes event support for Pentium Pro / Pentium III/ P6 on perfmon2 and perf events kernels. Also fixed some confusion where perfctr called chips "Intel Core" meaning Core Duo wheras pfmon called "Intel Core" meaning Core2. This was tested on actual Pentium Pro and PIII hardware (as well as on a few Pentium 4 machines plus a Core2 machine) 2010-07-02 * src/: papi_hl.c, ctests/api.c: Added remaining low-level api tests papi-5.6.0/man/man3/PAPI_granularity_option_t.3000664 001750 001750 00000001104 13216244356 023315 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_granularity_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_granularity_option_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "int \fBdef_cidx\fP" .br .ti -1c .RI "int \fBeventset\fP" .br .ti -1c .RI "int \fBgranularity\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Field Documentation" .PP .SS "int PAPI_granularity_option_t::def_cidx" this structure requires a component index to set default granularity .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.7.x/etc/costs/Duron-750000664 001750 001750 00000001244 13216244367 023001 0ustar00jshenry1963jshenry1963000000 000000 [data from a 750MHz Duron] PERFCTR INIT: vendor 2, family 6, model 3 PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 88 cycles PERFCTR INIT: rdtsc cost is 11.2 cycles (805 total) PERFCTR INIT: rdpmc cost is 12.9 cycles (918 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3400 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.7 cycles (3465 total) PERFCTR INIT: wrmsr (counter) cost is 81.6 cycles (5314 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.6 cycles (14913 total) PERFCTR INIT: read %cr4 cost is 4.7 cycles (390 total) PERFCTR INIT: write %cr4 cost is 62.9 cycles (4119 total) perfctr: driver 2.0-pre3, cpu type AMD K7 at 749671 kHz papi-5.6.0/src/libpfm-3.y/python/src/__init__.py000664 001750 001750 00000000102 13216244363 023416 0ustar00jshenry1963jshenry1963000000 000000 from perfmon_int import * from pmu import * from session import * papi-5.6.0/src/ftests/fmatrixpapi.F000664 001750 001750 00000010506 13216244361 021267 0ustar00jshenry1963jshenry1963000000 000000 C**************************************************************************** C C fmatrixpapi.f C An example of matrix-matrix multiplication and using PAPI high level to C look at the performance. written by Kevin London C March 2000 C**************************************************************************** #include "fpapi_test.h" program fmatrixpapi IMPLICIT integer (p) INTEGER ncols1,nrows1,ncols2,nrows2 PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) INTEGER i,j,k,num_events,retval C PAPI standardized event to be monitored INTEGER event(2) C PAPI values of the counters INTEGER*8 values(2) REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), & r(nrows1,ncols2),tmp integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() C Setup default values num_events=0 C Open matrix file number 1 for reading C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') C Open matrix file number 2 for reading C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') C See how many hardware events at one time are supported C This also initializes the PAPI library call PAPIf_num_counters( num_events ) if ( num_events .LT. 2 ) then print *,'This example program requries the architecture to ', . 'support 2 simultaneous hardware events...shutting down.' call ftest_skip(__FILE__, __LINE__, * 'too few counters', num_events) end if if (tests_quiet .EQ. 0) then print *, 'Number of hardware counters supported: ', num_events end if call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event(1) = PAPI_TOT_INS else C Total floating point operations event(1) = PAPI_FP_INS end if C Time used event(2) = PAPI_TOT_CYC C matrix 1: read in the matrix values do i=1, nrows1 do j=1,ncols1 p(i,j) = i*j*1.0 end do end do C matrix 2: read in the matrix values do i=1, nrows2 do j=1,ncols2 q(i,j) = i*j*1.0 end do end do C Initialize the result matrix do i=1,nrows1 do j=1, ncols2 r(i,j) = i*j*1.0 end do end do C Set up the counters num_events = 2 call PAPIf_start_counters( event, num_events, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_start_counters', retval) end if C Clear the counter values call PAPIf_read_counters(values, num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_read_counters', retval) end if C Compute the matrix-matrix multiplication do i=1,nrows1 do j=1,ncols2 do k=1,ncols1 r(i,j)=r(i,j) + p(i,k)*q(k,j) end do end do end do C Stop the counters and put the results in the array values call PAPIf_stop_counters(values,num_events,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_stop_counters', retval) end if C Make sure the compiler does not optimize away the multiplication call dummy(r) if (tests_quiet .EQ. 0) then if (event(1) .EQ. PAPI_TOT_INS) then print *, 'TOT Instructions: ',values(1) else print *, 'FP Instructions: ',values(1) end if print *, 'Cycles: ',values(2) if (event(1) .EQ. PAPI_FP_INS) then write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', & real(values(1))/real(values(2)) C Compare measured FLOPS to expected value tmp=2.0*real(nrows1)*real(ncols2)*real(ncols1) if(abs(values(1)-tmp).gt.tmp*0.05)then C Maybe we are counting FMAs? tmp=tmp/2.0 if(abs(values(1)-tmp).gt.tmp*0.05)then print *,'Expected operation count:',2.0*tmp print *,'Or possibly (using FMA): ',tmp print *,'Instead I got: ',values(1) call ftest_fail(__FILE__, __LINE__, * 'Unexpected FLOP count (check vector operations)', 1) end if end if end if end if call ftests_pass(__FILE__) end papi-5.6.0/src/components/cuda/tests/timer.h000664 001750 001750 00000003125 13216244357 023064 0ustar00jshenry1963jshenry1963000000 000000 /** * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ #ifndef TIMER_H #define TIMER_H #include #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) #define WIN32_LEAN_AND_MEAN #include #else #include #endif #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) double PCFreq = 0.0; __int64 timerStart = 0; #else struct timeval timerStart; #endif void StartTimer() { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) LARGE_INTEGER li; if (!QueryPerformanceFrequency(&li)) { printf("QueryPerformanceFrequency failed!\n"); } PCFreq = (double)li.QuadPart/1000.0; QueryPerformanceCounter(&li); timerStart = li.QuadPart; #else gettimeofday(&timerStart, NULL); #endif } // time elapsed in ms double GetTimer() { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) LARGE_INTEGER li; QueryPerformanceCounter(&li); return (double)(li.QuadPart-timerStart)/PCFreq; #else struct timeval timerStop, timerElapsed; gettimeofday(&timerStop, NULL); timersub(&timerStop, &timerStart, &timerElapsed); return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; #endif } #endif // TIMER_H papi-5.6.0/src/event_data/power5+/000775 001750 001750 00000000000 13216244361 020730 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/rules.mk000664 001750 001750 00000003004 13216244366 020344 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # This file is part of libpfm, a performance monitoring support library for # applications on Linux/ia64. # .SUFFIXES: .c .S .o .lo .cpp .S.o: $(CC) $(CFLAGS) -c $*.S .c.o: $(CC) $(CFLAGS) -c $*.c .cpp.o: $(CXX) $(CFLAGS) -c $*.cpp .c.lo: $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo .S.lo: $(CC) -fPIC -DPIC $(CFLAGS) -c $*.S -o $*.lo papi-5.6.0/src/utils/papi_avail.c000664 001750 001750 00000055163 13216244370 020745 0ustar00jshenry1963jshenry1963000000 000000 // Define the papi_avail man page contents. /** * file papi_avail.c * @brief papi_avail utility. * @page papi_avail * @section Name * papi_avail - provides availability and detailed information for PAPI preset and user defined events. * * @section Synopsis * papi_avail [-adht] [-e event] * * @section Description * papi_avail is a PAPI utility program that reports information about the * current PAPI installation and supported preset and user defined events. * * @section Options *
    *
  • -h Display help information about this utility. *
  • -a Display only the available PAPI events. *
  • -c Display only the available PAPI events after a check. *
  • -d Display PAPI event information in a more detailed format. *
  • -e < event > Display detailed event information for the named event. * This event can be a preset event, a user defined event, or a native event. * If the event is a preset or a user defined event the output shows a list of native * events the event is based on and the formula that is used to compute the events final value.\n *
* * Event filtering options *
    *
  • --br Display branch related PAPI preset events *
  • --cache Display cache related PAPI preset events *
  • --cnd Display conditional PAPI preset events *
  • --fp Display Floating Point related PAPI preset events *
  • --ins Display instruction related PAPI preset events *
  • --idl Display Stalled or Idle PAPI preset events *
  • --l1 Display level 1 cache related PAPI preset events *
  • --l2 Display level 2 cache related PAPI preset events *
  • --l3 Display level 3 cache related PAPI preset events *
  • --mem Display memory related PAPI preset events *
  • --msc Display miscellaneous PAPI preset events *
  • --tlb Display Translation Lookaside Buffer PAPI preset events *
* @section Bugs * There are no known bugs in this utility. * If you find a bug, it should be reported to the PAPI Mailing List at . *
* @see PAPI_derived_event_files * */ // Define the PAPI_derived_event_files man page contents. /** * @page PAPI_derived_event_files * @brief Describes derived event definition file syntax. * * @section main Derived Events * PAPI provides the ability to define events whose value will be derived from multiple native events. The list of native * events to be used in a derived event and a formula which describes how to use them is provided in an event definition file. * The PAPI team provides an event definition file which describes all of the supported PAPI preset events. PAPI also allows * a user to provide an event definition file that describes a set of user defined events which can extend the events PAPI * normally supports. * * This page documents the syntax of the commands which can appear in an event definition file. * *
* @subsection rules General Rules: *
    *
  • Blank lines are ignored.
  • *
  • Lines that begin with '#' are comments (they are also ignored).
  • *
  • Names shown inside < > below represent values that must be provided by the user.
  • *
  • If a user provided value contains white space, it must be protected with quotes.
  • *
* *
* @subsection commands Commands: * @par CPU,\ * Specifies a PMU name which controls if the PRESET and EVENT commands that follow this line should * be processed. Multiple CPU commands can be entered without PRESET or EVENT commands between them to provide * a list of PMU names to which the derived events that follow will apply. When a PMU name provided in the list * matches a PMU name known to the running system, the events which follow will be created. If none of the PMU * names provided in the list match a PMU name on the running system, the events which follow will be ignored. * When a new CPU command follows either a PRESET or EVENT command, the PMU list is rebuilt.

* * @par PRESET,\,\,\,LDESC,\"\\",SDESC,\"\\",NOTE,\"\\" * Declare a PAPI preset derived event.

* * @par EVENT,\,\,\,LDESC,\"\\",SDESC,\"\\",NOTE,\"\\" * Declare a user defined derived event.

* * @par Where: * @par pmuName: * The PMU which the following events should apply to. A list of PMU names supported by your * system can be obtained by running papi_component_avail on your system.
* @par eventName: * Specifies the name used to identify this derived event. This name should be unique within the events on your system.
* @par derivedType: * Specifies the kind of derived event being defined (see 'Derived Types' below).
* @par eventAttr: * Specifies a formula and a list of base events that are used to compute the derived events value. The syntax * of this field depends on the 'derivedType' specified above (see 'Derived Types' below).
* @par longDesc: * Provides the long description of the event.
* @par shortDesc: * Provides the short description of the event.
* @par note: * Provides an event note.
* @par baseEvent (used below): * Identifies an event on which this derived event is based. This may be a native event (possibly with event masks), * an already known preset event, or an already known user event.
* *
* @subsection notes Notes: * The PRESET command has traditionally been used in the PAPI provided preset definition file. * The EVENT command is intended to be used in user defined event definition files. The code treats them * the same so they are interchangeable and they can both be used in either event definition file.
* *
* @subsection types Derived Types: * This describes values allowed in the 'derivedType' field of the PRESET and EVENT commands. It also * shows the syntax of the 'eventAttr' field for each derived type supported by these commands. * All of the derived events provide a list of one or more events which the derived event is based * on (baseEvent). Some derived events provide a formula that specifies how to compute the derived * events value using the baseEvents in the list. The following derived types are supported, the syntax * of the 'eventAttr' parameter for each derived event type is shown in parentheses.

* * @par NOT_DERIVED (\): * This derived type defines an alias for the existing event 'baseEvent'.
* @par DERIVED_ADD (\,\): * This derived type defines a new event that will be the sum of two other * events. It has a value of 'baseEvent1' plus 'baseEvent2'.
* @par DERIVED_PS (PAPI_TOT_CYC,\): * This derived type defines a new event that will report the number of 'baseEvent1' events which occurred * per second. It has a value of ((('baseEvent1' * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC). The user must * provide PAPI_TOT_CYC as the first event of two events in the event list for this to work correctly.
* @par DERIVED_ADD_PS (PAPI_TOT_CYC,\,\): * This derived type defines a new event that will add together two event counters and then report the number * which occurred per second. It has a value of (((('baseEvent1' + baseEvent2) * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC). * The user must provide PAPI_TOT_CYC as the first event of three events in the event list for this to work correctly.
* @par DERIVED_CMPD (\,\ * @par DERIVED_SUB (\,\): * This derived type defines a new event that will be the difference between two other * events. It has a value of 'baseEvent1' minus 'baseEvent2'.
* @par DERIVED_POSTFIX (\,\,\, ... ,\): * This derived type defines a new event whose value is computed from several native events using * a postfix (reverse polish notation) formula. Its value is the result of processing the postfix * formula. The 'pfFormula' is of the form 'N0|N1|N2|5|*|+|-|' where the '|' acts as a token * separator and the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1, * and baseEvent2 respectively.
* @par DERIVED_INFIX (\,\,\, ... ,\): * This derived type defines a new event whose value is computed from several native events using * an infix (algebraic notation) formula. Its value is the result of processing the infix * formula. The 'ifFormula' is of the form 'N0-(N1+(N2*5))' where the tokens N0, N1, and N2 * are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively.
* *
* @subsection example Example: * In the following example, the events PAPI_SP_OPS, USER_SP_OPS, and ALIAS_SP_OPS will all measure the same events and return * the same value. They just demonstrate different ways to use the PRESET and EVENT event definition commands.

* *
    *
  • # The following lines define pmu names that all share the following events
  • *
  • CPU nhm
  • *
  • CPU nhm-ex
  • *
  • \# Events which should be defined for either of the above pmu types
  • *
  • PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
  • *
  • PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
  • *
  • PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|3|*|+|,FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using a postfix formula"
  • *
  • EVENT,USER_SP_OPS,DERIVED_INFIX,N0+(N1*3),FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using the same formula in infix format"
  • *
  • EVENT,ALIAS_SP_OPS,NOT_DERIVED,PAPI_SP_OPS,LDESC,"Alias for preset event PAPI_SP_OPS"
  • *
  • # End of event definitions for above pmu names and start of a section for a new pmu name.
  • *
  • CPU snb
  • *
* */ #include #include #include #include "papi.h" #include "print_header.h" static char * is_derived( PAPI_event_info_t * info ) { if ( strlen( info->derived ) == 0 ) return ( "No" ); else if ( strcmp( info->derived, "NOT_DERIVED" ) == 0 ) return ( "No" ); else if ( strcmp( info->derived, "DERIVED_CMPD" ) == 0 ) return ( "No" ); else return ( "Yes" ); } static void print_help( char **argv ) { printf( "This is the PAPI avail program.\n" ); printf( "It provides availability and details about PAPI Presets and User-defined Events.\n" ); printf( "PAPI Preset Event filters can be combined in a logical OR.\n" ); printf( "Usage: %s [options]\n", argv[0] ); printf( "Options:\n\n" ); printf( "General command options:\n" ); printf( "\t-h, --help Print this help message\n" ); printf( "\t-a, --avail Display only available PAPI preset and user defined events\n" ); printf( "\t-c, --check Display only available PAPI preset and user defined events after an availability check\n" ); printf( "\t-d, --detail Display detailed information about events\n" ); printf( "\t-e EVENTNAME Display detail information about specified event\n" ); printf( "\nEvent filtering options:\n" ); printf( "\t--br Display branch related PAPI preset events\n" ); printf( "\t--cache Display cache related PAPI preset events\n" ); printf( "\t--cnd Display conditional PAPI preset events\n" ); printf( "\t--fp Display Floating Point related PAPI preset events\n" ); printf( "\t--ins Display instruction related PAPI preset events\n" ); printf( "\t--idl Display Stalled or Idle PAPI preset events\n" ); printf( "\t--l1 Display level 1 cache related PAPI preset events\n" ); printf( "\t--l2 Display level 2 cache related PAPI preset events\n" ); printf( "\t--l3 Display level 3 cache related PAPI preset events\n" ); printf( "\t--mem Display memory related PAPI preset events\n" ); printf( "\t--msc Display miscellaneous PAPI preset events\n" ); printf( "\t--tlb Display Translation Lookaside Buffer PAPI preset events\n" ); printf( "\n" ); } static int parse_unit_masks( PAPI_event_info_t * info ) { char *pmask; if ( ( pmask = strchr( info->symbol, ':' ) ) == NULL ) { return ( 0 ); } memmove( info->symbol, pmask, ( strlen( pmask ) + 1 ) * sizeof ( char ) ); pmask = strchr( info->long_descr, ':' ); if ( pmask == NULL ) info->long_descr[0] = 0; else memmove( info->long_descr, pmask + sizeof ( char ), ( strlen( pmask ) + 1 ) * sizeof ( char ) ); return 1; } static int checkCounter (int eventcode) { int EventSet = PAPI_NULL; if (PAPI_create_eventset(&EventSet) != PAPI_OK) return 0; if (PAPI_add_event (EventSet, eventcode) != PAPI_OK) return 0; if (PAPI_cleanup_eventset (EventSet) != PAPI_OK) return 0; if (PAPI_destroy_eventset (&EventSet) != PAPI_OK) return 0; return 1; } int main( int argc, char **argv ) { int args, i, j, k; int retval; unsigned int filter = 0; int print_event_info = 0; char *name = NULL; int print_avail_only = PAPI_ENUM_EVENTS; int print_tabular = 1; PAPI_event_info_t info; const PAPI_hw_info_t *hwinfo = NULL; int tot_count = 0; int avail_count = 0; int deriv_count = 0; int check_counter = 0; int event_code; PAPI_event_info_t n_info; /* Parse command line arguments */ for( args = 1; args < argc; args++ ) { if ( strstr( argv[args], "-e" ) ) { print_event_info = 1; name = argv[args + 1]; if ( ( name == NULL ) || ( strlen( name ) == 0 ) ) { print_help( argv ); exit( 1 ); } } else if ( strstr( argv[args], "-c" ) || strstr (argv[args], "--check") ) { print_avail_only = PAPI_PRESET_ENUM_AVAIL; check_counter = 1; } else if ( strstr( argv[args], "-a" )) print_avail_only = PAPI_PRESET_ENUM_AVAIL; else if ( strstr( argv[args], "-d" ) ) print_tabular = 0; else if ( strstr( argv[args], "-h" ) ) { print_help( argv ); exit( 1 ); } else if ( strstr( argv[args], "--br" ) ) filter |= PAPI_PRESET_BIT_BR; else if ( strstr( argv[args], "--cache" ) ) filter |= PAPI_PRESET_BIT_CACH; else if ( strstr( argv[args], "--cnd" ) ) filter |= PAPI_PRESET_BIT_CND; else if ( strstr( argv[args], "--fp" ) ) filter |= PAPI_PRESET_BIT_FP; else if ( strstr( argv[args], "--ins" ) ) filter |= PAPI_PRESET_BIT_INS; else if ( strstr( argv[args], "--idl" ) ) filter |= PAPI_PRESET_BIT_IDL; else if ( strstr( argv[args], "--l1" ) ) filter |= PAPI_PRESET_BIT_L1; else if ( strstr( argv[args], "--l2" ) ) filter |= PAPI_PRESET_BIT_L2; else if ( strstr( argv[args], "--l3" ) ) filter |= PAPI_PRESET_BIT_L3; else if ( strstr( argv[args], "--mem" ) ) filter |= PAPI_PRESET_BIT_BR; else if ( strstr( argv[args], "--msc" ) ) filter |= PAPI_PRESET_BIT_MSC; else if ( strstr( argv[args], "--tlb" ) ) filter |= PAPI_PRESET_BIT_TLB; } if ( filter == 0 ) { filter = ( unsigned int ) ( -1 ); } /* Init PAPI */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { fprintf(stderr,"Error! PAPI library mismatch!\n"); return 1; } retval = PAPI_set_debug( PAPI_VERB_ECONT ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error with PAPI_set debug!\n"); return 1; } retval=papi_print_header("Available PAPI preset and user defined events plus hardware information.\n", &hwinfo ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error with PAPI_get_hardware_info!\n"); return 1; } /* Code for info on just one event */ if ( print_event_info ) { if ( PAPI_event_name_to_code( name, &event_code ) == PAPI_OK ) { if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) { if ( event_code & PAPI_PRESET_MASK ) { printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n", "Event name:", info.symbol, "Event Code:", info.event_code, "Number of Native Events:", info.count ); printf( "%-29s|%s|\n%-29s|%s|\n%-29s|%s|\n", "Short Description:", info.short_descr, "Long Description:", info.long_descr, "Developer's Notes:", info.note ); printf( "%-29s|%s|\n%-29s|%s|\n", "Derived Type:", info.derived, "Postfix Processing String:", info.postfix ); for( j = 0; j < ( int ) info.count; j++ ) { printf( " Native Code[%d]: %#x |%s|\n", j, info.code[j], info.name[j] ); PAPI_get_event_info( (int) info.code[j], &n_info ); printf(" Number of Register Values: %d\n", n_info.count ); for( k = 0; k < ( int ) n_info.count; k++ ) { printf( " Register[%2d]: %#08x |%s|\n", k, n_info.code[k], n_info.name[k] ); } printf( " Native Event Description: |%s|\n\n", n_info.long_descr ); } } else { /* must be a native event code */ printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n", "Event name:", info.symbol, "Event Code:", info.event_code, "Number of Register Values:", info.count ); printf( "%-29s|%s|\n", "Description:", info.long_descr ); for ( k = 0; k < ( int ) info.count; k++ ) { printf( " Register[%2d]: %#08x |%s|\n", k, info.code[k], info.name[k] ); } /* if unit masks exist but none are specified, process all */ if ( !strchr( name, ':' ) ) { if ( 1 ) { if ( PAPI_enum_event( &event_code, PAPI_NTV_ENUM_UMASKS ) == PAPI_OK ) { printf( "\nUnit Masks:\n" ); do { retval = PAPI_get_event_info(event_code, &info ); if ( retval == PAPI_OK ) { if ( parse_unit_masks( &info ) ) { printf( "%-29s|%s|%s|\n", " Mask Info:", info.symbol, info.long_descr ); for ( k = 0; k < ( int ) info.count;k++ ) { printf( " Register[%2d]: %#08x |%s|\n", k, info.code[k], info.name[k] ); } } } } while ( PAPI_enum_event( &event_code, PAPI_NTV_ENUM_UMASKS ) == PAPI_OK ); } } } } } } else { printf( "Sorry, an event by the name '%s' could not be found.\n" " Is it typed correctly?\n\n", name ); } } else { /* Print *ALL* Events */ for (i=0 ; i<2 ; i++) { // set the event code to fetch preset events the first time through loop and user events the second time through the loop if (i== 0) { event_code = 0 | PAPI_PRESET_MASK; } else { event_code = 0 | PAPI_UE_MASK; } /* For consistency, always ASK FOR the first event, if there is not one then nothing to process */ if (PAPI_enum_event( &event_code, PAPI_ENUM_FIRST ) != PAPI_OK) { continue; } // print heading to show which kind of events follow if (i== 0) { printf( "================================================================================\n" ); printf( " PAPI Preset Events\n" ); printf( "================================================================================\n" ); } else { printf( "\n"); // put a blank line after the presets before strarting the user events printf( "================================================================================\n" ); printf( " User Defined Events\n" ); printf( "================================================================================\n" ); } if ( print_tabular ) { printf( " Name Code " ); if ( !print_avail_only ) { printf( "Avail " ); } printf( "Deriv Description (Note)\n" ); } else { printf( "%-13s%-11s%-8s%-16s\n |Long Description|\n" " |Developer's Notes|\n |Derived|\n |PostFix|\n" " Native Code[n]: |name|\n", "Symbol", "Event Code", "Count", "|Short Description|" ); } do { if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) { if ( print_tabular ) { // if this is a user defined event or its a preset and matches the preset event filters, display its information if ( (i==1) || (filter & info.event_type)) { if ( print_avail_only ) { if ( info.count ) { if ( (check_counter && checkCounter (event_code)) || !check_counter) { printf( "%-13s%#x %-5s%s", info.symbol, info.event_code, is_derived( &info ), info.long_descr ); } } if ( info.note[0] ) { printf( " (%s)", info.note ); } printf( "\n" ); } else { printf( "%-13s%#x %-6s%-4s %s", info.symbol, info.event_code, ( info.count ? "Yes" : "No" ), is_derived( &info ), info.long_descr ); if ( info.note[0] ) { printf( " (%s)", info.note ); } printf( "\n" ); } tot_count++; if ( info.count ) { if ((check_counter && checkCounter (event_code)) || !check_counter ) avail_count++; } if ( !strcmp( is_derived( &info ), "Yes" ) ) { deriv_count++; } } } else { if ( ( print_avail_only && info.count ) || ( print_avail_only == 0 ) ) { if ((check_counter && checkCounter (event_code)) || !check_counter) { printf( "%s\t%#x\t%d\t|%s|\n |%s|\n" " |%s|\n |%s|\n |%s|\n", info.symbol, info.event_code, info.count, info.short_descr, info.long_descr, info.note, info.derived, info.postfix ); for ( j = 0; j < ( int ) info.count; j++ ) { printf( " Native Code[%d]: %#x |%s|\n", j, info.code[j], info.name[j] ); } } } tot_count++; if ( info.count ) { if ((check_counter && checkCounter (event_code)) || !check_counter ) avail_count++; } if ( !strcmp( is_derived( &info ), "Yes" ) ) { deriv_count++; } } } } while (PAPI_enum_event( &event_code, print_avail_only ) == PAPI_OK); } } printf( "--------------------------------------------------------------------------------\n" ); if ( !print_event_info ) { if ( print_avail_only ) { printf( "Of %d available events, %d ", avail_count, deriv_count ); } else { printf( "Of %d possible events, %d are available, of which %d ", tot_count, avail_count, deriv_count ); } if ( deriv_count == 1 ) { printf( "is derived.\n\n" ); } else { printf( "are derived.\n\n" ); } } return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_intel_ivbep_unc_r3qpi.c000664 001750 001750 00000005326 13216244365 025417 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_ivbep_r3qpi.c : Intel IvyBridge-EP R3QPI uncore PMU * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_ivbep_unc_r3qpi_events.h" #define DEFINE_R3QPI_BOX(n) \ pfmlib_pmu_t intel_ivbep_unc_r3qpi##n##_support = {\ .desc = "Intel Ivy Bridge-EP R3QPI"#n" uncore", \ .name = "ivbep_unc_r3qpi"#n,\ .perf_name = "uncore_r3qpi_"#n, \ .pmu = PFM_PMU_INTEL_IVBEP_UNC_R3QPI##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_r3_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 3,\ .num_fixed_cntrs = 0,\ .max_encoding = 1,\ .pe = intel_ivbep_unc_r3_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK,\ .pmu_detect = pfm_intel_ivbep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ } DEFINE_R3QPI_BOX(0); DEFINE_R3QPI_BOX(1); DEFINE_R3QPI_BOX(2); papi-5.6.0/src/perfctr-2.7.x/examples/perfex/perfex.c000664 001750 001750 00000035323 13216244370 024313 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: perfex.c,v 1.35 2005/01/16 22:51:20 mikpe Exp $ * * NAME * perfex - a command-line interface to processor performance counters * * SYNOPSIS * perfex [-e event] .. [--p4pe=value] [--p4pmv=value] [-o file] command * perfex { -i | -l | -L } * * DESCRIPTION * The given command is executed; after it is complete, perfex * prints the values of the various hardware performance counters. * * OPTIONS * -e event | --event=event * Specify an event to be counted. * Multiple event specifiers may be given, limited by the * number of available performance counters in the processor. * * The full syntax of an event specifier is "evntsel/escr@pmc". * All three components are 32-bit processor-specific numbers, * written in decimal or hexadecimal notation. * * "evntsel" is the primary processor-specific event selection * code to use for this event. This field is mandatory. * * "/escr" is used to specify additional event selection data * for Pentium 4 processors. "evntsel" is put in the counter's * CCCR register, and "escr" is put in the associated ESCR * register. * * "@pmc" describes which CPU counter number to assign this * event to. When omitted, the events are assigned in the * order listed, starting from 0. Either all or none of the * event specifiers should use the "@pmc" notation. * Explicit counter assignment via "@pmc" is required on * Pentium 4 and VIA C3 processors. * * The counts, together with an event description are written * to the result file (default is stderr). * * --p4pe=value | --p4_pebs_enable=value * --p4pmv=value | --p4_pebs_matrix_vert=value * Specify the value to be stored in the auxiliary control * register PEBS_ENABLE or PEBS_MATRIX_VERT, which are used * for replay tagging events on Pentium 4 processors. * Note: Intel's documentation states that bit 25 should be * set in PEBS_ENABLE, but this is not true and the driver * will disallow it. * * -i | --info * Instead of running a command, generate output which * identifies the current processor and its capabilities. * * -l | --list * Instead of running a command, generate output which * identifies the current processor and its capabilities, * and lists its countable events. * * -L | --long-list * Like -l, but list the events in a more detailed format. * * -o file | --output=file * Write the results to file instead of stderr. * * EXAMPLES * The following commands count the number of retired instructions * in user-mode on an Intel P6 processor: * * perfex -e 0x004100C0 some_program * perfex --event=0x004100C0 some_program * * The following command does the same on an Intel Pentium 4 processor: * * perfex -e 0x00039000/0x04000204@0x8000000C some_program * * Explanation: Program IQ_CCCR0 with required flags, ESCR select 4 * (== CRU_ESCR0), and Enable. Program CRU_ESCR0 with event 2 * (instr_retired), NBOGUSNTAG, CPL>0. Map this event to IQ_COUNTER0 * (0xC) with fast RDPMC enabled. * * The following command counts the number of L1 cache read misses * on a Pentium 4 processor: * * perfex -e 0x0003B000/0x12000204@0x8000000C --p4pe=0x01000001 --p4pmv=0x1 some_program * * Explanation: IQ_CCCR0 is bound to CRU_ESCR2, CRU_ESCR2 is set up * for replay_event with non-bogus uops and CPL>0, and PEBS_ENABLE * and PEBS_MATRIX_VERT are set up for the 1stL_cache_load_miss_retired * metric. Note that bit 25 is NOT set in PEBS_ENABLE. * * DEPENDENCIES * perfex only works on Linux systems which have been modified * to include the perfctr kernel extension. Perfctr is available at * http://www.csd.uu.se/~mikpe/linux/perfctr/. * * NOTES * perfex is superficially similar to IRIX' perfex(1). * The -a, -mp, -s, and -x options are not yet implemented. * * Copyright (C) 1999-2004 Mikael Pettersson */ /* * Theory of operation: * - Parent creates a socketpair(). * - Parent forks. * - Child creates and sets up its perfctrs. * - Child sends its perfctr fd to parent via the socketpair(). * - Child exec:s the command. * - Parent waits for child to exit. * - Parent receives child's perfctr fd via the socketpair(). * - Parent retrieves child's final control and counts via the fd. */ #include #include #include #include #include #include /* for offsetof() */ #include #include #include /* for strerror() */ #include #include "libperfctr.h" #include "arch.h" /* * Our child-to-parent protocol is the following: * There is an int-sized data packet, with an optional 'struct cmsg_fd' * control message attached. * The data packet (which must be present, as control messages don't * work with zero-sized payloads) contains an 'int' status. * If status != 0, then it is an 'errno' value from the child's * perfctr setup code. */ struct cmsg_fd { struct cmsghdr hdr; int fd; /* 64-bit machines pad here, which causes problems since the kernel derives the number of fds from the size. The CMSG_FD_TRUE_SIZE macro gives the true payload size. */ }; #define CMSG_FD_TRUE_SIZE (offsetof(struct cmsg_fd, fd) + sizeof(int)) #define CMSG_FD_PADDED_SIZE sizeof(struct cmsg_fd) static int my_send(int sock, int fd, int status) { struct msghdr msg; struct iovec iov; struct cmsg_fd cmsg_fd; int buf[1]; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_flags = 0; buf[0] = status; iov.iov_base = buf; iov.iov_len = sizeof buf; msg.msg_iov = &iov; msg.msg_iovlen = 1; if( status != 0 ) { /* errno, don't send fd */ msg.msg_control = 0; msg.msg_controllen = 0; } else { cmsg_fd.hdr.cmsg_len = CMSG_FD_TRUE_SIZE; cmsg_fd.hdr.cmsg_level = SOL_SOCKET; cmsg_fd.hdr.cmsg_type = SCM_RIGHTS; cmsg_fd.fd = fd; msg.msg_control = &cmsg_fd; msg.msg_controllen = CMSG_FD_TRUE_SIZE; } return sendmsg(sock, &msg, 0) == sizeof buf ? 0 : -1; } static int my_send_fd(int sock, int fd) { return my_send(sock, fd, 0); } static int my_send_err(int sock) { return my_send(sock, -1, errno); } static int my_receive(int sock, int *fd) { struct msghdr msg; struct iovec iov; struct cmsg_fd cmsg_fd; int buf[1]; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_flags = 0; buf[0] = -1; iov.iov_base = buf; iov.iov_len = sizeof(buf); msg.msg_iov = &iov; msg.msg_iovlen = 1; memset(&cmsg_fd, ~0, sizeof cmsg_fd); msg.msg_control = &cmsg_fd; msg.msg_controllen = CMSG_FD_TRUE_SIZE; if( recvmsg(sock, &msg, 0) != sizeof buf ) return -1; if( buf[0] == 0 && msg.msg_control == &cmsg_fd && msg.msg_controllen == CMSG_FD_PADDED_SIZE && cmsg_fd.hdr.cmsg_type == SCM_RIGHTS && cmsg_fd.hdr.cmsg_level == SOL_SOCKET && cmsg_fd.hdr.cmsg_len == CMSG_FD_TRUE_SIZE && cmsg_fd.fd >= 0 ) { *fd = cmsg_fd.fd; return 0; } if( msg.msg_controllen == 0 && buf[0] != 0 ) errno = buf[0]; else errno = EPROTO; return -1; } static int do_open_self(int creat) { int fd; fd = _vperfctr_open(creat); if( fd >= 0 && perfctr_abi_check_fd(fd) < 0 ) { close(fd); return -1; } return fd; } static int do_child(int sock, const struct vperfctr_control *control, char **argv) { int fd; fd = do_open_self(1); if( fd < 0 ) { my_send_err(sock); return 1; } if( _vperfctr_control(fd, control) < 0 ) { my_send_err(sock); return 1; } if( my_send_fd(sock, fd) < 0 ) { my_send_err(sock); /* well, we can try.. */ return 1; } close(fd); close(sock); execvp(argv[0], argv); perror(argv[0]); return 1; } static int do_parent(int sock, int child_pid, FILE *resfile) { int child_status; int fd; struct perfctr_sum_ctrs sum; struct vperfctr_control control; struct perfctr_sum_ctrs children; /* this can be done before or after the recvmsg() */ if( waitpid(child_pid, &child_status, 0) < 0 ) { perror("perfex: waitpid"); return 1; } if( !WIFEXITED(child_status) ) { fprintf(stderr, "perfex: child did not exit normally\n"); return 1; } if( my_receive(sock, &fd) < 0 ) { perror("perfex: receiving fd/status"); return 1; } close(sock); /* XXX: surely we don't need to repeat the ABI check here? */ if( _vperfctr_read_sum(fd, &sum) < 0 ) { perror("perfex: read_sum"); return 1; } if( _vperfctr_read_control(fd, &control) < 0 ) { perror("perfex: read_control"); return 1; } if( _vperfctr_read_children(fd, &children) < 0 ) { perror("perfex: read_children"); return 1; } close(fd); do_print(resfile, &control.cpu_control, &sum, &children); return WEXITSTATUS(child_status); } static int do_perfex(const struct vperfctr_control *control, char **argv, FILE *resfile) { int pid; int sv[2]; if( socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0 ) { perror("perfex: socketpair"); return 1; } pid = fork(); if( pid < 0 ) { perror("perfex: fork"); return 1; } if( pid == 0 ) { close(sv[0]); return do_child(sv[1], control, argv); } else { close(sv[1]); return do_parent(sv[0], pid, resfile); } } static int get_info(struct perfctr_info *info) { int fd; fd = do_open_self(0); if( fd < 0 ) { perror("perfex: open perfctrs"); return -1; } if( perfctr_info(fd, info) < 0 ) { perror("perfex: perfctr_info"); close(fd); return -1; } close(fd); return 0; } static struct perfctr_cpus_info *get_cpus_info(void) { int fd; struct perfctr_cpus_info *cpus_info; fd = do_open_self(0); if( fd < 0 ) { perror("perfex: open perfctrs"); return NULL; } cpus_info = perfctr_cpus_info(fd); if( !cpus_info ) perror("perfex: perfctr_cpus_info"); close(fd); return cpus_info; } static int do_info(const struct perfctr_info *info) { struct perfctr_cpus_info *cpus_info; cpus_info = get_cpus_info(); printf("PerfCtr Info:\n"); perfctr_info_print(info); if( cpus_info ) { perfctr_cpus_info_print(cpus_info); free(cpus_info); } return 0; } static void do_print_event(const struct perfctr_event *event, int long_format, const char *event_prefix) { printf("%s%s", event_prefix, event->name); if( long_format ) printf(":0x%02X:0x%X:0x%X", event->evntsel, event->counters_set, event->unit_mask ? event->unit_mask->default_value : 0); printf("\n"); } static void do_print_event_set(const struct perfctr_event_set *event_set, int long_format) { unsigned int i; if( event_set->include ) do_print_event_set(event_set->include, long_format); for(i = 0; i < event_set->nevents; ++i) do_print_event(&event_set->events[i], long_format, event_set->event_prefix); } static int do_list(const struct perfctr_info *info, int long_format) { const struct perfctr_event_set *event_set; unsigned int nrctrs; printf("CPU type %s\n", perfctr_info_cpu_name(info)); printf("%s time-stamp counter available\n", (info->cpu_features & PERFCTR_FEATURE_RDTSC) ? "One" : "No"); nrctrs = perfctr_info_nrctrs(info); printf("%u performance counter%s available\n", nrctrs, (nrctrs == 1) ? "" : "s"); printf("Overflow interrupts%s available\n", (info->cpu_features & PERFCTR_FEATURE_PCINT) ? "" : " not"); event_set = perfctr_cpu_event_set(info->cpu_type); if( !event_set ) { fprintf(stderr, "perfex: perfctr_cpu_event_set(%u) failed\n", info->cpu_type); return 1; } if( !event_set->nevents ) /* the 'generic' CPU type */ return 0; printf("\nEvents Available:\n"); if( long_format ) printf("Name:EvntSel:CounterSet:DefaultUnitMask\n"); do_print_event_set(event_set, long_format); return 0; } /* Hack while phasing out an old number parsing bug. */ static unsigned int strtoul_base = 16; static unsigned int quiet; unsigned long my_strtoul(const char *nptr, char **endptr) { unsigned long val1; val1 = strtoul(nptr, endptr, strtoul_base); if (strtoul_base == 16 && !quiet) { unsigned long val2 = strtoul(nptr, NULL, 0); if (val1 != val2) fprintf(stderr, "perfex: warning: string '%s' is base-dependent, assuming base 16." " Please prefix hexadecimal numbers with '0x'.\n", nptr); } return val1; } static const struct option long_options[] = { { "decimal", 0, NULL, 'd' }, { "event", 1, NULL, 'e' }, { "help", 0, NULL, 'h' }, { "hex", 0, NULL, 'x' }, { "info", 0, NULL, 'i' }, { "list", 0, NULL, 'l' }, { "long-list", 0, NULL, 'L' }, { "output", 1, NULL, 'o' }, ARCH_LONG_OPTIONS { 0 } }; static void do_usage(void) { fprintf(stderr, "Usage: perfex [options] [] ...\n"); fprintf(stderr, "\tperfex -i\n"); fprintf(stderr, "\tperfex -h\n"); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "\t-e | --event=\tEvent to be counted\n"); fprintf(stderr, "\t-h | --help\t\t\tPrint this help text\n"); fprintf(stderr, "\t-o | --output=\tWrite output to file (default is stderr)\n"); fprintf(stderr, "\t-i | --info\t\t\tPrint PerfCtr driver information\n"); fprintf(stderr, "\t-l | --list\t\t\tList available events\n"); fprintf(stderr, "\t-L | --long-list\t\tList available events in long format\n"); fprintf(stderr, "\t-d | --decimal\t\t\tAllow decimal numbers in event specifications\n"); fprintf(stderr, "\t-x | --hex\t\t\tOnly accept hexadecimal numbers in event specifications\n"); do_arch_usage(); } int main(int argc, char **argv) { struct perfctr_info info; struct vperfctr_control control; int n; FILE *resfile; /* prime info, as we'll need it in most cases */ if( get_info(&info) ) return 1; memset(&control, 0, sizeof control); if( info.cpu_features & PERFCTR_FEATURE_RDTSC ) control.cpu_control.tsc_on = 1; n = 0; resfile = stderr; for(;;) { /* the '+' is there to prevent permutation of argv[] */ int ch = getopt_long(argc, argv, "+de:hilLo:x", long_options, NULL); switch( ch ) { case -1: /* no more options */ if( optind >= argc ) { fprintf(stderr, "perfex: command missing\n"); return 1; } argv += optind; break; case 'h': do_usage(); return 0; case 'i': return do_info(&info); case 'l': return do_list(&info, 0); case 'L': return do_list(&info, 1); case 'o': if( (resfile = fopen(optarg, "w")) == NULL ) { fprintf(stderr, "perfex: %s: %s\n", optarg, strerror(errno)); return 1; } continue; case 'd': strtoul_base = 0; continue; case 'x': strtoul_base = 16; quiet = 1; continue; case 'e': n = do_event_spec(n, optarg, &control.cpu_control); continue; default: if( do_arch_option(ch, optarg, &control.cpu_control) < 0 ) { do_usage(); return 1; } continue; } break; } return do_perfex(&control, argv, resfile); } papi-5.6.0/src/components/vmware/000775 001750 001750 00000000000 13216244360 021007 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/perf_examples/syst_count.c000664 001750 001750 00000023043 13216244365 024075 0ustar00jshenry1963jshenry1963000000 000000 /* * syst.c - example of a simple system wide monitoring program * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define MAX_GROUPS 256 #define MAX_PATH 1024 #ifndef STR # define _STR(x) #x # define STR(x) _STR(x) #endif typedef struct { const char *events[MAX_GROUPS]; int nevents[MAX_GROUPS]; /* #events per group */ int num_groups; int delay; int excl; int pin; int interval; int cpu; char *cgroup_name; } options_t; static options_t options; static perf_event_desc_t **all_fds; static int cgroupfs_find_mountpoint(char *buf, size_t maxlen) { FILE *fp; char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; char *token, *saved_ptr = NULL; int found = 0; fp = fopen("/proc/mounts", "r"); if (!fp) return -1; /* * in order to handle split hierarchy, we need to scan /proc/mounts * and inspect every cgroupfs mount point to find one that has * perf_event subsystem */ while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" STR(MAX_PATH)"s %*d %*d\n", mountpoint, type, tokens) == 3) { if (!strcmp(type, "cgroup")) { token = strtok_r(tokens, ",", &saved_ptr); while (token != NULL) { if (!strcmp(token, "perf_event")) { found = 1; break; } token = strtok_r(NULL, ",", &saved_ptr); } } if (found) break; } fclose(fp); if (!found) return -1; if (strlen(mountpoint) < maxlen) { strcpy(buf, mountpoint); return 0; } return -1; } int open_cgroup(char *name) { char path[MAX_PATH+1]; char mnt[MAX_PATH+1]; int cfd; if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) errx(1, "cannot find cgroup fs mount point"); snprintf(path, MAX_PATH, "%s/%s", mnt, name); cfd = open(path, O_RDONLY); if (cfd == -1) warn("no access to cgroup %s\n", name); return cfd; } void setup_cpu(int cpu, int cfd) { perf_event_desc_t *fds = NULL; int old_total, total = 0, num; int i, j, n, ret, is_lead, group_fd; unsigned long flags; pid_t pid; for(i=0, j=0; i < options.num_groups; i++) { old_total = total; ret = perf_setup_list_events(options.events[i], &fds, &total); if (ret) errx(1, "cannot setup events\n"); all_fds[cpu] = fds; num = total - old_total; options.nevents[i] = num; for(n=0; n < num; n++, j++) { is_lead = perf_is_group_leader(fds, j); if (is_lead) { fds[j].hw.disabled = 1; group_fd = -1; } else { fds[j].hw.disabled = 0; group_fd = fds[fds[j].group_leader].fd; } fds[j].hw.size = sizeof(struct perf_event_attr); if (options.cgroup_name) { flags = PERF_FLAG_PID_CGROUP; pid = cfd; //fds[j].hw.cgroup = 1; //fds[j].hw.cgroup_fd = cfd; } else { flags = 0; pid = -1; } if (options.pin && is_lead) fds[j].hw.pinned = 1; if (options.excl && is_lead) fds[j].hw.exclusive = 1; /* request timing information necessary for scaling counts */ fds[j].hw.read_format = PERF_FORMAT_SCALE; fds[j].fd = perf_event_open(&fds[j].hw, pid, cpu, group_fd, flags); if (fds[j].fd == -1) { if (errno == EACCES) err(1, "you need to be root to run system-wide on this machine"); warn("cannot attach event %s to CPU%ds, aborting", fds[j].name, cpu); exit(1); } } } } void start_cpu(int c) { perf_event_desc_t *fds = NULL; int j, ret, n = 0; fds = all_fds[c]; if (fds[0].fd == -1) return; for(j=0; j < options.num_groups; j++) { /* group leader always first in each group */ ret = ioctl(fds[n].fd, PERF_EVENT_IOC_ENABLE, 0); if (ret) err(1, "cannot enable event %s\n", fds[j].name); n += options.nevents[j]; } } void stop_cpu(int c) { perf_event_desc_t *fds = NULL; int j, ret, n = 0; fds = all_fds[c]; if (fds[0].fd == -1) return; for(j=0; j < options.num_groups; j++) { /* group leader always first in each group */ ret = ioctl(fds[n].fd, PERF_EVENT_IOC_DISABLE, 0); if (ret) err(1, "cannot disable event %s\n", fds[j].name); n += options.nevents[j]; } } void read_cpu(int c) { perf_event_desc_t *fds; uint64_t val, delta; double ratio; int i, j, n, ret; fds = all_fds[c]; if (fds[0].fd == -1) { printf("CPU%d not monitored\n", c); return; } for(i=0, j = 0; i < options.num_groups; i++) { for(n = 0; n < options.nevents[i]; n++, j++) { ret = read(fds[j].fd, fds[j].values, sizeof(fds[j].values)); if (ret != sizeof(fds[j].values)) { if (ret == -1) err(1, "cannot read event %s : %d", fds[j].name, ret); else { warnx("CPU%d G%-2d could not read event %s, read=%d", c, i, fds[j].name, ret); continue; } } /* * scaling because we may be sharing the PMU and * thus may be multiplexed */ delta = perf_scale_delta(fds[j].values, fds[j].prev_values); val = perf_scale(fds[j].values); ratio = perf_scale_ratio(fds[j].values); printf("CPU%-3d G%-2d %'20"PRIu64" %'20"PRIu64" %s (scaling %.2f%%, ena=%'"PRIu64", run=%'"PRIu64") %s\n", c, i, val, delta, fds[j].name, (1.0-ratio)*100, fds[j].values[1], fds[j].values[2], options.cgroup_name ? options.cgroup_name : ""); fds[j].prev_values[0] = fds[j].values[0]; fds[j].prev_values[1] = fds[j].values[1]; fds[j].prev_values[2] = fds[j].values[2]; if (fds[j].values[2] > fds[j].values[1]) errx(1, "WARNING: time_running > time_enabled %"PRIu64"\n", fds[j].values[2] - fds[j].values[1]); } } } void close_cpu(int c) { perf_event_desc_t *fds = NULL; int i, j; int total = 0; fds = all_fds[c]; if (fds[0].fd == -1) return; for(i=0; i < options.num_groups; i++) { for(j=0; j < options.nevents[i]; j++) close(fds[j].fd); total += options.nevents[i]; } perf_free_fds(fds, total); } void measure(void) { int c, cmin, cmax, ncpus; int cfd = -1; cmin = 0; cmax = (int)sysconf(_SC_NPROCESSORS_ONLN); ncpus = cmax; if (options.cpu != -1) { cmin = options.cpu; cmax = cmin + 1; } all_fds = malloc(ncpus * sizeof(perf_event_desc_t *)); if (!all_fds) err(1, "cannot allocate memory for all_fds"); if (options.cgroup_name) { cfd = open_cgroup(options.cgroup_name); if (cfd == -1) exit(1); } for(c=cmin ; c < cmax; c++) setup_cpu(c, cfd); if (options.cgroup_name) close(cfd); printf("\n", options.delay); /* * FIX this for hotplug CPU */ if (options.interval) { struct timespec tv; int delay; for (delay = 1 ; delay <= options.delay; delay++) { for(c=cmin ; c < cmax; c++) start_cpu(c); if (0) { tv.tv_sec = 0; tv.tv_nsec = 100000000; nanosleep(&tv, NULL); } else sleep(1); for(c=cmin ; c < cmax; c++) stop_cpu(c); for(c = cmin; c < cmax; c++) { printf("# %'ds -----\n", delay); read_cpu(c); } } } else { for(c=cmin ; c < cmax; c++) start_cpu(c); sleep(options.delay); if (0) for(c=cmin ; c < cmax; c++) stop_cpu(c); for(c = cmin; c < cmax; c++) { printf("# -----\n"); read_cpu(c); } } for(c = cmin; c < cmax; c++) close_cpu(c); free(all_fds); } static void usage(void) { printf("usage: syst [-c cpu] [-x] [-h] [-p] [-d delay] [-P] [-G cgroup name] [-e event1,event2,...]\n"); } int main(int argc, char **argv) { int c, ret; setlocale(LC_ALL, ""); options.cpu = -1; while ((c=getopt(argc, argv,"hc:e:d:xPpG:")) != -1) { switch(c) { case 'x': options.excl = 1; break; case 'p': options.interval = 1; break; case 'e': if (options.num_groups < MAX_GROUPS) { options.events[options.num_groups++] = optarg; } else { errx(1, "you cannot specify more than %d groups.\n", MAX_GROUPS); } break; case 'c': options.cpu = atoi(optarg); break; case 'd': options.delay = atoi(optarg); break; case 'P': options.pin = 1; break; case 'h': usage(); exit(0); case 'G': options.cgroup_name = optarg; break; default: errx(1, "unknown error"); } } if (!options.delay) options.delay = 20; if (!options.events[0]) { options.events[0] = "cycles,instructions"; options.num_groups = 1; } ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "libpfm initialization failed: %s\n", pfm_strerror(ret)); measure(); /* free libpfm resources cleanly */ pfm_terminate(); return 0; } papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_atom.3000664 001750 001750 00000002524 13216244364 024223 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2009" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_atom - support for Intel Atom processors .SH SYNOPSIS .nf .B #include .sp .B PMU name: atom .B PMU desc: Intel Atom .sp .SH DESCRIPTION The library supports all Intel Atom-based processors that includes family 6 model 28. .SH MODIFIERS The following modifiers are supported on Intel Atom processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on both threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/perfctr-2.7.x/etc/costs/Celeron-466000664 001750 001750 00000001467 13216244367 023314 0ustar00jshenry1963jshenry1963000000 000000 [data from a 466 MHz Celeron (Mendocino)] PERFCTR INIT: vendor 0, family 6, model 6, stepping 5, clock 465241 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 168 cycles PERFCTR INIT: rdtsc cost is 38.6 cycles (2641 total) PERFCTR INIT: rdpmc cost is 29.5 cycles (2059 total) PERFCTR INIT: rdmsr (counter) cost is 82.4 cycles (5442 total) PERFCTR INIT: rdmsr (evntsel) cost is 70.7 cycles (4699 total) PERFCTR INIT: wrmsr (counter) cost is 88.2 cycles (5819 total) PERFCTR INIT: wrmsr (evntsel) cost is 79.9 cycles (5283 total) PERFCTR INIT: read cr4 cost is 1.9 cycles (291 total) PERFCTR INIT: write cr4 cost is 42.2 cycles (2870 total) PERFCTR INIT: write LVTPC cost is 34.8 cycles (2401 total) PERFCTR INIT: sync_core cost is 77.3 cycles (5117 total) perfctr: driver 2.7.5, cpu type Intel P6 at 465241 kHz papi-5.6.0/src/Matlab/PAPI_Matlab.vcproj000664 001750 001750 00000014167 13216244356 021775 0ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.8.1000775 001750 001750 00000037647 13216244367 024331 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.8.1.perfctr26/CREDITS.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/CREDITS 2008-06-22 21:46:32.201321000 +0200 @@ -2535,9 +2535,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.8.1.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/Documentation/ioctl-number.txt 2008-06-22 21:46:27.231321000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.8.1.perfctr26/MAINTAINERS.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/MAINTAINERS 2008-06-22 21:46:32.201321000 +0200 @@ -1673,6 +1673,12 @@ M: george@mvista.com L: linux-net@vger.kernel.org S: Supported +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PNP SUPPORT P: Adam Belay M: ambx1@neo.rr.com --- linux-2.6.8.1.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/i386/Kconfig 2008-06-22 21:46:27.231321000 +0200 @@ -865,6 +865,8 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.8.1.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/i386/kernel/entry.S 2008-06-22 21:46:27.231321000 +0200 @@ -406,6 +406,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.8.1.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 21:46:27.231321000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -427,6 +428,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.8.1.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/i386/kernel/process.c 2008-06-22 21:46:27.231321000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -304,6 +305,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -366,6 +368,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -573,6 +577,9 @@ struct task_struct fastcall * __switch_t */ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; } + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.8.1.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/ppc/Kconfig 2008-06-22 21:46:27.231321000 +0200 @@ -243,6 +243,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.8.1.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/ppc/kernel/head.S 2008-06-22 21:46:27.231321000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.8.1.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/ppc/kernel/process.c 2008-06-22 21:46:27.231321000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -370,6 +373,7 @@ void exit_thread(void) last_task_used_math = NULL; if (last_task_used_altivec == current) last_task_used_altivec = NULL; + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -460,6 +464,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.8.1.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/x86_64/Kconfig 2008-06-22 21:46:27.231321000 +0200 @@ -318,6 +318,8 @@ config X86_MCE bool default y +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.8.1.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 21:46:27.231321000 +0200 @@ -557,6 +557,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.8.1.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 21:46:27.231321000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -509,6 +510,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.8.1.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 21:46:27.231321000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -262,6 +263,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -365,6 +367,8 @@ int copy_thread(int nr, unsigned long cl asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) @@ -514,6 +518,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.8.1.perfctr26/drivers/Makefile.~1~ 2008-06-22 21:43:08.000000000 +0200 +++ linux-2.6.8.1.perfctr26/drivers/Makefile 2008-06-22 21:46:27.231321000 +0200 @@ -50,4 +50,5 @@ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ --- linux-2.6.8.1.perfctr26/fs/exec.c.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/fs/exec.c 2008-06-22 21:46:32.201321000 +0200 @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -836,6 +837,7 @@ int flush_old_exec(struct linux_binprm * } current->comm[i] = '\0'; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.8.1.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 21:46:27.231321000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.8.1.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 21:46:27.231321000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.8.1.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-i386/processor.h 2008-06-22 21:46:27.231321000 +0200 @@ -422,6 +422,8 @@ struct thread_struct { unsigned int saved_fs, saved_gs; /* IO permissions */ unsigned long *io_bitmap_ptr; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.8.1.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 21:14:49.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-i386/system.h 2008-06-22 21:46:27.231321000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.8.1.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-ppc/processor.h 2008-06-22 21:46:27.231321000 +0200 @@ -126,6 +126,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.8.1.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 21:46:27.231321000 +0200 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.8.1.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 21:14:49.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-x86_64/irq.h 2008-06-22 21:46:27.231321000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.8.1.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-x86_64/processor.h 2008-06-22 21:46:27.231321000 +0200 @@ -253,6 +253,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.8.1.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/include/asm-x86_64/system.h 2008-06-22 21:46:27.231321000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.8.1.perfctr26/kernel/exit.c.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/kernel/exit.c 2008-06-22 21:46:27.231321000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,7 @@ repeat: p->parent->cmaj_flt += p->maj_flt + p->cmaj_flt; p->parent->cnvcsw += p->nvcsw + p->cnvcsw; p->parent->cnivcsw += p->nivcsw + p->cnivcsw; + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.8.1.perfctr26/kernel/sched.c.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/kernel/sched.c 2008-06-22 21:46:27.241321000 +0200 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -3334,6 +3335,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.8.1.perfctr26/kernel/timer.c.~1~ 2008-06-22 21:43:09.000000000 +0200 +++ linux-2.6.8.1.perfctr26/kernel/timer.c 2008-06-22 21:46:27.241321000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -835,6 +836,7 @@ static void update_one_process(struct ta do_process_times(p, user, system); do_it_virt(p, user); do_it_prof(p); + perfctr_sample_thread(&p->thread); } /* papi-5.6.0/src/components/micpower/Rules.micpower000664 001750 001750 00000000422 13216244357 024200 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/micpower/linux-micpower.c COMPOBJS += linux-micpower.o linux-micpower.o: components/micpower/linux-micpower.c components/micpower/linux-micpower.h $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/micpower/linux-micpower.c -o linux-micpower.o papi-5.6.0/src/ctests/tenth.c000664 001750 001750 00000014772 13216244361 020130 0ustar00jshenry1963jshenry1963000000 000000 /* * File: tenth.c * Mods: Maynard Johnson * maynardj@us.ibm.com */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define ITERS 100 /* This file performs the following test: start, stop and timer functionality for PAPI_L1_TCM derived event - They are counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). - Get us. - Start counters - Do flops - Stop and read counters - Get us. */ #if defined(sun) && defined(sparc) #define CACHE_LEVEL "PAPI_L2_TCM" #define EVT1 PAPI_L2_TCM #define EVT2 PAPI_L2_TCA #define EVT3 PAPI_L2_TCH #define EVT1_STR "PAPI_L2_TCM" #define EVT2_STR "PAPI_L2_TCA" #define EVT3_STR "PAPI_L2_TCH" #define MASK1 MASK_L2_TCM #define MASK2 MASK_L2_TCA #define MASK3 MASK_L2_TCH #else #if defined(__powerpc__) #define CACHE_LEVEL "PAPI_L1_DCA" #define EVT1 PAPI_L1_DCA #define EVT2 PAPI_L1_DCW #define EVT3 PAPI_L1_DCR #define EVT1_STR "PAPI_L1_DCA" #define EVT2_STR "PAPI_L1_DCW" #define EVT3_STR "PAPI_L1_DCR" #define MASK1 MASK_L1_DCA #define MASK2 MASK_L1_DCW #define MASK3 MASK_L1_DCR #else #define CACHE_LEVEL "PAPI_L1_TCM" #define EVT1 PAPI_L1_TCM #define EVT2 PAPI_L1_ICM #define EVT3 PAPI_L1_DCM #define EVT1_STR "PAPI_L1_TCM" #define EVT2_STR "PAPI_L1_ICM" #define EVT3_STR "PAPI_L1_DCM" #define MASK1 MASK_L1_TCM #define MASK2 MASK_L1_ICM #define MASK3 MASK_L1_DCM #endif #endif int main( int argc, char **argv ) { int retval, num_tests = 30, tmp; int EventSet1 = PAPI_NULL; int EventSet2 = PAPI_NULL; int EventSet3 = PAPI_NULL; int mask1 = MASK1; int mask2 = MASK2; int mask3 = MASK3; int num_events1; int num_events2; int num_events3; long long **values; int i, j; long long min[3]; long long max[3]; long long sum[3]; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Make sure that required resources are available */ /* Skip (don't fail!) if they are not */ retval = PAPI_query_event( EVT1 ); if ( retval != PAPI_OK ) { test_skip( __FILE__, __LINE__, EVT1_STR, retval ); } retval = PAPI_query_event( EVT2 ); if ( retval != PAPI_OK ) { test_skip( __FILE__, __LINE__, EVT2_STR, retval ); } retval = PAPI_query_event( EVT3 ); if ( retval != PAPI_OK ) { test_skip( __FILE__, __LINE__, EVT3_STR, retval ); } EventSet1 = add_test_events( &num_events1, &mask1, 1 ); EventSet2 = add_test_events( &num_events2, &mask2, 1 ); EventSet3 = add_test_events( &num_events3, &mask3, 1 ); values = allocate_test_space( num_tests, 1 ); /* Warm me up */ do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); for ( i = 0; i < 10; i++ ) { retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet1, values[( i * 3 ) + 0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_start( EventSet2 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet2, values[( i * 3 ) + 1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_start( EventSet3 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_l1misses( ITERS ); do_misses( 1, 1024 * 1024 * 4 ); retval = PAPI_stop( EventSet3, values[( i * 3 ) + 2] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } remove_test_events( &EventSet1, mask1 ); remove_test_events( &EventSet2, mask2 ); remove_test_events( &EventSet3, mask3 ); for ( j = 0; j < 3; j++ ) { min[j] = 65535; max[j] = sum[j] = 0; } for ( i = 0; i < 10; i++ ) { for ( j = 0; j < 3; j++ ) { if ( min[j] > values[( i * 3 ) + j][0] ) min[j] = values[( i * 3 ) + j][0]; if ( max[j] < values[( i * 3 ) + j][0] ) max[j] = values[( i * 3 ) + j][0]; sum[j] += values[( i * 3 ) + j][0]; } } if ( !quiet ) { printf( "Test case 10: start, stop for derived event %s.\n", CACHE_LEVEL ); printf( "--------------------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", ITERS ); printf( "Repeated 10 times\n" ); printf ( "-------------------------------------------------------------------------\n" ); /* for (i=0;i<10;i++) { printf("Test type : %12s%13s%13s\n", "1", "2", "3"); printf(TAB3, EVT1_STR, values[(i*3)+0][0], (long long)0, (long long)0); printf(TAB3, EVT2_STR, (long long)0, values[(i*3)+1][0], (long long)0); printf(TAB3, EVT3_STR, (long long)0, (long long)0, values[(i*3)+2][0]); printf ("-------------------------------------------------------------------------\n"); } */ printf( "Test type : %12s%13s%13s\n", "min", "max", "sum" ); printf( TAB3, EVT1_STR, min[0], max[0], sum[0] ); printf( TAB3, EVT2_STR, min[1], max[1], sum[1] ); printf( TAB3, EVT3_STR, min[2], max[2], sum[2] ); printf ( "-------------------------------------------------------------------------\n" ); printf( "Verification:\n" ); #if defined(sun) && defined(sparc) printf( TAB1, "Sum 1 approximately equals sum 2 - sum 3 or", ( sum[1] - sum[2] ) ); #else printf( TAB1, "Sum 1 approximately equals sum 2 + sum 3 or", ( sum[1] + sum[2] ) ); #endif } { long long tmin, tmax; #if defined(sun) && defined(sparc) tmax = ( long long ) ( sum[1] - sum[2] ); #else tmax = ( long long ) ( sum[1] + sum[2] ); #endif if (!quiet) { printf( "percent error: %f\n", (( float ) abs( ( int ) ( tmax - sum[0] ) ) / (float) sum[0] ) * 100.0 ); } tmin = ( long long ) ( ( double ) tmax * 0.8 ); tmax = ( long long ) ( ( double ) tmax * 1.2 ); if ( sum[0] > tmax || sum[0] < tmin ) { test_fail( __FILE__, __LINE__, CACHE_LEVEL, 1 ); } } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_r3qpi_events.h000664 001750 001750 00000064440 13216244364 026434 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_r3qpi */ static intel_x86_umask_t bdx_unc_r3_c_hi_ad_credits_empty[]={ { .uname = "CBO10", .ucode = 0x400, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO11", .ucode = 0x800, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO12", .ucode = 0x1000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO13", .ucode = 0x2000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO14_16", .ucode = 0x4000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO8", .ucode = 0x100, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO9", .ucode = 0x200, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO_15_17", .ucode = 0x8000, .udesc = "CBox AD Credits Empty", }, }; static intel_x86_umask_t bdx_unc_r3_c_lo_ad_credits_empty[]={ { .uname = "CBO0", .ucode = 0x100, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO1", .ucode = 0x200, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO2", .ucode = 0x400, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO3", .ucode = 0x800, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO4", .ucode = 0x1000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO5", .ucode = 0x2000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO6", .ucode = 0x4000, .udesc = "CBox AD Credits Empty", }, { .uname = "CBO7", .ucode = 0x8000, .udesc = "CBox AD Credits Empty", }, }; static intel_x86_umask_t bdx_unc_r3_ha_r2_bl_credits_empty[]={ { .uname = "HA0", .ucode = 0x100, .udesc = "HA/R2 AD Credits Empty", }, { .uname = "HA1", .ucode = 0x200, .udesc = "HA/R2 AD Credits Empty", }, { .uname = "R2_NCB", .ucode = 0x400, .udesc = "HA/R2 AD Credits Empty", }, { .uname = "R2_NCS", .ucode = 0x800, .udesc = "HA/R2 AD Credits Empty", }, }; static intel_x86_umask_t bdx_unc_r3_qpi0_ad_credits_empty[]={ { .uname = "VN0_HOM", .ucode = 0x200, .udesc = "VN0 HOM messages", }, { .uname = "VN0_NDR", .ucode = 0x800, .udesc = "VN0 NDR messages", }, { .uname = "VN0_SNP", .ucode = 0x400, .udesc = "VN0 SNP messages", }, { .uname = "VN1_HOM", .ucode = 0x1000, .udesc = "VN1 HOM messages", }, { .uname = "VN1_NDR", .ucode = 0x4000, .udesc = "VN1 NDR messages", }, { .uname = "VN1_SNP", .ucode = 0x2000, .udesc = "VN1 SNP messages", }, { .uname = "VNA", .ucode = 0x100, .udesc = "VNA messages", }, }; static intel_x86_umask_t bdx_unc_r3_qpi0_bl_credits_empty[]={ { .uname = "VN1_HOM", .ucode = 0x1000, .udesc = "QPIx BL Credits Empty", }, { .uname = "VN1_NDR", .ucode = 0x4000, .udesc = "QPIx BL Credits Empty", }, { .uname = "VN1_SNP", .ucode = 0x2000, .udesc = "QPIx BL Credits Empty", }, { .uname = "VNA", .ucode = 0x100, .udesc = "QPIx BL Credits Empty", }, }; static intel_x86_umask_t bdx_unc_r3_ring_ad_used[]={ { .uname = "CCW", .ucode = 0xc00, .udesc = "Counterclockwise", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CCW_EVEN", .ucode = 0x400, .udesc = "Counterclockwise and Even", }, { .uname = "CCW_ODD", .ucode = 0x800, .udesc = "Counterclockwise and Odd", }, { .uname = "CW", .ucode = 0x300, .udesc = "Clockwise", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CW_EVEN", .ucode = 0x100, .udesc = "Clockwise and Even", }, { .uname = "CW_ODD", .ucode = 0x200, .udesc = "Clockwise and Odd", }, }; static intel_x86_umask_t bdx_unc_r3_ring_iv_used[]={ { .uname = "ANY", .ucode = 0xf00, .udesc = "Any", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "CW", .ucode = 0x300, .udesc = "Clockwise", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_r3_ring_sink_starved[]={ { .uname = "AK", .ucode = 0x200, .udesc = "AK", .uflags = INTEL_X86_DFL, }, }; static intel_x86_umask_t bdx_unc_r3_rxr_cycles_ne[]={ { .uname = "HOM", .ucode = 0x100, .udesc = "Ingress Cycles Not Empty -- HOM", }, { .uname = "NDR", .ucode = 0x400, .udesc = "Ingress Cycles Not Empty -- NDR", }, { .uname = "SNP", .ucode = 0x200, .udesc = "Ingress Cycles Not Empty -- SNP", }, }; static intel_x86_umask_t bdx_unc_r3_rxr_cycles_ne_vn1[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VN1 Ingress Cycles Not Empty -- DRS", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VN1 Ingress Cycles Not Empty -- HOM", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VN1 Ingress Cycles Not Empty -- NCB", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VN1 Ingress Cycles Not Empty -- NCS", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VN1 Ingress Cycles Not Empty -- NDR", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VN1 Ingress Cycles Not Empty -- SNP", }, }; static intel_x86_umask_t bdx_unc_r3_rxr_inserts[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "Ingress Allocations -- DRS", }, { .uname = "HOM", .ucode = 0x100, .udesc = "Ingress Allocations -- HOM", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "Ingress Allocations -- NCB", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "Ingress Allocations -- NCS", }, { .uname = "NDR", .ucode = 0x400, .udesc = "Ingress Allocations -- NDR", }, { .uname = "SNP", .ucode = 0x200, .udesc = "Ingress Allocations -- SNP", }, }; static intel_x86_umask_t bdx_unc_r3_sbo0_credits_acquired[]={ { .uname = "AD", .ucode = 0x100, .udesc = "SBo0 Credits Acquired -- For AD Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "SBo0 Credits Acquired -- For BL Ring", }, }; static intel_x86_umask_t bdx_unc_r3_sbo1_credits_acquired[]={ { .uname = "AD", .ucode = 0x100, .udesc = "SBo1 Credits Acquired -- For AD Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "SBo1 Credits Acquired -- For BL Ring", }, }; static intel_x86_umask_t bdx_unc_r3_stall_no_sbo_credit[]={ { .uname = "SBO0_AD", .ucode = 0x100, .udesc = "Stall on No Sbo Credits -- For SBo0, AD Ring", }, { .uname = "SBO0_BL", .ucode = 0x400, .udesc = "Stall on No Sbo Credits -- For SBo0, BL Ring", }, { .uname = "SBO1_AD", .ucode = 0x200, .udesc = "Stall on No Sbo Credits -- For SBo1, AD Ring", }, { .uname = "SBO1_BL", .ucode = 0x800, .udesc = "Stall on No Sbo Credits -- For SBo1, BL Ring", }, }; static intel_x86_umask_t bdx_unc_r3_txr_nack[]={ { .uname = "DN_AD", .ucode = 0x100, .udesc = "Egress CCW NACK -- AD CCW", }, { .uname = "DN_AK", .ucode = 0x400, .udesc = "Egress CCW NACK -- AK CCW", }, { .uname = "DN_BL", .ucode = 0x200, .udesc = "Egress CCW NACK -- BL CCW", }, { .uname = "UP_AD", .ucode = 0x800, .udesc = "Egress CCW NACK -- AK CCW", }, { .uname = "UP_AK", .ucode = 0x2000, .udesc = "Egress CCW NACK -- BL CW", }, { .uname = "UP_BL", .ucode = 0x1000, .udesc = "Egress CCW NACK -- BL CCW", }, }; static intel_x86_umask_t bdx_unc_r3_vn0_credits_reject[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VN0 Credit Acquisition Failed on DRS -- DRS Message Class", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VN0 Credit Acquisition Failed on DRS -- HOM Message Class", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VN0 Credit Acquisition Failed on DRS -- NCB Message Class", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VN0 Credit Acquisition Failed on DRS -- NCS Message Class", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VN0 Credit Acquisition Failed on DRS -- NDR Message Class", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VN0 Credit Acquisition Failed on DRS -- SNP Message Class", }, }; static intel_x86_umask_t bdx_unc_r3_vn0_credits_used[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VN0 Credit Used -- DRS Message Class", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VN0 Credit Used -- HOM Message Class", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VN0 Credit Used -- NCB Message Class", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VN0 Credit Used -- NCS Message Class", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VN0 Credit Used -- NDR Message Class", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VN0 Credit Used -- SNP Message Class", }, }; static intel_x86_umask_t bdx_unc_r3_vn1_credits_reject[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VN1 Credit Acquisition Failed on DRS -- DRS Message Class", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VN1 Credit Acquisition Failed on DRS -- HOM Message Class", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VN1 Credit Acquisition Failed on DRS -- NCB Message Class", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VN1 Credit Acquisition Failed on DRS -- NCS Message Class", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VN1 Credit Acquisition Failed on DRS -- NDR Message Class", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VN1 Credit Acquisition Failed on DRS -- SNP Message Class", }, }; static intel_x86_umask_t bdx_unc_r3_vn1_credits_used[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VN1 Credit Used -- DRS Message Class", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VN1 Credit Used -- HOM Message Class", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VN1 Credit Used -- NCB Message Class", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VN1 Credit Used -- NCS Message Class", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VN1 Credit Used -- NDR Message Class", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VN1 Credit Used -- SNP Message Class", }, }; static intel_x86_umask_t bdx_unc_r3_vna_credits_acquired[]={ { .uname = "AD", .ucode = 0x100, .udesc = "VNA credit Acquisitions -- HOM Message Class", }, { .uname = "BL", .ucode = 0x400, .udesc = "VNA credit Acquisitions -- HOM Message Class", }, }; static intel_x86_umask_t bdx_unc_r3_vna_credits_reject[]={ { .uname = "DRS", .ucode = 0x800, .udesc = "VNA Credit Reject -- DRS Message Class", }, { .uname = "HOM", .ucode = 0x100, .udesc = "VNA Credit Reject -- HOM Message Class", }, { .uname = "NCB", .ucode = 0x1000, .udesc = "VNA Credit Reject -- NCB Message Class", }, { .uname = "NCS", .ucode = 0x2000, .udesc = "VNA Credit Reject -- NCS Message Class", }, { .uname = "NDR", .ucode = 0x400, .udesc = "VNA Credit Reject -- NDR Message Class", }, { .uname = "SNP", .ucode = 0x200, .udesc = "VNA Credit Reject -- SNP Message Class", }, }; static intel_x86_entry_t intel_bdx_unc_r3_pe[]={ { .name = "UNC_R3_CLOCKTICKS", .code = 0x1, .desc = "Counts the number of uclks in the QPI uclk domain. This could be slightly different than the count in the Ubox because of enable/freeze delays. However, because the QPI Agent is close to the Ubox, they generally should not diverge by more than a handful of cycles.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, }, { .name = "UNC_R3_C_HI_AD_CREDITS_EMPTY", .code = 0x1f, .desc = "No credits available to send to Cbox on the AD Ring (covers higher CBoxes)", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_c_hi_ad_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_c_hi_ad_credits_empty), }, { .name = "UNC_R3_C_LO_AD_CREDITS_EMPTY", .code = 0x22, .desc = "No credits available to send to Cbox on the AD Ring (covers lower CBoxes)", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_c_lo_ad_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_c_lo_ad_credits_empty), }, { .name = "UNC_R3_HA_R2_BL_CREDITS_EMPTY", .code = 0x2d, .desc = "No credits available to send to either HA or R2 on the BL Ring", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_ha_r2_bl_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ha_r2_bl_credits_empty), }, { .name = "UNC_R3_QPI0_AD_CREDITS_EMPTY", .code = 0x20, .desc = "No credits available to send to QPI0 on the AD Ring", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_qpi0_ad_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_qpi0_ad_credits_empty), }, { .name = "UNC_R3_QPI0_BL_CREDITS_EMPTY", .code = 0x21, .desc = "No credits available to send to QPI0 on the BL Ring", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_qpi0_bl_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_qpi0_bl_credits_empty), }, { .name = "UNC_R3_QPI1_AD_CREDITS_EMPTY", .code = 0x2e, .desc = "No credits available to send to QPI1 on the AD Ring", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_qpi0_ad_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_qpi0_ad_credits_empty), }, { .name = "UNC_R3_QPI1_BL_CREDITS_EMPTY", .code = 0x2f, .desc = "No credits available to send to QPI1 on the BL Ring", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_qpi0_ad_credits_empty, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_qpi0_ad_credits_empty), }, { .name = "UNC_R3_RING_AD_USED", .code = 0x7, .desc = "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, .ngrp = 1, .umasks = bdx_unc_r3_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ring_ad_used), }, { .name = "UNC_R3_RING_AK_USED", .code = 0x8, .desc = "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, .ngrp = 1, .umasks = bdx_unc_r3_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ring_ad_used), }, { .name = "UNC_R3_RING_BL_USED", .code = 0x9, .desc = "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, .ngrp = 1, .umasks = bdx_unc_r3_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ring_ad_used), }, { .name = "UNC_R3_RING_IV_USED", .code = 0xa, .desc = "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sent, but does not include when packets are being sunk into the ring stop.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, .ngrp = 1, .umasks = bdx_unc_r3_ring_iv_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ring_iv_used), }, { .name = "UNC_R3_RING_SINK_STARVED", .code = 0xe, .desc = "Number of cycles the ringstop is in starvation (per ring)", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x7, .ngrp = 1, .umasks = bdx_unc_r3_ring_sink_starved, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_ring_sink_starved), }, { .name = "UNC_R3_RXR_CYCLES_NE", .code = 0x10, .desc = "Counts the number of cycles when the QPI Ingress is not empty. This tracks one of the three rings that are used by the QPI agent. This can be used in conjunction with the QPI Ingress Occupancy Accumulator event in order to calculate average queue occupancy. Multiple ingress buffers can be tracked at a given time using multiple counters.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_rxr_cycles_ne, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_rxr_cycles_ne), }, { .name = "UNC_R3_RXR_CYCLES_NE_VN1", .code = 0x14, .desc = "Counts the number of cycles when the QPI VN1 Ingress is not empty. This tracks one of the three rings that are used by the QPI agent. This can be used in conjunction with the QPI VN1 Ingress Occupancy Accumulator event in order to calculate average queue occupancy. Multiple ingress buffers can be tracked at a given time using multiple counters.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_rxr_cycles_ne_vn1, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_rxr_cycles_ne_vn1), }, { .name = "UNC_R3_RXR_INSERTS", .code = 0x11, .desc = "Counts the number of allocations into the QPI Ingress. This tracks one of the three rings that are used by the QPI agent. This can be used in conjunction with the QPI Ingress Occupancy Accumulator event in order to calculate average queue latency. Multiple ingress buffers can be tracked at a given time using multiple counters.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_rxr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_rxr_inserts), }, { .name = "UNC_R3_RXR_INSERTS_VN1", .code = 0x15, .desc = "Counts the number of allocations into the QPI VN1 Ingress. This tracks one of the three rings that are used by the QPI agent. This can be used in conjunction with the QPI VN1 Ingress Occupancy Accumulator event in order to calculate average queue latency. Multiple ingress buffers can be tracked at a given time using multiple counters.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_rxr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_rxr_inserts), }, { .name = "UNC_R3_RXR_OCCUPANCY_VN1", .code = 0x13, .desc = "Accumulates the occupancy of a given QPI VN1 Ingress queue in each cycles. This tracks one of the three ring Ingress buffers. This can be used with the QPI VN1 Ingress Not Empty event to calculate average occupancy or the QPI VN1 Ingress Allocations event in order to calculate average queuing latency.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x1, .ngrp = 1, .umasks = bdx_unc_r3_rxr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_rxr_inserts), }, { .name = "UNC_R3_SBO0_CREDITS_ACQUIRED", .code = 0x28, .desc = "Number of Sbo 0 credits acquired in a given cycle, per ring.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_sbo0_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_sbo0_credits_acquired), }, { .name = "UNC_R3_SBO1_CREDITS_ACQUIRED", .code = 0x29, .desc = "Number of Sbo 1 credits acquired in a given cycle, per ring.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_sbo1_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_sbo1_credits_acquired), }, { .name = "UNC_R3_STALL_NO_SBO_CREDIT", .code = 0x2c, .desc = "Number of cycles Egress is stalled waiting for an Sbo credit to become available. Per Sbo, per Ring.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_stall_no_sbo_credit, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_stall_no_sbo_credit), }, { .name = "UNC_R3_TXR_NACK", .code = 0x26, .desc = "", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_txr_nack, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_txr_nack), }, { .name = "UNC_R3_VN0_CREDITS_REJECT", .code = 0x37, .desc = "Number of times a request failed to acquire a DRS VN0 credit. In order for a request to be transferred across QPI, it must be guaranteed to have a flit buffer on the remote socket to sink into. There are two credit pools, VNA and VN0. VNA is a shared pool used to achieve high performance. The VN0 pool has reserved entries for each message class and is used to prevent deadlock. Requests first attempt to acquire a VNA credit, and then fall back to VN0 if they fail. This therefore counts the number of times when a request failed to acquire either a VNA or VN0 credit and is delayed. This should generally be a rare situation.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vn0_credits_reject, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vn0_credits_reject), }, { .name = "UNC_R3_VN0_CREDITS_USED", .code = 0x36, .desc = "Number of times a VN0 credit was used on the DRS message channel. In order for a request to be transferred across QPI, it must be guaranteed to have a flit buffer on the remote socket to sink into. There are two credit pools, VNA and VN0. VNA is a shared pool used to achieve high performance. The VN0 pool has reserved entries for each message class and is used to prevent deadlock. Requests first attempt to acquire a VNA credit, and then fall back to VN0 if they fail. This counts the number of times a VN0 credit was used. Note that a single VN0 credit holds access to potentially multiple flit buffers. For example, a transfer that uses VNA could use 9 flit buffers and in that case uses 9 credits. A transfer on VN0 will only count a single credit even though it may use multiple buffers.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vn0_credits_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vn0_credits_used), }, { .name = "UNC_R3_VN1_CREDITS_REJECT", .code = 0x39, .desc = "Number of times a request failed to acquire a VN1 credit. In order for a request to be transferred across QPI, it must be guaranteed to have a flit buffer on the remote socket to sink into. There are two credit pools, VNA and VN1. VNA is a shared pool used to achieve high performance. The VN1 pool has reserved entries for each message class and is used to prevent deadlock. Requests first attempt to acquire a VNA credit, and then fall back to VN1 if they fail. This therefore counts the number of times when a request failed to acquire either a VNA or VN1 credit and is delayed. This should generally be a rare situation.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vn1_credits_reject, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vn1_credits_reject), }, { .name = "UNC_R3_VN1_CREDITS_USED", .code = 0x38, .desc = "Number of times a VN1 credit was used on the DRS message channel. In order for a request to be transferred across QPI, it must be guaranteed to have a flit buffer on the remote socket to sink into. There are two credit pools, VNA and VN1. VNA is a shared pool used to achieve high performance. The VN1 pool has reserved entries for each message class and is used to prevent deadlock. Requests first attempt to acquire a VNA credit, and then fall back to VN1 if they fail. This counts the number of times a VN1 credit was used. Note that a single VN1 credit holds access to potentially multiple flit buffers. For example, a transfer that uses VNA could use 9 flit buffers and in that case uses 9 credits. A transfer on VN1 will only count a single credit even though it may use multiple buffers.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vn1_credits_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vn1_credits_used), }, { .name = "UNC_R3_VNA_CREDITS_ACQUIRED", .code = 0x33, .desc = "Number of QPI VNA Credit acquisitions. This event can be used in conjunction with the VNA In-Use Accumulator to calculate the average lifetime of a credit holder. VNA credits are used by all message classes in order to communicate across QPI. If a packet is unable to acquire credits, it will then attempt to use credts from the VN0 pool. Note that a single packet may require multiple flit buffers (i.e. when data is being transfered). Therefore, this event will increment by the number of credits acquired in each cycle. Filtering based on message class is not provided. One can count the number of packets transfered in a given message class using an qfclk event.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vna_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vna_credits_acquired), }, { .name = "UNC_R3_VNA_CREDITS_REJECT", .code = 0x34, .desc = "Number of attempted VNA credit acquisitions that were rejected because the VNA credit pool was full (or almost full). It is possible to filter this event by message class. Some packets use more than one flit buffer, and therefore must acquire multiple credits. Therefore, one could get a reject even if the VNA credits were not fully used up. The VNA pool is generally used to provide the bulk of the QPI bandwidth (as opposed to the VN0 pool which is used to guarantee forward progress). VNA credits can run out if the flit buffer on the receiving side starts to queue up substantially. This can happen if the rest of the uncore is unable to drain the requests fast enough.", .modmsk = BDX_UNC_R3QPI_ATTRS, .cntmsk = 0x3, .ngrp = 1, .umasks = bdx_unc_r3_vna_credits_reject, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_r3_vna_credits_reject), }, }; papi-5.6.0/src/libpfm-3.y/examples_v3.x/multiplex.c000664 001750 001750 00000066760 13216244362 024105 0ustar00jshenry1963jshenry1963000000 000000 /* * multiplex2.c - example of kernel-level time-based or overflow-based event multiplexing * * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define MIN_FULL_PERIODS 2 #define MAX_EVT_NAME_LEN 128 #define MULTIPLEX_VERSION "0.2" #define SMPL_FREQ_IN_HZ 300 #define NUM_PMCS PMU_MAX_PMCS #define NUM_PMDS PMU_MAX_PMDS #define MAX_NUM_COUNTERS NUM_PMDS #define MAX_PMU_NAME_LEN 32 typedef struct { struct { int opt_plm; /* which privilege level to monitor (more than one possible) */ int opt_debug; /* print debug information */ int opt_verbose; /* verbose output */ int opt_us_format; /* print large numbers with comma for thousands */ int opt_ovfl_switch; /* overflow-based switching */ int opt_is_system; /* use system-wide */ int opt_intr_only; /* interrupts only*/ int opt_no_cmd_out; /* redirect cmd output to /dev/null */ int opt_no_header; /* no header */ } program_opt_flags; unsigned long max_counters; /* maximum number of counter for the platform */ unsigned long session_timeout; uint64_t smpl_period; uint32_t smpl_freq; unsigned long cpu_mhz; pid_t attach_pid; int pin_cmd_cpu; int pin_cpu; struct timespec switch_timeout; } program_options_t; #define opt_plm program_opt_flags.opt_plm #define opt_debug program_opt_flags.opt_debug #define opt_verbose program_opt_flags.opt_verbose #define opt_us_format program_opt_flags.opt_us_format #define opt_ovfl_switch program_opt_flags.opt_ovfl_switch #define opt_is_system program_opt_flags.opt_is_system #define opt_intr_only program_opt_flags.opt_intr_only #define opt_no_cmd_out program_opt_flags.opt_no_cmd_out #define opt_no_header program_opt_flags.opt_no_header typedef struct _event_set_t { struct _event_set_t *next; unsigned short id; unsigned int n_events; unsigned int pmcs_base; unsigned int pmds_base; int npmcs; int npmds; unsigned long set_runs; char *event_str; } event_set_t; static program_options_t options; static pfarg_pmr_t *all_pmcs; static pfarg_pmd_attr_t *all_pmds; static uint64_t *all_values; static event_set_t *current_set, *all_sets; static unsigned int num_pmds, num_pmcs, num_sets, total_events; static unsigned long full_periods; static volatile int time_to_quit; static jmp_buf jbuf; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void vbprintf(char *fmt, ...) { va_list ap; if (options.opt_verbose == 0) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * unreliable for CPU with variable clock speed */ static unsigned long get_cpu_speed(void) { FILE *fp1; unsigned long f1 = 0, f2 = 0; char buffer[128], *p, *value; memset(buffer, 0, sizeof(buffer)); fp1 = fopen("/proc/cpuinfo", "r"); if (fp1 == NULL) return 0; for (;;) { buffer[0] = '\0'; p = fgets(buffer, 127, fp1); if (p == NULL) break; /* skip blank lines */ if (*p == '\n') continue; p = strchr(buffer, ':'); if (p == NULL) break; /* * p+2: +1 = space, +2= firt character * strlen()-1 gets rid of \n */ *p = '\0'; value = p+2; value[strlen(value)-1] = '\0'; if (!strncmp("cpu MHz", buffer, 7)) { float fl; sscanf(value, "%f", &fl); f1 = lroundf(fl); break; } if (!strncmp("BogoMIPS", buffer, 8)) { float fl; sscanf(value, "%f", &fl); f2 = lroundf(fl); } } fclose(fp1); return f1 == 0 ? f2 : f1; } /* * pin task to CPU */ #ifndef __NR_sched_setaffinity #error "you need to define __NR_sched_setaffinity" #endif #define MAX_CPUS 2048 #define NR_CPU_BITS (MAX_CPUS>>3) int pin_cpu(pid_t pid, unsigned int cpu) { uint64_t my_mask[NR_CPU_BITS]; if (cpu >= MAX_CPUS) fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); my_mask[cpu>>6] = 1ULL << (cpu&63); return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); } int child(char **arg) { ptrace(PTRACE_TRACEME, 0, NULL, NULL); if (options.pin_cmd_cpu != -1) { pin_cpu(getpid(), options.pin_cmd_cpu); vbprintf("command running on CPU core %d\n", options.pin_cmd_cpu); } if (options.opt_no_cmd_out) { close(1); close(2); } execvp(arg[0], arg); /* not reached */ exit(1); } static void dec2sep(char *str2, char *str, char sep) { int i, l, b, j, c=0; l = strlen(str2); if (l <= 3) { strcpy(str, str2); return; } b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */ for(i=l, j=0; i >= 0; i--, j++) { if (j) c++; str[b-j] = str2[i]; if (c == 3 && i>0) { str[b-++j] = sep; c = 0; } } } static void print_results(void) { unsigned int i, j, cnt; int ovfl_adj; uint64_t value, set_runs; event_set_t *e; char *p; char tmp1[32], tmp2[32], *str; char mtotal_str[32], *mtotal; char stotal_str[32], *stotal; if (full_periods < num_sets) fatal_error("not all sets have been activated, need to run longer %lu\n", full_periods); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ if (options.opt_no_header == 0) { printf("# %u Hz period = %u usecs\n# %"PRIu64" cycles @ %lu MHz\n", options.smpl_freq, 1000000 / options.smpl_freq, options.smpl_period, options.cpu_mhz); if (options.opt_ovfl_switch == 0) printf("# using time-based multiplexing\n" "# %uus effective switch timeout\n", 1000000 / options.smpl_freq); else printf("# using overflow-based multiplexing\n"); if (options.opt_is_system) printf("# system-wide mode on CPU core %d\n",options.pin_cpu); printf("# %d sets\n", num_sets); printf("# %.2f average run per set\n", (double)full_periods/num_sets); printf("# set measured total #runs scaled total event name\n"); printf("# ------------------------------------------------------------------\n"); } ovfl_adj= options.opt_ovfl_switch ? 1 : 0; for (i=0, e = all_sets, cnt = 0; i < num_sets; i++, e = e->next) { set_runs = e->set_runs; str = e->event_str; for(j=0; j < e->npmds-ovfl_adj; j++, cnt++) { value = all_values[j+e->pmds_base]; sprintf(tmp1, "%"PRIu64, value); if (options.opt_us_format) { dec2sep(tmp1, mtotal_str, ','); } else { strcpy(mtotal_str, tmp1); } mtotal = mtotal_str; /* * scaling */ sprintf(tmp2, "%"PRIu64, ((value*full_periods)/set_runs)); if (options.opt_us_format) { dec2sep(tmp2, stotal_str, ','); } else { strcpy(stotal_str, tmp2); } stotal = stotal_str; printf(" %03d %20s %8"PRIu64" %20s %s\n", i, mtotal, set_runs, stotal, str); p = strchr(str, '\0'); if (p) str = p+1; } /* * skip first event */ if (options.opt_ovfl_switch) cnt++; } } static void update_set(int ctxid) { int count; int base; int ret; int i; base = current_set->pmds_base; /* * we do not read the last counter (cpu_cycles) to avoid overwriting * the reg_value field which will be used for next round * * We need to retry the read in case we get EBUSY because it means that * the child task context is not yet available from inspection by PFM_READ_PMDS2. * */ count = current_set->npmds; if (options.opt_ovfl_switch) count--; ret = pfm_read(ctxid, 0, PFM_RW_PMD_ATTR, all_pmds + base, count * sizeof(*all_pmds)); if (ret == -1) fatal_error("error reading set: %s\n", strerror(errno)); /* update counts for this set */ for (i=0; i < count; i++) { all_values[base+i] += all_pmds[base+i].reg_value; /* reset for next round */ all_pmds[base+i].reg_value = 0UL; } } static void switch_sets(int ctxid) { update_set(ctxid); current_set = current_set->next; if (current_set == NULL) current_set = all_sets; current_set->set_runs++; vbprintf("starting set %d run %lu\n", current_set->id, current_set->set_runs); /* * we must reprogram all avaibale PMCs (or PMDS) to ensure that no * state is left over from the previous set and which could conflict * on restart */ if (pfm_write(ctxid, 0, PFM_RW_PMC, all_pmcs+current_set->pmcs_base, current_set->npmcs * sizeof(*all_pmcs)) == -1) fatal_error("error writing pmcs: %s\n", strerror(errno)); if (pfm_write(ctxid, 0, PFM_RW_PMD_ATTR, all_pmds+current_set->pmds_base, current_set->npmds * sizeof(*all_pmds)) == -1) fatal_error("error writing pmds: %s\n", strerror(errno)); full_periods++; if (options.opt_ovfl_switch && pfm_set_state(ctxid, 0, PFM_ST_RESTART) == -1) { if (errno != EBUSY) fatal_error("error pfm_set_state(restart): %s\n", strerror(errno)); /* * in case of EBUSY, it probably means the task has exited now */ } } static void sigintr_handler(int sig) { if (sig == SIGALRM) time_to_quit = 1; else time_to_quit = 2; longjmp(jbuf, 1); } static void sigchld_handler(int sig) { time_to_quit = 1; } static int measure_one_task(char **argv) { int ctxid; struct pollfd pollfd; pfarg_sinfo_t sif; pid_t pid; int status, ret; /* * create the context */ ctxid = pfm_create(0, &sif); if (ctxid == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., * fd not visible to child. */ if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); /* * write registers for first set */ if (pfm_write(ctxid, 0, PFM_RW_PMC, all_pmcs+current_set->pmcs_base, current_set->npmcs * sizeof(*all_pmcs)) == -1) fatal_error("error pfm_write: %s\n", strerror(errno)); if (pfm_write(ctxid, 0, PFM_RW_PMD_ATTR, all_pmds+current_set->pmds_base, current_set->npmds * sizeof(*all_pmds)) == -1) fatal_error("error pfm_write: %s\n", strerror(errno)); /* * now launch the child code */ if (options.attach_pid == 0) { if ((pid= fork()) == -1) fatal_error("Cannot fork process\n"); if (pid == 0) exit(child(argv)); } else { pid = options.attach_pid; ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret) { fatal_error("cannot attach to task %d: %s\n",options.attach_pid, strerror(errno)); } } ret = waitpid(pid, &status, WUNTRACED); if (ret < 0 || WIFEXITED(status)) fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); vbprintf("child created and stopped\n"); /* * now attach the context */ if (pfm_attach(ctxid, 0, pid) == -1) fatal_error("pfm_attach error errno %d\n",errno); current_set->set_runs = 1; /* * start monitoring */ if (pfm_set_state(ctxid, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); ptrace(PTRACE_DETACH, pid, NULL, 0); if (setjmp(jbuf) == 1) { if (time_to_quit == 1) { printf("timeout expired\n"); } if (time_to_quit == 2) printf("session interrupted\n"); goto finish_line; } if (options.session_timeout) { printf("\n", options.session_timeout); alarm(options.session_timeout); } pollfd.fd = ctxid; pollfd.events = POLLIN; pollfd.revents = 0; while(time_to_quit == 0) { /* * mainloop. poll timeout is in msecs */ ret = poll(&pollfd, 1, 1000 / options.smpl_freq); switch(ret) { case 0: ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret) { time_to_quit = 1; break; } ret = waitpid(pid, &status, WUNTRACED); /* * exit with time_to_quit = 0 * to avoid unloading from dead thread */ if (WIFEXITED(status)) goto finish_line; switch_sets(ctxid); ptrace(PTRACE_DETACH, pid, NULL, 0); break; case -1: fatal_error("poll error: %s\n", strerror(errno)); default: /* we don't even read END_MSG */ time_to_quit = 1; } } finish_line: /* * cleanup after an alarm timeout */ if (time_to_quit) { /* stop monitored task */ ptrace(PTRACE_ATTACH, pid, NULL, 0); waitpid(pid, NULL, WUNTRACED); /* detach context */ pfm_unload_context(ctxid); } if (options.attach_pid == 0) { kill(pid, SIGKILL); waitpid(pid, &status, 0); } else { ptrace(PTRACE_DETACH, pid, NULL, 0); } if (time_to_quit < 2) print_results(); close(ctxid); return 0; } static int measure_one_cpu(char **argv) { pfarg_sinfo_t sif; int ctxid, status; struct pollfd pollfd; pid_t pid = 0; int ret, timeout; if (options.pin_cpu == -1) { options.pin_cpu = 0; printf("forcing monitoring onto CPU core 0\n"); pin_cpu(getpid(), 0); } /* * create the context */ ctxid = pfm_create(PFM_FL_SYSTEM_WIDE, &sif); if (ctxid == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., * fd not visible to child. */ if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); /* * Now program the all the registers in one call * * Note that there is a limitation on the size of the argument vector * that can be passed. It is usually set to a page size (16KB). */ if (pfm_write(ctxid, 0, PFM_RW_PMC, all_pmcs+current_set->pmcs_base, current_set->npmcs * sizeof(*all_pmcs)) == -1) fatal_error("error: pfm_write errno: %s\n", strerror(errno)); /* * initialize the PMD registers. * * To be read, each PMD must be either written or declared * as being part of a sample (reg_smpl_pmds) */ if (pfm_write(ctxid, 0, PFM_RW_PMD_ATTR, all_pmds+current_set->pmds_base, current_set->npmds * sizeof(*all_pmds)) == -1) fatal_error("pfm_write(PMD) error errno %d\n", strerror(errno)); /* * now launch the child code */ if (*argv) { if ((pid = fork()) == -1) fatal_error("Cannot fork process\n"); if (pid == 0) exit(child(argv)); } /* * wait for the child to exec or be stopped * We do this even in system-wide mode to ensure * that the task does not start until we are ready * to monitor. */ if (pid) { ret = waitpid(pid, &status, WUNTRACED); if (ret < 0 || WIFEXITED(status)) fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); vbprintf("child created and stopped\n"); } /* * now attach the context */ if (pfm_attach(ctxid, 0, options.pin_cpu) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * start monitoring */ if (pfm_set_state(ctxid, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); if (pid) { signal(SIGCHLD, sigchld_handler); ptrace(PTRACE_DETACH, pid, NULL, 0); } /* * mainloop */ pollfd.fd = ctxid; pollfd.events = POLLIN; pollfd.revents = 0; timeout = options.opt_ovfl_switch ? -1 : (1000 / options.smpl_freq); while (time_to_quit == 0) { ret = poll(&pollfd, 1, timeout); switch(ret) { case 1: case 0: /* *we are consuming the message. * to avoid this phase we could use PFM_FL_OVFL_NO_MSG * and use signal based notification */ if (options.opt_ovfl_switch) { ssize_t r; pfarg_msg_t msg; r = read(ctxid, &msg, sizeof(msg)); (void)r; } switch_sets(ctxid); break; default: if (errno != EINTR) fatal_error("poll fails\n"); } } if (full_periods < MIN_FULL_PERIODS) fatal_error("Not enough periods (%lu) to print results\n", full_periods); if (pid) waitpid(pid, &status, 0); print_results(); close(ctxid); return 0; } int mainloop(char **argv) { pfarg_sinfo_t sif; event_set_t *e; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_regmask_t impl_counters, used_pmcs; pfmlib_event_t cycle_event; unsigned int i, j; char *p, *str; unsigned int max_counters, allowed_counters; int ret; pfm_get_num_counters(&max_counters); if (max_counters < 2 && options.opt_ovfl_switch) fatal_error("not enough counter to get overflow switching to work\n"); allowed_counters = max_counters; /* * account for overflow counter (cpu cycles) */ if (options.opt_ovfl_switch) allowed_counters--; memset(&used_pmcs, 0, sizeof(used_pmcs)); memset(&impl_counters, 0, sizeof(impl_counters)); pfm_get_impl_counters(&impl_counters); options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq; vbprintf("%lu Hz period = %"PRIu64" cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz); for (e = all_sets; e; e = e->next) { for (p = str = e->event_str; p ; ) { p = strchr(str, ','); if (p) str = p +1; total_events++; } } /* * account for extra event per set (cycle event) */ if (options.opt_ovfl_switch) { total_events += num_sets; /* * look for our trigger event */ if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) { fatal_error("Cannot find cycle event\n"); } } vbprintf("total_events=%u\n", total_events); all_pmcs = calloc(1, sizeof(pfarg_pmr_t)*total_events); all_pmds = calloc(1, sizeof(pfarg_pmd_attr_t)*total_events); all_values = calloc(1, sizeof(uint64_t)*total_events); if (all_pmcs == NULL || all_pmds == NULL || all_values == NULL) fatal_error("cannot allocate event tables\n"); /* * use the library to figure out assignments for all events of all sets */ get_sif(options.opt_is_system ? PFM_FL_SYSTEM_WIDE : 0, &sif); for (i=0, e = all_sets; i < num_sets; i++, e = e->next) { memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); str = e->event_str; for(j=0, p = str; p && j < allowed_counters; j++) { p = strchr(str, ','); if (p) *p = '\0'; if (pfm_find_full_event(str, &inp.pfp_events[j]) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event for set %d event %d\n", str, i, j); } if (p) { *p = ','; str = p + 1; } } if (p) { fatal_error("error in set %d: cannot have more than %d event(s) per set %s\n", i, allowed_counters, options.opt_ovfl_switch ? "(overflow switch mode)": "(hardware limit)"); } /* * add the cycle event as the last event when we switch on overflow */ if (options.opt_ovfl_switch) { inp.pfp_events[j] = cycle_event; inp.pfp_event_count = j+1; e->n_events = j+1; } else { e->n_events = j; inp.pfp_event_count = j; } inp.pfp_dfl_plm = options.opt_plm; if (options.opt_is_system) inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; vbprintf("PMU programming for set %d\n", i); /* * let the library do the hard work */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret)); } e->id = i; e->pmcs_base = num_pmcs; e->pmds_base = num_pmds; /* * propagate from libpfm to kernel data structures */ for (j=0; j < outp.pfp_pmc_count; j++, num_pmcs++) { all_pmcs[num_pmcs].reg_num = outp.pfp_pmcs[j].reg_num; all_pmcs[num_pmcs].reg_value = outp.pfp_pmcs[j].reg_value; } for (j=0; j < outp.pfp_pmd_count; j++, num_pmds++) all_pmds[num_pmds].reg_num = outp.pfp_pmds[j].reg_num; e->npmcs = num_pmcs - e->pmcs_base; e->npmds = num_pmds - e->pmds_base; if (options.opt_ovfl_switch) { all_pmds[num_pmds-1].reg_flags = PFM_REGFL_OVFL_NOTIFY; all_pmds[num_pmds-1].reg_value = - options.smpl_period; all_pmds[num_pmds-1].reg_short_reset = - options.smpl_period; all_pmds[num_pmds-1].reg_long_reset = - options.smpl_period; } vbprintf("set%d pmc_base=%d pmd_base=%d npmcs=%d npmds=%d\n", e->id, e->pmcs_base, e->pmds_base, e->npmcs, e->npmds); } current_set = all_sets; signal(SIGALRM, sigintr_handler); signal(SIGINT, sigintr_handler); if (options.opt_is_system) return measure_one_cpu(argv); return measure_one_task(argv); } static struct option multiplex_options[]={ { "help", 0, 0, 1}, { "freq", 1, 0, 2 }, { "kernel-level", 0, 0, 3 }, { "user-level", 0, 0, 4 }, { "version", 0, 0, 5 }, { "set", 1, 0, 6 }, { "session-timeout", 1, 0, 7 }, { "attach-task", 1, 0, 8 }, { "pin-cmd", 1, 0, 9 }, { "cpu", 1, 0, 10 }, { "verbose", 0, &options.opt_verbose, 1 }, { "debug", 0, &options.opt_debug, 1 }, { "us-counter-format", 0, &options.opt_us_format, 1}, { "ovfl-switch", 0, &options.opt_ovfl_switch, 1}, { "system-wide", 0, &options.opt_is_system, 1}, { "no-cmd-output", 0, &options.opt_no_cmd_out, 1}, { "no-header", 0, &options.opt_no_header, 1}, { 0, 0, 0, 0} }; static void generate_default_sets(void) { event_set_t *es, *tail = NULL; pfmlib_event_t events[2]; size_t len; char *name; unsigned int i; int ret; ret = pfm_get_cycle_event(&events[0]); if (ret != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); ret = pfm_get_inst_retired_event(&events[1]); if (ret != PFMLIB_SUCCESS) fatal_error("cannot find instruction retired event\n"); pfm_get_max_event_name_len(&len); for (i=0; i < 2; i++) { name = malloc(len+1); if (name == NULL) { fatal_error("cannot allocate space for event name\n"); } pfm_get_full_event_name(&events[i], name, len+1); es = (event_set_t *)malloc(sizeof(event_set_t)); if (es == NULL) fatal_error("cannot allocate new event set\n"); memset(es, 0, sizeof(*es)); es->event_str = name; es->next = NULL; es->n_events = 0; if (all_sets == NULL) all_sets = es; else tail->next = es; tail = es; } num_sets = i; } static void print_usage(char **argv) { printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]); printf( "-h, --help\t\t\t\tdisplay this help and exit\n" "-V, --version\t\t\t\toutput version information and exit\n" "-u, --user-level\t\t\tmonitor at the user level for all events\n" "-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n" "-c, --us-counter-format\tprint large counts with comma for thousands\n" "-p pid, --attach-task pid\tattach to a running task\n" "--set=ev1[,ev2,ev3,ev4,...]\t\tdescribe one set\n" "--freq=number\t\t\t\tset set switching frequency in Hz\n" "-c cpu, --cpu=cpu\t\t\tCPU to use for system-wide [default current]\n" "--ovfl-switch\t\t\t\t\tuse overflow based multiplexing (default: time-based)\n" "--verbose\t\t\t\tprint more information during execution\n" "--system-wide\t\t\t\tuse system-wide (only one CPU at a time)\n" "--excl-idle\t\t\texclude idle task(system-wide only)\n" "--excl-intr\t\t\texclude interrupt triggered execution(system-wide only)\n" "--intr-only\t\t\tinclude only interrupt triggered execution(system-wide only)\n" "--session-timeout=sec\t\t\tsession timeout in seconds (system-wide only)\n" "--no-cmd-output\t\t\t\toutput of executed command redirected to /dev/null\n" "--pin-cmd=cpu\t\t\t\tpin executed command onto a specific cpu\n" ); } int main(int argc, char **argv) { char *endptr = NULL; pfmlib_options_t pfmlib_options; event_set_t *tail = NULL, *es; unsigned long long_val; int c, ret; options.pin_cmd_cpu = options.pin_cpu = -1; while ((c=getopt_long(argc, argv,"+vhkuVct:p:", multiplex_options, 0)) != -1) { switch(c) { case 0: continue; /* fast path for options */ case 1: print_usage(argv); exit(0); case 'v': options.opt_verbose = 1; break; case 'c': options.opt_us_format = 1; break; case 2: if (options.smpl_freq) fatal_error("sampling frequency set twice\n"); options.smpl_freq = strtoul(optarg, &endptr, 10); if (*endptr != '\0') fatal_error("invalid freqyency: %s\n", optarg); break; case 3: case 'k': options.opt_plm |= PFM_PLM0; break; case 4: case 'u': options.opt_plm |= PFM_PLM3; break; case 'V': case 5: printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n" "Copyright (C) 2004 Hewlett-Packard Company\n"); exit(0); case 6: es = (event_set_t *)malloc(sizeof(event_set_t)); if (es == NULL) fatal_error("cannot allocate new event set\n"); es->event_str = optarg; es->next = NULL; es->n_events = 0; if (all_sets == NULL) all_sets = es; else tail->next = es; tail = es; num_sets++; break; case 't': case 7: if (options.session_timeout) fatal_error("too many timeouts\n"); if (*optarg == '\0') fatal_error("--session-timeout needs an argument\n"); long_val = strtoul(optarg,&endptr, 10); if (*endptr != '\0') fatal_error("invalid number of seconds for timeout: %s\n", optarg); if (long_val >= UINT_MAX) fatal_error("timeout is too big, must be < %u\n", UINT_MAX); options.session_timeout = (unsigned int)long_val; break; case 'p': case 8: if (options.attach_pid) fatal_error("process to attach specified twice\n"); options.attach_pid = (pid_t)atoi(optarg); break; case 9: if (options.pin_cmd_cpu != -1) fatal_error("cannot pin command twice\n"); options.pin_cmd_cpu = atoi(optarg); break; case 10: if (options.pin_cpu != -1) fatal_error("cannot pin to more than one cpu\n"); options.pin_cpu = atoi(optarg); break; default: fatal_error(""); /* just quit silently now */ } } if (optind == argc && options.opt_is_system == 0 && options.attach_pid == 0) fatal_error("you need to specify a command to measure\n"); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); if ((options.cpu_mhz = get_cpu_speed()) == 0) fatal_error("can't get CPU speed\n"); if (options.smpl_freq == 0UL) options.smpl_freq = SMPL_FREQ_IN_HZ; if (options.opt_plm == 0) options.opt_plm = PFM_PLM3; if (num_sets == 0) generate_default_sets(); return mainloop(argv+optind); } papi-5.6.0/man/man3/PAPI_query_named_event.3000664 001750 001750 00000002730 13216244356 022561 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_query_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_query_named_event \- .PP Query if a named PAPI event exists\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_query_named_event(const char *EventName)\fP; .RE .PP \fBPAPI_query_named_event()\fP asks the PAPI library if the PAPI named event can be counted on this architecture\&. If the event CAN be counted, the function returns PAPI_OK\&. If the event CANNOT be counted, the function returns an error code\&. This function also can be used to check the syntax of native and user events\&. .PP \fBParameters:\fP .RS 4 \fIEventName\fP -- a defined event such as PAPI_TOT_INS\&. .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. .RE .PP \fBExamples\fP .RS 4 .PP .nf * int retval; * // Initialize the library * retval = PAPI_library_init(PAPI_VER_CURRENT); * if (retval != PAPI_VER_CURRENT) { * fprintf(stderr,\"PAPI library init error!\\n\"); * exit(1); * } * if (PAPI_query_named_event("PAPI_TOT_INS") != PAPI_OK) { * fprintf(stderr,\"No instruction counter? How lame\&.\\n\"); * exit(1); * } * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_query_event\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_regmask_weight.3000664 001750 001750 00000000033 13216244361 024712 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_regmask_set.3 papi-5.6.0/src/libpfm4/lib/events/intel_ivbep_unc_qpi_events.h000664 001750 001750 00000055023 13216244364 026514 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: ivbep_unc_qpi (Intel IvyBridge-EP QPI uncore) */ static const intel_x86_umask_t ivbep_unc_q_direct2core[]={ { .uname = "FAILURE_CREDITS", .udesc = "Number of spawn failures due to lack of Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_CREDITS_RBT", .udesc = "Number of spawn failures due to lack of Egress credit and route-back table (RBT) bit was not set", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_RBT_HIT", .udesc = "Number of spawn failures because route-back table (RBT) specified that the transaction should not trigger a direct2core transaction", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SUCCESS_RBT_HIT", .udesc = "Number of spawn successes", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_MISS", .udesc = "Number of spawn failures due to RBT tag not matching although the valid bit was set and there was enough Egress credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_CREDITS_MISS", .udesc = "Number of spawn failures due to RBT tag not matching and they were not enough Egress credits. The valid bit was set", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_RBT_MISS", .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set but there were enough Egress credits", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "FAILURE_CREDITS_RBT_MISS", .udesc = "Number of spawn failures due to RBT tag not matching, the valid bit was not set and there were not enough Egress credits", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_rxl_credits_consumed_vn0[]={ { .uname = "DRS", .udesc = "Number of times VN0 consumed for DRS message class", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM", .udesc = "Number of times VN0 consumed for HOM message class", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB", .udesc = "Number of times VN0 consumed for NCB message class", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCS", .udesc = "Number of times VN0 consumed for NCS message class", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR", .udesc = "Number of times VN0 consumed for NDR message class", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SNP", .udesc = "Number of times VN0 consumed for SNP message class", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_rxl_credits_consumed_vn1[]={ { .uname = "DRS", .udesc = "Number of times VN1 consumed for DRS message class", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM", .udesc = "Number of times VN1 consumed for HOM message class", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB", .udesc = "Number of times VN1 consumed for NCB message class", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCS", .udesc = "Number of times VN1 consumed for NCS message class", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR", .udesc = "Number of times VN1 consumed for NDR message class", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SNP", .udesc = "Number of times VN1 consumed for SNP message class", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g0[]={ { .uname = "DATA", .udesc = "Number of data flits over QPI", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IDLE", .udesc = "Number of flits over QPI that do not hold protocol payload", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NON_DATA", .udesc = "Number of non-NULL non-data flits over QPI", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_txl_flits_g0[]={ { .uname = "DATA", .udesc = "Number of data flits over QPI", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NON_DATA", .udesc = "Number of non-NULL non-data flits over QPI", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g1[]={ { .uname = "DRS", .udesc = "Number of flits over QPI on the Data Response (DRS) channel", .ucode = 0x1800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DRS_DATA", .udesc = "Number of data flits over QPI on the Data Response (DRS) channel", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DRS_NONDATA", .udesc = "Number of protocol flits over QPI on the Data Response (DRS) channel", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM", .udesc = "Number of flits over QPI on the home channel", .ucode = 0x600, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM_NONREQ", .udesc = "Number of non-request flits over QPI on the home channel", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM_REQ", .udesc = "Number of data requests over QPI on the home channel", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SNP", .udesc = "Number of snoop requests flits over QPI", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_rxl_flits_g2[]={ { .uname = "NCB", .udesc = "Number of non-coherent bypass flits", .ucode = 0xc00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB_DATA", .udesc = "Number of non-coherent data flits", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCB_NONDATA", .udesc = "Number of bypass non-data flits", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NCS", .udesc = "Number of non-coherent standard (NCS) flits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR_AD", .udesc = "Number of flits received over Non-data response (NDR) channel", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NDR_AK", .udesc = "Number of flits received on the Non-data response (NDR) channel)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_q_txr_ad_hom_credit_acquired[]={ { .uname = "VN0", .udesc = "for VN0", .ucode = 0x100, }, { .uname = "VN1", .udesc = "for VN1", .ucode = 0x200, }, }; static const intel_x86_umask_t ivbep_unc_q_txr_bl_drs_credit_acquired[]={ { .uname = "VN0", .udesc = "for VN0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VN1", .udesc = "for VN1", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VN_SHR", .udesc = "for shared VN", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_ivbep_unc_q_pe[]={ { .name = "UNC_Q_CLOCKTICKS", .desc = "Number of qfclks", .code = 0x14, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_CTO_COUNT", .desc = "Count of CTO Events", .code = 0x38 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_DIRECT2CORE", .desc = "Direct 2 Core Spawning", .code = 0x13, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_direct2core), .umasks = ivbep_unc_q_direct2core }, { .name = "UNC_Q_L1_POWER_CYCLES", .desc = "Cycles in L1", .code = 0x12, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL0P_POWER_CYCLES", .desc = "Cycles in L0p", .code = 0x10, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL0_POWER_CYCLES", .desc = "Cycles in L0", .code = 0xf, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_BYPASSED", .desc = "Rx Flit Buffer Bypassed", .code = 0x9, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN0", .desc = "VN0 Credit Consumed", .code = 0x1e | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_credits_consumed_vn0), .umasks = ivbep_unc_q_rxl_credits_consumed_vn0 }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VN1", .desc = "VN1 Credit Consumed", .code = 0x39 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_credits_consumed_vn1), .umasks = ivbep_unc_q_rxl_credits_consumed_vn1 }, { .name = "UNC_Q_RXL_CREDITS_CONSUMED_VNA", .desc = "VNA Credit Consumed", .code = 0x1d | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_CYCLES_NE", .desc = "RxQ Cycles Not Empty", .code = 0xa, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_FLITS_G0", .desc = "Flits Received - Group 0", .code = 0x1, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g0), .umasks = ivbep_unc_q_rxl_flits_g0 }, { .name = "UNC_Q_RXL_FLITS_G1", .desc = "Flits Received - Group 1", .code = 0x2 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g1), .umasks = ivbep_unc_q_rxl_flits_g1 }, { .name = "UNC_Q_RXL_FLITS_G2", .desc = "Flits Received - Group 2", .code = 0x3 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g2), .umasks = ivbep_unc_q_rxl_flits_g2 }, { .name = "UNC_Q_RXL_INSERTS", .desc = "Rx Flit Buffer Allocations", .code = 0x8, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_INSERTS_DRS", .desc = "Rx Flit Buffer Allocations - DRS", .code = 0x9 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_HOM", .desc = "Rx Flit Buffer Allocations - HOM", .code = 0xc | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NCB", .desc = "Rx Flit Buffer Allocations - NCB", .code = 0xa | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NCS", .desc = "Rx Flit Buffer Allocations - NCS", .code = 0xb | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_NDR", .desc = "Rx Flit Buffer Allocations - NDR", .code = 0xe | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_INSERTS_SNP", .desc = "Rx Flit Buffer Allocations - SNP", .code = 0xd | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY", .desc = "RxQ Occupancy - All Packets", .code = 0xb, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_RXL_OCCUPANCY_DRS", .desc = "RxQ Occupancy - DRS", .code = 0x15 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_HOM", .desc = "RxQ Occupancy - HOM", .code = 0x18 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NCB", .desc = "RxQ Occupancy - NCB", .code = 0x16 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NCS", .desc = "RxQ Occupancy - NCS", .code = 0x17 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_NDR", .desc = "RxQ Occupancy - NDR", .code = 0x1a | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_RXL_OCCUPANCY_SNP", .desc = "RxQ Occupancy - SNP", .code = 0x19 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXL0P_POWER_CYCLES", .desc = "Cycles in L0p", .code = 0xd, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL0_POWER_CYCLES", .desc = "Cycles in L0", .code = 0xc, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_BYPASSED", .desc = "Tx Flit Buffer Bypassed", .code = 0x5, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_CYCLES_NE", .desc = "Tx Flit Buffer Cycles not Empty", .code = 0x6, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_FLITS_G0", .desc = "Flits Transferred - Group 0", .code = 0x0, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txl_flits_g0), .umasks = ivbep_unc_q_txl_flits_g0 }, { .name = "UNC_Q_TXL_FLITS_G1", .desc = "Flits Transferred - Group 1", .code = 0x0 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g1), .umasks = ivbep_unc_q_rxl_flits_g1 /* shared with rxl_flits_g1 */ }, { .name = "UNC_Q_TXL_FLITS_G2", .desc = "Flits Transferred - Group 2", .code = 0x1 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_rxl_flits_g2), .umasks = ivbep_unc_q_rxl_flits_g2 /* shared with rxl_flits_g2 */ }, { .name = "UNC_Q_TXL_INSERTS", .desc = "Tx Flit Buffer Allocations", .code = 0x4, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXL_OCCUPANCY", .desc = "Tx Flit Buffer Occupancy", .code = 0x7, .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_VNA_CREDIT_RETURNS", .desc = "VNA Credits Returned", .code = 0x1c | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_VNA_CREDIT_RETURN_OCCUPANCY", .desc = "VNA Credits Pending Return - Occupancy", .code = 0x1b | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .modmsk = IVBEP_UNC_QPI_ATTRS, }, { .name = "UNC_Q_TXR_AD_HOM_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD HOM", .code = 0x26 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_HOM_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD HOM", .code = 0x22 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_NDR_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x28 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_NDR_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x24 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_SNP_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AD SNP", .code = 0x27 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AD_SNP_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD SNP", .code = 0x23 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AK_NDR_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy AK NDR", .code = 0x29 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_AK_NDR_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy AD NDR", .code = 0x25 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_DRS_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL DRS", .code = 0x2a | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_bl_drs_credit_acquired), .umasks = ivbep_unc_q_txr_bl_drs_credit_acquired, }, { .name = "UNC_Q_TXR_BL_DRS_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL DRS", .code = 0x1f | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_bl_drs_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_bl_drs_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCB_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL NCB", .code = 0x2b | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCB_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL NCB", .code = 0x20 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCS_CREDIT_ACQUIRED", .desc = "R3QPI Egress credit occupancy BL NCS", .code = 0x2c | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, { .name = "UNC_Q_TXR_BL_NCS_CREDIT_OCCUPANCY", .desc = "R3QPI Egress credit occupancy BL NCS", .code = 0x21 | (1ULL << 21), /* sel_ext */ .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_QPI_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_q_txr_ad_hom_credit_acquired), /* shared */ .umasks = ivbep_unc_q_txr_ad_hom_credit_acquired, }, }; papi-5.6.0/man/man3/PAPIF_remove_events.3000664 001750 001750 00000001062 13216244355 022032 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_remove_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_remove_events \- .PP Remove an array of hardware event codes from a PAPI event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Prototype:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_remove_events\fP( C_INT EventSet, C_INT(*) EventCode, C_INT number, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_remove_events\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/utils/papi_mem_info.c000664 001750 001750 00000007370 13216244370 021437 0ustar00jshenry1963jshenry1963000000 000000 /* * This file perfoms the following test: memory info * * Author: Kevin London * london@cs.utk.edu */ /** file papi_mem_info.c * @brief papi_mem_info utility. * @page papi_mem_info * @section NAME * papi_mem_info - provides information on the memory architecture of the current processor. * * @section Synopsis * * @section Description * papi_mem_info is a PAPI utility program that reports information about * the cache memory architecture of the current processor, including number, * types, sizes and associativities of instruction and data caches and * Translation Lookaside Buffers. * * @section Options * This utility has no command line options. * * @section Bugs * There are no known bugs in this utility. * If you find a bug, it should be reported to the * PAPI Mailing List at . */ #include #include #include "papi.h" int main( int argc, char **argv ) { const PAPI_hw_info_t *meminfo = NULL; PAPI_mh_level_t *L; int i, j, retval; (void)argc; (void)argv; retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { fprintf(stderr,"Error! PAPI_library_init\n"); return retval; } meminfo = PAPI_get_hardware_info( ); if (meminfo == NULL ) { fprintf(stderr,"Error! PAPI_get_hardware_info"); return 2; } printf( "Memory Cache and TLB Hierarchy Information.\n" ); printf( "------------------------------------------------------------------------\n" ); /* Extract and report the tlb and cache information */ L = ( PAPI_mh_level_t * ) & ( meminfo->mem_hierarchy.level[0] ); printf( "TLB Information.\n There may be multiple descriptors for each level of TLB\n" ); printf( " if multiple page sizes are supported.\n\n" ); /* Scan the TLB structures */ for ( i = 0; i < meminfo->mem_hierarchy.levels; i++ ) { for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) { switch ( PAPI_MH_CACHE_TYPE( L[i].tlb[j].type ) ) { case PAPI_MH_TYPE_UNIFIED: printf( "L%d Unified TLB:\n", i + 1 ); break; case PAPI_MH_TYPE_DATA: printf( "L%d Data TLB:\n", i + 1 ); break; case PAPI_MH_TYPE_INST: printf( "L%d Instruction TLB:\n", i + 1 ); break; } if ( L[i].tlb[j].type ) { if ( L[i].tlb[j].page_size ) printf( " Page Size: %6d KB\n", L[i].tlb[j].page_size >> 10 ); printf( " Number of Entries: %6d\n", L[i].tlb[j].num_entries ); switch ( L[i].tlb[j].associativity ) { case 0: /* undefined */ break; case 1: printf( " Associativity: Direct Mapped\n\n" ); break; case SHRT_MAX: printf( " Associativity: Full\n\n" ); break; default: printf( " Associativity: %6d\n\n", L[i].tlb[j].associativity ); break; } } } } /* Scan the Cache structures */ printf( "\nCache Information.\n\n" ); for ( i = 0; i < meminfo->mem_hierarchy.levels; i++ ) { for ( j = 0; j < 2; j++ ) { switch ( PAPI_MH_CACHE_TYPE( L[i].cache[j].type ) ) { case PAPI_MH_TYPE_UNIFIED: printf( "L%d Unified Cache:\n", i + 1 ); break; case PAPI_MH_TYPE_DATA: printf( "L%d Data Cache:\n", i + 1 ); break; case PAPI_MH_TYPE_INST: printf( "L%d Instruction Cache:\n", i + 1 ); break; case PAPI_MH_TYPE_TRACE: printf( "L%d Trace Buffer:\n", i + 1 ); break; case PAPI_MH_TYPE_VECTOR: printf( "L%d Vector Cache:\n", i + 1 ); break; } if ( L[i].cache[j].type ) { printf( " Total size: %6d KB\n Line size: %6d B\n Number of Lines: %6d\n Associativity: %6d\n\n", ( L[i].cache[j].size ) >> 10, L[i].cache[j].line_size, L[i].cache[j].num_lines, L[i].cache[j].associativity ); } } } return 0; } papi-5.6.0/src/configure.in000664 001750 001750 00000203560 13216244360 017640 0ustar00jshenry1963jshenry1963000000 000000 # Process this file with autoconf to produce a configure script. # File: configure.in # cross compile sample # ARCH=mips CC=scgcc ./configure --with-arch=mips --host=mips64el-gentoo-linux-gnu- --with-ffsll --with-libpfm4 --with-perf-events --with-virtualtimer=times --with-walltimer=gettimeofday --with-tls=__thread --with-CPU=mips # cross compiling should work differently... AC_PREREQ(2.59) AC_INIT(PAPI, 5.6.0.0, ptools-perfapi@icl.utk.edu) AC_CONFIG_SRCDIR([papi.c]) AC_CONFIG_HEADER([config.h]) AC_DEFUN([AS_AC_EXPAND], [EXP_VAR=[$1] FROM_VAR=[$2] prefix_save=$prefix exec_prefix_save=$exec_prefix if test "x$prefix" = "xNONE"; then prefix="$ac_default_prefix" fi if test "x$exec_prefix" = "xNONE"; then exec_prefix=$prefix fi full_var="$FROM_VAR" while true; do new_full_var="`eval echo $full_var`" if test "x$new_full_var" = "x$full_var"; then break; fi full_var=$new_full_var done full_var=$new_full_var AC_DEFINE_UNQUOTED([$1], "$full_var") prefix=$prefix_save exec_prefix=$exec_prefix_save ]) AC_MSG_CHECKING(for architecture) AC_ARG_WITH(arch, [ --with-arch= Specify architecture (uname -m)], [arch=$withval], [arch=`uname -m`]) AC_MSG_RESULT($arch) AC_ARG_WITH(bitmode, [ --with-bitmode=<32,64> Specify bit mode of library], [bitmode=$withval]) AC_MSG_CHECKING(for OS) AC_ARG_WITH(OS, [ --with-OS= Specify operating system], [OS=$withval], [OS="`uname | tr '[A-Z]' '[a-z]'`" if (test "$OS" = "SunOS" || test "$OS" = "sunos"); then OS=solaris fi ]) AC_MSG_RESULT($OS) AC_MSG_CHECKING(for OS version) AC_ARG_WITH(OSVER, [ --with-OSVER= Specify operating system version], [OSVER=$withval], [if test "$OS" != "bgp" -o "$OS" != "bgq"; then OSVER="`uname -r`" fi ]) AC_MSG_RESULT($OSVER) AC_MSG_CHECKING(for perf_event workaround level) AC_ARG_WITH(assumed_kernel, [ --with-assumed-kernel= Assume kernel version is for purposes of workarounds], [assumed_kernel=$withval; CFLAGS="$CFLAGS -DASSUME_KERNEL=\\\"$with_assumed_kernel\\\""], [assumed_kernel="autodetect"] ) AC_MSG_RESULT($assumed_kernel) AC_MSG_CHECKING([for if MIC should be used]) AC_ARG_WITH(mic, [ --with-mic To compile for Intel MIC ], [MIC=yes tls=__thread virtualtimer=cputime_id perf_events=yes walltimer=clock_realtime_hr ffsll=no cross_compiling=yes arch=k1om], [MIC=no]) AC_MSG_RESULT($MIC) AC_SUBST(MIC) CFLAGS="$CFLAGS -g" #If not set, set FFLAGS to null to prevent AC_PROG_F77 from defaulting it to -g -O2 if test "x$FFLAGS" = "x"; then FFLAGS="" fi OPTFLAGS="-O2" TOPTFLAGS="-O1" AC_PROG_CC([xlc icc gcc cc]) AC_PROG_F77([xlf ifort gfortran f95 f90 f77]) if test "x$F77" = "x"; then F77= fi AC_CHECK_PROG( [MPICC], mpicc, [mpicc], []) # Lets figure out what CC actually is... # Used in later checks to set compiler specific options if `$CC -V 2>&1 | grep '^Intel(R) C' >/dev/null 2>&1` ; then CC_COMMON_NAME="icc" elif `$CC -v 2>&1 | grep 'gcc version' >/dev/null 2>&1` ; then CC_COMMON_NAME="gcc" elif `$CC -qversion 2>&1 | grep 'IBM XL C' >/dev/null 2>&1`; then CC_COMMON_NAME="xlc" else CC_COMMON_NAME="unknown" fi #prevent icc warnings about overriding optimization settings set by AC_PROG_CC # remark #869: parameter was never referenced # remark #271: trailing comma is nonstandard if test "$CC_COMMON_NAME" = "icc"; then CFLAGS="$CFLAGS -diag-disable 188,869,271" if test "$MIC" = "yes"; then CC="$CC -mmic -fPIC" fi fi if test "$F77" = "ifort" -a "$MIC" = "yes"; then F77="$F77 -mmic -fPIC" fi AC_PROG_AWK AC_PROG_CPP AC_PROG_LN_S AC_PROG_MAKE_SET AC_PROG_RANLIB AC_GNU_SOURCE AC_HEADER_STDC AC_C_INLINE AC_HEADER_TIME AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h sched.h]) AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time sched_getcpu]) # # Check if the system provides dl* symbols without -ldl, and if not, # check for -ldl existance. # AC_MSG_CHECKING([for dlopen and dlerror symbols in base system]) AC_TRY_LINK([#include ], [void *p = dlopen ("", 0); char *c = dlerror();], [dlsymbols_in_base="yes"], [dlsymbols_in_base="no"]) if test "${dlsymbols_in_base}" = "yes"; then AC_MSG_RESULT([found]) LDL="" else AC_MSG_RESULT([not found]) AC_MSG_CHECKING([for dlopen and dlerror symbols in -ldl]) SAVED_LIBS=${LIBS} LIBS="${LIBS} -ldl" AC_TRY_LINK([#include ], [void *p = dlopen ("", 0); char *c = dlerror();], [has_ldl="yes"], [has_ldl="no"]) LIBS=${SAVED_LIBS} if test "${has_ldl}" = "yes" ; then AC_MSG_RESULT([found]) LDL="-ldl" else AC_MSG_ERROR([cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl]) fi fi AC_SUBST(LDL) if test "$OS" = "CLE"; then virtualtimer=times tls=__thread walltimer=cycle ffsll=yes cross_compiling=yes STATIC="-static" # _rtc is only defined when using the Cray compiler AC_MSG_CHECKING([for _rtc intrinsic]) rtc_ok=yes AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H #include #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define for _rtc() intrinsic.])], [rtc_ok=no AC_DEFINE(NO_RTC_INTRINSIC,1,[Define if _rtc() is not found.])]) AC_MSG_RESULT($rtc_ok) elif test "$OS" = "bgp"; then CC=powerpc-bgp-linux-gcc F77=powerpc-bgp-linux-gfortran walltimer=cycle virtualtimer=perfctr tls=no ffsll=yes cross_compiling=yes elif test "$OS" = "bgq"; then AC_ARG_WITH(bgpm_installdir, [ --with-bgpm_installdir= Specify the installation path of BGPM], [BGPM_INSTALL_DIR=$withval CFLAGS="$CFLAGS -I$withval"], [AC_MSG_ERROR([BGQ CPU component requires installation path of BGPM (see --with-bgpm_installdir)])]) bitmode=64 tls=no elif test "$OS" = "linux"; then if test "$arch" = "ppc64" -o "$arch" = "x86_64"; then if test "$bitmode" = "64" -a "$libdir" = '${exec_prefix}/lib'; then libdir='${exec_prefix}/lib64' fi fi elif test "$OS" = "solaris"; then AC_CHECK_TYPE([hrtime_t], [AC_DEFINE(HAVE_HRTIME_T, 1, [Define if hrtime_t is defined in ])],[], [#if HAVE_SYS_TIME_H #include #endif]) if test "x$AR" = "x"; then AR=/usr/ccs/bin/ar fi fi if test "x$AR" = "x"; then AR=ar fi if test "$cross_compiling" = "yes" ; then AC_MSG_CHECKING(for native compiler for header generation) AC_ARG_WITH(nativecc, [ --with-nativecc= Specify native C compiler for header generation ], [nativecc=$withval], [nativecc=gcc]) AC_MSG_RESULT($nativecc) fi AC_ARG_WITH(tests, [ --with-tests= Specify which tests to run on install ], [tests=$withval], [tests="ctests ftests"]) AC_MSG_CHECKING(for debug build) AC_ARG_WITH(debug, [ --with-debug= Build a debug version, debug version plus memory tracker or none ], [debug=$withval]) if test "$debug" = "yes"; then if test "$CC_COMMON_NAME" = "gcc"; then CFLAGS="$CFLAGS -g3" fi OPTFLAGS="-O0" PAPICFLAGS+=" -DDEBUG -DPAPI_NO_MEMORY_MANAGEMENT" elif test "$debug" = "memory"; then if test "$CC_COMMON_NAME" = "gcc"; then CFLAGS="$CFLAGS -g3" fi OPTFLAGS="-O0" PAPICFLAGS+=" -DDEBUG" else PAPICFLAGS+="-DPAPI_NO_MEMORY_MANAGEMENT" fi AC_MSG_RESULT($debug) if test "$CC_COMMON_NAME" = "gcc"; then gcc_version=`gcc -v 2>&1 | tail -n 1 | awk '{printf $3}'` major=`echo $gcc_version | sed 's/\([[^.]][[^.]]*\).*/\1/'` minor=`echo $gcc_version | sed 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'` if (test "$major" -ge 4 || test "$major" = 3 -a "$minor" -ge 4); then CFLAGS+=" -Wextra" else CFLAGS+=" -W" fi # -Wextra => -Woverride-init on gcc >= 4.2 # This issues a warning (error under -Werror) for some libpfm4 code. AC_MSG_CHECKING( for -Wno-override-init) oldcflags="$CFLAGS" CFLAGS+=" -Wall -Wextra -Werror -Wno-override-init" AC_COMPILE_IFELSE([AC_LANG_SOURCE( [ struct A { int x; int y; }; int main(void) { struct A a = {.x = 0, .y = 0, .y = 5 }; return a.x; } ])], [HAVE_NO_OVERRIDE_INIT=1], [HAVE_NO_OVERRIDE_INIT=0] ) CFLAGS="$oldcflags" AC_MSG_RESULT($HAVE_NO_OVERRIDE_INIT) fi AC_MSG_CHECKING(for CPU type) AC_ARG_WITH(CPU, [ --with-CPU= Specify CPU type], [CPU=$withval case "$CPU" in core|core2|i7|atom|p4|p3|opteron|athlon) MISCSRCS="$MISCSRCS x86_cpuid_info.c" esac], [case "$OS" in aix) CPU="`/usr/sbin/lsattr -E -l proc0 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr '[A-Z]' '[a-z]'`" if test "$CPU" = ""; then CPU="`/usr/sbin/lsattr -E -l proc1 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr '[A-Z]' '[a-z]'`" fi ;; freebsd) family=`uname -m` if test "$family" = "amd64"; then MISCSRCS="$MISCSRCS x86_cpuid_info.c" elif test "$family" = "i386"; then MISCSRCS="$MISCSRCS x86_cpuid_info.c" fi ;; darwin) family=`uname -m` MISCSRCS="$MISCSRCS x86_cpuid_info.c" ;; linux) family=`uname -m` if test "$family" = "x86_64"; then MISCSRCS="$MISCSRCS x86_cpuid_info.c" CPU="x86" elif test "$family" = "i686"; then MISCSRCS="$MISCSRCS x86_cpuid_info.c" CPU="x86" elif test "$family" = "ppc64"; then CPU_info="`cat /proc/cpuinfo | grep cpu | cut -d: -f2 | cut -d' ' -f2 | sed '2,$d'`" case "$CPU_info" in PPC970*) CPU="PPC970";; POWER5) CPU="POWER5";; POWER5+) CPU="POWER5+";; POWER6) CPU="POWER6";; POWER7) CPU="POWER7";; esac fi ;; solaris) AC_CHECK_HEADER([libcpc.h], [CFLAGS="$CFLAGS -lcpc" AC_TRY_RUN([#include #include int main() { // Check for libcpc 2 if(CPC_VER_CURRENT == 2) exit(0); exit(1); } ], [cpc_version=2], [cpc_version=0])], [AC_MSG_ERROR([libcpc is needed for running PAPI on Solaris]) ]) processor=`uname -p` machinetype=`uname -m` if test "$processor" = "sparc"; then if test "$machinetype" = "sun4u"; then CPU=ultra AC_CHECK_LIB([cpc], [cpc_take_sample], [], [AC_MSG_ERROR([libcpc.a is needed on Solaris, install SUNWcpc]) ]) elif test "$machinetype" = "sun4v"; then CPU=niagara2 if test "$cpc_version" != "2"; then AC_MSG_ERROR([libcpc2 needed for Niagara 2]) fi else AC_MSG_ERROR([$machinetype not supported]) fi else AC_MSG_ERROR([Only SPARC processors are supported on Solaris]) fi ;; bgp) CPU=bgp ;; bgq) CPU=bgq ;; esac ]) AC_MSG_RESULT($CPU) AC_DEFINE_UNQUOTED(CPU,$CPU,[cpu type]) # First set pthread-mutexes based on arch case $arch in aarch64|arm*) pthread_mutexes=yes CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" echo "forcing use of pthread mutexes... " >&6 ;; esac AC_ARG_WITH(pthread-mutexes, [ --with-pthread-mutexes Specify use of pthread mutexes rather than custom PAPI locks], [pthread_mutexes=yes CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" ]) AC_ARG_WITH(ffsll, [ --with-ffsll Specify use of the ffsll() function ], [ffsll=$withval], [if test "$cross_compiling" = "yes" ; then AC_MSG_ERROR([ffsll must be specified for cross compile]) fi didcheck=1 AC_CHECK_FUNC(ffsll,[ffsll=yes],[ffsll=no]) ]) if test "$ffsll" = "yes" ; then AC_DEFINE(HAVE_FFSLL, 1, This platform has the ffsll() function) fi if test "$didcheck" != "1"; then AC_MSG_CHECKING(for ffsll) if test "$ffsll" = "yes" ; then AC_DEFINE(HAVE_FFSLL, 1, This platform has the ffsll() function) fi AC_MSG_RESULT($ffsll) fi AC_MSG_CHECKING(for working gettid) AC_LINK_IFELSE([AC_LANG_SOURCE([#include main() { pid_t a = gettid(); }])], [AC_MSG_RESULT(yes) AC_DEFINE(HAVE_GETTID, 1, [Full gettid function])], [AC_MSG_RESULT(no) AC_MSG_CHECKING(for working syscall(SYS_gettid)) AC_LINK_IFELSE([AC_LANG_SOURCE([#include #include main() { pid_t a = syscall(SYS_gettid); }])], [AC_MSG_RESULT(yes) AC_DEFINE(HAVE_SYSCALL_GETTID, 1, [gettid syscall function])], [AC_MSG_RESULT(no)]) ]) AC_ARG_WITH(walltimer, [ --with-walltimer= Specify realtime timer ], [walltimer=$withval], [if test "$cross_compiling" = "yes" ; then AC_MSG_ERROR([walltimer must be specified for cross compile]) fi AC_MSG_CHECKING(for working MMTIMER) AC_TRY_RUN([#include #include #include #include #include #include #ifndef MMTIMER_FULLNAME #define MMTIMER_FULLNAME "/dev/mmtimer" #endif int main() { int offset; int fd; if((fd = open(MMTIMER_FULLNAME, O_RDONLY)) == -1) exit(1); if ((offset = ioctl(fd, MMTIMER_GETOFFSET, 0)) < 0) exit(1); close(fd); exit(0); } ], [walltimer="mmtimer" AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no) AC_MSG_CHECKING(for working CLOCK_REALTIME_HR POSIX 1b timer) AC_TRY_RUN([#include #include #include #include #include main() { struct timespec t1, t2; double seconds; if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t1) == -1) exit(1); sleep(1); if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t2) == -1) exit(1); seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); if (seconds > 1.0) exit(0); else exit(1); } ], [walltimer="clock_realtime_hr" AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no) AC_MSG_CHECKING(for working CLOCK_REALTIME POSIX 1b timer) AC_TRY_RUN([#include #include #include #include #include main() { struct timespec t1, t2; double seconds; if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t1) == -1) exit(1); sleep(1); if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t2) == -1) exit(1); seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); if (seconds > 1.0) exit(0); else exit(1); } ], [walltimer="clock_realtime" AC_MSG_RESULT(yes) ], [walltimer="cycle" AC_MSG_RESULT(no)]) ]) ]) ]) AC_MSG_CHECKING(for which real time clock to use) if test "$walltimer" = "gettimeofday"; then AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Normal gettimeofday timer]) elif test "$walltimer" = "mmtimer"; then AC_DEFINE(HAVE_MMTIMER, 1, [Altix memory mapped global cycle counter]) altix="-DALTIX" elif test "$walltimer" = "clock_realtime_hr"; then AC_DEFINE(HAVE_CLOCK_GETTIME, 1, [POSIX 1b clock]) AC_DEFINE(HAVE_CLOCK_GETTIME_REALTIME_HR, 1, [POSIX 1b realtime HR clock]) elif test "$walltimer" = "clock_realtime"; then AC_DEFINE(HAVE_CLOCK_GETTIME, 1, [POSIX 1b clock]) AC_DEFINE(HAVE_CLOCK_GETTIME_REALTIME, 1, [POSIX 1b realtime clock]) elif test "$walltimer" = "cycle"; then AC_DEFINE(HAVE_CYCLE, 1, [Native access to a hardware cycle counter]) else AC_MSG_ERROR([Unknown value for walltimer]) fi AC_MSG_RESULT($walltimer) SAVED_LIBS=$LIBS SAVED_LDFLAGS=$LDFLAGS SAVED_CFLAGS=$CFLAGS LIBS="" LDFLAGS="" CFLAGS="-pthread" AC_ARG_WITH(tls, [ --with-tls= This platform supports thread local storage with a keyword ], [tls=$withval], [if test "$cross_compiling" = "yes" ; then AC_MSG_ERROR([tls must be specified for cross compile]) fi AC_MSG_CHECKING(for working __thread) AC_TRY_RUN([#include #include extern __thread int i; static int res1, res2; void thread_main (void *arg) { i = (int)arg; sleep (1); if ((int)arg == 1) res1 = (i == (int)arg); else res2 = (i == (int)arg); } __thread int i; int main () { pthread_t t1, t2; i = 5; pthread_create (&t1, NULL, thread_main, (void *)1); pthread_create (&t2, NULL, thread_main, (void *)2); pthread_join (t1, NULL); pthread_join (t2, NULL); return !(res1 + res2 == 2); } ], [AC_MSG_RESULT(yes) tls="__thread"], [AC_MSG_RESULT(no) tls="no" ]) if test "$OS" = "linux"; then if test "x$tls" = "x__thread"; then # On some linux distributions, TLS works in executables, but linking against # a shared library containing TLS fails with: undefined reference to `__tls_get_addr' rm -f conftest.c conftest.so conftest echo "static __thread int foo; void main () { foo = 5; }" > conftest.c gcc -fPIC --shared -o conftest.so conftest.c > /dev/null 2>&1 gcc -o conftest conftest.so > /dev/null 2>&1 if test ! -f conftest; then AC_MSG_WARN([Disabling usage of __thread.]); tls="no" fi rm -f conftest.c conftest.so conftest fi fi]) AC_MSG_CHECKING(for high performance thread local storage) if test "$tls" = "no"; then NOTLS="-DNO_TLS" elif test "x$tls" != "x"; then if test "$tls" = "yes"; then tls="__thread" fi NOTLS="-DUSE_COMPILER_TLS" AC_DEFINE_UNQUOTED(HAVE_THREAD_LOCAL_STORAGE,$tls,[Keyword for per-thread variables]) fi AC_MSG_RESULT($tls) AC_ARG_WITH(virtualtimer, [ --with-virtualtimer= Specify per-thread virtual timer ], [virtualtimer=$withval], [if test "$cross_compiling" = "yes" ; then AC_MSG_ERROR([virtualtimer must be specified for cross compile]) fi AC_MSG_CHECKING(for working CLOCK_THREAD_CPUTIME_ID POSIX 1b timer) AC_TRY_RUN([#include #include #include #include #include #include #include #include #include #include #if !defined( SYS_gettid ) #define SYS_gettid 1105 #endif struct timespec threadone = { 0, 0 }; struct timespec threadtwo = { 0, 0 }; pthread_t threadOne, threadTwo; volatile int done = 0; int gettid() { return syscall( SYS_gettid ); } void *doThreadOne( void * v ) { while (!done) sleep(1); if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadone) == -1) { perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); exit(1); } return 0; } void *doThreadTwo( void * v ) { long i, j = 0xdeadbeef; for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadtwo) == -1) { perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); exit(1); } done = 1; return j; } int main( int argc, char ** argv ) { int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); assert( status == 0 ); status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); assert( status == 0 ); status = pthread_join( threadTwo, NULL ); assert( status == 0 ); status = pthread_join( threadOne, NULL ); assert( status == 0 ); if ((threadone.tv_sec != threadtwo.tv_sec) || (threadone.tv_nsec != threadtwo.tv_nsec)) exit(0); else { fprintf(stderr,"T1 %ld %ld T2 %ld %ld\n",threadone.tv_sec,threadone.tv_nsec,threadtwo.tv_sec,threadtwo.tv_nsec); exit(1); } } ], [AC_MSG_RESULT(yes) virtualtimer="clock_thread_cputime_id"], [AC_MSG_RESULT(no) # *** Checks for working per thread timer*** AC_MSG_CHECKING(for working per-thread times() timer) AC_TRY_RUN([#include #include #include #include #include #include #include #include #include #include #if !defined( SYS_gettid ) #define SYS_gettid 1105 #endif long threadone = 0, threadtwo = 0; pthread_t threadOne, threadTwo; volatile int done = 0; int gettid() { return syscall( SYS_gettid ); } int doThreadOne( void * v ) { struct tms tm; int status; while (!done) sleep(1); status = times( & tm ); assert( status != -1 ); threadone = tm.tms_utime; return 0; } int doThreadTwo( void * v ) { struct tms tm; long i, j = 0xdeadbeef; int status; for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } status = times( & tm ); assert( status != -1 ); threadtwo = tm.tms_utime; done = 1; return j; } int main( int argc, char ** argv ) { int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); assert( status == 0 ); status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); assert( status == 0 ); status = pthread_join( threadTwo, NULL ); assert( status == 0 ); status = pthread_join( threadOne, NULL ); assert( status == 0 ); return (threadone == threadtwo); } ], [AC_MSG_RESULT(yes) virtualtimer="times"], [AC_MSG_RESULT(no) virtualtimer="default"]) ]) ]) LDFLAGS=$SAVED_LDFLAGS CFLAGS=$SAVED_CFLAGS LIBS=$SAVED_LIBS AC_MSG_CHECKING(for which virtual timer to use) case "$virtualtimer" in times) AC_DEFINE(HAVE_PER_THREAD_TIMES, 1, [Working per thread timer]) ;; getrusage) AC_DEFINE(HAVE_PER_THREAD_GETRUSAGE, 1, [Working per thread getrusage]) ;; clock_thread_cputime_id) AC_DEFINE(HAVE_CLOCK_GETTIME_THREAD, CLOCK_THREAD_CPUTIME_ID, [POSIX 1b per-thread clock]) ;; proc|default|perfctr) AC_DEFINE(USE_PROC_PTTIMER, 1, [Use /proc for per-thread times]) AC_DEFINE(USE_PERFCTR_PTTIMER, 1, [Use the perfctr virtual TSC for per-thread times]) ;; esac AC_MSG_RESULT($virtualtimer) if test "$OS" = "aix"; then AC_ARG_WITH(pmapi, [ --with-pmapi= Specify path of pmapi on aix system ], [PMAPI=$withval], [PMAPI="/usr/pmapi"]) LIBS="-L$PMAPI/lib -lpmapi" CPPFLAGS="$CPPFLAGS -I$PMAPI/include" AC_CHECK_LIB([pmapi], [pm_initialize], [PMINIT="-DPM_INITIALIZE"], [AC_CHECK_LIB([pmapi], [pm_init], [PMINIT="-DPM_INIT"], [AC_MSG_ERROR([libpmapi.a not found, rerun configure with different flags]) ]) ]) fi AC_MSG_CHECKING(for static user preset events) AC_ARG_WITH(static_user_events, [ --with-static-user-events Build with a static user events file.], [STATIC_USER_EVENTS=$withval], [STATIC_USER_EVENTS=no]) if test "$STATIC_USER_EVENTS" = "yes"; then PAPICFLAGS+=" -DSTATIC_USER_EVENTS" fi AC_MSG_RESULT($STATIC_USER_EVENTS) AC_MSG_CHECKING(for static PAPI preset events) AC_ARG_WITH(static_papi_events, [ --with-static-papi-events Build with a static papi events file.], [STATIC_PAPI_EVENTS=$withval], [STATIC_PAPI_EVENTS=yes]) if test "$STATIC_PAPI_EVENTS" = "yes"; then PAPICFLAGS+=" -DSTATIC_PAPI_EVENTS_TABLE" fi AC_MSG_RESULT($STATIC_PAPI_EVENTS) AC_MSG_CHECKING(for whether to build static library) AC_ARG_WITH(static_lib, [ --with-static-lib= Build a static library], [static_lib=$withval], [static_lib=yes]) AC_MSG_RESULT($static_lib) AC_MSG_CHECKING(for whether to build shared library) AC_ARG_WITH(shared_lib, [ --with-shared-lib= Build a shared library], [shared_lib=$withval], [shared_lib=yes]) AC_MSG_RESULT($shared_lib) if test "$shared_lib" = "no" -a "$static_lib" = "no"; then AC_MSG_ERROR(Both shared and static libs are disabled) fi if test "$shared_lib" = "yes"; then papiLIBS="shared" fi if test "$static_lib" = "yes"; then papiLIBS="$papiLIBS static" fi AC_MSG_CHECKING(for static compile of tests and utilities) AC_ARG_WITH(static_tools, [ --with-static-tools Specify static compile of tests and utilities], [STATIC="-static" AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)]) if test "$static_lib" = "no"; then AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) fi AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) AC_ARG_WITH(shlib_tools, [ --with-shlib-tools Specify linking with papi library of tests and utilities], [shlib_tools=yes AC_MSG_RESULT(yes)], [shlib_tools=no AC_MSG_RESULT(no)]) if test "$shlib_tools" = "yes"; then if test "$shared_lib" != "yes"; then AC_MSG_ERROR(Building static but specified shared linking for tests and utilities) fi if test "$STATIC" = "-static"; then AC_MSG_ERROR([Building shared but specified static linking]) fi LINKLIB='$(SHLIB)' elif test "$shlib_tools" = "no"; then if test "$static_lib" != "yes"; then AC_MSG_ERROR([Building shared but specified static linking for tests and utilities]) fi LINKLIB='$(LIBRARY)' fi ################################################## # perfctr ################################################## perfctr=0 force_perfctr=no AC_ARG_WITH(perfctr, [ --with-perfctr Specify perfctr as the performance interface ], force_perfctr=yes [user_specified_interface=perfctr if test "$arch" != "ppc64"; then perfctr=6 else perfctr=7 fi], [perfctr=0]) if test "$cross_compiling" = "no" ; then AC_CHECK_FILE(/sys/class/perfctr,[perfctr=7],[AC_CHECK_FILE(/dev/perfctr,[perfctr=6])]) fi if test "$perfctr" != 0; then pfm_incdir="libpfm-3.y/include/" AC_MSG_CHECKING(for perfctr version) if test "$perfctr" = 7 -a "$arch" != "ppc64"; then AC_MSG_ERROR([Perfctr 2.7.x only works on PPC64 machines. Patch kernel with 2.6.x.]) fi AC_MSG_RESULT(2.$perfctr) fi AC_ARG_WITH(perfctr_root, [ --with-perfctr-root= Specify path to source tree (for use by developers only) ], [perfctr_root=$withval user_specified_interface=perfctr]) AC_ARG_WITH(perfctr_prefix, [ --with-perfctr-prefix= Specify prefix to installed perfctr distribution ], [perfctr_prefix=$withval user_specified_interface=perfctr]) AC_ARG_WITH(perfctr_incdir, [ --with-perfctr-incdir= Specify directory of perfctr header files in non-standard location ], [perfctr_incdir=$withval user_specified_interface=perfctr]) AC_ARG_WITH(perfctr_libdir, [ --with-perfctr-libdir= Specify directory of perfctr library in non-standard location ], [perfctr_libdir=$withval user_specified_interface=perfctr]) if test "$perfctr" != 0; then dotest=0 if test "x$perfctr_root" != "x"; then LIBS="-L$perfctr_root/usr.lib -lperfctr" CPPFLAGS="$CPPFLAGS -I$perfctr_root/usr.lib -I$perfctr_root/linux/include" dotest=1 elif test "x$perfctr_prefix" != "x"; then LIBS="-L$perfctr_prefix/lib -lperfctr" CPPFLAGS="$CPPFLAGS -I$perfctr_prefix/include" perfctr_libdir="$perfctr_prefix/lib" perfctr_incdir="$perfctr_prefix/include" dotest=1 else if test "x$perfctr_libdir" != "x"; then LIBS="-L$perfctr_libdir -lperfctr" dotest=1 fi if test "x$perfctr_incdir" != "x"; then CPPFLAGS="-I$perfctr_incdir" dotest=1 fi fi if test "$dotest" = 1; then AC_CHECK_LIB([perfctr], [vperfctr_open], [AC_CHECK_HEADERS([libperfctr.h], [oCFLAGS=$CFLAGS CFLAGS="$CFLAGS -static" AC_TRY_RUN([#include #include "libperfctr.h" int main() { if ((PERFCTR_ABI_VERSION >> 24) != 5) exit(1); exit(0); } ], [perfctr=6], [perfctr=7]) CFLAGS=$oCFLAGS], [AC_MSG_ERROR([libperfctr.h not found, rerun configure with different flags])])], [AC_MSG_ERROR([libperfctr.a not found, rerun configure with different flags]) ]) else AC_DEFINE(HAVE_LIBPERFCTR_H,1,[perfctr header file]) fi fi user_specified_interface=no ################################################## # perfmon ################################################## old_pfmv2=n perfmon=0 perfmon2=no force_perfmon2=no AC_ARG_WITH(perfmon, [ --with-perfmon= Specify perfmon as the performance interface and specify version], [perfmon=$withval user_specified_interface=perfmon force_perfmon2=yes pfm_incdir="libpfm-3.y/include" perfmon=`echo ${perfmon} | sed 's/^[ \t]*//;s/[ \t]*$//'` perfmon=`echo ${perfmon} | grep -e '[[1-9]]\.[[0-9]][[0-9]]*'` if test "x$perfmon" = "x"; then AC_MSG_ERROR("Badly formed perfmon version string") fi perfmon=`echo ${perfmon} | sed 's/\.//'` if test $perfmon -gt 20; then perfmon2=yes fi if test $perfmon -lt 25; then old_pfmv2=y PFMCFLAGS="-DPFMLIB_OLD_PFMV2" fi], [perfmon=0 if test "$cross_compiling" = "no" ; then AC_CHECK_FILE(/sys/kernel/perfmon/version, [perfmon=`cat /sys/kernel/perfmon/version`], [AC_CHECK_FILE(/proc/perfmon, [perfmon=`cat /proc/perfmon | grep version | cut -d: -f2`], [perfmon=0])]) if test "$perfmon" != 0; then pfm_incdir="libpfm-3.y/include" perfmon=`echo ${perfmon} | sed 's/^[ \t]*//;s/[ \t]*$//'` perfmon=`echo ${perfmon} | grep -e '[[1-9]]\.[[0-9]][[0-9]]*'` perfmon=`echo ${perfmon} | sed 's/\.//'` if test $perfmon -gt 20; then perfmon2=yes fi if test $perfmon -lt 25; then # must be y, not yes, or libpfm breaks old_pfmv2="y" PFMCFLAGS="-DPFMLIB_OLD_PFMV2" fi fi fi]) force_pfm_incdir=no # default AC_ARG_WITH(pfm_root, [ --with-pfm-root= Specify path to source tree (for use by developers only) ], [pfm_root=$withval pfm_incdir=$withval/include pfm_libdir=$withval/lib]) AC_ARG_WITH(pfm_prefix, [ --with-pfm-prefix= Specify prefix to installed pfm distribution ], [pfm_prefix=$withval pfm_incdir=$pfm_prefix/include pfm_libdir=$pfm_prefix/lib]) AC_ARG_WITH(pfm_incdir, [ --with-pfm-incdir= Specify directory of pfm header files in non-standard location ], [pfm_incdir=$withval]) AC_ARG_WITH(pfm_libdir, [ --with-pfm-libdir= Specify directory of pfm library in non-standard location ], [pfm_libdir=$withval]) # if these are both empty, it means we haven't set either pfm_prefix or pfm_root # which would have set them. Thus it means that we set this to our included # libpfm4 library. Shame on the person that sets one but not the other. if test "x$pfm_incdir" = "x" -a "x$pfm_libdir" = "x"; then pfm_root="libpfm4" pfm_incdir="libpfm4/include" pfm_libdir="libpfm4/lib" fi ################################################## # Linux perf_event/perf_counter ################################################## if test "x$mic" = "xno"; then perf_events=no fi force_perf_events=no disable_uncore=yes AC_ARG_WITH(perf_events, [ --with-perf-events Specify use of Linux Performance Event (requires kernel 2.6.32 or greater)], [force_perf_events=yes user_specified_interface=pe]) # RDPMC support AC_ARG_ENABLE(perfevent_rdpmc, AS_HELP_STRING([--enable-perfevent-rdpmc], [Enable userspace rdpmc instruction on perf_event, default: yes]), [case "${enableval}" in yes) enable_perfevent_rdpmc=true ;; no) enable_perfevent_rpdmc=false ;; *) AC_MSG_ERROR([bad value ${enableval} for --enable-perfevent-rdpmc]) ;; esac], [enable_perfevent_rdpmc=true]) if test "$enable_perfevent_rdpmc" = "true"; then PECFLAGS="$PECFLAGS -DUSE_PERFEVENT_RDPMC=1" fi # Uncore support AC_ARG_WITH(pe_incdir, [ --with-pe-incdir= Specify path to the correct perf header file], [pe_incdir=$withval force_perf_events=yes user_specified_interface=pe], [pe_incdir=$pfm_incdir/perfmon]) AC_ARG_ENABLE(perf_event_uncore, [ --disable-perf-event-uncore Disable perf_event uncore component]) AS_IF([test "x$enable_perf_event_uncore" != "xno"],[ disable_uncore=no ]) # Check for perf_event.h if test "$force_perf_events" = "yes"; then perf_events="yes" fi if test "$cross_compiling" = "no"; then AC_CHECK_FILE(/proc/sys/kernel/perf_event_paranoid,[ AC_CHECK_FILE($pe_incdir/perf_event.h,perf_events="yes")]) fi if test "$perf_events" = "yes"; then PECFLAGS="$PECFLAGS -DPEINCLUDE=\\\"$pe_incdir/perf_event.h\\\"" fi # # Sort out the choice of the user vs. what we detected # # MESSING WITH CFLAGS IS STUPID! # if test "$user_specified_interface" != "no"; then if test "$user_specified_interface" = "perfctr"; then perfmon=0 perf_events="no" else if test "$user_specified_interface" = "perfmon"; then perfctr=0 perf_events="no" PAPICFLAGS+=" $PFMCFLAGS" else if test "$user_specified_interface" = "pe"; then perfctr=0 perfmon=0 PAPICFLAGS+=" $PECFLAGS" else AC_MSG_ERROR("Unknown user_specified_interface=$user_specified_interface perfctr=$perfctr perfmon=$perfmon perfmon2=$perfmon2 perf-events=$perf_events") fi fi fi else if test "$perfmon" != 0; then PAPICFLAGS+=" $PFMCFLAGS" fi if test "$perf_events" = "yes"; then PAPICFLAGS+=" $PECFLAGS" fi fi # # User has made no choice, so we default to the ordering below in the platform section, if # we detect more than one. # # # What does this next section do? It determines whether or not to run the tests for libpfm # based on the settings of pfm_root, pfm_prefix, pfm_incdir, pfm_libdir # if test "$perfmon" != 0 -o "$perf_events" = "yes"; then # if prefix set, then yes if test "x$pfm_prefix" != "x"; then dotest=1 # if root not set and libdir set, then yes elif test "x$pfm_root" = "x" -a "x$pfm_libdir" != "x"; then dotest=1 else dotest=0 fi if test "$dotest" = 1; then LIBS="-L$pfm_libdir -lpfm" CPPFLAGS="$CPPFLAGS -I$pfm_incdir" AC_CHECK_LIB([pfm], [pfm_initialize], [AC_CHECK_HEADERS([perfmon/pfmlib.h], [if test "$arch" = "ia64"; then AC_CHECK_HEADERS([perfmon/pfmlib_montecito.h]) fi AC_CHECK_FUNC(pfm_get_event_description, [AC_DEFINE(HAVE_PFM_GET_EVENT_DESCRIPTION,1,[event description function])],[]) AC_CHECK_MEMBER(pfmlib_reg_t.reg_evt_idx, [AC_DEFINE(HAVE_PFM_REG_EVT_IDX,1,[old reg_evt_idx])],[],[#include "perfmon/pfmlib.h"]) AC_CHECK_MEMBER(pfmlib_output_param_t.pfp_pmd_count, [AC_DEFINE(HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT,1,[new pfmlib_output_param_t])],[],[#include "perfmon/pfmlib.h"]) AC_CHECK_MEMBER(pfm_msg_t.type, [AC_DEFINE(HAVE_PFM_MSG_TYPE,1,[new pfm_msg_t])],[],[#include "perfmon/perfmon.h"]) ], [AC_MSG_ERROR([perfmon/pfmlib.h not found, rerun configure with different flags]) ]) ], [AC_MSG_ERROR([libpfm.a not found, rerun configure with different flags]) ]) else AC_DEFINE(HAVE_PERFMON_PFMLIB_MONTECITO_H,1,[Montecito headers]) AC_DEFINE(HAVE_PFM_GET_EVENT_DESCRIPTION,1,[event description function]) AC_DEFINE(HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT,1,[new pfmlib_output_param_t]) fi fi ################################################## # Checking platform ################################################## AC_MSG_CHECKING(platform) case "$OS" in aix) MAKEVER="$OS"-"$CPU" ;; bgp) MAKEVER=bgp ;; bgq) MAKEVER=bgq ;; CLE) if test "$perfmon2" = "yes"; then # major_version=`echo $OSVER | sed 's/\([[^.]][[^.]]*\).*/\1/'` # minor_version=`echo $OSVER | sed 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'` # point_version=`echo $OSVER | sed -e 's/[[^.]][[^.]]*.[[^.]][[^.]]*.\(.*\)/\1/' -e 's/[[^0-9]].*//'` # if (test "$major_version" = 2 -a "$minor_version" = 6 -a "$point_version" -lt 31 -a "$perfmon2" != "yes" ); then MAKEVER="$OS"-perfmon2 else MAKEVER="$OS"-pe fi ;; freebsd) MAKEVER="freebsd" LDFLAGS="-lpmc" # HWPMC driver is available for FreeBSD >= 6 FREEBSD_VERSION=`uname -r | cut -d'.' -f1` if test "${FREEBSD_VERSION}" -lt 6 ; then AC_MSG_ERROR([PAPI requires FreeBSD 6 or greater]) fi # Determine if HWPMC module is on the kernel dmesg | grep hwpmc 2> /dev/null > /dev/null if test "$?" != "0" ; then AC_MSG_ERROR([HWPMC module not found. (see INSTALL.TXT)]) fi # Determine the number of counters echo "/* Automatically generated file by configure */" > freebsd-config.h echo "#ifndef _FREEBSD_CONFIG_H_" >> freebsd-config.h echo "#define _FREEBSD_CONFIG_H_" >> freebsd-config.h echo "" >> freebsd-config.h AC_TRY_LINK([#include #include ], [int i = pmc_init();], [pmc_pmc_init_linked="yes"], [pmc_pmc_init_linked="no"]) if test "${pmc_init_linked}" = "no" ; then AC_MSG_ERROR([Failed to link hwpmc example]) fi AC_TRY_RUN([#include #include int main() { const struct pmc_cpuinfo *info; if (pmc_init() < 0) return 0; if (pmc_cpuinfo (&info) < 0) return 0; return info->pm_npmc-1; } ], [ num_counters="0" ], [ num_counters="$?"]) if test "${num_counters}" = "0" ; then AC_MSG_ERROR([pmc_npmc info returned 0. Determine if the HWPMC module is loaded (see hwpmc(4))]) fi echo "#define HWPMC_NUM_COUNTERS ${num_counters}" >> freebsd-config.h echo "" >> freebsd-config.h echo "#endif" >> freebsd-config.h ;; linux) if test "$force_perf_events" = "yes" ; then MAKEVER="$OS"-pe elif test "$force_perfmon2" = "yes" ; then MAKEVER="$OS"-perfmon2 elif test "$force_perfctr" = "yes" ; then MAKEVER="$OS"-perfctr-x86 case "$CPU" in itanium2|montecito) if test "$bitmode" = "32"; then AC_MSG_ERROR([The bitmode you specified is not supported]) fi MAKEVER="$OS"-pfm-"$CPU" ;; POWER5|POWER5+|POWER6|POWER7|PPC970) MAKEVER="$OS"-perfctr-"$CPU" ;; esac elif test "$perf_events" = "yes" ; then MAKEVER="$OS"-pe elif test "$perfmon2" = "yes" ; then MAKEVER="$OS"-perfmon2 elif test "$old_pfmv2" = "y" ; then MAKEVER="$OS"-pfm-"$CPU" elif test "$perfctr" != 0 ; then case "$CPU" in itanium2|montecito) if test "$bitmode" = "32"; then AC_MSG_ERROR([The bitmode you specified is not supported]) fi MAKEVER="$OS"-pfm-"$CPU" ;; x86) MAKEVER="$OS"-perfctr-x86 ;; POWER5|POWER5+|POWER6|POWER7|PPC970) MAKEVER="$OS"-perfctr-"$CPU" ;; *) MAKEVER="$OS"-generic ;; esac else MAKEVER="$OS"-generic fi ;; solaris) if test "$bitmode" = "64" -a "`isainfo -v | grep "64"`" = ""; then AC_MSG_ERROR([The bitmode you specified is not supported]) fi MAKEVER="$OS"-"$CPU" ;; darwin) MAKEVER="$OS" ;; esac AC_MSG_RESULT($MAKEVER) if test "x$MAKEVER" = "x"; then AC_MSG_NOTICE(This platform is not supported so a generic build without CPU counters will be used) MAKEVER="generic_platform" fi ################################################## # Set build macros ################################################## FILENAME=Makefile.inc SHOW_CONF=showconf CTEST_TARGETS="all" FTEST_TARGETS="all" LIBRARY=libpapi.a SHLIB='libpapi.so.AC_PACKAGE_VERSION' VLIB='libpapi.so.$(PAPIVER)' OMPCFLGS=-fopenmp CC_R='$(CC) -pthread' CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' if test "$CC_COMMON_NAME" = "gcc"; then if test "$bitmode" = "32"; then BITFLAGS=-m32 elif test "$bitmode" = "64"; then BITFLAGS=-m64 fi fi OPTFLAGS="$OPTFLAGS" PAPICFLAGS+=" -D_REENTRANT -D_GNU_SOURCE $NOTLS" CFLAGS="$CFLAGS $BITFLAGS" if test "$CC_COMMON_NAME" = "gcc"; then CFLAGS="$CFLAGS -Wall" fi FFLAGS="$CFLAGS $BITFLAGS $FFLAGS -Dlinux" # OS Support if (test "$OS" = "aix"); then OSFILESSRC=aix-memory.c OSLOCK=aix-lock.h OSCONTEXT=aix-context.h elif (test "$OS" = "bgp"); then OSFILESSRC=linux-bgp-memory.c OSLOCK=linux-bgp-lock.h OSCONTEXT=linux-bgp-context.h elif (test "$OS" = "bgq"); then OSFILESSRC=linux-bgq-memory.c OSLOCK=linux-bgq-lock.h OSCONTEXT=linux-context.h elif (test "$OS" = "freebsd"); then OSFILESSRC=freebsd-memory.c OSLOCK="freebsd-lock.h" OSCONTEXT="freebsd-context.h" elif (test "$OS" = "linux"); then OSFILESSRC="linux-memory.c linux-timer.c linux-common.c" OSFILESHDR="linux-memory.h linux-timer.h linux-common.h" OSLOCK="linux-lock.h" OSCONTEXT="linux-context.h" elif (test "$OS" = "solaris"); then OSFILESSRC="solaris-memory.c solaris-common.c" OSFILESHDR="solaris-memory.h solaris-common.h" OSLOCK="solaris-lock.h" OSCONTEXT="solaris-context.h" elif (test "$OS" = "darwin"); then OSFILESSRC="darwin-memory.c darwin-common.c" OSFILESHDR="darwin-memory.h darwin-common.h" OSLOCK="darwin-lock.h" OSCONTEXT="darwin-context.h" fi OSFILESOBJ='$(OSFILESSRC:.c=.o)' if (test "$MAKEVER" = "aix-power5" || test "$MAKEVER" = "aix-power6" || test "$MAKEVER" = "aix-power7"); then if test "$bitmode" = "64"; then LIBRARY=libpapi64.a SHLIB=libpapi64.so # By default AIX enforces a limit on heap space #( limiting the heap to share the same 256MB memory segment as stack ) # changing the max data paramater moves the heap off the stack's memory segment BITFLAGS='-q64 -bmaxdata:0x07000000000000' ARG64=-X64 else # If the issue ever comes up, /dsa requires AIX v5.1 or higher # and the Large address-space model (-bmaxdata) requires v4.3 or later # see http://publib.boulder.ibm.com/infocenter/pseries/v5r3/topic/com.ibm.aix.genprogc/doc/genprogc/lrg_prg_support.htm#a179c11c5d SHLIB=libpapi.so BITFLAGS="-bmaxdata:0x80000000/dsa" fi CPUCOMPONENT_NAME=aix CPUCOMPONENT_C=aix.c CPUCOMPONENT_OBJ=aix.o VECTOR=_aix_vector PAPI_EVENTS_CSV="papi_events.csv" MISCHDRS="aix.h components/perfctr_ppc/ppc64_events.h papi_events_table.h" MISCSRCS="aix.c" CFLAGS+='-qenum=4 -Icomponents/perfctr_ppc -DNO_VARARG_MACRO -D_AIX -D_$(CPU_MODEL) -DNEED_FFSLL -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors -DSTATIC_PAPI_EVENTS_TABLE' FFLAGS+='-WF,-D_$(CPU_MODEL) -WF,-DARCH_EVTS=\"$(ARCH_EVENTS).h\"' CFLAGS+='-I$(PMAPI)/include -Icomponents/perfctr_ppc -qmaxmem=-1 -qarch=$(cpu_option) -qtune=$(cpu_option) -qlanglvl=extended $(BITFLAGS)' if test $debug != "yes"; then OPTFLAGS='-O3 -qstrict $(PMINIT)' else OPTFLAGS='$(PMINIT)' fi SMPCFLGS=-qsmp OMPCFLGS='-qsmp=omp' LDFLAGS='-L$(PMAPI)/lib -lpmapi' CC_R=xlc_r CC=xlc CC_SHR="xlc -G -bnoentry" AC_CHECK_PROGS( [MPICC], [mpicc mpcc], []) F77=xlf CPP='xlc -E $(CPPFLAGS)' if test "$MAKEVER" = "aix-power5"; then ARCH_EVENTS=power5_events CPU_MODEL=POWER5 cpu_option=pwr5 DESCR="AIX 5.1.0 or greater with POWER5" if test "$bitmode" = "64"; then DESCR="$DESCR 64 bit build" fi elif test "$MAKEVER" = "aix-power6"; then ARCH_EVENTS=power6_events CPU_MODEL=POWER6 cpu_option=pwr6 DESCR="AIX 5.1.0 or greater with POWER6" CPPFLAGS="-qlanglvl=extended" if test "$bitmode" = "64"; then DESCR="$DESCR 64 bit build" fi elif test "$MAKEVER" = "aix-power7"; then ARCH_EVENTS=power7_events CPU_MODEL=POWER7 cpu_option=pwr7 DESCR="AIX 5.1.0 or greater with POWER7" CPPFLAGS="-qlanglvl=extended" if test "$bitmode" = "64"; then DESCR="$DESCR 64 bit build" fi fi elif test "$MAKEVER" = "bgp"; then CPP="$CC -E" CPUCOMPONENT_NAME=linux-bgp CPUCOMPONENT_C=linux-bgp.c CPUCOMPONENT_OBJ=linux-bgp.o VECTOR=_bgp_vectors PAPI_EVENTS_CSV="papi_events.csv" MISCSRCS= CFLAGS='-g -gdwarf-2 -O2 -Wall -I. -I$(BGP_SYSDIR)/arch/include -DCOMP_VECTOR=_bgp_vectors' tests="$tests bgp_tests" SHOW_CONF=show_bgp_conf BGP_SYSDIR=/bgsys/drivers/ppcfloor BGP_GNU_LINUX_PATH='${BGP_SYSDIR}/gnu-linux' LDFLAGS='-L$(BGP_SYSDIR)/runtime/SPI -lSPI.cna' FFLAGS='-g -gdwarf-2 -O2 -Wall -I. -Dlinux' OPTFLAGS="-g -Wall -O3" TOPTFLAGS="-g -Wall -O0" SHLIB=libpapi.so DESCR="Linux for BlueGene/P" LIBS=static CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "$(SHLIB)" -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' OMPCFLGS="" elif test "$MAKEVER" = "bgq"; then FILENAME=Rules.bgpm VECTOR=_bgq_vectors CPUCOMPONENT_NAME=linux-bgq CPUCOMPONENT_C=linux-bgq.c CPUCOMPONENT_OBJ=linux-bgq.o PAPI_EVENTS_CSV="papi_events.csv" MISCSRCS="linux-bgq-common.c" OPTFLAGS="-g -Wall -O3" TOPTFLAGS="-g -Wall -O0" SHLIB=libpapi.so DESCR="Linux for Blue Gene/Q" CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' OMPCFLGS="" elif test "$MAKEVER" = "CLE-perfmon2"; then FILENAME=Rules.perfmon2 CPUCOMPONENT_NAME=perfmon CPUCOMPONENT_C=perfmon.c CPUCOMPONENT_OBJ=perfmon.o VECTOR=_papi_pfm_vector PAPI_EVENTS_CSV="papi_events.csv" F77=gfortran CFLAGS="$CFLAGS -D__crayxt" FFLAGS="" elif test "$MAKEVER" = "freebsd"; then CPUCOMPONENT_NAME=freebsd CPUCOMPONENT_C=freebsd.c CPUCOMPONENT_OBJ=freebsd.o VECTOR=_papi_freebsd_vector PAPI_EVENTS_CSV="freebsd_events.csv" MISCHDRS="freebsd/map-unknown.h freebsd/map.h freebsd/map-p6.h freebsd/map-p6-m.h freebsd/map-p6-3.h freebsd/map-p6-2.h freebsd/map-p6-c.h freebsd/map-k7.h freebsd/map-k8.h freebsd/map-p4.h freebsd/map-atom.h freebsd/map-core.h freebsd/map-core2.h freebsd/map-core2-extreme.h freebsd/map-i7.h freebsd/map-westme\ re.h" MISCSRCS="$MISCSRCS freebsd/map-unknown.c freebsd/map.c freebsd/map-p6.c freebsd/map-p6-m.c freebsd/map-p6-3.c freebsd/map-p6-2.c freebsd/map-p6-c.c freebsd/map-k7.c freebsd/map-k8.c freebsd/map-p4.c freebsd/map-atom.c freebsd/map-core.c freebsd/map-core2.c freebsd/map-core2-extreme.c freebsd/map-i7.c freebsd/map-westme\ re.c" DESCR="FreeBSD -over libpmc- " CFLAGS+=" -I. -Ifreebsd -DPIC -fPIC" CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "libpapi.so" -Xlinker "-rpath" -Xlinker "$(LIBDIR)" -DPIC -fPIC -I. -Ifreebsd' elif test "$MAKEVER" = "linux-generic"; then CPUCOMPONENT_NAME=linux-generic CPUCOMPONENT_C=linux-generic.c CPUCOMPONENT_OBJ=linux-generic.o PAPI_EVENTS_CSV="papi_events.csv" VECTOR=_papi_dummy_vector elif test "$MAKEVER" = "linux-pe"; then FILENAME=Rules.pfm4_pe CPUCOMPONENT_NAME=perf_event components="perf_event" if test "$disable_uncore" = "no"; then components="$components perf_event_uncore" fi elif test "$MAKEVER" = "linux-perfctr-x86"; then FILENAME=Rules.perfctr-pfm CPUCOMPONENT_NAME=perfctr-x86 VERSION=2.6.x components="perfctr" elif (test "$MAKEVER" = "linux-perfctr-POWER5" || test "$MAKEVER" = "linux-perfctr-POWER5+" || test "$MAKEVER" = "linux-perfctr-POWER6" || test "$MAKEVER" = "linux-perfctr-POWER7" || test "$MAKEVER" = "linux-perfctr-PPC970"); then FILENAME=Rules.perfctr CPUCOMPONENT_NAME=perfctr-ppc VERSION=2.7.x components="perfctr_ppc" CPU=ppc64 if test "$MAKEVER" = "linux-perfctr-POWER5"; then CPU_MODEL=POWER5 ARCH_EVENTS=power5_events ARCH_SPEC_EVTS=power5_events_map.c elif test "$MAKEVER" = "linux-perfctr-POWER5+"; then CPU_MODEL=POWER5p ARCH_EVENTS=power5+_events ARCH_SPEC_EVTS=power5+_events_map.c elif test "$MAKEVER" = "linux-perfctr-POWER6"; then CPU_MODEL=POWER6 ARCH_EVENTS=power6_events ARCH_SPEC_EVTS=power6_events_map.c elif test "$MAKEVER" = "linux-perfctr-POWER7"; then CPU_MODEL=POWER7 ARCH_EVENTS=power7_events ARCH_SPEC_EVTS=power7_events_map.c elif test "$MAKEVER" = "linux-perfctr-PPC970"; then CPU_MODEL=PPC970 ARCH_EVENTS=ppc970_events ARCH_SPEC_EVTS=ppc970_events_map.c fi CFLAGS="$CFLAGS -DPPC64 -D_$(CPU_MODEL) -D__perfctr__ -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors" FFLAGS='-D_$(CPU_MODEL)' elif test "$MAKEVER" = "linux-perfmon2"; then FILENAME=Rules.perfmon2 CPUCOMPONENT_NAME=perfmon2 components="perfmon2" elif (test "$MAKEVER" = "linux-pfm-ia64" || test "$MAKEVER" = "linux-pfm-itanium2" || test "$MAKEVER" = "linux-pfm-montecito"); then FILENAME=Rules.pfm CPUCOMPONENT_NAME=perfmon-ia64 components="perfmon_ia64" VERSION=3.y if test "$MAKEVER" = "linux-pfm-itanium2"; then CPU=2 else CPU=3 fi CFLAGS="$CFLAGS -DITANIUM$CPU" FFLAGS="$FFLAGS -DITANIUM$CPU" CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' elif test "$MAKEVER" = "solaris-ultra"; then CPUCOMPONENT_NAME=solaris-ultra CPUCOMPONENT_C=solaris-ultra.c CPUCOMPONENT_OBJ=solaris-ultra.obj VECTOR=_solaris_vector PAPI_EVENTS_CSV="papi_events.csv" DESCR="Solaris 5.8 or greater with UltraSPARC I, II or III" if test "$CC" = "gcc"; then F77=g77 CPP="$CC -E" CC_R="$CC" CC_SHR="$CC -shared -fpic" OPTFLAGS=-O3 CFLAGS="$CFLAGS -DNEED_FFSLL" FFLAGS=$CFLAGS else # Sun Workshop compilers: V5.0 and V6.0 R2 CPP="$CC -E" CC_R="$CC -mt" CC_SHR="$CC -ztext -G -Kpic" CFLAGS="-xtarget=ultra3 -xarch=v8plusa -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" SMPCFLGS=-xexplicitpar OMPCFLGS=-xopenmp F77=f90 FFLAGS=$CFLAGS NOOPT=-xO0 OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v8plusa" fi LDFLAGS="$LDFLAGS -lcpc" if test "$bitmode" = "64"; then LIBRARY=libpapi64.a SHLIB=libpapi64.so CFLAGS="-xtarget=ultra3 -xarch=v9a -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v9a" fi elif test "$MAKEVER" = "solaris-niagara2"; then CPUCOMPONENT_NAME=solaris-niagara2 CPUCOMPONENT_C=solaris-niagara2.c CPUCOMPONENT_OBJ=solaris-niagara2.obj VECTOR=_niagara2_vector PAPI_EVENTS_CSV="papi_events.csv" CFLAGS="-xtarget=native -xarch=native -DNO_VARARG_MACRO -D__EXTENSIONS__ -DCOMP_VECTOR=_niagara2_vector" ORY_MANAGEMENT="-DCOMP_VECTOR=_solaris_vector" DESCR="Solaris 10 with libcpc2 and UltraSPARC T2 (Niagara 2)" CPP="$CC -E" CC_R="$CC -mt" CC_SHR="$CC -ztext -G -Kpic" SMPCFLGS=-xexplicitpar OMPCFLGS=-xopenmp F77=f90 FFLAGS=$CFLAGS NOOPT=-xO0 OPTFLAGS="-fast" FOPTFLAGS=$OPTFLAGS LDFLAGS="$LDFLAGS -lcpc" if test "$bitmode" = "64"; then LIBRARY=libpapi64.a SHLIB=libpapi64.so CFLAGS="$CFLAGS -m64" FFLAGS="$FFLAGS -m64" fi elif test "$MAKEVER" = "darwin"; then DESCR="Darwin" CPUCOMPONENT_NAME=darwin CPUCOMPONENT=linux-generic.c CPUCOMPONENT=linux-generic.obj CFLAGS="-DNEED_FFSLL" CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-dylib -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' SHLIB=libpapi.dylib elif test "$MAKEVER" = "generic_platform"; then DESCR="Generic platform" fi MISCOBJS='$(MISCSRCS:.c=.o)' if test "$F77" = "pgf77"; then FFLAGS="$FFLAGS -Wall -Mextend" elif test "$F77" = "ifort"; then FFLAGS="$FFLAGS -warn all" elif test "$F77" != "xlf"; then FFLAGS="$FFLAGS -ffixed-line-length-132" fi if test "$CC_COMMON_NAME" = "icc"; then OMPCFLGS=-openmp fi AC_MSG_CHECKING(for components to build) COMPONENT_RULES=components/Rules.components echo "/* Automatically generated by configure */" > components_config.h echo "#ifndef COMPONENTS_CONFIG_H" >> components_config.h echo "#define COMPONENTS_CONFIG_H" >> components_config.h echo "" >> components_config.h AC_ARG_WITH(components, [ --with-components=<"component1 component2"> Specify which components to build ], [components="$components $withval"]) # This is an ugly hack to keep building on configurations covered by any-null in the past. if test "$VECTOR" = "_papi_dummy_vector"; then if test "x$components" = "x"; then echo "papi_vector_t ${VECTOR} = {" >> components_config.h echo " .size = { .context = sizeof ( int ), .control_state = sizeof ( int ), .reg_value = sizeof ( int ), .reg_alloc = sizeof ( int ), }, .cmp_info = { .num_native_events = 0, .num_preset_events = 0, .num_cntrs = 0, .name = \"Your system is unsupported! \", .short_name = \"UNSUPPORTED!\" }, .dispatch_timer = NULL, .get_overflow_address = NULL, .start = NULL, .stop = NULL, .read = NULL, .reset = NULL, .write = NULL, .cleanup_eventset = NULL, .stop_profiling = NULL, .init_component = NULL, .init_thread = NULL, .init_control_state = NULL, .update_control_state = NULL, .ctl = NULL, .set_overflow = NULL, .set_profile = NULL, .set_domain = NULL, .ntv_enum_events = NULL, .ntv_name_to_code = NULL, .ntv_code_to_name = NULL, .ntv_code_to_descr = NULL, .ntv_code_to_bits = NULL, .ntv_code_to_info = NULL, .allocate_registers = NULL, .shutdown_thread = NULL, .shutdown_component = NULL, .user = NULL, };" >> components_config.h # but in the face of actual components, we don't have to do hacky size games else VECTOR="" fi elif test "x$VECTOR" != "x"; then echo "extern papi_vector_t ${VECTOR};" >> components_config.h fi for comp in $components; do idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` if test "$idx" = 0; then subcomp=$comp else subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` fi COMPONENT_RULES="$COMPONENT_RULES components/$comp/Rules.$subcomp" echo "extern papi_vector_t _${subcomp}_vector;" >> components_config.h done echo "" >> components_config.h echo "struct papi_vectors *_papi_hwd[[]] = {" >> components_config.h if test "x$VECTOR" != "x"; then echo " &${VECTOR}," >> components_config.h fi for comp in $components; do idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` if test "$idx" = 0; then subcomp=$comp else subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` fi echo " &_${subcomp}_vector," >> components_config.h done echo " NULL" >> components_config.h echo "};" >> components_config.h echo "" >> components_config.h echo "#endif" >> components_config.h # check for component tests for comp in $components; do if test "`find components/$comp -name "tests"`" != "" ; then COMPONENTS="$COMPONENTS $comp" fi done tests="$tests comp_tests" AC_MSG_RESULT($components) AC_MSG_CHECKING(for PAPI event CSV filename to use) if test "x$PAPI_EVENTS_CSV" == "x"; then PAPI_EVENTS_CSV="papi_events.csv" fi AC_MSG_RESULT($PAPI_EVENTS_CSV) AC_SUBST(prefix) AC_SUBST(exec_prefix) AC_SUBST(libdir) AC_SUBST(includedir) AC_SUBST(mandir) AC_SUBST(bindir) AC_SUBST(datadir) AC_SUBST(datarootdir) AC_SUBST(docdir) AC_SUBST(PACKAGE_TARNAME) AC_SUBST(arch) AC_SUBST(MAKEVER) AC_SUBST(PMAPI) AC_SUBST(PMINIT) AC_SUBST(F77) AC_SUBST(CPP) AC_SUBST(CC) AC_SUBST(AR) AC_SUBST(papiLIBS) AC_SUBST(STATIC) AC_SUBST(LDFLAGS) AC_SUBST(altix) AC_SUBST(perfctr_root) AC_SUBST(perfctr_prefix) AC_SUBST(perfctr_incdir) AC_SUBST(perfctr_libdir) AC_SUBST(pfm_root) AC_SUBST(old_pfmv2) AC_SUBST(pfm_prefix) AC_SUBST(pfm_incdir) AC_SUBST(pfm_libdir) AC_SUBST(OS) AC_SUBST(CFLAGS) AC_SUBST(FFLAGS) AC_SUBST(CPPFLAGS) AC_SUBST(PAPI_EVENTS) AC_SUBST(PAPI_EVENTS_CSV) AC_SUBST(SETPATH) AC_SUBST(LINKLIB) AC_SUBST(VERSION) AC_SUBST(CPU) AC_SUBST(FILENAME) AC_SUBST(LIBRARY) AC_SUBST(SHLIB) AC_SUBST(VLIB) AC_SUBST(PAPICFLAGS) AC_SUBST(OPTFLAGS) AC_SUBST(CPUCOMPONENT_NAME) AC_SUBST(CPUCOMPONENT_C) AC_SUBST(CPUCOMPONENT_OBJ) AC_SUBST(OSFILESSRC) AC_SUBST(OSFILESOBJ) AC_SUBST(OSFILESHDR) AC_SUBST(OSLOCK) AC_SUBST(OSCONTEXT) AC_SUBST(DESCR) AC_SUBST(LIBS) AC_SUBST(CTEST_TARGETS) AC_SUBST(CC_R) AC_SUBST(CC_SHR) AC_SUBST(SMPCFLGS) AC_SUBST(OMPCFLGS) AC_SUBST(NOOPT) AC_SUBST(MISCSRCS) AC_SUBST(MISCOBJS) AC_SUBST(POST_BUILD) AC_SUBST(ARCH_EVENTS) AC_SUBST(CPU_MODEL) AC_SUBST(cpu_option) AC_SUBST(ARG64) AC_SUBST(FLAGS) AC_SUBST(MPICC) AC_SUBST(MISCHDRS) AC_SUBST(SHLIBDEPS) AC_SUBST(TOPTFLAGS) AC_SUBST(TESTS) AC_SUBST(tests) AC_SUBST(SHOW_CONF) AC_SUBST(BGP_SYSDIR) AC_SUBST(BITFLAGS) AC_SUBST(COMPONENT_RULES) AC_SUBST(COMPONENTS) AC_SUBST(FTEST_TARGETS) AC_SUBST(HAVE_NO_OVERRIDE_INIT) AC_SUBST(BGPM_INSTALL_DIR) AC_SUBST(CC_COMMON_NAME) if test "$cross_compiling" = "yes" ; then AC_MSG_NOTICE(Compiling genpapifdef with $nativecc because cross compiling) $nativecc -I. genpapifdef.c -o genpapifdef else AC_MSG_NOTICE(Compiling genpapifdef with $CC) $CC -I. genpapifdef.c -o genpapifdef fi AC_MSG_NOTICE(Generating fpapi.h) ./genpapifdef -c > fpapi.h AC_MSG_NOTICE(Generating f77papi.h) ./genpapifdef -f77 > f77papi.h AC_MSG_NOTICE(Generating f90papi.h) ./genpapifdef -f90 > f90papi.h AC_MSG_NOTICE($FILENAME will be included in the generated Makefile) AC_CONFIG_FILES([Makefile papi.pc]) AC_CONFIG_FILES([components/Makefile_comp_tests.target testlib/Makefile.target utils/Makefile.target ctests/Makefile.target ftests/Makefile.target validation_tests/Makefile.target]) AC_OUTPUT papi-5.6.0/src/libpfm4/lib/pfmlib_powerpc_perf_event.c000664 001750 001750 00000007232 13216244365 025026 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_powerpc_perf_event.c : perf_event IBM Power/Torrent functions * * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_power_priv.h" /* architecture private */ #include "pfmlib_perf_event_priv.h" int pfm_gen_powerpc_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { pfmlib_pmu_t *pmu = this; struct perf_event_attr *attr = e->os_data; int ret; if (!pmu->get_event_encoding[PFM_OS_NONE]) return PFM_ERR_NOTSUPP; /* * encoding routine changes based on PMU model */ ret = pmu->get_event_encoding[PFM_OS_NONE](this, e); if (ret != PFM_SUCCESS) return ret; attr->type = PERF_TYPE_RAW; attr->config = e->codes[0]; return PFM_SUCCESS; } static int find_pmu_type_by_name(const char *name) { char filename[PATH_MAX]; FILE *fp; int ret, type; if (!name) return PFM_ERR_NOTSUPP; sprintf(filename, "/sys/bus/event_source/devices/%s/type", name); fp = fopen(filename, "r"); if (!fp) return PFM_ERR_NOTSUPP; ret = fscanf(fp, "%d", &type); if (ret != 1) type = PFM_ERR_NOTSUPP; fclose(fp); return type; } int pfm_gen_powerpc_get_nest_perf_encoding(void *this, pfmlib_event_desc_t *e) { pfmlib_pmu_t *pmu = this; struct perf_event_attr *attr = e->os_data; int ret; if (!pmu->get_event_encoding[PFM_OS_NONE]) return PFM_ERR_NOTSUPP; /* * encoding routine changes based on PMU model */ ret = pmu->get_event_encoding[PFM_OS_NONE](this, e); if (ret != PFM_SUCCESS) return ret; ret = find_pmu_type_by_name(pmu->perf_name); if (ret < 0) return ret; attr->type = ret; attr->config = e->codes[0]; return PFM_SUCCESS; } void pfm_gen_powerpc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e) { int i, compact; for (i = 0; i < e->npattrs; i++) { compact = 0; /* umasks never conflict */ if (e->pattrs[i].type == PFM_ATTR_UMASK) continue; /* * remove PMU-provided attributes which are either * not accessible under perf_events or fully controlled * by perf_events, e.g., priv levels filters */ if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PMU) { } /* * remove perf_event generic attributes not supported * by PPC */ if (e->pattrs[i].ctrl == PFM_ATTR_CTRL_PERF_EVENT) { /* no precise sampling */ if (e->pattrs[i].idx == PERF_ATTR_PR) compact = 1; } if (compact) { pfmlib_compact_pattrs(e, i); i--; } } } papi-5.6.0/src/libpfm-3.y/examples_v2.x/x86/Makefile000664 001750 001750 00000003543 13216244362 023770 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/../.. include $(TOPDIR)/config.mk include $(TOPDIR)/rules.mk LIBS += -lm ifeq ($(SYS),Linux) TARGETS=smpl_p4_pebs smpl_core_pebs smpl_amd64_ibs smpl_nhm_lbr smpl_pebs endif all: $(TARGETS) $(TARGETS): %:%.o $(PFMLIB) ../detect_pmcs.o $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LIBS) clean: $(RM) -f *.o $(TARGETS) *~ distclean: clean install_examples: $(TARGETS) install_examples: @echo installing: $(TARGETS) -mkdir -p $(DESTDIR)$(EXAMPLESDIR)/v2/x86 $(INSTALL) -m 755 $(TARGETS) $(DESTDIR)$(EXAMPLESDIR)/v2/x86 # # examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ # papi-5.6.0/src/testlib/test_utils.c000664 001750 001750 00000044467 13216244370 021352 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include #include "papi.h" #include "papi_test.h" #define TOLERANCE .2 /* Variable to hold reporting status if TRUE, output is suppressed if FALSE output is sent to stdout initialized to FALSE declared here so it can be available globally */ int TESTS_QUIET = 0; static int TESTS_COLOR = 1; static int TEST_WARN = 0; void validate_string( const char *name, char *s ) { if ( ( s == NULL ) || ( strlen( s ) == 0 ) ) { char s2[1024] = ""; sprintf( s2, "%s was NULL or length 0", name ); test_fail( __FILE__, __LINE__, s2, 0 ); } } int approx_equals( double a, double b ) { if ( ( a >= b * ( 1.0 - TOLERANCE ) ) && ( a <= b * ( 1.0 + TOLERANCE ) ) ) return 1; else { printf( "Out of tolerance range %2.2f: %.0f vs %.0f [%.0f,%.0f]\n", TOLERANCE, a, b, b * ( 1.0 - TOLERANCE ), b * ( 1.0 + TOLERANCE ) ); return 0; } } long long ** allocate_test_space( int num_tests, int num_events ) { long long **values; int i; values = ( long long ** ) malloc( ( size_t ) num_tests * sizeof ( long long * ) ); if ( values == NULL ) exit( 1 ); memset( values, 0x0, ( size_t ) num_tests * sizeof ( long long * ) ); for ( i = 0; i < num_tests; i++ ) { values[i] = ( long long * ) malloc( ( size_t ) num_events * sizeof ( long long ) ); if ( values[i] == NULL ) exit( 1 ); memset( values[i], 0x00, ( size_t ) num_events * sizeof ( long long ) ); } return ( values ); } void free_test_space( long long **values, int num_tests ) { int i; for ( i = 0; i < num_tests; i++ ) free( values[i] ); free( values ); } int is_event_derived(unsigned int event) { PAPI_event_info_t info; if (event & PAPI_PRESET_MASK) { PAPI_get_event_info(event,&info); if (strcmp(info.derived,"NOT_DERIVED")) { // printf("%#x is derived\n",event); return 1; } } return 0; } int find_nonderived_event( void ) { /* query and set up the right event to monitor */ PAPI_event_info_t info; int potential_evt_to_add[3] = { PAPI_FP_OPS, PAPI_FP_INS, PAPI_TOT_INS }; int i; for ( i = 0; i < 3; i++ ) { if ( PAPI_query_event( potential_evt_to_add[i] ) == PAPI_OK ) { if ( PAPI_get_event_info( potential_evt_to_add[i], &info ) == PAPI_OK ) { if ( ( info.count > 0 ) && !strcmp( info.derived, "NOT_DERIVED" ) ) return ( potential_evt_to_add[i] ); } } } return ( 0 ); } /* Add events to an EventSet, as specified by a mask. Returns: number = number of events added */ //struct test_events_t { // unsigned int mask; // unsigned int event; //}; struct test_events_t test_events[MAX_TEST_EVENTS] = { { MASK_TOT_CYC, PAPI_TOT_CYC }, { MASK_TOT_INS, PAPI_TOT_INS }, { MASK_FP_INS, PAPI_FP_INS }, { MASK_L1_TCM, PAPI_L1_TCM }, { MASK_L1_ICM, PAPI_L1_ICM }, { MASK_L1_DCM, PAPI_L1_DCM }, { MASK_L2_TCM, PAPI_L2_TCM }, { MASK_L2_TCA, PAPI_L2_TCA }, { MASK_L2_TCH, PAPI_L2_TCH }, { MASK_BR_CN, PAPI_BR_CN }, { MASK_BR_MSP, PAPI_BR_MSP }, { MASK_BR_PRC, PAPI_BR_PRC }, { MASK_TOT_IIS, PAPI_TOT_IIS}, { MASK_L1_DCR, PAPI_L1_DCR}, { MASK_L1_DCW, PAPI_L1_DCW}, { MASK_L1_DCA, PAPI_L1_DCA}, { MASK_FP_OPS, PAPI_FP_OPS}, }; int add_test_events( int *number, int *mask, int allow_derived ) { int retval,i; int EventSet = PAPI_NULL; char name_string[BUFSIZ]; *number = 0; /* create the eventset */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail(__FILE__,__LINE__,"Trouble creating eventset",retval); } /* check all the masks */ for(i=0;i 1 ) && ( ( strcasecmp( argv[1], "TESTS_QUIET" ) == 0 ) || ( strcasecmp( argv[1], "-q" ) == 0 ) ) ) { TESTS_QUIET = 1; } /* Always report PAPI errors when testing */ /* Even in quiet mode */ retval = PAPI_set_debug( PAPI_VERB_ECONT ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_set_debug", retval ); } value=getenv("TESTS_COLOR"); if (value!=NULL) { if (value[0]=='y') { TESTS_COLOR=1; } else { TESTS_COLOR=0; } } /* Disable colors if sending to a file */ if (!isatty(fileno(stdout))) { TESTS_COLOR=0; } return TESTS_QUIET; } #define RED "\033[1;31m" #define YELLOW "\033[1;33m" #define GREEN "\033[1;32m" #define NORMAL "\033[0m" static void print_spaces(int count) { int i; for(i=0;i 0 ) { fprintf( stdout, "Error: %s\n", call ); } else if ( retval == 0 ) { #if defined(sgi) fprintf( stdout, "SGI requires root permissions for this test\n" ); #else fprintf( stdout, "Error: %s\n", call ); #endif } else { fprintf( stdout, "Error in %s: %s\n", call, PAPI_strerror( retval ) ); } // fprintf( stdout, "\n" ); /* NOTE: Because test_fail is called from thread functions, calling PAPI_shutdown here could prevent some threads from being able to free memory they have allocated. */ if ( PAPI_is_initialized( ) ) { PAPI_shutdown( ); } /* This is stupid. Threads are the rare case */ /* and in any case an exit() should clear everything out */ /* adding back the exit() call */ exit(1); } /* Use a positive value of retval to simply print an error message */ void test_warn( const char *file, int line, const char *call, int retval ) { (void)file; // int line_pad; // line_pad=60-strlen(file); // if (line_pad<0) line_pad=0; char buf[128]; memset( buf, '\0', sizeof ( buf ) ); // fprintf(stdout,"%s",file); // print_spaces(line_pad); if (TEST_WARN==0) fprintf(stdout,"\n"); if (TESTS_COLOR) fprintf( stdout, "%s", YELLOW); fprintf( stdout, "WARNING "); if (TESTS_COLOR) fprintf( stdout, "%s", NORMAL); fprintf( stdout, "Line # %d ", line ); if ( retval == PAPI_ESYS ) { sprintf( buf, "System warning in %s", call ); perror( buf ); } else if ( retval > 0 ) { fprintf( stdout, "Warning: %s\n", call ); } else if ( retval == 0 ) { fprintf( stdout, "Warning: %s\n", call ); } else { fprintf( stdout, "Warning in %s: %s\n", call, PAPI_strerror( retval )); } TEST_WARN++; } void test_skip( const char *file, int line, const char *call, int retval ) { // int line_pad; (void)file; (void)line; (void)call; (void)retval; // line_pad=(60-strlen(file)); // fprintf(stdout,"%s",file); // print_spaces(line_pad); fprintf( stdout, "SKIPPED\n"); exit( 0 ); } void test_print_event_header( const char *call, int evset ) { int *ev_ids; int i, nev; int retval; char evname[PAPI_MAX_STR_LEN]; if ( *call ) fprintf( stdout, "%s", call ); if ((nev = PAPI_get_cmp_opt(PAPI_MAX_MPX_CTRS,NULL,0)) <= 0) { fprintf( stdout, "Can not list event names.\n" ); return; } if ((ev_ids = calloc(nev,sizeof(int))) == NULL) { fprintf( stdout, "Can not list event names.\n" ); return; } retval = PAPI_list_events( evset, ev_ids, &nev ); if ( retval == PAPI_OK ) { for ( i = 0; i < nev; i++ ) { PAPI_event_code_to_name( ev_ids[i], evname ); printf( ONEHDR, evname ); } } else { fprintf( stdout, "Can not list event names." ); } fprintf( stdout, "\n" ); free(ev_ids); } int add_two_events( int *num_events, int *papi_event, int *mask ) { int retval; int EventSet = PAPI_NULL; *num_events=2; *papi_event=PAPI_TOT_INS; (void)mask; /* create the eventset */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval = PAPI_add_named_event( EventSet, "PAPI_TOT_CYC"); if ( retval != PAPI_OK ) { if (!TESTS_QUIET) printf("Couldn't add PAPI_TOT_CYC\n"); test_skip(__FILE__,__LINE__,"Couldn't add PAPI_TOT_CYC",0); } retval = PAPI_add_named_event( EventSet, "PAPI_TOT_INS"); if ( retval != PAPI_OK ) { if (!TESTS_QUIET) printf("Couldn't add PAPI_TOT_CYC\n"); test_skip(__FILE__,__LINE__,"Couldn't add PAPI_TOT_CYC",0); } return EventSet; } int add_two_nonderived_events( int *num_events, int *papi_event, int *mask ) { /* query and set up the right event to monitor */ int EventSet = PAPI_NULL; int retval; *num_events=0; #define POTENTIAL_EVENTS 3 unsigned int potential_evt_to_add[POTENTIAL_EVENTS][2] = { {( unsigned int ) PAPI_FP_INS, MASK_FP_INS}, {( unsigned int ) PAPI_FP_OPS, MASK_FP_OPS}, {( unsigned int ) PAPI_TOT_INS, MASK_TOT_INS} }; int i; *mask = 0; /* could leak up to two event sets. */ for(i=0;imodel_string,"POWER6")) || (!strcmp(hw_info->model_string,"POWER5")) ) { test_warn(__FILE__, __LINE__, "Limiting num_counters because of " "LIMITED_PMC on Power5 and Power6",1); counters=4; } } ( *evtcodes ) = ( int * ) calloc( counters, sizeof ( int ) ); retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* For platform independence, always ASK FOR the first event */ /* Don't just assume it'll be the first numeric value */ i = 0 | PAPI_NATIVE_MASK; retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cidx ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_enum_cmp_event", retval ); } do { retval = PAPI_get_event_info( i, &info ); /* HACK! FIXME */ if (no_software_events && ( strstr(info.symbol,"PERF_COUNT_SW") || strstr(info.long_descr, "PERF_COUNT_SW") ) ) { if (!TESTS_QUIET) { printf("Blocking event %s as a SW event\n", info.symbol); } continue; } if ( s->cntr_umasks ) { k = i; if ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK ) { do { retval = PAPI_get_event_info( k, &info ); event_code = ( int ) info.event_code; retval = PAPI_add_event( EventSet, event_code ); if ( retval == PAPI_OK ) { ( *evtcodes )[event_found] = event_code; if ( !TESTS_QUIET ) { printf( "event_code[%d] = %#x (%s)\n", event_found, event_code, info.symbol ); } event_found++; } else { if ( !TESTS_QUIET ) { printf( "%#x (%s) can't be added to the EventSet.\n", event_code, info.symbol ); } } } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK && event_found < counters ); } else { event_code = ( int ) info.event_code; retval = PAPI_add_event( EventSet, event_code ); if ( retval == PAPI_OK ) { ( *evtcodes )[event_found] = event_code; if ( !TESTS_QUIET ) { printf( "event_code[%d] = %#x (%s)\n", event_found, event_code, info.symbol ); } event_found++; } } if ( !TESTS_QUIET && retval == PAPI_OK ) { /* */ } } else { event_code = ( int ) info.event_code; retval = PAPI_add_event( EventSet, event_code ); if ( retval == PAPI_OK ) { ( *evtcodes )[event_found] = event_code; event_found++; } else { if ( !TESTS_QUIET ) fprintf( stdout, "%#x is not available.\n", event_code ); } } } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ) == PAPI_OK && event_found < counters ); *num_events = ( int ) event_found; if (!TESTS_QUIET) printf("Tried to fill %d counters with events, " "found %d\n",counters,event_found); return EventSet; } papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_cbo_events.h000664 001750 001750 00000111604 13216244364 026134 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_cbo */ #define CBO_FILT_MESIF(a, b, c, d) \ { .uname = "STATE_"#a,\ .udesc = #b" cacheline state",\ .ufilters[0] = 1ULL << (17 + (c)),\ .grpid = d, \ } #define CBO_FILT_MESIFS(d) \ CBO_FILT_MESIF(I, Invalid, 0, d), \ CBO_FILT_MESIF(S, Shared, 1, d), \ CBO_FILT_MESIF(E, Exclusive, 2, d), \ CBO_FILT_MESIF(M, Modified, 3, d), \ CBO_FILT_MESIF(F, Forward, 4, d), \ CBO_FILT_MESIF(D, Debug, 5, d), \ { .uname = "STATE_MP",\ .udesc = "Cacheline is modified but never written, was forwarded in modified state",\ .ufilters[0] = 0x1ULL << (17+6),\ .grpid = d, \ .uflags = INTEL_X86_NCOMBO, \ }, \ { .uname = "STATE_MESIFD",\ .udesc = "Any cache line state",\ .ufilters[0] = 0x7fULL << 17,\ .grpid = d, \ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, \ } #define CBO_FILT_OPC(d) \ { .uname = "OPC_RFO",\ .udesc = "Demand data RFO (combine with any OPCODE umask)",\ .ufilters[1] = 0x180ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_CRD",\ .udesc = "Demand code read (combine with any OPCODE umask)",\ .ufilters[1] = 0x181ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_DRD",\ .udesc = "Demand data read (combine with any OPCODE umask)",\ .ufilters[1] = 0x182ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PRD",\ .udesc = "Partial reads (UC) (combine with any OPCODE umask)",\ .ufilters[1] = 0x187ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCILF",\ .udesc = "Full Stream store (combine with any OPCODE umask)", \ .ufilters[1] = 0x18cULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCIL",\ .udesc = "Partial Stream store (combine with any OPCODE umask)", \ .ufilters[1] = 0x18dULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WIL",\ .udesc = "Write Invalidate Line (Partial) (combine with any OPCODE umask)", \ .ufilters[1] = 0x18fULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_RFO",\ .udesc = "Prefetch RFO into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x190ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_CODE",\ .udesc = "Prefetch code into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x191ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_DATA",\ .udesc = "Prefetch data into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x192ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIWIL",\ .udesc = "PCIe write (partial, non-allocating) - partial line MMIO write transactions from IIO (P2P). Not used for coherent transacions. Uncacheable. (combine with any OPCODE umask)", \ .ufilters[1] = 0x193ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIWIF",\ .udesc = "PCIe write (full, non-allocating) - full line MMIO write transactions from IIO (P2P). Not used for coherent transacions. Uncacheable. (combine with any OPCODE umask)", \ .ufilters[1] = 0x194ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIITOM",\ .udesc = "PCIe write (allocating) (combine with any OPCODE umask)", \ .ufilters[1] = 0x19cULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIRDCUR",\ .udesc = "PCIe read current (combine with any OPCODE umask)", \ .ufilters[1] = 0x19eULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOI",\ .udesc = "Request writeback modified invalidate line (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c4ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOE",\ .udesc = "Request writeback modified set to exclusive (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c5ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_ITOM",\ .udesc = "Request invalidate line. Request exclusive ownership of the line (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c8ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSRD",\ .udesc = "PCIe non-snoop read (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e4ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWR",\ .udesc = "PCIe non-snoop write (partial) (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e5ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWRF",\ .udesc = "PCIe non-snoop write (full) (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e6ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ } static intel_x86_umask_t bdx_unc_c_llc_lookup[]={ { .uname = "ANY", .ucode = 0x1100, .udesc = "Cache Lookups -- Any Request", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "DATA_READ", .ucode = 0x300, .udesc = "Cache Lookups -- Data Read Request", .grpid = 0, }, { .uname = "NID", .ucode = 0x4100, .udesc = "Cache Lookups -- Lookups that Match NID", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .grpid = 1, .uflags = INTEL_X86_GRP_DFL_NONE }, { .uname = "READ", .ucode = 0x2100, .udesc = "Cache Lookups -- Any Read Request", .grpid = 0, }, { .uname = "REMOTE_SNOOP", .ucode = 0x900, .udesc = "Cache Lookups -- External Snoop Request", .grpid = 0, }, { .uname = "WRITE", .ucode = 0x500, .udesc = "Cache Lookups -- Write Requests", .grpid = 0, }, CBO_FILT_MESIFS(2), }; static intel_x86_umask_t bdx_unc_c_llc_victims[]={ { .uname = "F_STATE", .ucode = 0x800, .udesc = "Lines in Forward state", .grpid = 0, }, { .uname = "I_STATE", .ucode = 0x400, .udesc = "Lines in S State", .grpid = 0, }, { .uname = "S_STATE", .ucode = 0x400, .udesc = "Lines in S state", .grpid = 0, }, { .uname = "E_STATE", .ucode = 0x200, .udesc = "Lines in E state", .grpid = 0, }, { .uname = "M_STATE", .ucode = 0x100, .udesc = "Lines in M state", .grpid = 0, }, { .uname = "MISS", .ucode = 0x1000, .udesc = "Lines Victimized", .grpid = 0, }, { .uname = "NID", .ucode = 0x4000, .udesc = "Lines Victimized -- Victimized Lines that Match NID", .uflags = INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .grpid = 1, }, }; static intel_x86_umask_t bdx_unc_c_misc[]={ { .uname = "CVZERO_PREFETCH_MISS", .ucode = 0x2000, .udesc = "Cbo Misc -- DRd hitting non-M with raw CV=0", }, { .uname = "CVZERO_PREFETCH_VICTIM", .ucode = 0x1000, .udesc = "Cbo Misc -- Clean Victim with raw CV=0", }, { .uname = "RFO_HIT_S", .ucode = 0x800, .udesc = "Cbo Misc -- RFO HitS", }, { .uname = "RSPI_WAS_FSE", .ucode = 0x100, .udesc = "Cbo Misc -- Silent Snoop Eviction", }, { .uname = "STARTED", .ucode = 0x400, .udesc = "Cbo Misc -- ", }, { .uname = "WC_ALIASING", .ucode = 0x200, .udesc = "Cbo Misc -- Write Combining Aliasing", }, }; static intel_x86_umask_t bdx_unc_c_ring_ad_used[]={ { .uname = "ALL", .ucode = 0xf00, .udesc = "AD Ring In Use -- All", }, { .uname = "CCW", .ucode = 0xc00, .udesc = "AD Ring In Use -- Down", }, { .uname = "CW", .ucode = 0x300, .udesc = "AD Ring In Use -- Up", }, { .uname = "DOWN_EVEN", .ucode = 0x400, .udesc = "AD Ring In Use -- Down and Even", }, { .uname = "DOWN_ODD", .ucode = 0x800, .udesc = "AD Ring In Use -- Down and Odd", }, { .uname = "UP_EVEN", .ucode = 0x100, .udesc = "AD Ring In Use -- Up and Even", }, { .uname = "UP_ODD", .ucode = 0x200, .udesc = "AD Ring In Use -- Up and Odd", }, }; static intel_x86_umask_t bdx_unc_c_ring_ak_used[]={ { .uname = "ALL", .ucode = 0xf00, .udesc = "AK Ring In Use -- All", }, { .uname = "CCW", .ucode = 0xc00, .udesc = "AK Ring In Use -- Down", }, { .uname = "CW", .ucode = 0x300, .udesc = "AK Ring In Use -- Up", }, { .uname = "DOWN_EVEN", .ucode = 0x400, .udesc = "AK Ring In Use -- Down and Even", }, { .uname = "DOWN_ODD", .ucode = 0x800, .udesc = "AK Ring In Use -- Down and Odd", }, { .uname = "UP_EVEN", .ucode = 0x100, .udesc = "AK Ring In Use -- Up and Even", }, { .uname = "UP_ODD", .ucode = 0x200, .udesc = "AK Ring In Use -- Up and Odd", }, }; static intel_x86_umask_t bdx_unc_c_ring_bl_used[]={ { .uname = "ALL", .ucode = 0xf00, .udesc = "BL Ring in Use -- Down", }, { .uname = "CCW", .ucode = 0xc00, .udesc = "BL Ring in Use -- Down", }, { .uname = "CW", .ucode = 0x300, .udesc = "BL Ring in Use -- Up", }, { .uname = "DOWN_EVEN", .ucode = 0x400, .udesc = "BL Ring in Use -- Down and Even", }, { .uname = "DOWN_ODD", .ucode = 0x800, .udesc = "BL Ring in Use -- Down and Odd", }, { .uname = "UP_EVEN", .ucode = 0x100, .udesc = "BL Ring in Use -- Up and Even", }, { .uname = "UP_ODD", .ucode = 0x200, .udesc = "BL Ring in Use -- Up and Odd", }, }; static intel_x86_umask_t bdx_unc_c_ring_bounces[]={ { .uname = "AD", .ucode = 0x100, .udesc = "Number of LLC responses that bounced on the Ring. -- AD", }, { .uname = "AK", .ucode = 0x200, .udesc = "Number of LLC responses that bounced on the Ring. -- AK", }, { .uname = "BL", .ucode = 0x400, .udesc = "Number of LLC responses that bounced on the Ring. -- BL", }, { .uname = "IV", .ucode = 0x1000, .udesc = "Number of LLC responses that bounced on the Ring. -- Snoops of processors cachee.", }, }; static intel_x86_umask_t bdx_unc_c_ring_iv_used[]={ { .uname = "ANY", .ucode = 0xf00, .udesc = "BL Ring in Use -- Any", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "DN", .ucode = 0xc00, .udesc = "BL Ring in Use -- Any", .uflags = INTEL_X86_NCOMBO, }, { .uname = "DOWN", .ucode = 0xcc00, .udesc = "BL Ring in Use -- Down", .uflags = INTEL_X86_NCOMBO, }, { .uname = "UP", .ucode = 0x300, .udesc = "BL Ring in Use -- Any", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_c_rxr_ext_starved[]={ { .uname = "IPQ", .ucode = 0x200, .udesc = "Ingress Arbiter Blocking Cycles -- IRQ", }, { .uname = "IRQ", .ucode = 0x100, .udesc = "Ingress Arbiter Blocking Cycles -- IPQ", }, { .uname = "ISMQ_BIDS", .ucode = 0x800, .udesc = "Ingress Arbiter Blocking Cycles -- ISMQ_BID", }, { .uname = "PRQ", .ucode = 0x400, .udesc = "Ingress Arbiter Blocking Cycles -- PRQ", }, }; static intel_x86_umask_t bdx_unc_c_rxr_inserts[]={ { .uname = "IPQ", .ucode = 0x400, .udesc = "Ingress Allocations -- IPQ", }, { .uname = "IRQ", .ucode = 0x100, .udesc = "Ingress Allocations -- IRQ", }, { .uname = "IRQ_REJ", .ucode = 0x200, .udesc = "Ingress Allocations -- IRQ Rejected", }, { .uname = "PRQ", .ucode = 0x1000, .udesc = "Ingress Allocations -- PRQ", }, { .uname = "PRQ_REJ", .ucode = 0x2000, .udesc = "Ingress Allocations -- PRQ", }, }; static intel_x86_umask_t bdx_unc_c_rxr_ipq_retry[]={ { .uname = "ADDR_CONFLICT", .ucode = 0x400, .udesc = "Probe Queue Retries -- Address Conflict", }, { .uname = "ANY", .ucode = 0x100, .udesc = "Probe Queue Retries -- Any Reject", .uflags = INTEL_X86_DFL, }, { .uname = "FULL", .ucode = 0x200, .udesc = "Probe Queue Retries -- No Egress Credits", }, { .uname = "QPI_CREDITS", .ucode = 0x1000, .udesc = "Probe Queue Retries -- No QPI Credits", }, }; static intel_x86_umask_t bdx_unc_c_rxr_ipq_retry2[]={ { .uname = "AD_SBO", .ucode = 0x100, .udesc = "Probe Queue Retries -- No AD Sbo Credits", }, { .uname = "TARGET", .ucode = 0x4000, .udesc = "Probe Queue Retries -- Target Node Filter", }, }; static intel_x86_umask_t bdx_unc_c_rxr_irq_retry[]={ { .uname = "ADDR_CONFLICT", .ucode = 0x400, .udesc = "Ingress Request Queue Rejects -- Address Conflict", }, { .uname = "ANY", .ucode = 0x100, .udesc = "Ingress Request Queue Rejects -- Any Reject", .uflags = INTEL_X86_DFL, }, { .uname = "FULL", .ucode = 0x200, .udesc = "Ingress Request Queue Rejects -- No Egress Credits", }, { .uname = "IIO_CREDITS", .ucode = 0x2000, .udesc = "Ingress Request Queue Rejects -- No IIO Credits", }, { .uname = "NID", .ucode = 0x4000, .udesc = "Ingress Request Queue Rejects -- ", }, { .uname = "QPI_CREDITS", .ucode = 0x1000, .udesc = "Ingress Request Queue Rejects -- No QPI Credits", }, { .uname = "RTID", .ucode = 0x800, .udesc = "Ingress Request Queue Rejects -- No RTIDs", }, }; static intel_x86_umask_t bdx_unc_c_rxr_irq_retry2[]={ { .uname = "AD_SBO", .ucode = 0x100, .udesc = "Ingress Request Queue Rejects -- No AD Sbo Credits", }, { .uname = "BL_SBO", .ucode = 0x200, .udesc = "Ingress Request Queue Rejects -- No BL Sbo Credits", }, { .uname = "TARGET", .ucode = 0x4000, .udesc = "Ingress Request Queue Rejects -- Target Node Filter", }, }; static intel_x86_umask_t bdx_unc_c_rxr_ismq_retry[]={ { .uname = "ANY", .ucode = 0x100, .udesc = "ISMQ Retries -- Any Reject", .uflags = INTEL_X86_DFL, }, { .uname = "FULL", .ucode = 0x200, .udesc = "ISMQ Retries -- No Egress Credits", }, { .uname = "IIO_CREDITS", .ucode = 0x2000, .udesc = "ISMQ Retries -- No IIO Credits", }, { .uname = "NID", .ucode = 0x4000, .udesc = "ISMQ Retries -- ", }, { .uname = "QPI_CREDITS", .ucode = 0x1000, .udesc = "ISMQ Retries -- No QPI Credits", }, { .uname = "RTID", .ucode = 0x800, .udesc = "ISMQ Retries -- No RTIDs", }, { .uname = "WB_CREDITS", .ucode = 0x8000, .udesc = "ISMQ Retries -- ", }, }; static intel_x86_umask_t bdx_unc_c_rxr_ismq_retry2[]={ { .uname = "AD_SBO", .ucode = 0x100, .udesc = "ISMQ Request Queue Rejects -- No AD Sbo Credits", }, { .uname = "BL_SBO", .ucode = 0x200, .udesc = "ISMQ Request Queue Rejects -- No BL Sbo Credits", }, { .uname = "TARGET", .ucode = 0x4000, .udesc = "ISMQ Request Queue Rejects -- Target Node Filter", }, }; static intel_x86_umask_t bdx_unc_c_rxr_occupancy[]={ { .uname = "IPQ", .ucode = 0x400, .udesc = "Ingress Occupancy -- IPQ", .uflags = INTEL_X86_NCOMBO, }, { .uname = "IRQ", .ucode = 0x100, .udesc = "Ingress Occupancy -- IRQ", .uflags = INTEL_X86_NCOMBO, }, { .uname = "IRQ_REJ", .ucode = 0x200, .udesc = "Ingress Occupancy -- IRQ Rejected", .uflags = INTEL_X86_NCOMBO, }, { .uname = "PRQ_REJ", .ucode = 0x2000, .udesc = "Ingress Occupancy -- PRQ Rejects", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_c_sbo_credits_acquired[]={ { .uname = "AD", .ucode = 0x100, .udesc = "SBo Credits Acquired -- For AD Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "SBo Credits Acquired -- For BL Ring", }, }; static intel_x86_umask_t bdx_unc_c_sbo_credit_occupancy[]={ { .uname = "AD", .ucode = 0x100, .udesc = "SBo Credits Occupancy -- For AD Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "SBo Credits Occupancy -- For BL Ring", }, }; static intel_x86_umask_t bdx_unc_c_tor_inserts[]={ { .uname = "ALL", .ucode = 0x800, .udesc = "All", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "EVICTION", .ucode = 0x400, .udesc = "Evictions", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "LOCAL", .ucode = 0x2800, .udesc = "Local Memory", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "LOCAL_OPCODE", .ucode = 0x2100, .udesc = "Local Memory - Opcode Matched", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "MISS_LOCAL", .ucode = 0x2a00, .udesc = "Misses to Local Memory", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "MISS_LOCAL_OPCODE", .ucode = 0x2300, .udesc = "Misses to Local Memory - Opcode Matched", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "MISS_OPCODE", .ucode = 0x300, .udesc = "Miss Opcode Match", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "MISS_REMOTE", .ucode = 0x8a00, .udesc = "Misses to Remote Memory", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "MISS_REMOTE_OPCODE", .ucode = 0x8300, .udesc = "Misses to Remote Memory - Opcode Matched", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "NID_ALL", .ucode = 0x4800, .udesc = "NID Matched", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "NID_EVICTION", .ucode = 0x4400, .udesc = "NID Matched Evictions", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "NID_MISS_ALL", .ucode = 0x4a00, .udesc = "NID Matched Miss All", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "NID_MISS_OPCODE", .ucode = 0x4300, .udesc = "NID and Opcode Matched Miss", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "NID_OPCODE", .ucode = 0x4100, .udesc = "NID and Opcode Matched", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "NID_WB", .ucode = 0x5000, .udesc = "NID Matched Writebacks", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "OPCODE", .ucode = 0x100, .udesc = "Opcode Match", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "REMOTE", .ucode = 0x8800, .udesc = "Remote Memory", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "REMOTE_OPCODE", .ucode = 0x8100, .udesc = "Remote Memory - Opcode Matched", .uflags = INTEL_X86_NCOMBO, .grpid = 0, }, { .uname = "WB", .ucode = 0x1000, .udesc = "Writebacks", .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, CBO_FILT_OPC(1) }; static intel_x86_umask_t bdx_unc_c_tor_occupancy[]={ { .uname = "ALL", .ucode = 0x800, .udesc = "Any", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 0, }, { .uname = "EVICTION", .ucode = 0x400, .udesc = "Evictions", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "LOCAL", .ucode = 0x2800, .udesc = "Number of transactions in the TOR that are satisfied by locally homed memory", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "LOCAL_OPCODE", .ucode = 0x2100, .udesc = "Local Memory - Opcode Matched", .grpid = 0, }, { .uname = "MISS_ALL", .ucode = 0xa00, .udesc = "Miss All", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_LOCAL", .ucode = 0x2a00, .udesc = "Number of miss transactions in the TOR that are satisfied by locally homed memory", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_LOCAL_OPCODE", .ucode = 0x2300, .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by locally homed memory", .grpid = 0, }, { .uname = "MISS_OPCODE", .ucode = 0x300, .udesc = "Number of miss transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .grpid = 0, }, { .uname = "MISS_REMOTE_OPCODE", .ucode = 0x8300, .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", .grpid = 0, }, { .uname = "NID_ALL", .ucode = 0x4800, .udesc = "Number of NID-matched transactions inserted into the TOR (must provide nf=X modifier)", .grpid = 0, }, { .uname = "NID_EVICTION", .ucode = 0x4400, .udesc = "Number of NID-matched eviction transactions inserted into the TOR (must provide nf=X modifier)", .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_ALL", .ucode = 0x4a00, .udesc = "Number of NID-matched miss transactions that were inserted into the TOR (must provide nf=X modifier)", .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_OPCODE", .ucode = 0x4300, .udesc = "Number of NID and opcode matched miss transactions inserted into the TOR (must provide opc_* umask and nf=X modifier)", .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_OPCODE", .ucode = 0x4100, .udesc = "Number of transactions inserted into the TOR that match a NID and opcode (must provide opc_* umask and nf=X modifier)", .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_WB", .ucode = 0x5000, .udesc = "Number of NID-matched write back transactions inserted into the TOR (must provide nf=X modifier)", .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "OPCODE", .ucode = 0x100, .udesc = "Number of transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .ucode = 0x8800, .udesc = "Number of transactions inserted into the TOR that are satisfied by remote caches or memory", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "REMOTE_OPCODE", .ucode = 0x8100, .udesc = "Number of opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WB", .ucode = 0x1000, .udesc = "Number of write transactions inserted into the TOR", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_REMOTE", .ucode = 0x8a00, .udesc = "Number of miss transactions inserted into the TOR that are satisfied by remote caches or memory", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, CBO_FILT_OPC(1) }; static intel_x86_umask_t bdx_unc_c_txr_ads_used[]={ { .uname = "AD", .ucode = 0x100, .udesc = "Onto AD Ring", }, { .uname = "AK", .ucode = 0x200, .udesc = "Onto AK Ring", }, { .uname = "BL", .ucode = 0x400, .udesc = "Onto BL Ring", }, }; static intel_x86_umask_t bdx_unc_c_txr_inserts[]={ { .uname = "AD_CACHE", .ucode = 0x100, .udesc = "Egress Allocations -- AD - Cachebo", }, { .uname = "AD_CORE", .ucode = 0x1000, .udesc = "Egress Allocations -- AD - Corebo", }, { .uname = "AK_CACHE", .ucode = 0x200, .udesc = "Egress Allocations -- AK - Cachebo", }, { .uname = "AK_CORE", .ucode = 0x2000, .udesc = "Egress Allocations -- AK - Corebo", }, { .uname = "BL_CACHE", .ucode = 0x400, .udesc = "Egress Allocations -- BL - Cacheno", }, { .uname = "BL_CORE", .ucode = 0x4000, .udesc = "Egress Allocations -- BL - Corebo", }, { .uname = "IV_CACHE", .ucode = 0x800, .udesc = "Egress Allocations -- IV - Cachebo", }, }; static intel_x86_entry_t intel_bdx_unc_c_pe[]={ { .name = "UNC_C_BOUNCE_CONTROL", .code = 0xa, .desc = "TBD", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_C_CLOCKTICKS", .code = 0x0, .desc = "Clock ticks", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_C_COUNTER0_OCCUPANCY", .code = 0x1f, .desc = "Since occupancy counts can only be captured in the Cbos 0 counter, this event allows a user to capture occupancy related information by filtering the Cb0 occupancy count captured in Counter 0. The filtering available is found in the control register - threshold, invert and edge detect. E.g. setting threshold to 1 can effectively monitor how many cycles the monitored queue has an entryy.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_C_FAST_ASSERTED", .code = 0x9, .desc = "Counts the number of cycles either the local distress or incoming distress signals are asserted. Incoming distress includes both up and dn.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0x3, }, { .name = "UNC_C_LLC_LOOKUP", .code = 0x34, .desc = "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.", .modmsk = BDX_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .cntmsk = 0xf, .ngrp = 3, .umasks = bdx_unc_c_llc_lookup, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_llc_lookup), }, { .name = "UNC_C_LLC_VICTIMS", .code = 0x37, .desc = "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", .modmsk = BDX_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .cntmsk = 0xf, .ngrp = 2, .umasks = bdx_unc_c_llc_victims, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_llc_victims), }, { .name = "UNC_C_MISC", .code = 0x39, .desc = "Miscellaneous events in the Cbo.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_misc, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_misc), }, { .name = "UNC_C_RING_AD_USED", .code = 0x1b, .desc = "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_ring_ad_used), }, { .name = "UNC_C_RING_AK_USED", .code = 0x1c, .desc = "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_ring_ak_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_ring_ak_used), }, { .name = "UNC_C_RING_BL_USED", .code = 0x1d, .desc = "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.We really have two rings in BDX -- a clockwise ring and a counter-clockwise ring. On the left side of the ring, the UP direction is on the clockwise ring and DN is on the counter-clockwise ring. On the right side of the ring, this is reversed. The first half of the CBos are on the left side of the ring, and the 2nd half are on the right side of the ring. In other words (for example), in a 4c part, Cbo 0 UP AD is NOT the same ring as CBo 2 UP AD because they are on opposite sides of the rhe ring.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_ring_bl_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_ring_bl_used), }, { .name = "UNC_C_RING_BOUNCES", .code = 0x5, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_ring_bounces, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_ring_bounces), }, { .name = "UNC_C_RING_IV_USED", .code = 0x1e, .desc = "Counts the number of cycles that the IV ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop. There is only 1 IV ring in BDX Therefore, if one wants to monitor the Even ring, they should select both UP_EVEN and DN_EVEN. To monitor the Odd ring, they should select both UP_ODD and DN_ DN_ODD.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_ring_iv_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_ring_iv_used), }, { .name = "UNC_C_RING_SRC_THRTL", .code = 0x7, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_C_RXR_EXT_STARVED", .code = 0x12, .desc = "Counts cycles in external starvation. This occurs when one of the ingress queues is being starved by the other queues.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_ext_starved, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_ext_starved), }, { .name = "UNC_C_RXR_INSERTS", .code = 0x13, .desc = "Counts number of allocations per cycle into the specified Ingress queue.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_inserts), }, { .name = "UNC_C_RXR_IPQ_RETRY", .code = 0x31, .desc = "Number of times a snoop (probe) request had to retry. Filters exist to cover some of the common cases retries.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_ipq_retry, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_ipq_retry), }, { .name = "UNC_C_RXR_IPQ_RETRY2", .code = 0x28, .desc = "Number of times a snoop (probe) request had to retry. Filters exist to cover some of the common cases retries.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_ipq_retry2, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_ipq_retry2), }, { .name = "UNC_C_RXR_IRQ_RETRY", .code = 0x32, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_irq_retry, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_irq_retry), }, { .name = "UNC_C_RXR_IRQ_RETRY2", .code = 0x29, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_irq_retry2, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_irq_retry2), }, { .name = "UNC_C_RXR_ISMQ_RETRY", .code = 0x33, .desc = "Number of times a transaction flowing through the ISMQ had to retry. Transaction pass through the ISMQ as responses for requests that already exist in the Cbo. Some examples include: when data is returned or when snoop responses come back from the cores.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_ismq_retry, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_ismq_retry), }, { .name = "UNC_C_RXR_ISMQ_RETRY2", .code = 0x2a, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_rxr_ismq_retry2, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_ismq_retry2), }, { .name = "UNC_C_RXR_OCCUPANCY", .code = 0x11, .desc = "Counts number of entries in the specified Ingress queue in each cycle.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0x1, .ngrp = 1, .umasks = bdx_unc_c_rxr_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_rxr_occupancy), }, { .name = "UNC_C_SBO_CREDITS_ACQUIRED", .code = 0x3d, .desc = "Number of Sbo credits acquired in a given cycle, per ring. Each Cbo is assigned an Sbo it can communicate with.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_sbo_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_sbo_credits_acquired), }, { .name = "UNC_C_SBO_CREDIT_OCCUPANCY", .code = 0x3e, .desc = "Number of Sbo credits in use in a given cycle, per ring. Each Cbo is assigned an Sbo it can communicate with.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0x1, .ngrp = 1, .umasks = bdx_unc_c_sbo_credit_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_sbo_credit_occupancy), }, { .name = "UNC_C_TOR_INSERTS", .code = 0x35, .desc = "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent filters but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x1(0x182).", .modmsk = BDX_UNC_CBO_NID_ATTRS | _SNBEP_UNC_ATTR_ISOC | _SNBEP_UNC_ATTR_NC, .flags = INTEL_X86_NO_AUTOENCODE, .cntmsk = 0xf, .ngrp = 2, .umasks = bdx_unc_c_tor_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_tor_inserts), }, { .name = "UNC_C_TOR_OCCUPANCY", .code = 0x36, .desc = "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent filters but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x (0x182)", .modmsk = BDX_UNC_CBO_NID_ATTRS | _SNBEP_UNC_ATTR_ISOC | _SNBEP_UNC_ATTR_NC, .flags = INTEL_X86_NO_AUTOENCODE, .cntmsk = 0x1, .ngrp = 2, .umasks = bdx_unc_c_tor_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_tor_occupancy), }, { .name = "UNC_C_TXR_ADS_USED", .code = 0x4, .desc = "", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_txr_ads_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_txr_ads_used), }, { .name = "UNC_C_TXR_INSERTS", .code = 0x2, .desc = "Number of allocations into the Cbo Egress. The Egress is used to queue up requests destined for the ring.", .modmsk = BDX_UNC_CBO_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_c_txr_inserts, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_c_txr_inserts), }, }; papi-5.6.0/src/libpfm4/lib/pfmlib_sparc_niagara.c000664 001750 001750 00000006153 13216244365 023725 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_sparc_niagara.c : SPARC Niagara I, II * * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Core PMU = architectural perfmon v2 + PEBS */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_sparc_priv.h" #include "events/sparc_niagara1_events.h" #include "events/sparc_niagara2_events.h" pfmlib_pmu_t sparc_niagara1_support={ .desc = "Sparc Niagara I", .name = "niagara1", .pmu = PFM_PMU_SPARC_NIAGARA1, .pme_count = LIBPFM_ARRAY_SIZE(niagara1_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = SPARC_PLM, .max_encoding = 2, .num_cntrs = 2, .pe = niagara1_pe, .atdesc = NULL, .flags = 0, .pmu_detect = pfm_sparc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_sparc_get_encoding, PFMLIB_ENCODE_PERF(pfm_sparc_get_perf_encoding), .get_event_first = pfm_sparc_get_event_first, .get_event_next = pfm_sparc_get_event_next, .event_is_valid = pfm_sparc_event_is_valid, .validate_table = pfm_sparc_validate_table, .get_event_info = pfm_sparc_get_event_info, .get_event_attr_info = pfm_sparc_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_sparc_perf_validate_pattrs), .get_event_nattrs = pfm_sparc_get_event_nattrs, }; pfmlib_pmu_t sparc_niagara2_support={ .desc = "Sparc Niagara II", .name = "niagara2", .pmu = PFM_PMU_SPARC_NIAGARA2, .pme_count = LIBPFM_ARRAY_SIZE(niagara2_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = NIAGARA2_PLM, .num_cntrs = 2, .max_encoding = 2, .pe = niagara2_pe, .atdesc = NULL, .flags = 0, .pmu_detect = pfm_sparc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_sparc_get_encoding, PFMLIB_ENCODE_PERF(pfm_sparc_get_perf_encoding), .get_event_first = pfm_sparc_get_event_first, .get_event_next = pfm_sparc_get_event_next, .event_is_valid = pfm_sparc_event_is_valid, .validate_table = pfm_sparc_validate_table, .get_event_info = pfm_sparc_get_event_info, .get_event_attr_info = pfm_sparc_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_sparc_perf_validate_pattrs), .get_event_nattrs = pfm_sparc_get_event_nattrs, }; papi-5.6.0/src/components/bgpm/CNKunit/000775 001750 001750 00000000000 13216244356 021753 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPI_get_shared_lib_info.3000664 001750 001750 00000001647 13216244356 023023 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_shared_lib_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_shared_lib_info \- .PP Get address info about the shared libraries used by the process\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP In C, this function returns a pointer to a structure containing information about the shared library used by the program\&. There is no Fortran equivalent call\&. .PP \fBNote:\fP .RS 4 This data will be incorporated into the \fBPAPI_get_executable_info\fP call in the future\&. \fBPAPI_get_shared_lib_info\fP will be deprecated and should be used with caution\&. .RE .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_shlib_info_t\fP .PP \fBPAPI_get_hardware_info\fP .PP \fBPAPI_get_executable_info\fP .PP \fBPAPI_get_dmem_info\fP .PP \fBPAPI_get_opt\fP \fBPAPI_library_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/papi_preset.h000664 001750 001750 00000004037 13216244366 020017 0ustar00jshenry1963jshenry1963000000 000000 /** * @file papi_preset.h * @author Haihang You * you@cs.utk.edu */ #ifndef _PAPI_PRESET /* _PAPI_PRESET */ #define _PAPI_PRESET /** search element for preset events defined for each platform * @internal */ typedef struct hwi_search { /* eventcode should have a more specific name, like papi_preset! -pjm */ unsigned int event_code; /**< Preset code that keys back to sparse preset array */ int derived; /**< Derived type code */ int native[PAPI_EVENTS_IN_DERIVED_EVENT]; /**< array of native event code(s) for this preset event */ char operation[PAPI_2MAX_STR_LEN]; /**< operation string: +,-,*,/,@(number of metrics), $(constant Mhz), %(1000000.0) */ char *note; /**< optional developer notes for this event */ } hwi_search_t; /** collected text and data info for all preset events * @internal */ typedef struct hwi_presets { char *symbol; /**< name of the preset event; i.e. PAPI_TOT_INS, etc. */ char *short_descr; /**< short description of the event for labels, etc. */ char *long_descr; /**< long description (full sentence) */ int derived_int; /**< Derived type code */ unsigned int count; unsigned int event_type; char *postfix; unsigned int code[PAPI_MAX_INFO_TERMS]; char *name[PAPI_MAX_INFO_TERMS]; char *note; } hwi_presets_t; /** This is a general description structure definition for various parameter lists * @internal */ typedef struct hwi_describe { int value; /**< numeric value (from papi.h) */ char *name; /**< name of the element */ char *descr; /**< description of the element */ } hwi_describe_t; extern hwi_search_t *preset_search_map; int _papi_hwi_setup_all_presets( hwi_search_t * findem, int cidx); int _papi_hwi_cleanup_all_presets( void ); int _xml_papi_hwi_setup_all_presets( char *arch); int _papi_load_preset_table( char *name, int type, int cidx ); extern hwi_presets_t _papi_hwi_presets[PAPI_MAX_PRESET_EVENTS]; #endif /* _PAPI_PRESET */ papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_i386_p6.h000664 001750 001750 00000007334 13216244362 025103 0ustar00jshenry1963jshenry1963000000 000000 /* * Intel Pentium II/Pentium Pro/Pentium III/Pentium M PMU specific types and definitions * * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_I386_P6_H__ #define __PFMLIB_I386_P6_H__ #include /* * privilege level mask usage for i386-p6: * * PFM_PLM0 = OS (kernel, hypervisor, ..) * PFM_PLM1 = unused (ignored) * PFM_PLM2 = unused (ignored) * PFM_PLM3 = USR (user level) */ #ifdef __cplusplus extern "C" { #endif #define PMU_I386_P6_NUM_COUNTERS 2 /* total numbers of EvtSel/EvtCtr */ #define PMU_I386_P6_NUM_PERFSEL 2 /* total number of EvtSel defined */ #define PMU_I386_P6_NUM_PERFCTR 2 /* total number of EvtCtr defined */ #define PMU_I386_P6_COUNTER_WIDTH 32 /* hardware counter bit width */ /* * This structure provides a detailed way to setup a PMC register. * Once value is loaded, it must be copied (via pmu_reg) to the * perfmon_req_t and passed to the kernel via perfmonctl(). */ typedef union { unsigned long val; /* complete register value */ struct { unsigned long sel_event_mask:8; /* event mask */ unsigned long sel_unit_mask:8; /* unit mask */ unsigned long sel_usr:1; /* user level */ unsigned long sel_os:1; /* system level */ unsigned long sel_edge:1; /* edge detec */ unsigned long sel_pc:1; /* pin control */ unsigned long sel_int:1; /* enable APIC intr */ unsigned long sel_res1:1; /* reserved */ unsigned long sel_en:1; /* enable */ unsigned long sel_inv:1; /* invert counter mask */ unsigned long sel_cnt_mask:8; /* counter mask */ } perfsel; } pfm_i386_p6_sel_reg_t; typedef union { uint64_t val; /* counter value */ /* counting perfctr register */ struct { unsigned long ctr_count:32; /* 32-bit hardware counter */ unsigned long ctr_res1:32; /* reserved */ } perfctr; } pfm_i386_p6_ctr_reg_t; typedef enum { PFM_I386_P6_CNT_MASK_0, PFM_I386_P6_CNT_MASK_1, PFM_I386_P6_CNT_MASK_2, PFM_I386_P6_CNT_MASK_3 } pfm_i386_p6_cnt_mask_t; typedef struct { pfm_i386_p6_cnt_mask_t cnt_mask; /* threshold (cnt_mask) */ unsigned int flags; /* counter specific flag */ } pfmlib_i386_p6_counter_t; #define PFM_I386_P6_SEL_INV 0x1 /* inverse */ #define PFM_I386_P6_SEL_EDGE 0x2 /* edge detect */ /* * P6-specific parameters for the library */ typedef struct { pfmlib_i386_p6_counter_t pfp_i386_p6_counters[PMU_I386_P6_NUM_COUNTERS]; /* extended counter features */ uint64_t reserved[4]; /* for future use */ } pfmlib_i386_p6_input_param_t; typedef struct { uint64_t reserved[8]; /* for future use */ } pfmlib_i386_p6_output_param_t; #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_I386_P6_H__ */ papi-5.6.0/src/papi_preset.c000664 001750 001750 00000143754 13216244366 020024 0ustar00jshenry1963jshenry1963000000 000000 /* * File: papi_preset.c * Author: Haihang You * you@cs.utk.edu * Mods: Brian Sheely * bsheely@eecs.utk.edu * Author: Vince Weaver * vweaver1 @ eecs.utk.edu * Merge of the libpfm3/libpfm4/pmapi-ppc64_events preset code */ #include #include #include #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "papi_preset.h" #include "extras.h" // A place to put user defined events extern hwi_presets_t user_defined_events[]; extern int user_defined_events_count; static int papi_load_derived_events (char *pmu_str, int pmu_type, int cidx, int preset_flag); /* This routine copies values from a dense 'findem' array of events into the sparse global _papi_hwi_presets array, which is assumed to be empty at initialization. Multiple dense arrays can be copied into the sparse array, allowing event overloading at run-time, or allowing a baseline table to be augmented by a model specific table at init time. This method supports adding new events; overriding existing events, or deleting deprecated events. */ int _papi_hwi_setup_all_presets( hwi_search_t * findem, int cidx ) { int i, pnum, did_something = 0; unsigned int preset_index, j, k; /* dense array of events is terminated with a 0 preset. don't do anything if NULL pointer. This allows just notes to be loaded. It's also good defensive programming. */ if ( findem != NULL ) { for ( pnum = 0; ( pnum < PAPI_MAX_PRESET_EVENTS ) && ( findem[pnum].event_code != 0 ); pnum++ ) { /* find the index for the event to be initialized */ preset_index = ( findem[pnum].event_code & PAPI_PRESET_AND_MASK ); /* count and set the number of native terms in this event, these items are contiguous. PAPI_EVENTS_IN_DERIVED_EVENT is arbitrarily defined in the high level to be a reasonable number of terms to use in a derived event linear expression, currently 8. This wastes space for components with less than 8 counters, but keeps the framework independent of the components. The 'native' field below is an arbitrary opaque identifier that points to information on an actual native event. It is not an event code itself (whatever that might mean). By definition, this value can never == PAPI_NULL. - dkt */ INTDBG( "Counting number of terms for preset index %d, " "search map index %d.\n", preset_index, pnum ); i = 0; j = 0; while ( i < PAPI_EVENTS_IN_DERIVED_EVENT ) { if ( findem[pnum].native[i] != PAPI_NULL ) { j++; } else if ( j ) { break; } i++; } INTDBG( "This preset has %d terms.\n", j ); _papi_hwi_presets[preset_index].count = j; _papi_hwi_presets[preset_index].derived_int = findem[pnum].derived; for(k=0;kcmp_info.num_preset_events += did_something; return ( did_something ? PAPI_OK : PAPI_ENOEVNT ); } int _papi_hwi_cleanup_all_presets( void ) { int preset_index,cidx; unsigned int j; for ( preset_index = 0; preset_index < PAPI_MAX_PRESET_EVENTS; preset_index++ ) { if ( _papi_hwi_presets[preset_index].postfix != NULL ) { papi_free( _papi_hwi_presets[preset_index].postfix ); _papi_hwi_presets[preset_index].postfix = NULL; } if ( _papi_hwi_presets[preset_index].note != NULL ) { papi_free( _papi_hwi_presets[preset_index].note ); _papi_hwi_presets[preset_index].note = NULL; } for(j=0; j<_papi_hwi_presets[preset_index].count;j++) { papi_free(_papi_hwi_presets[preset_index].name[j]); } } for(cidx=0;cidxcmp_info.num_preset_events = 0; } #if defined(ITANIUM2) || defined(ITANIUM3) /* NOTE: This memory may need to be freed for BG/P builds as well */ if ( preset_search_map != NULL ) { papi_free( preset_search_map ); preset_search_map = NULL; } #endif return PAPI_OK; } #define PAPI_EVENT_FILE "papi_events.csv" /* Trims blank space from both ends of a string (in place). Returns pointer to new start address */ static inline char * trim_string( char *in ) { int len, i = 0; char *start = in; if ( in == NULL ) return ( in ); len = ( int ) strlen( in ); if ( len == 0 ) return ( in ); /* Trim left */ while ( i < len ) { if ( isblank( in[i] ) ) { in[i] = '\0'; start++; } else break; i++; } /* Trim right */ i = ( int ) strlen( start ) - 1; while ( i >= 0 ) { if ( isblank( start[i] ) ) start[i] = '\0'; else break; i--; } return ( start ); } /* Calls trim_string to remove blank space; Removes paired punctuation delimiters from beginning and end of string. If the same punctuation appears first and last (quotes, slashes) they are trimmed; Also checks for the following pairs: () <> {} [] */ static inline char * trim_note( char *in ) { int len; char *note, start, end; note = trim_string( in ); if ( note != NULL ) { len = ( int ) strlen( note ); if ( len > 0 ) { if ( ispunct( *note ) ) { start = *note; end = note[len - 1]; if ( ( start == end ) || ( ( start == '(' ) && ( end == ')' ) ) || ( ( start == '<' ) && ( end == '>' ) ) || ( ( start == '{' ) && ( end == '}' ) ) || ( ( start == '[' ) && ( end == ']' ) ) ) { note[len - 1] = '\0'; *note = '\0'; note++; } } } } return note; } static inline int find_event_index(hwi_presets_t *array, int size, char *tmp) { SUBDBG("ENTER: array: %p, size: %d, tmp: %s\n", array, size, tmp); int i; for (i = 0; i < size; i++) { if (array[i].symbol == NULL) { array[i].symbol = papi_strdup(tmp); SUBDBG("EXIT: i: %d\n", i); return i; } if (strcasecmp(tmp, array[i].symbol) == 0) { SUBDBG("EXIT: i: %d\n", i); return i; } } SUBDBG("EXIT: PAPI_EINVAL\n"); return PAPI_EINVAL; } /* Look for an event file 'name' in a couple common locations. Return a valid file handle if found */ static FILE * open_event_table( char *name ) { FILE *table; SUBDBG( "Opening %s\n", name ); table = fopen( name, "r" ); if ( table == NULL ) { SUBDBG( "Open %s failed, trying ./%s.\n", name, PAPI_EVENT_FILE ); sprintf( name, "%s", PAPI_EVENT_FILE ); table = fopen( name, "r" ); } if ( table == NULL ) { SUBDBG( "Open ./%s failed, trying ../%s.\n", name, PAPI_EVENT_FILE ); sprintf( name, "../%s", PAPI_EVENT_FILE ); table = fopen( name, "r" ); } if ( table ) { SUBDBG( "Open %s succeeded.\n", name ); } return table; } /* parse a single line from either a file or character table Strip trailing ; return 0 if empty */ static int get_event_line( char *line, FILE * table, char **tmp_perfmon_events_table ) { int i; if ( table ) { if ( fgets( line, LINE_MAX, table ) == NULL) return 0; i = ( int ) strlen( line ); if (i == 0) return 0; if ( line[i-1] == '\n' ) line[i-1] = '\0'; return 1; } else { for ( i = 0; **tmp_perfmon_events_table && **tmp_perfmon_events_table != '\n'; i++, ( *tmp_perfmon_events_table )++ ) line[i] = **tmp_perfmon_events_table; if (i == 0) return 0; if ( **tmp_perfmon_events_table && **tmp_perfmon_events_table == '\n' ) { ( *tmp_perfmon_events_table )++; } line[i] = '\0'; return 1; } } // update tokens in formula referring to index "old_index" with tokens referring to index "new_index". static void update_ops_string(char **formula, int old_index, int new_index) { INTDBG("ENTER: *formula: %s, old_index: %d, new_index: %d\n", *formula?*formula:"NULL", old_index, new_index); int cur_index; char *newFormula; char *subtoken; char *tok_save_ptr=NULL; // if formula is null just return if (*formula == NULL) { INTDBG("EXIT: Null pointer to formula passed in\n"); return; } // get some space for the new formula we are going to create newFormula = papi_calloc(strlen(*formula) + 20, 1); // replace the specified "replace" tokens in the new original formula with the new insertion formula newFormula[0] = '\0'; subtoken = strtok_r(*formula, "|", &tok_save_ptr); while ( subtoken != NULL) { // INTDBG("subtoken: %s, newFormula: %s\n", subtoken, newFormula); char work[10]; // if this is the token we want to replace with the new token index, do it now if ((subtoken[0] == 'N') && (isdigit(subtoken[1]))) { cur_index = atoi(&subtoken[1]); // if matches old index, use the new one if (cur_index == old_index) { sprintf (work, "N%d", new_index); strcat (newFormula, work); } else if (cur_index > old_index) { // current token greater than old index, make it one less than what it was sprintf (work, "N%d", cur_index-1); strcat (newFormula, work); } else { // current token less than old index, copy this part of the original formula into the new formula strcat(newFormula, subtoken); } } else { // copy this part of the original formula into the new formula strcat(newFormula, subtoken); } strcat (newFormula, "|"); subtoken = strtok_r(NULL, "|", &tok_save_ptr); } papi_free (*formula); *formula = newFormula; INTDBG("EXIT: newFormula: %s\n", newFormula); return; } // // Handle creating a new derived event of type DERIVED_ADD. This may create a new formula // which can be used to compute the results of the new event from the events it depends on. // This code is also responsible for making sure that all the needed native events are in the // new events native event list and that the formula's referenced to this array are correct. // static void ops_string_append(hwi_presets_t *results, hwi_presets_t *depends_on, int addition) { INTDBG("ENTER: results: %p, depends_on: %p, addition %d\n", results, depends_on, addition); int i; int second_event = 0; char newFormula[PAPI_MIN_STR_LEN] = ""; char work[20]; // if our results already have a formula, start with what was collected so far // this should only happens when processing the second event of a new derived add if (results->postfix != NULL) { INTDBG("Event %s has existing formula %s\n", results->symbol, results->postfix); // get the existing formula strncat(newFormula, results->postfix, sizeof(newFormula)-1); newFormula[sizeof(newFormula)-1] = '\0'; second_event = 1; } // process based on what kind of event the one we depend on is switch (depends_on->derived_int) { case DERIVED_POSTFIX: { // the event we depend on has a formula, append it our new events formula // if event we depend on does not have a formula, report error if (depends_on->postfix == NULL) { INTDBG("Event %s is of type DERIVED_POSTFIX but is missing operation string\n", depends_on->symbol); return; } // may need to renumber the native event index values in the depends on event formula before putting it into new derived event char *temp = papi_strdup(depends_on->postfix); // If this is not the first event of the new derived add, need to adjust native event index values in formula. // At this time we assume that all the native events in the second events formula are unique for the new event // and just bump the indexes by the number of events already known to the new event. Later when we add the events // to the native event list for this new derived event, we will check to see if the native events are already known // to the new derived event and if so adjust the indexes again. if (second_event) { for ( i=depends_on->count-1 ; i>=0 ; i--) { update_ops_string(&temp, i, results->count + i); } } // append the existing formula from the event we depend on (but get rid of last '|' character) strncat(newFormula, temp, sizeof(newFormula)-1); newFormula[sizeof(newFormula)-1] = '\0'; papi_free (temp); break; } case DERIVED_ADD: { // the event we depend on has no formula, create a formula for our new event to add together the depends_on native event values // build a formula for this add event sprintf(work, "N%d|N%d|+|", results->count, results->count + 1); strcat(newFormula, work); break; } case DERIVED_SUB: { // the event we depend on has no formula, create a formula for our new event to subtract the depends_on native event values // build a formula for this subtract event sprintf(work, "N%d|N%d|-|", results->count, results->count + 1); strcat(newFormula, work); break; } case NOT_DERIVED: { // the event we depend on has no formula and is itself only based on one native event, create a formula for our new event to include this native event // build a formula for this subtract event sprintf(work, "N%d|", results->count); strcat(newFormula, work); break; } default: { // the event we depend on has unsupported derived type, put out some debug and give up INTDBG("Event %s depends on event %s which has an unsupported derived type of %d\n", results->symbol, depends_on->symbol, depends_on->derived_int); return; } } // if this was the second event, append to the formula an operation to add or subtract the results of the two events if (second_event) { if (addition != 0) { strcat(newFormula, "+|"); } else { strcat(newFormula, "-|"); } // also change the new derived events type to show it has a formula now results->derived_int = DERIVED_POSTFIX; } // we need to free the existing space (created by malloc and we need to create a new one) papi_free (results->postfix); results->postfix = papi_strdup(newFormula); INTDBG("EXIT: newFormula: %s\n", newFormula); return; } // merge the 'insertion' formula into the 'original' formula replacing the // 'replaces' token in the 'original' formula. static void ops_string_merge(char **original, char *insertion, int replaces, int start_index) { INTDBG("ENTER: original: %p, *original: %s, insertion: %s, replaces: %d, start_index: %d\n", original, *original, insertion, replaces, start_index); int orig_len=0; int ins_len=0; char *subtoken; char *workBuf; char *workPtr; char *tok_save_ptr=NULL; char *newOriginal; char *newInsertion; char *newFormula; int insert_events; if (*original != NULL) { orig_len = strlen(*original); } if (insertion != NULL) { ins_len = strlen(insertion); } newFormula = papi_calloc (orig_len + ins_len + 40, 1); // if insertion formula is not provided, then the original formula remains basically unchanged. if (insertion == NULL) { // if the original formula has a leading '|' then get rid of it workPtr = *original; if (workPtr[0] == '|') { strcpy(newFormula, &workPtr[1]); } else { strcpy(newFormula, workPtr); } // formula fields are always malloced space so free the previous one papi_free (*original); *original = newFormula; INTDBG("EXIT: newFormula: %s\n", *original); return; } // renumber the token numbers in the insertion formula // also count how many native events are used in this formula insert_events = 0; newInsertion = papi_calloc(ins_len+20, 1); workBuf = papi_calloc(ins_len+10, 1); workPtr = papi_strdup(insertion); subtoken = strtok_r(workPtr, "|", &tok_save_ptr); while ( subtoken != NULL) { // INTDBG("subtoken: %s, newInsertion: %s\n", subtoken, newInsertion); if ((subtoken[0] == 'N') && (isdigit(subtoken[1]))) { insert_events++; int val = atoi(&subtoken[1]); val += start_index; subtoken[1] = '\0'; sprintf (workBuf, "N%d", val); } else { strcpy(workBuf, subtoken); } strcat (newInsertion, workBuf); strcat (newInsertion, "|"); subtoken = strtok_r(NULL, "|", &tok_save_ptr); } papi_free (workBuf); papi_free (workPtr); INTDBG("newInsertion: %s\n", newInsertion); // if original formula is not provided, then the updated insertion formula becomes the new formula // but we still had to renumber the native event tokens in case another native event was put into the list first if (*original == NULL) { *original = papi_strdup(newInsertion); INTDBG("EXIT: newFormula: %s\n", newInsertion); papi_free (newInsertion); papi_free (newFormula); return; } // if token to replace not valid, return null (do we also need to check an upper bound ???) if ((replaces < 0)) { papi_free (newInsertion); papi_free (newFormula); INTDBG("EXIT: Invalid value for token in original formula to be replaced\n"); return; } // renumber the token numbers in the original formula // tokens with an index greater than the replaces token need to be incremented by number of events in insertion formula-1 newOriginal = papi_calloc (orig_len+20, 1); workBuf = papi_calloc(orig_len+10, 1); workPtr = papi_strdup(*original); subtoken = strtok_r(workPtr, "|", &tok_save_ptr); while ( subtoken != NULL) { // INTDBG("subtoken: %s, newOriginal: %s\n", subtoken, newOriginal); // prime the work area with the next token, then see if we need to change it strcpy(workBuf, subtoken); if ((subtoken[0] == 'N') && (isdigit(subtoken[1]))) { int val = atoi(&subtoken[1]); if (val > replaces) { val += insert_events-1; subtoken[1] = '\0'; sprintf (workBuf, "N%d", val); } } // put the work buffer into the new original formula strcat (newOriginal, workBuf); strcat (newOriginal, "|"); subtoken = strtok_r(NULL, "|", &tok_save_ptr); } papi_free (workBuf); papi_free (workPtr); INTDBG("newOriginal: %s\n", newOriginal); // replace the specified "replace" tokens in the new original formula with the new insertion formula newFormula[0] = '\0'; workPtr = newOriginal; subtoken = strtok_r(workPtr, "|", &tok_save_ptr); while ( subtoken != NULL) { // INTDBG("subtoken: %s, newFormula: %s\n", subtoken, newFormula); // if this is the token we want to replace with the insertion string, do it now if ((subtoken[0] == 'N') && (isdigit(subtoken[1])) && (replaces == atoi(&subtoken[1]))) { // copy updated insertion string into the original string (replacing this token) strcat(newFormula, newInsertion); } else { // copy this part of the original formula into the new formula strcat(newFormula, subtoken); strcat(newFormula, "|"); } subtoken = strtok_r(NULL, "|", &tok_save_ptr); } papi_free (newInsertion); papi_free (workPtr); // formula fields are always malloced space so free the previous one papi_free (*original); *original = newFormula; INTDBG("EXIT: newFormula: %s\n", newFormula); return; } // // Check to see if an event the new derived event being created depends on is known. We check both preset and user defined derived events here. // If it is a known derived event then we set the new event being defined to include the necessary native events and formula to compute its // derived value and use it in the correct context of the new derived event being created. Depending on the inputs, the operations strings (formulas) // to be used by the new derived event may need to be created and/or adjusted to reference the correct native event indexes for the new derived event. // The formulas processed by this code must be reverse polish notation (RPN) or postfix format and they must contain place holders (like N0, N1) which // identify indexes into the native event array used to compute the new derived events final value. // // Arguments: // target: event we are looking for // derived_type: type of derived event being created (add, subtract, postfix) // results: where to build the new preset event being defined. // search: table of known existing preset or user events the new derived event is allowed to use (points to a table of either preset or user events). // search_size: number of entries in the search table. // static int check_derived_events(char *target, int derived_type, hwi_presets_t* results, hwi_presets_t * search, int search_size, int token_index) { INTDBG("ENTER: target: %p (%s), results: %p, search: %p, search_size: %d, token_index: %d\n", target, target, results, search, search_size, token_index); unsigned int i; int j; int k; int found = 0; for (j=0; j < search_size; j++) { // INTDBG("search[%d].symbol: %s, looking for: %s\n", j, search[j].symbol, target); if (search[j].symbol == NULL) { INTDBG("EXIT: returned: 0\n"); return 0; } // if not the event we depend on, just look at next if ( strcasecmp( target, search[j].symbol) != 0 ) { continue; } INTDBG("Found a match\n"); // derived formulas need to be adjusted based on what kind of derived event we are processing // the derived type passed to this function is the type of the new event being defined (not the events it is based on) // when we get here the formula must be in reverse polish notation (RPN) format switch (derived_type) { case DERIVED_POSTFIX: { // go create a formula to merge the second formula into a spot identified by one of the tokens in // the first formula. ops_string_merge(&(results->postfix), search[j].postfix, token_index, results->count); break; } case DERIVED_ADD: { // the new derived event adds two things together, go handle this target events role in the add ops_string_append(results, &search[j], 1); break; } case DERIVED_SUB: { // go create a formula to subtract the value generated by the second formula from the value generated by the first formula. ops_string_append(results, &search[j], 0); break; } default: { INTDBG("Derived type: %d, not currently handled\n", derived_type); break; } } // copy event name and code used by the derived event into the results table (place where new derived event is getting created) for ( k = 0; k < (int)search[j].count; k++ ) { // INTDBG("search[%d]: %p, name[%d]: %s, code[%d]: %#x\n", j, &search[j], k, search[j].name[k], k, search[j].code[k]); // if this event is already in the list, just update the formula so that references to this event point to the existing one for (i=0 ; i < results->count ; i++) { if (results->code[i] == search[j].code[k]) { INTDBG("event: %s, code: %#x, already in results at index: %d\n", search[j].name[k], search[j].code[k], i); // replace all tokens in the formula that refer to index "results->count + found" with a token that refers to index "i". // the index "results->count + found" identifies the index used in the formula for the event we just determined is a duplicate update_ops_string(&(results->postfix), results->count + found, i); found++; break; } } // if we did not find a match, copy native event info into results array if (found == 0) { // not a duplicate, go ahead and copy into results and bump number of native events in results if (search[j].name[k]) { results->name[results->count] = papi_strdup(search[j].name[k]); } else { results->name[results->count] = papi_strdup(target); } results->code[results->count] = search[j].code[k]; INTDBG("results: %p, name[%d]: %s, code[%d]: %#x\n", results, results->count, results->name[results->count], results->count, results->code[results->count]); results->count++; } } INTDBG("EXIT: returned: 1\n"); return 1; } INTDBG("EXIT: returned: 0\n"); return 0; } static int check_native_events(char *target, hwi_presets_t* results) { INTDBG("ENTER: target: %p (%s), results: %p\n", target, target, results); int ret; // find this native events code if ( ( ret = _papi_hwi_native_name_to_code( target, (int *)(&results->code[results->count])) ) != PAPI_OK ) { INTDBG("EXIT: returned: 0, call to convert name to event code failed with ret: %d\n", ret); return 0; } // if the code returned was 0, return to show it is not a valid native event if ( results->code[results->count] == 0 ) { INTDBG( "EXIT: returned: 0, event code not found\n"); return 0; } // if this native event is not for component 0, return to show it can not be used in derived events // it should be possible to create derived events for other components as long as all events in the derived event are associated with the same component if ( _papi_hwi_component_index(results->code[results->count]) != 0 ) { INTDBG( "EXIT: returned: 0, new event not associated with component 0 (current limitation with derived events)\n"); return 0; } // found = 1; INTDBG("\tFound a native event %s\n", target); results->name[results->count++] = papi_strdup(target); INTDBG( "EXIT: returned: 1\n"); return 1; } // see if the event_name string passed in matches a known event name // if it does these calls also updates information in event definition tables to remember the event static int is_event(char *event_name, int derived_type, hwi_presets_t* results, int token_index) { INTDBG("ENTER: event_name: %p (%s), derived_type: %d, results: %p, token_index: %d\n", event_name, event_name, derived_type, results, token_index); /* check if its a preset event */ if ( check_derived_events(event_name, derived_type, results, &_papi_hwi_presets[0], PAPI_MAX_PRESET_EVENTS, token_index) ) { INTDBG("EXIT: found preset event\n"); return 1; } /* check if its a user defined event */ if ( check_derived_events(event_name, derived_type, results, user_defined_events, user_defined_events_count, token_index) ) { INTDBG("EXIT: found user event\n"); return 1; } /* check if its a native event */ if ( check_native_events(event_name, results) ) { INTDBG("EXIT: found native event\n"); return 1; } INTDBG("EXIT: event not found\n"); return 0; } /* Static version of the events file. */ #if defined(STATIC_PAPI_EVENTS_TABLE) #include "papi_events_table.h" #else static char *papi_events_table = NULL; #endif int _papi_load_preset_table(char *pmu_str, int pmu_type, int cidx) { SUBDBG("ENTER: pmu_str: %s, pmu_type: %d, cidx: %d\n", pmu_str, pmu_type, cidx); int retval; // go load papi preset events (last argument tells function if we are loading presets or user events) retval = papi_load_derived_events(pmu_str, pmu_type, cidx, 1); if (retval != PAPI_OK) { SUBDBG("EXIT: retval: %d\n", retval); return retval; } // go load the user defined event definitions if any are defined retval = papi_load_derived_events(pmu_str, pmu_type, cidx, 0); SUBDBG("EXIT: retval: %d\n", retval); return retval; } // global variables static char stack[2*PAPI_HUGE_STR_LEN]; // stack static int stacktop = -1; // stack length // priority: This function returns the priority of the operator static int priority( char symbol ) { switch( symbol ) { case '@': return -1; case '(': return 0; case '+': case '-': return 1; case '*': case '/': case '%': return 2; default : return 0; } // end switch symbol } // end priority static int push( char symbol ) { if (stacktop >= 2*PAPI_HUGE_STR_LEN - 1) { INTDBG("stack overflow converting algebraic expression (%d,%c)\n", stacktop,symbol ); return -1; //***TODO: Figure out how to exit gracefully } // end if stacktop>MAX stack[++stacktop] = symbol; return 0; } // end push // pop from stack static char pop() { if( stacktop < 0 ) { INTDBG("stack underflow converting algebraic expression\n" ); return '\0'; //***TODO: Figure out how to exit gracefully } // end if empty return( stack[stacktop--] ); } // end pop /* infix_to_postfix: routine that will be called with parameter: char *in characters of infix notation (algebraic formula) returns: char * pointer to string of returned postfix */ static char * infix_to_postfix( char *infix ) { INTDBG("ENTER: in: %s, size: %zu\n", infix, strlen(infix)); static char postfix[2*PAPI_HUGE_STR_LEN]; // output unsigned int index; int postfixlen; char token; if ( strlen(infix) > PAPI_HUGE_STR_LEN ) PAPIERROR("A infix string (probably in user-defined presets) is too big (max allowed %d): %s", PAPI_HUGE_STR_LEN, infix ); // initialize stack memset(stack, 0, 2*PAPI_HUGE_STR_LEN); stacktop = -1; push('#'); stacktop = 0; // after initialization of stack to # /* initialize output string */ memset(postfix,0,2*PAPI_HUGE_STR_LEN); postfixlen = 0; for( index=0; index priority(token) ) { postfix[postfixlen++] = pop(); postfix[postfixlen++] = '|'; } push( token ); /* save current operator */ break; default: // if alphanumeric character which is not parenthesis or an operator postfix[postfixlen++] = token; break; } // end switch symbol } // end while /* Write any remaining operators */ if (postfix[postfixlen-1]!='|') postfix[postfixlen++] = '|'; while ( stacktop>0 ) { postfix[postfixlen++] = pop(); postfix[postfixlen++] = '|'; } postfix[postfixlen++] = '\0'; stacktop = -1; INTDBG("EXIT: postfix: %s, size: %zu\n", postfix, strlen(postfix)); return (postfix); } // end infix_to_postfix /* * This function will load event definitions from either a file or an in memory table. It is used to load both preset events * which are defined by the PAPI development team and delivered with the product and user defined events which can be defined * by papi users and provided to papi to be processed at library initialization. Both the preset events and user defined events * support the same event definition syntax. * * Event definition file syntax: * see PAPI_derived_event_files(1) man page. * * Blank lines are ignored * Lines that begin with '#' are comments. * Lines that begin with 'CPU' identify a pmu name and have the following effect. * If this pmu name does not match the pmu_str passed in, it is ignored and we get the next input line. * If this pmu name matches the pmu_str passed in, we set a 'process events' flag. * Multiple consecutive 'CPU' lines may be provided and if any of them match the pmu_str passed in, we set a 'process events' flag. * When a 'CPU' line is found following event definition lines, it turns off the 'process events' flag and then does the above checks. * Lines that begin with 'PRESET' or 'EVENT' specify an event definition and are processed as follows. * If the 'process events' flag is not set, the line is ignored and we get the next input line. * If the 'process events' flag is set, the event is processed and the event information is put into the next slot in the results array. * * There are three possible sources of input for preset event definitions. The code will first look for the environment variable * "PAPI_CSV_EVENT_FILE". If found its value will be used as the pathname of where to get the preset information. If not found, * the code will look for a built in table containing preset events. If the built in table was not created during the build of * PAPI then the code will build a pathname of the form "PAPI_DATADIR/PAPI_EVENT_FILE". Each of these are build variables, the * PAPI_DATADIR variable can be given a value during the configure of PAPI at build time, and the PAPI_EVENT_FILE variable has a * hard coded value of "papi_events.csv". * * There is only one way to define user events. The code will look for an environment variable "PAPI_USER_EVENTS_FILE". If found * its value will be used as the pathname of a file which contains user event definitions. The events defined in this file will be * added to the ones known by PAPI when the call to PAPI_library_init is done. * * TODO: * Look into restoring the ability to specify a user defined event file with a call to PAPI_set_opt(PAPI_USER_EVENTS_FILE). * This needs to figure out how to pass a pmu name (could use default pmu from component 0) to this function. * * Currently code elsewhere in PAPI limits the events which preset and user events can depend on to those events which are known to component 0. This possibly could * be relaxed to allow events from different components. But since all the events used by any derived event must be added to the same eventset, it will always be a * requirement that all events used by a given derived event must be from the same component. * */ static int papi_load_derived_events (char *pmu_str, int pmu_type, int cidx, int preset_flag) { SUBDBG( "ENTER: pmu_str: %s, pmu_type: %d, cidx: %d, preset_flag: %d\n", pmu_str, pmu_type, cidx, preset_flag); char pmu_name[PAPI_MIN_STR_LEN]; char line[LINE_MAX]; char name[PATH_MAX] = "builtin papi_events_table"; char *event_file_path=NULL; char *event_table_ptr=NULL; int event_type_bits = 0; char *tmpn; char *tok_save_ptr=NULL; FILE *event_file = NULL; hwi_presets_t *results=NULL; int result_size = 0; int *event_count = NULL; int invalid_event; int line_no = 0; /* count of lines read from event definition input */ int derived = 0; int res_idx = 0; /* index into results array for where to store next event */ int preset = 0; int get_events = 0; /* only process derived events after CPU type they apply to is identified */ int found_events = 0; /* flag to track if event definitions (PRESETS) are found since last CPU declaration */ #ifdef PAPI_DATADIR char path[PATH_MAX]; #endif if (preset_flag) { /* try the environment variable first */ if ((tmpn = getenv("PAPI_CSV_EVENT_FILE")) && (strlen(tmpn) > 0)) { event_file_path = tmpn; } /* if no valid environment variable, look for built-in table */ else if (papi_events_table) { event_table_ptr = papi_events_table; } /* if no env var and no built-in, search for default file */ else { #ifdef PAPI_DATADIR sprintf( path, "%s/%s", PAPI_DATADIR, PAPI_EVENT_FILE ); event_file_path = path; #else event_file_path = PAPI_EVENT_FILE; #endif } event_type_bits = PAPI_PRESET_MASK; results = &_papi_hwi_presets[0]; result_size = PAPI_MAX_PRESET_EVENTS; event_count = &_papi_hwd[cidx]->cmp_info.num_preset_events; } else { if ((event_file_path = getenv( "PAPI_USER_EVENTS_FILE" )) == NULL ) { SUBDBG("EXIT: User event definition file not provided.\n"); return PAPI_OK; } event_type_bits = PAPI_UE_MASK; results = &user_defined_events[0]; result_size = PAPI_MAX_USER_EVENTS; event_count = &user_defined_events_count; } // if we have an event file pathname, open it and read event definitions from the file if (event_file_path != NULL) { if ((event_file = open_event_table(event_file_path)) == NULL) { // if file open fails, return an error SUBDBG("EXIT: Event file open failed.\n"); return PAPI_ESYS; } strncpy(name, event_file_path, sizeof(name)-1); name[sizeof(name)-1] = '\0'; } else if (event_table_ptr == NULL) { // if we do not have a path name or table pointer, return an error SUBDBG("EXIT: Both event_file_path and event_table_ptr are NULL.\n"); return PAPI_ESYS; } /* copy the pmu identifier, stripping commas if found */ tmpn = pmu_name; while (*pmu_str) { if (*pmu_str != ',') *tmpn++ = *pmu_str; pmu_str++; } *tmpn = '\0'; /* at this point we have either a valid file pointer or built-in table pointer */ while (get_event_line(line, event_file, &event_table_ptr)) { char *t; int i; // increment number of lines we have read line_no++; t = trim_string(strtok_r(line, ",", &tok_save_ptr)); /* Skip blank lines */ if ((t == NULL) || (strlen(t) == 0)) continue; /* Skip comments */ if (t[0] == '#') { continue; } if (strcasecmp(t, "CPU") == 0) { if (get_events != 0 && found_events != 0) { SUBDBG( "Ending event scanning at line %d of %s.\n", line_no, name); get_events = 0; found_events = 0; } t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) { PAPIERROR("Expected name after CPU token at line %d of %s -- ignoring", line_no, name); continue; } if (strcasecmp(t, pmu_name) == 0) { int type; SUBDBG( "Process events for PMU %s found at line %d of %s.\n", t, line_no, name); t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) { SUBDBG("No additional qualifier found, matching on string.\n"); get_events = 1; } else if ((sscanf(t, "%d", &type) == 1) && (type == pmu_type)) { SUBDBG( "Found CPU %s type %d at line %d of %s.\n", pmu_name, type, line_no, name); get_events = 1; } else { SUBDBG( "Additional qualifier match failed %d vs %d.\n", pmu_type, type); } } continue; } if ((strcasecmp(t, "PRESET") == 0) || (strcasecmp(t, "EVENT") == 0)) { if (get_events == 0) continue; found_events = 1; t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) { PAPIERROR("Expected name after PRESET token at line %d of %s -- ignoring", line_no, name); continue; } SUBDBG( "Examining event %s\n", t); // see if this event already exists in the results array, if not already known it sets up event in unused entry if ((res_idx = find_event_index (results, result_size, t)) < 0) { PAPIERROR("No room left for event %s -- ignoring", t); continue; } // add the proper event bits (preset or user defined bits) preset = res_idx | event_type_bits; (void) preset; SUBDBG( "Use event code: %#x for %s\n", preset, t); t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) { // got an error, make this entry unused papi_free (results[res_idx].symbol); results[res_idx].symbol = NULL; PAPIERROR("Expected derived type after PRESET token at line %d of %s -- ignoring", line_no, name); continue; } if (_papi_hwi_derived_type(t, &derived) != PAPI_OK) { // got an error, make this entry unused papi_free (results[res_idx].symbol); results[res_idx].symbol = NULL; PAPIERROR("Invalid derived name %s after PRESET token at line %d of %s -- ignoring", t, line_no, name); continue; } /****************************************/ /* Have an event, let's start assigning */ /****************************************/ SUBDBG( "Adding event: %s, code: %#x, derived: %d results[%d]: %p.\n", t, preset, derived, res_idx, &results[res_idx]); /* results[res_idx].event_code = preset; */ results[res_idx].derived_int = derived; /* Derived support starts here */ /* Special handling for postfix and infix */ if ((derived == DERIVED_POSTFIX) || (derived == DERIVED_INFIX)) { t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) { // got an error, make this entry unused papi_free (results[res_idx].symbol); results[res_idx].symbol = NULL; PAPIERROR("Expected Operation string after derived type DERIVED_POSTFIX or DERIVED_INFIX at line %d of %s -- ignoring", line_no, name); continue; } // if it is an algebraic formula, we need to convert it to postfix if (derived == DERIVED_INFIX) { SUBDBG( "Converting InFix operations %s\n", t); t = infix_to_postfix( t ); results[res_idx].derived_int = DERIVED_POSTFIX; } SUBDBG( "Saving PostFix operations %s\n", t); results[res_idx].postfix = papi_strdup(t); } /* All derived terms collected here */ i = 0; invalid_event = 0; results[res_idx].count = 0; do { t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ((t == NULL) || (strlen(t) == 0)) break; if (strcasecmp(t, "NOTE") == 0) break; if (strcasecmp(t, "LDESC") == 0) break; if (strcasecmp(t, "SDESC") == 0) break; SUBDBG( "Adding term (%d) %s to derived event %#x, current native event count: %d.\n", i, t, preset, results[res_idx].count); // show that we do not have an event code yet (the component may create one and update this info) // this also clears any values left over from a previous call _papi_hwi_set_papi_event_code(-1, -1); // make sure that this term in the derived event is a valid event name // this call replaces preset and user event names with the equivalent native events in our results table // it also updates formulas for derived events so that they refer to the correct native event index if (is_event(t, results[res_idx].derived_int, &results[res_idx], i) == 0) { invalid_event = 1; PAPIERROR("Missing event %s, used in derived event %s", t, results[res_idx].symbol); break; } i++; } while (results[res_idx].count < PAPI_EVENTS_IN_DERIVED_EVENT); /* preset code list must be PAPI_NULL terminated */ if (i < PAPI_EVENTS_IN_DERIVED_EVENT) { results[res_idx].code[results[res_idx].count] = PAPI_NULL; } if (invalid_event) { // got an error, make this entry unused // preset table is statically allocated, user defined is dynamic if (!preset_flag) papi_free (results[res_idx].symbol); results[res_idx].symbol = NULL; continue; } /* End of derived support */ // if we did not find any terms to base this derived event on, report error if (i == 0) { // got an error, make this entry unused if (!preset_flag) papi_free (results[res_idx].symbol); results[res_idx].symbol = NULL; PAPIERROR("Expected PFM event after DERIVED token at line %d of %s -- ignoring", line_no, name); continue; } if (i == PAPI_EVENTS_IN_DERIVED_EVENT) { t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); } // if something was provided following the list of events to be used by the operation, process it if ( t!= NULL && strlen(t) > 0 ) { do { // save the field name char *fptr = papi_strdup(t); // get the value to be used with this field t = trim_note(strtok_r(NULL, ",", &tok_save_ptr)); if ( t== NULL || strlen(t) == 0 ) { papi_free(fptr); break; } // Handle optional short descriptions, long descriptions and notes if (strcasecmp(fptr, "SDESC") == 0) { results[res_idx].short_descr = papi_strdup(t); } if (strcasecmp(fptr, "LDESC") == 0) { results[res_idx].long_descr = papi_strdup(t); } if (strcasecmp(fptr, "NOTE") == 0) { results[res_idx].note = papi_strdup(t); } SUBDBG( "Found %s (%s) on line %d\n", fptr, t, line_no); papi_free (fptr); // look for another field name t = trim_string(strtok_r(NULL, ",", &tok_save_ptr)); if ( t== NULL || strlen(t) == 0 ) { break; } } while (t != NULL); } (*event_count)++; continue; } PAPIERROR("Unrecognized token %s at line %d of %s -- ignoring", t, line_no, name); } if (event_file) { fclose(event_file); } SUBDBG("EXIT: Done processing derived event file.\n"); return PAPI_OK; } /* The following code is proof of principle for reading preset events from an xml file. It has been tested and works for pentium3. It relys on the expat library and is invoked by adding XMLFLAG = -DXML to the Makefile. It is presently hardcoded to look for "./papi_events.xml" */ #ifdef XML #define BUFFSIZE 8192 #define SPARSE_BEGIN 0 #define SPARSE_EVENT_SEARCH 1 #define SPARSE_EVENT 2 #define SPARSE_DESC 3 #define ARCH_SEARCH 4 #define DENSE_EVENT_SEARCH 5 #define DENSE_NATIVE_SEARCH 6 #define DENSE_NATIVE_DESC 7 #define FINISHED 8 char buffer[BUFFSIZE], *xml_arch; int location = SPARSE_BEGIN, sparse_index = 0, native_index, error = 0; /* The function below, _xml_start(), is a hook into expat's XML * parser. _xml_start() defines how the parser handles the * opening tags in PAPI's XML file. This function can be understood * more easily if you follow along with its logic while looking at * papi_events.xml. The location variable is a global telling us * where we are in the XML file. Have we found our architecture's * events yet? Are we looking at an event definition?...etc. */ static void _xml_start( void *data, const char *el, const char **attr ) { int native_encoding; if ( location == SPARSE_BEGIN && !strcmp( "papistdevents", el ) ) { location = SPARSE_EVENT_SEARCH; } else if ( location == SPARSE_EVENT_SEARCH && !strcmp( "papievent", el ) ) { _papi_hwi_presets[sparse_index].info.symbol = papi_strdup( attr[1] ); // strcpy(_papi_hwi_presets.info[sparse_index].symbol, attr[1]); location = SPARSE_EVENT; } else if ( location == SPARSE_EVENT && !strcmp( "desc", el ) ) { location = SPARSE_DESC; } else if ( location == ARCH_SEARCH && !strcmp( "availevents", el ) && !strcmp( xml_arch, attr[1] ) ) { location = DENSE_EVENT_SEARCH; } else if ( location == DENSE_EVENT_SEARCH && !strcmp( "papievent", el ) ) { if ( !strcmp( "PAPI_NULL", attr[1] ) ) { location = FINISHED; return; } else if ( PAPI_event_name_to_code( ( char * ) attr[1], &sparse_index ) != PAPI_OK ) { PAPIERROR( "Improper Preset name given in XML file for %s.", attr[1] ); error = 1; } sparse_index &= PAPI_PRESET_AND_MASK; /* allocate and initialize data space for this event */ papi_valid_free( _papi_hwi_presets[sparse_index].data ); _papi_hwi_presets[sparse_index].data = papi_malloc( sizeof ( hwi_preset_data_t ) ); native_index = 0; _papi_hwi_presets[sparse_index].data->native[native_index] = PAPI_NULL; _papi_hwi_presets[sparse_index].data->operation[0] = '\0'; if ( attr[2] ) { /* derived event */ _papi_hwi_presets[sparse_index].data->derived = _papi_hwi_derived_type( ( char * ) attr[3] ); /* where does DERIVED POSTSCRIPT get encoded?? */ if ( _papi_hwi_presets[sparse_index].data->derived == -1 ) { PAPIERROR( "No derived type match for %s in Preset XML file.", attr[3] ); error = 1; } if ( attr[5] ) { _papi_hwi_presets[sparse_index].count = atoi( attr[5] ); } else { PAPIERROR( "No count given for %s in Preset XML file.", attr[1] ); error = 1; } } else { _papi_hwi_presets[sparse_index].data->derived = NOT_DERIVED; _papi_hwi_presets[sparse_index].count = 1; } location = DENSE_NATIVE_SEARCH; } else if ( location == DENSE_NATIVE_SEARCH && !strcmp( "native", el ) ) { location = DENSE_NATIVE_DESC; } else if ( location == DENSE_NATIVE_DESC && !strcmp( "event", el ) ) { if ( _papi_hwi_native_name_to_code( attr[1], &native_encoding ) != PAPI_OK ) { printf( "Improper Native name given in XML file for %s\n", attr[1] ); PAPIERROR( "Improper Native name given in XML file for %s", attr[1] ); error = 1; } _papi_hwi_presets[sparse_index].data->native[native_index] = native_encoding; native_index++; _papi_hwi_presets[sparse_index].data->native[native_index] = PAPI_NULL; } else if ( location && location != ARCH_SEARCH && location != FINISHED ) { PAPIERROR( "Poorly-formed Preset XML document." ); error = 1; } } /* The function below, _xml_end(), is a hook into expat's XML * parser. _xml_end() defines how the parser handles the * end tags in PAPI's XML file. */ static void _xml_end( void *data, const char *el ) { int i; if ( location == SPARSE_EVENT_SEARCH && !strcmp( "papistdevents", el ) ) { for ( i = sparse_index; i < PAPI_MAX_PRESET_EVENTS; i++ ) { _papi_hwi_presets[i].info.symbol = NULL; _papi_hwi_presets[i].info.long_descr = NULL; _papi_hwi_presets[i].info.short_descr = NULL; } location = ARCH_SEARCH; } else if ( location == DENSE_NATIVE_DESC && !strcmp( "native", el ) ) { location = DENSE_EVENT_SEARCH; } else if ( location == DENSE_EVENT_SEARCH && !strcmp( "availevents", el ) ) { location = FINISHED; } } /* The function below, _xml_content(), is a hook into expat's XML * parser. _xml_content() defines how the parser handles the * text between tags in PAPI's XML file. The information between * tags is usally text for event descriptions. */ static void _xml_content( void *data, const char *el, const int len ) { int i; if ( location == SPARSE_DESC ) { _papi_hwi_presets[sparse_index].info.long_descr = papi_malloc( len + 1 ); for ( i = 0; i < len; i++ ) _papi_hwi_presets[sparse_index].info.long_descr[i] = el[i]; _papi_hwi_presets[sparse_index].info.long_descr[len] = '\0'; /* the XML data currently doesn't contain a short description */ _papi_hwi_presets[sparse_index].info.short_descr = NULL; sparse_index++; _papi_hwi_presets[sparse_index].data = NULL; location = SPARSE_EVENT_SEARCH; } } int _xml_papi_hwi_setup_all_presets( char *arch, hwi_dev_notes_t * notes ) { int done = 0; FILE *fp = fopen( "./papi_events.xml", "r" ); XML_Parser p = XML_ParserCreate( NULL ); if ( !p ) { PAPIERROR( "Couldn't allocate memory for XML parser." ); fclose(fp); return ( PAPI_ESYS ); } XML_SetElementHandler( p, _xml_start, _xml_end ); XML_SetCharacterDataHandler( p, _xml_content ); if ( fp == NULL ) { PAPIERROR( "Error opening Preset XML file." ); fclose(fp); return ( PAPI_ESYS ); } xml_arch = arch; do { int len; void *buffer = XML_GetBuffer( p, BUFFSIZE ); if ( buffer == NULL ) { PAPIERROR( "Couldn't allocate memory for XML buffer." ); fclose(fp); return ( PAPI_ESYS ); } len = fread( buffer, 1, BUFFSIZE, fp ); if ( ferror( fp ) ) { PAPIERROR( "XML read error." ); fclose(fp); return ( PAPI_ESYS ); } done = feof( fp ); if ( !XML_ParseBuffer( p, len, len == 0 ) ) { PAPIERROR( "Parse error at line %d:\n%s", XML_GetCurrentLineNumber( p ), XML_ErrorString( XML_GetErrorCode( p ) ) ); fclose(fp); return ( PAPI_ESYS ); } if ( error ) { fclose(fp); return ( PAPI_ESYS ); } } while ( !done ); XML_ParserFree( p ); fclose( fp ); return ( PAPI_OK ); } #endif papi-5.6.0/src/components/appio/tests/iozone/libbif.c000664 001750 001750 00000024201 13216244356 024662 0ustar00jshenry1963jshenry1963000000 000000 /* * Here is a very simple set of routines to write an Excel worksheet * Microsoft BIFF format. The Excel version is set to 2.0 so that it * will work with all versions of Excel. * * Author: Don Capps */ /* * Note: rows and colums should not exceed 255 or this code will * act poorly */ #ifdef Windows #include #endif #include #include #include #if defined(__AIX__) || defined(__FreeBSD__) || defined(__DragonFly__) #include #else #include #endif #if defined(OSV5) || defined(linux) || defined (__FreeBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__APPLE__) || defined(__DragonFly__) #include #endif #if defined(linux) || defined(__DragonFly__) || defined(macosx) #include #include #endif #if (defined(solaris) && defined( __LP64__ )) || defined(__s390x__) || defined(FreeBSD) /* If we are building for 64-bit Solaris, all functions that return pointers * must be declared before they are used; otherwise the compiler will assume * that they return ints and the top 32 bits of the pointer will be lost, * causing segmentation faults. The following includes take care of this. * It should be safe to add these for all other OSs too, but we're only * doing it for Solaris now in case another OS turns out to be a special case. */ #include #include #include #include #include #endif /* Little Endian */ #define ENDIAN_1 1 /* Big Endian */ #define ENDIAN_2 2 /* Middle Endian */ #define ENDIAN_3 3 /* Middle Endian */ #define ENDIAN_4 4 int junk, *junkp; #ifdef HAVE_ANSIC_C /************************************************************************/ /* Here is the API... Enjoy */ /************************************************************************/ /* Create worksheet */ int create_xls(char *); /* Args: Filename */ /* */ /* Close worksheet */ void close_xls(int); /* Args: file descriptor */ /* */ /* Put a 16 bit integer in worksheet */ void do_int(int,int,int,int); /* Args: file descriptor, */ /* value, */ /* row, */ /* column */ /* Put a double in 8 byte float */ void do_float(int,double,int,int); /* Args: file descriptor, */ /* value, */ /* row, */ /* column */ /* Put a string in worksheet */ void do_label(int,char *,int,int); /* Args: file descriptor, */ /* string, */ /* row, */ /* column */ /************************************************************************/ char libbif_version[] = "Libbif Version $Revision$"; void do_eof(int ); /* Used internally */ void do_header(int ); /* Used internally */ int endian(void); #endif #define BOF 0x9 #define INTEGER 0x2 #define FLOAT 0x3 #define LABEL 0x4 #define EXCEL_VERS 0x2 #define WORKSHEET 0x10 struct bof_record{ /* Beginning of file */ char hi_opcode; char lo_opcode; char hi_length; char lo_length; char hi_version; /* Excel version */ char lo_version; char hi_filetype; char lo_filetype; }; struct int_record { char hi_opcode; /* Type 2 of record */ char lo_opcode; char hi_length; char lo_length; char hi_row; char lo_row; char hi_column; char lo_column; char rgbhi; char rgbmed; char rgblo; char hi_data; char lo_data; }; struct label_record { char hi_opcode; /* Type 4 of record */ char lo_opcode; char hi_length; char lo_length; char hi_row; char lo_row; char hi_column; char lo_column; char rgbhi; char rgbmed; char rgblo; char string_length; char str_array[256]; }; struct float_record { /* Type 3 record */ char hi_opcode; char lo_opcode; char hi_length; char lo_length; char hi_row; char lo_row; char hi_column; char lo_column; char rgbhi; char rgbmed; char rgblo; double data; }; /* * Write the EOF and close the file */ #ifdef HAVE_ANSIC_C void close_xls(int fd) { #else close_xls(fd) int fd; { #endif do_eof(fd); close(fd); } /* * Create xls worksheet. Create file and put the BOF record in it. */ #ifdef HAVE_ANSIC_C int create_xls(char *name) { #else create_xls(name) char *name; { #endif int fd; unlink(name); #ifdef Windows fd=open(name,O_BINARY|O_CREAT|O_RDWR,0666); #else fd=open(name,O_CREAT|O_RDWR,0666); #endif if(fd<0) { printf("Error opening file %s\n",name); exit(-1); } do_header(fd); return(fd); } #ifdef HAVE_ANSIC_C void do_header(int fd) /* Stick the BOF at the beginning of the file */ { #else do_header(fd) int fd; { #endif struct bof_record bof; bof.hi_opcode=BOF; bof.lo_opcode = 0x0; bof.hi_length=0x4; bof.lo_length=0x0; bof.hi_version=EXCEL_VERS; bof.lo_version=0x0; bof.hi_filetype=WORKSHEET; bof.lo_filetype=0x0; junk=write(fd,&bof,sizeof(struct bof_record)); } /* * Put an integer (16 bit) in the worksheet */ #ifdef HAVE_ANSIC_C void do_int(int fd,int val, int row, int column) { #else do_int(fd,val,row,column) int fd,val,row,column; { #endif struct int_record intrec; short s_row,s_column; s_row=(short)row; s_column=(short)column; intrec.hi_opcode=INTEGER; intrec.lo_opcode=0x00; intrec.hi_length=0x09; intrec.lo_length=0x00; intrec.rgbhi=0x0; intrec.rgbmed=0x0; intrec.rgblo=0x0; intrec.hi_row=(char)s_row&0xff; intrec.lo_row=(char)(s_row>>8)&0xff; intrec.hi_column=(char)(s_column&0xff); intrec.lo_column=(char)(s_column>>8)&0xff; intrec.hi_data=(val & 0xff); intrec.lo_data=(val & 0xff00)>>8; junk=write(fd,&intrec,13); } /* Note: This routine converts Big Endian to Little Endian * and writes the record out. */ /* * Put a double in the worksheet as 8 byte float in IEEE format. */ #ifdef HAVE_ANSIC_C void do_float(int fd, double value, int row, int column) { #else do_float(fd, value, row, column) int fd; double value; int row,column; { #endif struct float_record floatrec; short s_row,s_column; unsigned char *sptr,*dptr; s_row=(short)row; s_column=(short)column; floatrec.hi_opcode=FLOAT; floatrec.lo_opcode=0x00; floatrec.hi_length=0xf; floatrec.lo_length=0x00; floatrec.rgbhi=0x0; floatrec.rgbmed=0x0; floatrec.rgblo=0x0; floatrec.hi_row=(char)(s_row&0xff); floatrec.lo_row=(char)((s_row>>8)&0xff); floatrec.hi_column=(char)(s_column&0xff); floatrec.lo_column=(char)((s_column>>8)&0xff); sptr =(unsigned char *) &value; dptr =(unsigned char *) &floatrec.data; if(endian()==ENDIAN_2) /* Big Endian */ { dptr[0]=sptr[7]; /* Convert to Little Endian */ dptr[1]=sptr[6]; dptr[2]=sptr[5]; dptr[3]=sptr[4]; dptr[4]=sptr[3]; dptr[5]=sptr[2]; dptr[6]=sptr[1]; dptr[7]=sptr[0]; } if(endian()==ENDIAN_3) /* Middle Endian */ { dptr[0]=sptr[4]; /* 16 bit swapped ARM */ dptr[1]=sptr[5]; dptr[2]=sptr[6]; dptr[3]=sptr[7]; dptr[4]=sptr[0]; dptr[5]=sptr[1]; dptr[6]=sptr[2]; dptr[7]=sptr[3]; } if(endian()==ENDIAN_1) /* Little Endian */ { dptr[0]=sptr[0]; /* Do not convert to Little Endian */ dptr[1]=sptr[1]; dptr[2]=sptr[2]; dptr[3]=sptr[3]; dptr[4]=sptr[4]; dptr[5]=sptr[5]; dptr[6]=sptr[6]; dptr[7]=sptr[7]; } if(endian()==-1) /* Unsupported architecture */ { dptr[0]=0; dptr[1]=0; dptr[2]=0; dptr[3]=0; dptr[4]=0; dptr[5]=0; dptr[6]=0; dptr[7]=0; printf("Excel output not supported on this architecture.\n"); } junk=write(fd,&floatrec,11); /* Don't write floatrec. Padding problems */ junk=write(fd,&floatrec.data,8); /* Write value seperately */ } /* * Put a string as a label in the worksheet. */ #ifdef HAVE_ANSIC_C void do_label(int fd, char *string, int row, int column) { #else do_label(fd, string, row, column) int fd; char *string; int row,column; { #endif struct label_record labelrec; short s_row,s_column; int i; for(i=0;i<255;i++) labelrec.str_array[i]=0; s_row=(short)row; s_column=(short)column; i=strlen(string); labelrec.hi_opcode=LABEL; labelrec.lo_opcode=0x00; labelrec.hi_length=0x08; /* 264 total bytes */ labelrec.lo_length=0x01; labelrec.rgblo=0x0; labelrec.rgbmed=0x0; labelrec.rgbhi=0x0; labelrec.hi_row=(char)(s_row&0xff); labelrec.lo_row=(char)((s_row>>8)&0xff); labelrec.hi_column=(char)(s_column&0xff); labelrec.lo_column=(char)((s_column>>8)&0xff); labelrec.string_length=i; if(i > 255) /* If too long then terminate it early */ string[254]=0; i=strlen(string); strcpy(labelrec.str_array,string); junk=write(fd,&labelrec,sizeof(struct label_record)); } /* * Write the EOF in the file */ #ifdef HAVE_ANSIC_C void do_eof(int fd) { #else do_eof(fd) int fd; { #endif char buf[]={0x0a,0x00,0x00,0x00}; junk=write(fd,buf,4); } /* * Routine to determine the Endian-ness of the system. This * is needed for Iozone to convert doubles (floats) into * Little-endian format. This is needed for Excel to be * able to interpret the file */ int endian(void) { long long foo = 0x0102030405060708LL; long foo1 = 0x012345678; unsigned char *c,c1,c2,c3,c4,c5,c6,c7,c8; c=(unsigned char *)&foo; c1=*c++; c2=*c++; c3=*c++; c4=*c++; c5=*c++; c6=*c++; c7=*c++; c8=*c; /*--------------------------------------------------------------*/ /* printf("%x %x %x %x %x %x %x %x\n",c1,c2,c3,c4,c5,c6,c7,c8); */ /*--------------------------------------------------------------*/ /* Little Endian format ? ( Intel ) */ if( (c1==0x08) && (c2==0x07) && (c3==0x06) && (c4==0x05) && (c5==0x04) && (c6==0x03) && (c7==0x02) && (c8==0x01) ) return(ENDIAN_1); /* Big Endian format ? ( Sparc, Risc... */ if( (c1==0x01) && (c2==0x02) && (c3==0x03) && (c4==0x04) && (c5==0x05) && (c6==0x06) && (c7==0x07) && (c8==0x08) ) return(ENDIAN_2); /* Middle Endian format ? ( ARM ... ) */ if( (c1==0x04) && (c2==0x03) && (c3==0x02) && (c4==0x01) && (c5==0x08) && (c6==0x07) && (c7==0x06) && (c8==0x05) ) return(ENDIAN_3); c=(unsigned char *)&foo1; c1=*c++; c2=*c++; c3=*c++; c4=*c++; /* Another middle endian format ? ( PDP-11 ... ) */ if( (c1==0x34) && (c2==0x12) && (c3==0x78) && (c4==0x56)) return(ENDIAN_4); return(-1); } papi-5.6.0/src/components/rapl/tests/rapl_basic.c000664 001750 001750 00000020360 13216244360 024052 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @author Vince Weaver * * test case for RAPL component * * @brief * Tests basic functionality of RAPL component */ #include #include #include #include #include "papi.h" #include "papi_test.h" #define MAX_RAPL_EVENTS 64 #ifdef BASIC_TEST void run_test(int quiet) { if (!quiet) { printf("Sleeping 1 second...\n"); } /* Sleep */ sleep(1); } #else #define MATRIX_SIZE 1024 static double a[MATRIX_SIZE][MATRIX_SIZE]; static double b[MATRIX_SIZE][MATRIX_SIZE]; static double c[MATRIX_SIZE][MATRIX_SIZE]; /* Naive matrix multiply */ void run_test(int quiet) { double s; int i,j,k; if (!quiet) { printf("Doing a naive %dx%d MMM...\n",MATRIX_SIZE,MATRIX_SIZE); } for(i=0;i 1 ) { if ( strstr( argv[1], "-w" ) ) { do_wrap = 1; } } #endif /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!TESTS_QUIET) { printf("Trying all RAPL events\n"); } numcmp = PAPI_num_components(); for(cid=0; cidname,"rapl")) { rapl_cid=cid; if (!TESTS_QUIET) { printf("Found rapl component at cid %d\n",rapl_cid); } if (cmpinfo->disabled) { if (!TESTS_QUIET) { printf("RAPL component disabled: %s\n", cmpinfo->disabled_reason); } test_skip(__FILE__,__LINE__,"RAPL component disabled",0); } break; } } /* Component not found */ if (cid==numcmp) { test_skip(__FILE__,__LINE__,"No rapl component found\n",0); } /* Create EventSet */ retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset()",retval); } /* Add all events */ code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, rapl_cid ); while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( code, event_names[num_events] ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",code); test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } retval = PAPI_get_event_info(code,&evinfo); if (retval != PAPI_OK) { test_fail( __FILE__, __LINE__, "Error getting event info\n",retval); } strncpy(units[num_events],evinfo.units,sizeof(units[0])-1); // buffer must be null terminated to safely use strstr operation on it below units[num_events][sizeof(units[0])-1] = '\0'; data_type[num_events] = evinfo.data_type; retval = PAPI_add_event( EventSet, code ); if (retval != PAPI_OK) { break; /* We've hit an event limit */ } num_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, rapl_cid ); } values=calloc(num_events,sizeof(long long)); if (values==NULL) { test_fail(__FILE__, __LINE__, "No memory",retval); } if (!TESTS_QUIET) { printf("\nStarting measurements...\n\n"); } /* Start Counting */ before_time=PAPI_get_real_nsec(); retval = PAPI_start( EventSet); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } /* Run test */ run_test(TESTS_QUIET); /* Stop Counting */ after_time=PAPI_get_real_nsec(); retval = PAPI_stop( EventSet, values); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_stop()",retval); } elapsed_time=((double)(after_time-before_time))/1.0e9; if (!TESTS_QUIET) { printf("\nStopping measurements, took %.3fs, gathering results...\n\n", elapsed_time); printf("Scaled energy measurements:\n"); for(i=0;i * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include /* private library and arch headers */ #include "pfmlib_priv.h" #include "pfmlib_s390x_priv.h" #include "pfmlib_perf_event_priv.h" int pfm_s390x_get_perf_encoding(void *this, pfmlib_event_desc_t *e) { pfmlib_pmu_t *pmu = this; struct perf_event_attr *attr = e->os_data; int rc; if (!pmu->get_event_encoding[PFM_OS_NONE]) return PFM_ERR_NOTSUPP; /* set up raw pmu event encoding */ rc = pmu->get_event_encoding[PFM_OS_NONE](this, e); if (rc == PFM_SUCCESS) { /* currently use raw events only */ attr->type = PERF_TYPE_RAW; attr->config = e->codes[0]; } return rc; } papi-5.6.0/src/components/perfmon_ia64/Rules.perfmon_ia64000664 001750 001750 00000000413 13216244360 025302 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/perfmon_ia64/perfmon-ia64.c COMPOBJS += perfmon-ia64.o perfmon-ia64.o: components/perfmon_ia64/perfmon-ia64.c components/perfmon_ia64/perfmon-ia64.h $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfmon_ia64/perfmon-ia64.c -o perfmon-ia64.o papi-5.6.0/src/perfctr-2.6.x/usr.lib/event_set.h000775 001750 001750 00000005335 13216244367 023302 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set.h,v 1.5 2004/02/20 21:32:06 mikpe Exp $ * Common definitions used when creating event set descriptions. * * Copyright (C) 2003-2004 Mikael Pettersson */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define UM(um) ((const struct perfctr_unit_mask*)&(um).header) struct perfctr_unit_mask_header { unsigned short default_value; enum perfctr_unit_mask_type type:8; unsigned char nvalues; }; struct perfctr_unit_mask_0 { struct perfctr_unit_mask_header header; }; struct perfctr_unit_mask_1 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[1]; }; struct perfctr_unit_mask_2 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[2]; }; struct perfctr_unit_mask_3 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[3]; }; struct perfctr_unit_mask_4 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[4]; }; struct perfctr_unit_mask_5 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[5]; }; struct perfctr_unit_mask_6 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[6]; }; struct perfctr_unit_mask_7 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[7]; }; struct perfctr_unit_mask_8 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[8]; }; struct perfctr_unit_mask_9 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[9]; }; struct perfctr_unit_mask_13 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[13]; }; struct perfctr_unit_mask_15 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[15]; }; extern const struct perfctr_event_set perfctr_p5_event_set; extern const struct perfctr_event_set perfctr_p5mmx_event_set; extern const struct perfctr_event_set perfctr_mii_event_set; extern const struct perfctr_event_set perfctr_wcc6_event_set; extern const struct perfctr_event_set perfctr_wc2_event_set; extern const struct perfctr_event_set perfctr_vc3_event_set; extern const struct perfctr_event_set perfctr_ppro_event_set; extern const struct perfctr_event_set perfctr_p2_event_set; extern const struct perfctr_event_set perfctr_p3_event_set; extern const struct perfctr_event_set perfctr_p4_event_set; extern const struct perfctr_event_set perfctr_k7_event_set; extern const struct perfctr_event_set perfctr_k8_event_set; extern const struct perfctr_event_set perfctr_pentm_event_set; extern const struct perfctr_event_set perfctr_k8c_event_set; extern const struct perfctr_event_set perfctr_p4m3_event_set; papi-5.6.0/src/perfctr-2.6.x/examples/signal/Makefile000775 001750 001750 00000001215 13216244366 024302 0ustar00jshenry1963jshenry1963000000 000000 # $Id: Makefile,v 1.5 2004/01/12 01:56:15 mikpe Exp $ SHELL=/bin/sh ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) CC=gcc CFLAGS=-O2 -fomit-frame-pointer -Wall CPPFLAGS=-I../../linux/include -I../../usr.lib HDEP=../../usr.lib/libperfctr.h ../../linux/include/linux/perfctr.h ../../linux/include/asm/perfctr.h arch.h TARGET=signal i386_OBJS=x86.o x86_64_OBJS=x86.o ppc_OBJS=ppc.o ARCH_OBJS=$($(ARCH)_OBJS) OBJS=signal.o $(ARCH_OBJS) default: $(TARGET) $(TARGET): $(OBJS) ../../usr.lib/libperfctr.a $(OBJS): $(HDEP) install: distclean realclean: clean clean: rm -f $(TARGET) $(OBJS) core a.out papi-5.6.0/src/papi_debug.h000664 001750 001750 00000014761 13216244366 017610 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file papi_debug.h * @author Philip Mucci * mucci@cs.utk.edu * @author Dan Terpstra * terpstra.utk.edu * @author Kevin London * london@cs.utk.edu * @author Haihang You * you@cs.utk.edu */ #ifndef _PAPI_DEBUG_H #define _PAPI_DEBUG_H #ifdef NO_VARARG_MACRO #include #endif #include /* Debug Levels */ #define DEBUG_SUBSTRATE 0x002 #define DEBUG_API 0x004 #define DEBUG_INTERNAL 0x008 #define DEBUG_THREADS 0x010 #define DEBUG_MULTIPLEX 0x020 #define DEBUG_OVERFLOW 0x040 #define DEBUG_PROFILE 0x080 #define DEBUG_MEMORY 0x100 #define DEBUG_LEAK 0x200 #define DEBUG_ALL (DEBUG_SUBSTRATE|DEBUG_API|DEBUG_INTERNAL|DEBUG_THREADS|DEBUG_MULTIPLEX|DEBUG_OVERFLOW|DEBUG_PROFILE|DEBUG_MEMORY|DEBUG_LEAK) /* Please get rid of the DBG macro from your code */ extern int _papi_hwi_debug; extern unsigned long int ( *_papi_hwi_thread_id_fn ) ( void ); #ifdef DEBUG #ifdef __GNUC__ #define FUNC __FUNCTION__ #elif defined(__func__) #define FUNC __func__ #else #define FUNC "?" #endif #define DEBUGLABEL(a) if (_papi_hwi_thread_id_fn) fprintf(stderr, "%s:%s:%s:%d:%d:%#lx ",a,__FILE__, FUNC, __LINE__,(int)getpid(),_papi_hwi_thread_id_fn()); else fprintf(stderr, "%s:%s:%s:%d:%d ",a,__FILE__, FUNC, __LINE__, (int)getpid()) #define ISLEVEL(a) (_papi_hwi_debug&a) #define DEBUGLEVEL(a) ((a&DEBUG_SUBSTRATE)?"SUBSTRATE":(a&DEBUG_API)?"API":(a&DEBUG_INTERNAL)?"INTERNAL":(a&DEBUG_THREADS)?"THREADS":(a&DEBUG_MULTIPLEX)?"MULTIPLEX":(a&DEBUG_OVERFLOW)?"OVERFLOW":(a&DEBUG_PROFILE)?"PROFILE":(a&DEBUG_MEMORY)?"MEMORY":(a&DEBUG_LEAK)?"LEAK":"UNKNOWN") #ifndef NO_VARARG_MACRO /* Has variable arg macro support */ #define PAPIDEBUG(level,format, args...) { if(_papi_hwi_debug&level){DEBUGLABEL(DEBUGLEVEL(level));fprintf(stderr,format, ## args);}} /* Macros */ #define SUBDBG(format, args...) (PAPIDEBUG(DEBUG_SUBSTRATE,format, ## args)) #define APIDBG(format, args...) (PAPIDEBUG(DEBUG_API,format, ## args)) #define INTDBG(format, args...) (PAPIDEBUG(DEBUG_INTERNAL,format, ## args)) #define THRDBG(format, args...) (PAPIDEBUG(DEBUG_THREADS,format, ## args)) #define MPXDBG(format, args...) (PAPIDEBUG(DEBUG_MULTIPLEX,format, ## args)) #define OVFDBG(format, args...) (PAPIDEBUG(DEBUG_OVERFLOW,format, ## args)) #define PRFDBG(format, args...) (PAPIDEBUG(DEBUG_PROFILE,format, ## args)) #define MEMDBG(format, args...) (PAPIDEBUG(DEBUG_MEMORY,format, ## args)) #define LEAKDBG(format, args...) (PAPIDEBUG(DEBUG_LEAK,format, ## args)) #endif #else #ifndef NO_VARARG_MACRO /* Has variable arg macro support */ #define SUBDBG(format, args...) { ; } #define APIDBG(format, args...) { ; } #define INTDBG(format, args...) { ; } #define THRDBG(format, args...) { ; } #define MPXDBG(format, args...) { ; } #define OVFDBG(format, args...) { ; } #define PRFDBG(format, args...) { ; } #define MEMDBG(format, args...) { ; } #define LEAKDBG(format, args...) { ; } #define PAPIDEBUG(level, format, args...) { ; } #endif #endif /* * Debug functions for platforms without vararg macro support */ #ifdef NO_VARARG_MACRO static void PAPIDEBUG( int level, char *format, va_list args ) { #ifdef DEBUG if ( ISLEVEL( level ) ) { vfprintf( stderr, format, args ); } else #endif return; } static void _SUBDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_SUBSTRATE, format, args ); va_end(args); #endif } #ifdef DEBUG #define SUBDBG do { \ if (DEBUG_SUBSTRATE & _papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_SUBSTRATE ) ); \ } \ } while(0); _SUBDBG #else #define SUBDBG _SUBDBG #endif static void _APIDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_API, format, args ); va_end(args); #endif } #ifdef DEBUG #define APIDBG do { \ if (DEBUG_API&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_API ) ); \ } \ } while(0); _APIDBG #else #define APIDBG _APIDBG #endif static void _INTDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_INTERNAL, format, args ); va_end(args); #endif } #ifdef DEBUG #define INTDBG do { \ if (DEBUG_INTERNAL&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_INTERNAL ) ); \ } \ } while(0); _INTDBG #else #define INTDBG _INTDBG #endif static void _THRDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_THREADS, format, args ); va_end(args); #endif } #ifdef DEBUG #define THRDBG do { \ if (DEBUG_THREADS&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_THREADS ) ); \ } \ } while(0); _THRDBG #else #define THRDBG _THRDBG #endif static void _MPXDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_MULTIPLEX, format, args ); va_end(args); #endif } #ifdef DEBUG #define MPXDBG do { \ if (DEBUG_MULTIPLEX&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_MULTIPLEX ) ); \ } \ } while(0); _MPXDBG #else #define MPXDBG _MPXDBG #endif static void _OVFDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_OVERFLOW, format, args ); va_end(args); #endif } #ifdef DEBUG #define OVFDBG do { \ if (DEBUG_OVERFLOW&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_OVERFLOW ) ); \ } \ } while(0); _OVFDBG #else #define OVFDBG _OVFDBG #endif static void _PRFDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_PROFILE, format, args ); va_end(args); #endif } #ifdef DEBUG #define PRFDBG do { \ if (DEBUG_PROFILE&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_PROFILE ) ); \ } \ } while(0); _PRFDBG #else #define PRFDBG _PRFDBG #endif static void _MEMDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_MEMORY, format , args); va_end(args); #endif } #ifdef DEBUG #define MEMDBG do { \ if (DEBUG_MEMORY&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_MEMORY ) ); \ } \ } while(0); _MEMDBG #else #define MEMDBG _MEMDBG #endif static void _LEAKDBG( char *format, ... ) { #ifdef DEBUG va_list args; va_start(args, format); PAPIDEBUG( DEBUG_LEAK, format , args); va_end(args); #endif } #ifdef DEBUG #define LEAKDBG do { \ if (DEBUG_LEAK&_papi_hwi_debug) {\ DEBUGLABEL( DEBUGLEVEL ( DEBUG_LEAK ) ); \ } \ } while(0); _LEAKDBG #else #define LEAKDBG _LEAKDBG #endif /* ifdef NO_VARARG_MACRO */ #endif #endif /* PAPI_DEBUG_H */ papi-5.6.0/man/man3/PAPI_set_opt.3000664 001750 001750 00000010571 13216244356 020526 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_set_opt" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_set_opt \- .PP Set PAPI library or event set options\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_set_opt( int option, PAPI_option_t * ptr )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIoption\fP Defines the option to be set\&. Possible values are briefly described in the table below\&. .br \fIptr\fP Pointer to a structure determined by the selected option\&. See \fBPAPI_option_t\fP for a description of possible structures\&. .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_OK\fP .br \fIPAPI_EINVAL\fP The specified option or parameter is invalid\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .br \fIPAPI_EISRUN\fP The EventSet is currently counting events\&. .br \fIPAPI_ECMP\fP The option is not implemented for the current component\&. .br \fIPAPI_ENOINIT\fP PAPI has not been initialized\&. .br \fIPAPI_EINVAL_DOM\fP Invalid domain has been requested\&. .RE .PP \fBPAPI_set_opt()\fP changes the options of the PAPI library or a specific EventSet created by \fBPAPI_create_eventset\fP\&. Some options may require that the EventSet be bound to a component before they can execute successfully\&. This can be done either by adding an event or by explicitly calling \fBPAPI_assign_eventset_component\fP\&. .PP Ptr is a pointer to the \fBPAPI_option_t\fP structure, which is actually a union of different structures for different options\&. Not all options require or return information in these structures\&. Each requires different values to be set\&. Some options require a component index to be provided\&. These options are handled implicitly through the option structures\&. .PP \fBNote:\fP .RS 4 Some options, such as PAPI_DOMAIN and PAPI_MULTIPLEX are also available as separate entry points in both C and Fortran\&. .RE .PP The reader is encouraged to peruse the ctests code in the PAPI distribution for examples of usage of \fBPAPI_set_opt\fP\&. .PP \fBPossible values for the PAPI_set_opt option parameter\fP .RS 4 OPTION DEFINITION PAPI_DEFDOM Set default counting domain for newly created event sets. Requires a component index. PAPI_DEFGRN Set default counting granularity. Requires a component index. PAPI_DEBUG Set the PAPI debug state and the debug handler. The debug state is specified in ptr->debug.level. The debug handler is specified in ptr->debug.handler. For further information regarding debug states and the behavior of the handler, see PAPI_set_debug. PAPI_MULTIPLEX Enable specified EventSet for multiplexing. PAPI_DEF_ITIMER Set the type of itimer used in software multiplexing, overflowing and profiling. PAPI_DEF_MPX_NS Set the sampling time slice in nanoseconds for multiplexing and overflow. PAPI_DEF_ITIMER_NS See PAPI_DEF_MPX_NS. PAPI_ATTACH Attach EventSet specified in ptr->attach.eventset to thread or process id specified in in ptr->attach.tid. PAPI_CPU_ATTACH Attach EventSet specified in ptr->cpu.eventset to cpu specified in in ptr->cpu.cpu_num. PAPI_DETACH Detach EventSet specified in ptr->attach.eventset from any thread or process id. PAPI_DOMAIN Set domain for EventSet specified in ptr->domain.eventset. Will error if eventset is not bound to a component. PAPI_GRANUL Set granularity for EventSet specified in ptr->granularity.eventset. Will error if eventset is not bound to a component. PAPI_INHERIT Enable or disable inheritance for specified EventSet. PAPI_DATA_ADDRESS Set data address range to restrict event counting for EventSet specified in ptr->addr.eventset. Starting and ending addresses are specified in ptr->addr.start and ptr->addr.end, respectively. If exact addresses cannot be instantiated, offsets are returned in ptr->addr.start_off and ptr->addr.end_off. Currently implemented on Itanium only. PAPI_INSTR_ADDRESS Set instruction address range as described above. Itanium only. .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_debug\fP .PP \fBPAPI_set_multiplex\fP .PP \fBPAPI_set_domain\fP .PP \fBPAPI_option_t\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/ftests/fmultiplex1.F000664 001750 001750 00000030410 13216244361 021211 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program multiplex1 IMPLICIT integer (p) integer retval integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() if (tests_quiet .EQ. 0) then write (*, 100) NUM_ITERS 100 FORMAT ("multiplex1: Using ", I3, " iterations") write (*,*) "case1: Does PAPI_multiplex_init() not break", *" regular operation?" end if call case1(retval, tests_quiet) if (tests_quiet .EQ. 0) then write (*,*) "case2: Does setmpx/add work?" end if call case2(retval, tests_quiet) if (tests_quiet .EQ. 0) then write (*,*) "case3: Does add/setmpx work?" end if call case3(retval, tests_quiet) if (tests_quiet .EQ. 0) then write (*,*) "case4: Does add/setmpx/add work?" end if call case4(retval, tests_quiet) retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, & 'PAPI_library_init', retval) end if call ftests_pass(__FILE__) end subroutine init_papi(event) IMPLICIT integer (p) integer retval integer event retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, & 'PAPI_library_init', retval) end if call PAPIf_query_event(PAPI_TOT_INS, retval) if (retval .NE. PAPI_OK) then event = PAPI_TOT_CYC else event = PAPI_TOT_INS end if end C Tests that PAPI_multiplex_init does not mess with normal operation. subroutine case1(ret, tests_quiet) IMPLICIT integer (p) integer ret, tests_quiet, event integer retval, EventSet INTEGER*8 values(4) integer fd EventSet = PAPI_NULL call init_papi(event) call init_multiplex() call PAPIf_create_eventset(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_create_eventset', retval) end if call PAPIf_add_event( EventSet, event, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if end if if(tests_quiet .EQ. 0) then write(*,*) 'Event set list' call PrintEventSet(EventSet) end if call do_stuff() call PAPIf_start(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_start', retval) end if fd = 1 call do_stuff() call PAPIf_stop(EventSet, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_stop', retval) end if if (tests_quiet .EQ. 0) then print *, "case1: ", values(1), values(2) end if call PAPIf_cleanup_eventset(EventSet, retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_cleanup_eventset', retval) end if call PAPIF_shutdown() ret = SUCCESS end C Tests that PAPI_set_multiplex() works before adding events subroutine case2(ret, tests_quiet) IMPLICIT integer (p) integer ret, tests_quiet, event integer retval, EventSet INTEGER*8 values(4) integer fd EventSet = PAPI_NULL call init_papi(event) call init_multiplex() call PAPIf_create_eventset(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_create_eventset', retval) end if call PAPIf_assign_eventset_component(EventSet, 0, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_assign_eventset_component', retval) end if call PAPIf_set_multiplex(EventSet, retval) if ( retval.EQ.PAPI_ENOSUPP) then call ftest_skip(__FILE__, __LINE__, & 'Multiplex not implemented', 1) end if if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'papif_set_multiplex', retval) end if call PAPIf_add_event( EventSet, event, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if end if C This print-out is disabled until PAPIf_list_event is working C for multiplexed event sets (change -4711 to 0 when it is working) if(tests_quiet .EQ. 0) then write(*,*) 'Event set list' call PrintEventSet(EventSet) endif call PAPIf_start(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_start', retval) end if fd = 1 call do_stuff() call PAPIf_stop(EventSet, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_stop', retval) end if if (tests_quiet .EQ. 0) then print *, "case2: ", values(1), values(2) end if call PAPIf_cleanup_eventset(EventSet, retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_cleanup_eventset', retval) end if call PAPIF_shutdown() ret = SUCCESS end C Tests that PAPI_set_multiplex() works after adding events subroutine case3(ret, tests_quiet) IMPLICIT integer (p) integer ret, tests_quiet, event integer retval, EventSet INTEGER*8 values(4) integer fd EventSet = PAPI_NULL call init_papi(event) call init_multiplex() call PAPIf_create_eventset(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_create_eventset', retval) end if call PAPIf_add_event( EventSet, event, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if if(tests_quiet .EQ. 0) then write(*,*) 'Event set before call to PAPIf_set_multiplex:' call PrintEventSet(EventSet) endif call PAPIf_set_multiplex(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'papif_set_multiplex', retval) end if if(tests_quiet .EQ. 0) then write(*,*) 'Event set after call to PAPIf_set_multiplex:' call PrintEventSet(EventSet) endif call PAPIf_start(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_start', retval) end if fd = 1 call do_stuff() call PAPIf_stop(EventSet, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_stop', retval) end if if (tests_quiet .EQ. 0) then print *, "case3: ", values(1), values(2) end if call PAPIf_cleanup_eventset(EventSet, retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_cleanup_eventset', retval) end if call PAPIF_shutdown() ret = SUCCESS end C Tests that PAPI_set_multiplex() works before adding events C Tests that PAPI_add_event() works after C PAPI_add_event()/PAPI_set_multiplex() subroutine case4(ret, tests_quiet) IMPLICIT integer (p) integer ret, tests_quiet, event integer retval, EventSet INTEGER*8 values(4) integer fd EventSet = PAPI_NULL call init_papi(event) call init_multiplex() call PAPIf_create_eventset(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_create_eventset', retval) end if call PAPIf_add_event( EventSet, event, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if end if if(tests_quiet .EQ. 0) then write(*,*) 'Event set before call to PAPIf_set_multiplex:' call PrintEventSet(EventSet) endif call PAPIf_set_multiplex(EventSet, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'papif_set_multiplex', retval) end if if(tests_quiet .EQ. 0) then write(*,*) 'Event set after call to PAPIf_set_multiplex:' call PrintEventSet(EventSet) endif #if (defined(i386)&&defined(linux))||defined(mips) || (defined(__ia64__) && defined(linux)) || (SUBSTR==aix-power) call PAPIf_add_event( EventSet, PAPI_L1_DCM, retval ) C Try alternative event if the above is not possible to use... if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then call PAPIf_add_event( EventSet, PAPI_L2_DCM, retval ) end if if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then call PAPIf_add_event( EventSet, PAPI_L2_TCM, retval ) end if if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_L1_ICM, retval ) C Try alternative event if the above is not possible to use... if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then call PAPIf_add_event( EventSet, PAPI_L1_LDM, retval ) end if if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if #elif (defined(sparc) && defined(sun)) call PAPIf_add_event( EventSet, PAPI_LD_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if call PAPIf_add_event( EventSet, PAPI_SR_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if #elif (defined(__alpha)&&defined(__osf__)) call PAPIf_add_event( EventSet, PAPI_TLB_DM, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_add_event', retval) end if #else print *,'*** Did not match in event selection ***' #endif if(tests_quiet .EQ. 0) then write(*,*) 'Updated event set list:' call PrintEventSet(EventSet) endif call PAPIf_start(EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_start', retval) end if fd = 1 call do_stuff() call PAPIf_stop(EventSet, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_stop', retval) end if if (tests_quiet .EQ. 0) then write (*, *) "case4: ", values(1), values(2), values(3), * values(4) end if call PAPIf_cleanup_eventset(EventSet, retval) if (retval .NE. PAPI_OK) then call ftest_fail(__FILE__, __LINE__, & 'PAPIf_cleanup_eventset', retval) end if call PAPIF_shutdown() ret = SUCCESS end papi-5.6.0/src/libpfm4/docs/man3/libpfm_amd64_fam16h.3000664 001750 001750 00000003121 13216244363 024136 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "July, 2017" "" "Linux Programmer's Manual" .SH NAME libpfm_amd64_fam16h - support for AMD64 Family 16h processors .SH SYNOPSIS .nf .B #include .sp .B PMU name: amd64_fam16h .B PMU desc: AMD64 Fam16h Zen .sp .SH DESCRIPTION The library supports AMD Family 16h processors core PMU in both 32 and 64-bit modes. .SH MODIFIERS The following modifiers are supported on AMD64 Family 16h core PMU: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B h Measure at while executing in host mode (when using virtualization). This corresponds to \fBPFM_PLMH\fR. This modifier is available starting with Fam10h. This is a boolean modifier. .TP .B g Measure at while executing in guest mode (when using virtualization). This modifier is available starting with Fam10h. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/threads.h000664 001750 001750 00000010162 13216244370 017125 0ustar00jshenry1963jshenry1963000000 000000 /** @file threads.h * CVS: $Id$ * @author ?? */ #ifndef PAPI_THREADS_H #define PAPI_THREADS_H #include #include #include #ifdef HAVE_THREAD_LOCAL_STORAGE #define THREAD_LOCAL_STORAGE_KEYWORD HAVE_THREAD_LOCAL_STORAGE #else #define THREAD_LOCAL_STORAGE_KEYWORD #endif #if defined(ANY_THREAD_GETS_SIGNAL) && !defined(_AIX) #error "lookup_and_set_thread_symbols and _papi_hwi_broadcast_signal have only been tested on AIX" #endif typedef struct _ThreadInfo { unsigned long int tid; unsigned long int allocator_tid; struct _ThreadInfo *next; hwd_context_t **context; void *thread_storage[PAPI_MAX_TLS]; EventSetInfo_t **running_eventset; EventSetInfo_t *from_esi; /* ESI used for last update this control state */ int wants_signal; } ThreadInfo_t; /** The list of threads, gets initialized to master process with TID of getpid() * @internal */ extern volatile ThreadInfo_t *_papi_hwi_thread_head; /* If we have TLS, this variable ALWAYS points to our thread descriptor. It's like magic! */ #if defined(HAVE_THREAD_LOCAL_STORAGE) extern THREAD_LOCAL_STORAGE_KEYWORD ThreadInfo_t *_papi_hwi_my_thread; #endif /** Function that returns an unsigned long int thread identifier * @internal */ extern unsigned long int ( *_papi_hwi_thread_id_fn ) ( void ); /** Function that sends a signal to other threads * @internal */ extern int ( *_papi_hwi_thread_kill_fn ) ( int, int ); extern int _papi_hwi_initialize_thread( ThreadInfo_t ** dest, int tid ); extern int _papi_hwi_init_global_threads( void ); extern int _papi_hwi_shutdown_thread( ThreadInfo_t * thread, int force ); extern int _papi_hwi_shutdown_global_threads( void ); extern int _papi_hwi_broadcast_signal( unsigned int mytid ); extern int _papi_hwi_set_thread_id_fn( unsigned long int ( *id_fn ) ( void ) ); inline_static int _papi_hwi_lock( int lck ) { if ( _papi_hwi_thread_id_fn ) { _papi_hwd_lock( lck ); THRDBG( "Lock %d\n", lck ); } else { ( void ) lck; /* unused if !defined(DEBUG) */ THRDBG( "Skipped lock %d\n", lck ); } return ( PAPI_OK ); } inline_static int _papi_hwi_unlock( int lck ) { if ( _papi_hwi_thread_id_fn ) { _papi_hwd_unlock( lck ); THRDBG( "Unlock %d\n", lck ); } else { ( void ) lck; /* unused if !defined(DEBUG) */ THRDBG( "Skipped unlock %d\n", lck ); } return ( PAPI_OK ); } inline_static ThreadInfo_t * _papi_hwi_lookup_thread( int custom_tid ) { unsigned long int tid; ThreadInfo_t *tmp; if (custom_tid==0) { #ifdef HAVE_THREAD_LOCAL_STORAGE THRDBG( "TLS returning %p\n", _papi_hwi_my_thread ); return ( _papi_hwi_my_thread ); #else if ( _papi_hwi_thread_id_fn == NULL ) { THRDBG( "Threads not initialized, returning master thread at %p\n", _papi_hwi_thread_head ); return ( ( ThreadInfo_t * ) _papi_hwi_thread_head ); } tid = ( *_papi_hwi_thread_id_fn ) ( ); #endif } else { tid=custom_tid; } THRDBG( "Threads initialized, looking for thread %#lx\n", tid ); _papi_hwi_lock( THREADS_LOCK ); tmp = ( ThreadInfo_t * ) _papi_hwi_thread_head; while ( tmp != NULL ) { THRDBG( "Examining thread tid %#lx at %p\n", tmp->tid, tmp ); if ( tmp->tid == tid ) break; tmp = tmp->next; if ( tmp == _papi_hwi_thread_head ) { tmp = NULL; break; } } if ( tmp ) { _papi_hwi_thread_head = tmp; THRDBG( "Found thread %ld at %p\n", tid, tmp ); } else { THRDBG( "Did not find tid %ld\n", tid ); } _papi_hwi_unlock( THREADS_LOCK ); return ( tmp ); } inline_static int _papi_hwi_lookup_or_create_thread( ThreadInfo_t ** here, int tid ) { ThreadInfo_t *tmp = _papi_hwi_lookup_thread( tid ); int retval = PAPI_OK; if ( tmp == NULL ) retval = _papi_hwi_initialize_thread( &tmp, tid ); if ( retval == PAPI_OK ) *here = tmp; return ( retval ); } /* Prototypes */ void _papi_hwi_shutdown_the_thread_list( void ); void _papi_hwi_cleanup_thread_list( void ); int _papi_hwi_insert_in_thread_list( ThreadInfo_t * ptr ); ThreadInfo_t *_papi_hwi_lookup_in_thread_list( ); void _papi_hwi_shutdown_the_thread_list( void ); int _papi_hwi_get_thr_context( void ** ); int _papi_hwi_gather_all_thrspec_data( int tag, PAPI_all_thr_spec_t * where ); #endif papi-5.6.0/src/threads.c000664 001750 001750 00000035342 13216244370 017127 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: threads.c * Author: Philip Mucci * mucci@cs.utk.edu * Mods: Kevin London * london@cs.utk.edu */ /* This file contains thread allocation and bookkeeping functions */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include #include /*****************/ /* BEGIN GLOBALS */ /*****************/ /* The following globals get initialized and cleared by: extern int _papi_hwi_init_global_threads(void); extern int _papi_hwi_shutdown_thread(ThreadInfo_t *thread); */ /* list of threads, gets initialized to master process with TID of getpid() */ volatile ThreadInfo_t *_papi_hwi_thread_head; /* If we have TLS, this variable ALWAYS points to our thread descriptor. It's like magic! */ #if defined(HAVE_THREAD_LOCAL_STORAGE) THREAD_LOCAL_STORAGE_KEYWORD ThreadInfo_t *_papi_hwi_my_thread; #endif /* Function that returns and unsigned long thread identifier */ unsigned long ( *_papi_hwi_thread_id_fn ) ( void ); /* Function that sends a signal to other threads */ #ifdef ANY_THREAD_GETS_SIGNAL int ( *_papi_hwi_thread_kill_fn ) ( int, int ); #endif /*****************/ /* END GLOBALS */ /*****************/ static int lookup_and_set_thread_symbols( void ) { #if defined(ANY_THREAD_GETS_SIGNAL) int retval; char *error_ptc = NULL, *error_ptk = NULL; void *symbol_ptc = NULL, *symbol_ptk = NULL, *handle = NULL; handle = dlopen( NULL, RTLD_LAZY ); if ( handle == NULL ) { PAPIERROR( "Error from dlopen(NULL, RTLD_LAZY): %d %s", errno, dlerror( ) ); return ( PAPI_ESYS ); } symbol_ptc = dlsym( handle, "pthread_self" ); if ( symbol_ptc == NULL ) { error_ptc = dlerror( ); THRDBG( "dlsym(%p,pthread_self) returned NULL: %s\n", ( error_ptc ? error_ptc : "No error, NULL symbol!" ) ); } symbol_ptk = dlsym( handle, "pthread_kill" ); if ( symbol_ptk == NULL ) { error_ptk = dlerror( ); THRDBG( "dlsym(%p,pthread_kill) returned NULL: %s\n", ( error_ptk ? error_ptk : "No error, NULL symbol!" ) ); } dlclose( handle ); if ( !( ( _papi_hwi_thread_kill_fn && _papi_hwi_thread_id_fn ) || ( !_papi_hwi_thread_kill_fn && !_papi_hwi_thread_id_fn ) ) ) return ( PAPI_EMISC ); _papi_hwi_thread_kill_fn = ( int ( * )( int, int ) ) symbol_ptk; _papi_hwi_thread_id_fn = ( unsigned long ( * )( void ) ) symbol_ptc; #endif return ( PAPI_OK ); } static ThreadInfo_t * allocate_thread( int tid ) { ThreadInfo_t *thread; int i; /* The Thread EventSet is special. It is not in the EventSet list, but is pointed to by each EventSet of that particular thread. */ thread = ( ThreadInfo_t * ) papi_malloc( sizeof ( ThreadInfo_t ) ); if ( thread == NULL ) return ( NULL ); memset( thread, 0x00, sizeof ( ThreadInfo_t ) ); thread->context = ( hwd_context_t ** ) papi_malloc( sizeof ( hwd_context_t * ) * ( size_t ) papi_num_components ); if ( !thread->context ) { papi_free( thread ); return ( NULL ); } thread->running_eventset = ( EventSetInfo_t ** ) papi_malloc( sizeof ( EventSetInfo_t * ) * ( size_t ) papi_num_components ); if ( !thread->running_eventset ) { papi_free( thread->context ); papi_free( thread ); return ( NULL ); } for ( i = 0; i < papi_num_components; i++ ) { thread->context[i] = ( void * ) papi_malloc( ( size_t ) _papi_hwd[i]->size.context ); thread->running_eventset[i] = NULL; if ( thread->context[i] == NULL ) { for ( i--; i >= 0; i-- ) papi_free( thread->context[i] ); papi_free( thread->context ); papi_free( thread ); return ( NULL ); } memset( thread->context[i], 0x00, ( size_t ) _papi_hwd[i]->size.context ); } if ( _papi_hwi_thread_id_fn ) { thread->tid = ( *_papi_hwi_thread_id_fn ) ( ); } else { thread->tid = ( unsigned long ) getpid( ); } thread->allocator_tid=thread->tid; if (tid == 0 ) { } else { thread->tid=tid; } THRDBG( "Allocated thread %ld at %p, allocator: %ld\n", thread->tid, thread, thread->allocator_tid ); return thread; } static void free_thread( ThreadInfo_t ** thread ) { int i; THRDBG( "Freeing thread %ld at %p\n", ( *thread )->tid, *thread ); for ( i = 0; i < papi_num_components; i++ ) { if ( ( *thread )->context[i] ) papi_free( ( *thread )->context[i] ); } if ( ( *thread )->context ) papi_free( ( *thread )->context ); if ( ( *thread )->running_eventset ) papi_free( ( *thread )->running_eventset ); memset( *thread, 0x00, sizeof ( ThreadInfo_t ) ); papi_free( *thread ); *thread = NULL; } static void insert_thread( ThreadInfo_t * entry, int tid ) { _papi_hwi_lock( THREADS_LOCK ); if ( _papi_hwi_thread_head == NULL ) { /* 0 elements */ THRDBG( "_papi_hwi_thread_head is NULL\n" ); entry->next = entry; } else if ( _papi_hwi_thread_head->next == _papi_hwi_thread_head ) { /* 1 elements */ THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); _papi_hwi_thread_head->next = entry; entry->next = ( ThreadInfo_t * ) _papi_hwi_thread_head; } else { /* 2+ elements */ THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); entry->next = _papi_hwi_thread_head->next; _papi_hwi_thread_head->next = entry; } _papi_hwi_thread_head = entry; THRDBG( "_papi_hwi_thread_head now thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); _papi_hwi_unlock( THREADS_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) /* Don't set the current local thread if we are a fake attach thread */ if (tid==0) { _papi_hwi_my_thread = entry; THRDBG( "TLS for thread %ld is now %p\n", entry->tid, _papi_hwi_my_thread ); } #else ( void ) tid; #endif } static int remove_thread( ThreadInfo_t * entry ) { ThreadInfo_t *tmp = NULL, *prev = NULL; _papi_hwi_lock( THREADS_LOCK ); THRDBG( "_papi_hwi_thread_head was thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); /* Find the preceding element and the matched element, short circuit if we've seen the head twice */ for ( tmp = ( ThreadInfo_t * ) _papi_hwi_thread_head; ( entry != tmp ) || ( prev == NULL ); tmp = tmp->next ) { prev = tmp; } if ( tmp != entry ) { THRDBG( "Thread %ld at %p was not found in the thread list!\n", entry->tid, entry ); return ( PAPI_EBUG ); } /* Only 1 element in list */ if ( prev == tmp ) { _papi_hwi_thread_head = NULL; tmp->next = NULL; THRDBG( "_papi_hwi_thread_head now NULL\n" ); } else { prev->next = tmp->next; /* If we're removing the head, better advance it! */ if ( _papi_hwi_thread_head == tmp ) { _papi_hwi_thread_head = tmp->next; THRDBG( "_papi_hwi_thread_head now thread %ld at %p\n", _papi_hwi_thread_head->tid, _papi_hwi_thread_head ); } THRDBG( "Removed thread %p from list\n", tmp ); } _papi_hwi_unlock( THREADS_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) _papi_hwi_my_thread = NULL; THRDBG( "TLS for thread %ld is now %p\n", entry->tid, _papi_hwi_my_thread ); #endif return PAPI_OK; } int _papi_hwi_initialize_thread( ThreadInfo_t ** dest, int tid ) { int retval; ThreadInfo_t *thread; int i; if ( ( thread = allocate_thread( tid ) ) == NULL ) { *dest = NULL; return PAPI_ENOMEM; } /* Call the component to fill in anything special. */ for ( i = 0; i < papi_num_components; i++ ) { if (_papi_hwd[i]->cmp_info.disabled) continue; retval = _papi_hwd[i]->init_thread( thread->context[i] ); if ( retval ) { free_thread( &thread ); *dest = NULL; return retval; } } insert_thread( thread, tid ); *dest = thread; return PAPI_OK; } #if defined(ANY_THREAD_GETS_SIGNAL) /* This is ONLY defined for systems that enable ANY_THREAD_GETS_SIGNAL since we must forward signals sent to non-PAPI threads. This is NOT compatible with thread local storage, since to broadcast the signal, we need a list of threads. */ int _papi_hwi_broadcast_signal( unsigned int mytid ) { int i, retval, didsomething = 0; volatile ThreadInfo_t *foo = NULL; _papi_hwi_lock( THREADS_LOCK ); for ( foo = _papi_hwi_thread_head; foo != NULL; foo = foo->next ) { /* xxxx Should this be hardcoded to index 0 or walk the list or what? */ for ( i = 0; i < papi_num_components; i++ ) { if ( ( foo->tid != mytid ) && ( foo->running_eventset[i] ) && ( foo->running_eventset[i]-> state & ( PAPI_OVERFLOWING | PAPI_MULTIPLEXING ) ) ) { /* xxxx mpx_info inside _papi_mdi_t _papi_hwi_system_info is commented out. See papi_internal.h for details. The multiplex_timer_sig value is now part of that structure */ THRDBG("Thread %ld sending signal %d to thread %ld\n",mytid,foo->tid, (foo->running_eventset[i]->state & PAPI_OVERFLOWING ? _papi_hwd[i]->cmp_info.hardware_intr_sig : _papi_os_info.itimer_sig)); retval = (*_papi_hwi_thread_kill_fn)(foo->tid, (foo->running_eventset[i]->state & PAPI_OVERFLOWING ? _papi_hwd[i]->cmp_info.hardware_intr_sig : _papi_os_info.itimer_sig)); if (retval != 0) return(PAPI_EMISC); } } if ( foo->next == _papi_hwi_thread_head ) break; } _papi_hwi_unlock( THREADS_LOCK ); return ( PAPI_OK ); } #endif /* This is undefined for systems that enable ANY_THREAD_GETS_SIGNAL since we always must enable threads for safety. */ int _papi_hwi_set_thread_id_fn( unsigned long ( *id_fn ) ( void ) ) { #if !defined(ANY_THREAD_GETS_SIGNAL) /* Check for multiple threads still in the list, if so, we can't change it */ if ( _papi_hwi_thread_head->next != _papi_hwi_thread_head ) return ( PAPI_EINVAL ); /* We can't change the thread id function from one to another, only NULL to non-NULL and vice versa. */ if ( ( id_fn != NULL ) && ( _papi_hwi_thread_id_fn != NULL ) ) return ( PAPI_EINVAL ); _papi_hwi_thread_id_fn = id_fn; THRDBG( "Set new thread id function to %p\n", id_fn ); if ( id_fn ) _papi_hwi_thread_head->tid = ( *_papi_hwi_thread_id_fn ) ( ); else _papi_hwi_thread_head->tid = ( unsigned long ) getpid( ); THRDBG( "New master tid is %ld\n", _papi_hwi_thread_head->tid ); #else THRDBG( "Skipping set of thread id function\n" ); #endif return PAPI_OK; } static int _papi_hwi_thread_free_eventsets(long tid) { EventSetInfo_t *ESI; ThreadInfo_t *master; DynamicArray_t *map = &_papi_hwi_system_info.global_eventset_map; int i; master = _papi_hwi_lookup_thread( tid ); _papi_hwi_lock( INTERNAL_LOCK ); for( i = 0; i < map->totalSlots; i++ ) { ESI = map->dataSlotArray[i]; if ( ( ESI ) && (ESI->master!=NULL) ) { if ( ESI->master == master ) { THRDBG("Attempting to remove %d from tid %ld\n",ESI->EventSetIndex,tid); /* Code copied from _papi_hwi_remove_EventSet(ESI); */ _papi_hwi_free_EventSet( ESI ); map->dataSlotArray[i] = NULL; map->availSlots++; map->fullSlots--; } } } _papi_hwi_unlock( INTERNAL_LOCK ); return PAPI_OK; } int _papi_hwi_shutdown_thread( ThreadInfo_t * thread, int force_shutdown ) { int retval = PAPI_OK; unsigned long tid; int i, failure = 0; if ( _papi_hwi_thread_id_fn ) tid = ( *_papi_hwi_thread_id_fn ) ( ); else tid = ( unsigned long ) getpid( ); THRDBG("Want to shutdown thread %ld, alloc %ld, our_tid: %ld\n", thread->tid, thread->allocator_tid, tid); if ((thread->tid==tid) || ( thread->allocator_tid == tid ) || force_shutdown) { _papi_hwi_thread_free_eventsets(tid); remove_thread( thread ); THRDBG( "Shutting down thread %ld at %p\n", thread->tid, thread ); for( i = 0; i < papi_num_components; i++ ) { if (_papi_hwd[i]->cmp_info.disabled) continue; retval = _papi_hwd[i]->shutdown_thread( thread->context[i]); if ( retval != PAPI_OK ) failure = retval; } free_thread( &thread ); return ( failure ); } THRDBG( "Skipping shutdown thread %ld at %p, thread %ld not allocator!\n", thread->tid, thread, tid ); return PAPI_EBUG; } /* THESE MUST BE CALLED WITH A GLOBAL LOCK */ int _papi_hwi_shutdown_global_threads( void ) { int err,num_threads,i; ThreadInfo_t *tmp,*next; unsigned long our_tid; tmp = _papi_hwi_lookup_thread( 0 ); if ( tmp == NULL ) { THRDBG( "Did not find my thread for shutdown!\n" ); err = PAPI_EBUG; } else { our_tid=tmp->tid; (void)our_tid; THRDBG("Shutting down %ld\n",our_tid); err = _papi_hwi_shutdown_thread( tmp, 1 ); /* count threads */ tmp = ( ThreadInfo_t * ) _papi_hwi_thread_head; num_threads=0; while(tmp!=NULL) { num_threads++; if (tmp->next==_papi_hwi_thread_head) break; tmp=tmp->next; } /* Shut down all threads allocated by this thread */ /* Urgh it's a circular list where we removed in the loop */ /* so the only sane way to do it is get a count in advance */ tmp = ( ThreadInfo_t * ) _papi_hwi_thread_head; for(i=0;inext; THRDBG("looking at #%d %ld our_tid: %ld alloc_tid: %ld\n", i,tmp->tid,our_tid,tmp->allocator_tid); THRDBG("Also removing thread %ld\n",tmp->tid); err = _papi_hwi_shutdown_thread( tmp, 1 ); tmp=next; } } #ifdef DEBUG if ( ISLEVEL( DEBUG_THREADS ) ) { if ( _papi_hwi_thread_head ) { THRDBG( "Thread head %p still exists!\n", _papi_hwi_thread_head ); } } #endif #if defined(HAVE_THREAD_LOCAL_STORAGE) _papi_hwi_my_thread = NULL; #endif _papi_hwi_thread_head = NULL; _papi_hwi_thread_id_fn = NULL; #if defined(ANY_THREAD_GETS_SIGNAL) _papi_hwi_thread_kill_fn = NULL; #endif return err; } int _papi_hwi_init_global_threads( void ) { int retval; ThreadInfo_t *tmp; _papi_hwi_lock( GLOBAL_LOCK ); #if defined(HAVE_THREAD_LOCAL_STORAGE) _papi_hwi_my_thread = NULL; #endif _papi_hwi_thread_head = NULL; _papi_hwi_thread_id_fn = NULL; #if defined(ANY_THREAD_GETS_SIGNAL) _papi_hwi_thread_kill_fn = NULL; #endif retval = _papi_hwi_initialize_thread( &tmp , 0); if ( retval == PAPI_OK ) { retval = lookup_and_set_thread_symbols( ); } _papi_hwi_unlock( GLOBAL_LOCK ); return ( retval ); } int _papi_hwi_gather_all_thrspec_data( int tag, PAPI_all_thr_spec_t * where ) { int didsomething = 0; ThreadInfo_t *foo = NULL; _papi_hwi_lock( THREADS_LOCK ); for ( foo = ( ThreadInfo_t * ) _papi_hwi_thread_head; foo != NULL; foo = foo->next ) { /* If we want thread ID's */ if ( where->id ) memcpy( &where->id[didsomething], &foo->tid, sizeof ( where->id[didsomething] ) ); /* If we want data pointers */ if ( where->data ) where->data[didsomething] = foo->thread_storage[tag]; didsomething++; if ( ( where->id ) || ( where->data ) ) { if ( didsomething >= where->num ) break; } if ( foo->next == _papi_hwi_thread_head ) break; } where->num = didsomething; _papi_hwi_unlock( THREADS_LOCK ); return ( PAPI_OK ); } papi-5.6.0/src/libpfm4/lib/pfmlib_itanium.c000664 001750 001750 00000101613 13216244365 022576 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_itanium.c : support for Itanium-family PMU * * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_priv_ia64.h" /* architecture private */ #include "pfmlib_itanium_priv.h" /* PMU private */ #include "itanium_events.h" /* PMU private */ #define is_ear(i) event_is_ear(itanium_pe+(i)) #define is_ear_tlb(i) event_is_tlb_ear(itanium_pe+(i)) #define is_iear(i) event_is_iear(itanium_pe+(i)) #define is_dear(i) event_is_dear(itanium_pe+(i)) #define is_btb(i) event_is_btb(itanium_pe+(i)) #define has_opcm(i) event_opcm_ok(itanium_pe+(i)) #define has_iarr(i) event_iarr_ok(itanium_pe+(i)) #define has_darr(i) event_darr_ok(itanium_pe+(i)) #define evt_use_opcm(e) ((e)->pfp_ita_pmc8.opcm_used != 0 || (e)->pfp_ita_pmc9.opcm_used !=0) #define evt_use_irange(e) ((e)->pfp_ita_irange.rr_used) #define evt_use_drange(e) ((e)->pfp_ita_drange.rr_used) #define evt_umask(e) itanium_pe[(e)].pme_umask /* let's define some handy shortcuts! */ #define pmc_plm pmc_ita_count_reg.pmc_plm #define pmc_ev pmc_ita_count_reg.pmc_ev #define pmc_oi pmc_ita_count_reg.pmc_oi #define pmc_pm pmc_ita_count_reg.pmc_pm #define pmc_es pmc_ita_count_reg.pmc_es #define pmc_umask pmc_ita_count_reg.pmc_umask #define pmc_thres pmc_ita_count_reg.pmc_thres #define pmc_ism pmc_ita_count_reg.pmc_ism /* * Description of the PMC register mappings use by * this module (as reported in pfmlib_reg_t.reg_num): * * 0 -> PMC0 * 1 -> PMC1 * n -> PMCn * * The following are in the model specific rr_br[]: * IBR0 -> 0 * IBR1 -> 1 * ... * IBR7 -> 7 * DBR0 -> 0 * DBR1 -> 1 * ... * DBR7 -> 7 * * We do not use a mapping table, instead we make up the * values on the fly given the base. */ #define PFMLIB_ITA_PMC_BASE 0 static int pfm_ita_detect(void) { int ret = PFMLIB_ERR_NOTSUPP; /* * we support all chips (there is only one!) in the Itanium family */ if (pfm_ia64_get_cpu_family() == 0x07) ret = PFMLIB_SUCCESS; return ret; } /* * Part of the following code will eventually go into a perfmon library */ static int valid_assign(unsigned int *as, pfmlib_regmask_t *r_pmcs, unsigned int cnt) { unsigned int i; for(i=0; i < cnt; i++) { if (as[i]==0) return PFMLIB_ERR_NOASSIGN; /* * take care of restricted PMC registers */ if (pfm_regmask_isset(r_pmcs, as[i])) return PFMLIB_ERR_NOASSIGN; } return PFMLIB_SUCCESS; } /* * Automatically dispatch events to corresponding counters following constraints. */ static int pfm_ita_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp) { #define has_counter(e,b) (itanium_pe[e].pme_counters & (1 << (b)) ? (b) : 0) pfmlib_ita_input_param_t *param = mod_in; pfm_ita_pmc_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; unsigned int i,j,k,l, m; unsigned int max_l0, max_l1, max_l2, max_l3; unsigned int assign[PMU_ITA_NUM_COUNTERS]; unsigned int cnt; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; if (PFMLIB_DEBUG()) { for (m=0; m < cnt; m++) { DPRINT("ev[%d]=%s counters=0x%lx\n", m, itanium_pe[e[m].event].pme_name, itanium_pe[e[m].event].pme_counters); } } if (cnt > PMU_ITA_NUM_COUNTERS) return PFMLIB_ERR_TOOMANY; max_l0 = PMU_ITA_FIRST_COUNTER + PMU_ITA_NUM_COUNTERS; max_l1 = PMU_ITA_FIRST_COUNTER + PMU_ITA_NUM_COUNTERS*(cnt>1); max_l2 = PMU_ITA_FIRST_COUNTER + PMU_ITA_NUM_COUNTERS*(cnt>2); max_l3 = PMU_ITA_FIRST_COUNTER + PMU_ITA_NUM_COUNTERS*(cnt>3); DPRINT("max_l0=%u max_l1=%u max_l2=%u max_l3=%u\n", max_l0, max_l1, max_l2, max_l3); /* * This code needs fixing. It is not very pretty and * won't handle more than 4 counters if more become * available ! * For now, worst case in the loop nest: 4! (factorial) */ for (i=PMU_ITA_FIRST_COUNTER; i < max_l0; i++) { assign[0]= has_counter(e[0].event,i); if (max_l1 == PMU_ITA_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (j=PMU_ITA_FIRST_COUNTER; j < max_l1; j++) { if (j == i) continue; assign[1] = has_counter(e[1].event,j); if (max_l2 == PMU_ITA_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (k=PMU_ITA_FIRST_COUNTER; k < max_l2; k++) { if(k == i || k == j) continue; assign[2] = has_counter(e[2].event,k); if (max_l3 == PMU_ITA_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (l=PMU_ITA_FIRST_COUNTER; l < max_l3; l++) { if(l == i || l == j || l == k) continue; assign[3] = has_counter(e[3].event,l); if (valid_assign(assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; } } } } /* we cannot satisfy the constraints */ return PFMLIB_ERR_NOASSIGN; done: for (j=0; j < cnt ; j++ ) { reg.pmc_val = 0; /* clear all */ /* if plm is 0, then assume not specified per-event and use default */ reg.pmc_plm = e[j].plm ? e[j].plm : inp->pfp_dfl_plm; reg.pmc_oi = 1; /* overflow interrupt */ reg.pmc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc_thres = param ? param->pfp_ita_counters[j].thres: 0; reg.pmc_ism = param ? param->pfp_ita_counters[j].ism : PFMLIB_ITA_ISM_BOTH; reg.pmc_umask = is_ear(e[j].event) ? 0x0 : evt_umask(e[j].event); reg.pmc_es = itanium_pe[e[j].event].pme_code; pc[j].reg_num = assign[j]; pc[j].reg_value = reg.pmc_val; pc[j].reg_addr = assign[j]; pc[j].reg_alt_addr= assign[j]; pd[j].reg_num = assign[j]; pd[j].reg_addr = assign[j]; pd[j].reg_alt_addr = assign[j]; __pfm_vbprintf("[PMC%u(pmc%u)=0x%06lx thres=%d es=0x%02x plm=%d umask=0x%x pm=%d ism=0x%x oi=%d] %s\n", assign[j], assign[j], reg.pmc_val, reg.pmc_thres, reg.pmc_es,reg.pmc_plm, reg.pmc_umask, reg.pmc_pm, reg.pmc_ism, reg.pmc_oi, itanium_pe[e[j].event].pme_name); __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of PMC registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_dispatch_iear(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita_pmc_reg_t reg; pfmlib_ita_input_param_t *param = mod_in; pfmlib_ita_input_param_t fake_param; pfmlib_reg_t *pc, *pd; unsigned int pos1, pos2; int iear_idx = -1; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_iear(inp->pfp_events[i].event)) iear_idx = i; } if (param == NULL || mod_in->pfp_ita_iear.ear_used == 0) { /* * case 3: no I-EAR event, no (or nothing) in param->pfp_ita2_iear.ear_used */ if (iear_idx == -1) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; pfm_ita_get_ear_mode(inp->pfp_events[iear_idx].event, ¶m->pfp_ita_iear.ear_mode); param->pfp_ita_iear.ear_umask = evt_umask(inp->pfp_events[iear_idx].event); param->pfp_ita_iear.ear_ism = PFMLIB_ITA_ISM_BOTH; /* force both instruction sets */ DPRINT("I-EAR event with no info\n"); } /* sanity check on the mode */ if (param->pfp_ita_iear.ear_mode < 0 || param->pfp_ita_iear.ear_mode > 2) return PFMLIB_ERR_INVAL; /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running I-EAR), use param info */ reg.pmc_val = 0; /* if plm is 0, then assume not specified per-event and use default */ reg.pmc10_ita_reg.iear_plm = param->pfp_ita_iear.ear_plm ? param->pfp_ita_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc10_ita_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc10_ita_reg.iear_tlb = param->pfp_ita_iear.ear_mode; reg.pmc10_ita_reg.iear_umask = param->pfp_ita_iear.ear_umask; reg.pmc10_ita_reg.iear_ism = param->pfp_ita_iear.ear_ism; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 10)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 10; /* PMC10 is I-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = 10; pc[pos1].reg_alt_addr= 10; pos1++; pd[pos2].reg_num = 0; pd[pos2].reg_addr = 0; pd[pos2].reg_alt_addr = 0; pos2++; pd[pos2].reg_num = 1; pd[pos2].reg_addr = 1; pd[pos2].reg_alt_addr = 1; pos2++; __pfm_vbprintf("[PMC10(pmc10)=0x%lx tlb=%s plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc10_ita_reg.iear_tlb ? "Yes" : "No", reg.pmc10_ita_reg.iear_plm, reg.pmc10_ita_reg.iear_pm, reg.pmc10_ita_reg.iear_ism, reg.pmc10_ita_reg.iear_umask); __pfm_vbprintf("[PMD0(pmd0)]\n[PMD1(pmd1)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_dear(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita_pmc_reg_t reg; pfmlib_ita_input_param_t *param = mod_in; pfmlib_ita_input_param_t fake_param; pfmlib_reg_t *pc, *pd; unsigned int pos1, pos2; int dear_idx = -1; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_dear(inp->pfp_events[i].event)) dear_idx = i; } if (param == NULL || param->pfp_ita_dear.ear_used == 0) { /* * case 3: no D-EAR event, no (or nothing) in param->pfp_ita2_dear.ear_used */ if (dear_idx == -1) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; pfm_ita_get_ear_mode(inp->pfp_events[dear_idx].event, ¶m->pfp_ita_dear.ear_mode); param->pfp_ita_dear.ear_umask = evt_umask(inp->pfp_events[dear_idx].event); param->pfp_ita_dear.ear_ism = PFMLIB_ITA_ISM_BOTH; /* force both instruction sets */ DPRINT("D-EAR event with no info\n"); } /* sanity check on the mode */ if (param->pfp_ita_dear.ear_mode > 2) return PFMLIB_ERR_INVAL; /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running D-EAR), use param info */ reg.pmc_val = 0; /* if plm is 0, then assume not specified per-event and use default */ reg.pmc11_ita_reg.dear_plm = param->pfp_ita_dear.ear_plm ? param->pfp_ita_dear.ear_plm : inp->pfp_dfl_plm; reg.pmc11_ita_reg.dear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc11_ita_reg.dear_tlb = param->pfp_ita_dear.ear_mode; reg.pmc11_ita_reg.dear_ism = param->pfp_ita_dear.ear_ism; reg.pmc11_ita_reg.dear_umask = param->pfp_ita_dear.ear_umask; reg.pmc11_ita_reg.dear_pt = param->pfp_ita_drange.rr_used ? 0: 1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 11)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 11; /* PMC11 is D-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = 11; pos1++; pd[pos2].reg_num = 2; pd[pos2].reg_addr = 2; pd[pos2].reg_alt_addr = 2; pos2++; pd[pos2].reg_num = 3; pd[pos2].reg_addr = 3; pd[pos2].reg_alt_addr = 3; pos2++; pd[pos2].reg_num = 17; pd[pos2].reg_addr = 17; pd[pos2].reg_alt_addr = 17; pos2++; __pfm_vbprintf("[PMC11(pmc11)=0x%lx tlb=%s plm=%d pm=%d ism=0x%x umask=0x%x pt=%d]\n", reg.pmc_val, reg.pmc11_ita_reg.dear_tlb ? "Yes" : "No", reg.pmc11_ita_reg.dear_plm, reg.pmc11_ita_reg.dear_pm, reg.pmc11_ita_reg.dear_ism, reg.pmc11_ita_reg.dear_umask, reg.pmc11_ita_reg.dear_pt); __pfm_vbprintf("[PMD2(pmd2)]\n[PMD3(pmd3)\nPMD17(pmd17)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_opcm(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_ita_input_param_t *param = mod_in; pfm_ita_pmc_reg_t reg; pfmlib_reg_t *pc = outp->pfp_pmcs; int pos = outp->pfp_pmc_count; if (param == NULL) return PFMLIB_SUCCESS; if (param->pfp_ita_pmc8.opcm_used) { reg.pmc_val = param->pfp_ita_pmc8.pmc_val; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 8)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 8; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = 8; pc[pos].reg_alt_addr = 8; pos++; __pfm_vbprintf("[PMC8(pmc8)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x]\n", reg.pmc_val, reg.pmc8_9_ita_reg.m, reg.pmc8_9_ita_reg.i, reg.pmc8_9_ita_reg.f, reg.pmc8_9_ita_reg.b, reg.pmc8_9_ita_reg.match, reg.pmc8_9_ita_reg.mask); } if (param->pfp_ita_pmc9.opcm_used) { reg.pmc_val = param->pfp_ita_pmc9.pmc_val; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 9)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 9; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = 9; pc[pos].reg_alt_addr = 9; pos++; __pfm_vbprintf("[PMC9(pmc9)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x]\n", reg.pmc_val, reg.pmc8_9_ita_reg.m, reg.pmc8_9_ita_reg.i, reg.pmc8_9_ita_reg.f, reg.pmc8_9_ita_reg.b, reg.pmc8_9_ita_reg.match, reg.pmc8_9_ita_reg.mask); } outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int pfm_dispatch_btb(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita_pmc_reg_t reg; pfmlib_ita_input_param_t *param = mod_in; pfmlib_ita_input_param_t fake_param; pfmlib_reg_t *pc, *pd; int found_btb=0; unsigned int i, count; unsigned int pos1, pos2; reg.pmc_val = 0; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_btb(inp->pfp_events[i].event)) found_btb = 1; } if (param == NULL || param->pfp_ita_btb.btb_used == 0) { /* * case 3: no BTB event, no param */ if (found_btb == 0) return PFMLIB_SUCCESS; /* * case 1: BTB event, no param, capture all branches */ memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; param->pfp_ita_btb.btb_tar = 0x1; /* capture TAR */ param->pfp_ita_btb.btb_tm = 0x3; /* all branches */ param->pfp_ita_btb.btb_ptm = 0x3; /* all branches */ param->pfp_ita_btb.btb_ppm = 0x3; /* all branches */ param->pfp_ita_btb.btb_tac = 0x1; /* capture TAC */ param->pfp_ita_btb.btb_bac = 0x1; /* capture BAC */ DPRINT("BTB event with no info\n"); } /* * case 2: BTB event, param * case 4: no BTB event, param (free running mode) */ /* if plm is 0, then assume not specified per-event and use default */ reg.pmc12_ita_reg.btbc_plm = param->pfp_ita_btb.btb_plm ? param->pfp_ita_btb.btb_plm : inp->pfp_dfl_plm; reg.pmc12_ita_reg.btbc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc12_ita_reg.btbc_tar = param->pfp_ita_btb.btb_tar & 0x1; reg.pmc12_ita_reg.btbc_tm = param->pfp_ita_btb.btb_tm & 0x3; reg.pmc12_ita_reg.btbc_ptm = param->pfp_ita_btb.btb_ptm & 0x3; reg.pmc12_ita_reg.btbc_ppm = param->pfp_ita_btb.btb_ppm & 0x3; reg.pmc12_ita_reg.btbc_bpt = param->pfp_ita_btb.btb_tac & 0x1; reg.pmc12_ita_reg.btbc_bac = param->pfp_ita_btb.btb_bac & 0x1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 12)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 12; pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_value = 12; pos1++; __pfm_vbprintf("[PMC12(pmc12)=0x%lx plm=%d pm=%d tar=%d tm=%d ptm=%d ppm=%d bpt=%d bac=%d]\n", reg.pmc_val, reg.pmc12_ita_reg.btbc_plm, reg.pmc12_ita_reg.btbc_pm, reg.pmc12_ita_reg.btbc_tar, reg.pmc12_ita_reg.btbc_tm, reg.pmc12_ita_reg.btbc_ptm, reg.pmc12_ita_reg.btbc_ppm, reg.pmc12_ita_reg.btbc_bpt, reg.pmc12_ita_reg.btbc_bac); /* * PMD16 is included in list of used PMD */ for(i=8; i < 17; i++, pos2++) { pd[pos2].reg_num = i; pd[pos2].reg_addr = i; pd[pos2].reg_alt_addr = i; __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[pos2].reg_num, pd[pos2].reg_num); } /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } /* * mode = 0 -> check code (enforce bundle alignment) * mode = 1 -> check data */ static int check_intervals(pfmlib_ita_input_rr_t *irr, int mode, int *n_intervals) { int i; pfmlib_ita_input_rr_desc_t *lim = irr->rr_limits; for(i=0; i < 4; i++) { /* end marker */ if (lim[i].rr_start == 0 && lim[i].rr_end == 0) break; /* invalid entry */ if (lim[i].rr_start >= lim[i].rr_end) return PFMLIB_ERR_IRRINVAL; if (mode == 0 && (lim[i].rr_start & 0xf || lim[i].rr_end & 0xf)) return PFMLIB_ERR_IRRALIGN; } *n_intervals = i; return PFMLIB_SUCCESS; } static void do_normal_rr(unsigned long start, unsigned long end, pfmlib_reg_t *br, int nbr, int dir, int *idx, int *reg_idx, int plm) { unsigned long size, l_addr, c; unsigned long l_offs = 0, r_offs = 0; unsigned long l_size, r_size; dbreg_t db; int p2; if (nbr < 1 || end <= start) return; size = end - start; DPRINT("start=0x%016lx end=0x%016lx size=0x%lx bytes (%lu bundles) nbr=%d dir=%d\n", start, end, size, size >> 4, nbr, dir); p2 = pfm_ia64_fls(size); c = ALIGN_DOWN(end, p2); DPRINT("largest power of two possible: 2^%d=0x%lx, crossing=0x%016lx\n", p2, 1UL << p2, c); if ((c - (1UL<= start) { l_addr = c - (1UL << p2); } else { p2--; if ((c + (1UL<>l_offs: 0x%lx\n", l_offs); } } else if (dir == 1 && r_size != 0 && nbr == 1) { p2++; l_addr = start; if (PFMLIB_DEBUG()) { r_offs = l_addr+(1UL<>r_offs: 0x%lx\n", r_offs); } } l_size = l_addr - start; r_size = end - l_addr-(1UL<>largest chunk: 2^%d @0x%016lx-0x%016lx\n", p2, l_addr, l_addr+(1UL<>before: 0x%016lx-0x%016lx\n", start, l_addr); if (r_size && !r_offs) DPRINT(">>after : 0x%016lx-0x%016lx\n", l_addr+(1UL<>1; if (nbr & 0x1) { /* * our simple heuristic is: * we assign the largest number of registers to the largest * of the two chunks */ if (l_size > r_size) { l_nbr++; } else { r_nbr++; } } do_normal_rr(start, l_addr, br, l_nbr, 0, idx, reg_idx, plm); do_normal_rr(l_addr+(1UL<rr_start, in_rr->rr_end, n_pairs); __pfm_vbprintf("start offset: -0x%lx end_offset: +0x%lx\n", out_rr->rr_soff, out_rr->rr_eoff); for (j=0; j < n_pairs; j++, base_idx += 2) { d.val = dbr[base_idx+1].reg_value; r_end = dbr[base_idx].reg_value+((~(d.db.db_mask)) & ~(0xffUL << 56)); __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx end=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, (unsigned long) d.db.db_mask, r_end); } } static int compute_normal_rr(pfmlib_ita_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_ita_output_rr_t *orr) { pfmlib_ita_input_rr_desc_t *in_rr; pfmlib_ita_output_rr_desc_t *out_rr; unsigned long r_end; pfmlib_reg_t *br; dbreg_t d; int i, j, br_index, reg_idx, prev_index; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br; reg_idx = *base_idx; br_index = 0; for (i=0; i < n; i++, in_rr++, out_rr++) { /* * running out of registers */ if (br_index == 8) break; prev_index = br_index; do_normal_rr( in_rr->rr_start, in_rr->rr_end, br, 4 - (reg_idx>>1), /* how many pairs available */ 0, &br_index, ®_idx, in_rr->rr_plm ? in_rr->rr_plm : dfl_plm); DPRINT("br_index=%d reg_idx=%d\n", br_index, reg_idx); /* * compute offsets */ out_rr->rr_soff = out_rr->rr_eoff = 0; for(j=prev_index; j < br_index; j+=2) { d.val = br[j+1].reg_value; r_end = br[j].reg_value+((~(d.db.db_mask)+1) & ~(0xffUL << 56)); if (br[j].reg_value <= in_rr->rr_start) out_rr->rr_soff = in_rr->rr_start - br[j].reg_value; if (r_end >= in_rr->rr_end) out_rr->rr_eoff = r_end - in_rr->rr_end; } if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, prev_index, (br_index-prev_index)>>1); } /* do not have enough registers to cover all the ranges */ if (br_index == 8 && i < n) return PFMLIB_ERR_TOOMANY; orr->rr_nbr_used = br_index; return PFMLIB_SUCCESS; } static int pfm_dispatch_irange(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita_output_param_t *mod_out) { pfm_ita_pmc_reg_t reg; pfmlib_ita_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfmlib_ita_input_rr_t *irr; pfmlib_ita_output_rr_t *orr; int pos = outp->pfp_pmc_count; int ret, base_idx = 0; int n_intervals; if (param == NULL || param->pfp_ita_irange.rr_used == 0) return PFMLIB_SUCCESS; if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita_irange; orr = &mod_out->pfp_ita_irange; ret = check_intervals(irr, 0, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_IRRINVAL; DPRINT("n_intervals=%d\n", n_intervals); ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_IRRTOOMANY : ret; } reg.pmc_val = 0; reg.pmc13_ita_reg.irange_ta = 0x0; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 13)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 13; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = 13; pc[pos].reg_alt_addr= 13; pos++; __pfm_vbprintf("[PMC13(pmc13)=0x%lx ta=%d]\n", reg.pmc_val, reg.pmc13_ita_reg.irange_ta); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int pfm_dispatch_drange(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita_output_param_t *mod_out) { pfmlib_ita_input_param_t *param = mod_in; pfmlib_event_t *e = inp->pfp_events; pfmlib_reg_t *pc = outp->pfp_pmcs; pfmlib_ita_input_rr_t *irr; pfmlib_ita_output_rr_t *orr; pfm_ita_pmc_reg_t reg; unsigned int i, count; int pos = outp->pfp_pmc_count; int ret, base_idx = 0; int n_intervals; if (param == NULL || param->pfp_ita_drange.rr_used == 0) return PFMLIB_SUCCESS; if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita_drange; orr = &mod_out->pfp_ita_drange; ret = check_intervals(irr, 1 , &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_DRRINVAL; DPRINT("n_intervals=%d\n", n_intervals); ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_DRRTOOMANY : ret; } count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_dear(e[i].event)) return PFMLIB_SUCCESS; /* will be done there */ } reg.pmc_val = 0UL; /* * here we have no other choice but to use the default priv level as there is no * specific D-EAR event provided */ reg.pmc11_ita_reg.dear_plm = inp->pfp_dfl_plm; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 11)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 11; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = 11; pc[pos].reg_alt_addr= 11; pos++; __pfm_vbprintf("[PMC11(pmc11)=0x%lx tlb=%s plm=%d pm=%d ism=0x%x umask=0x%x pt=%d]\n", reg.pmc_val, reg.pmc11_ita_reg.dear_tlb ? "Yes" : "No", reg.pmc11_ita_reg.dear_plm, reg.pmc11_ita_reg.dear_pm, reg.pmc11_ita_reg.dear_ism, reg.pmc11_ita_reg.dear_umask, reg.pmc11_ita_reg.dear_pt); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int check_qualifier_constraints(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in) { pfmlib_event_t *e = inp->pfp_events; unsigned int i, count; count = inp->pfp_event_count; for(i=0; i < count; i++) { /* * skip check for counter which requested it. Use at your own risk. * No all counters have necessarily been validated for use with * qualifiers. Typically the event is counted as if no constraint * existed. */ if (mod_in->pfp_ita_counters[i].flags & PFMLIB_ITA_FL_EVT_NO_QUALCHECK) continue; if (evt_use_irange(mod_in) && has_iarr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_drange(mod_in) && has_darr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_opcm(mod_in) && has_opcm(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int check_range_plm(pfmlib_input_param_t *inp, pfmlib_ita_input_param_t *mod_in) { unsigned int i, count; if (mod_in->pfp_ita_drange.rr_used == 0 && mod_in->pfp_ita_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * range restriction applies to all events, therefore we must have a consistent * set of plm and they must match the pfp_dfl_plm which is used to setup the debug * registers */ count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].plm && inp->pfp_events[i].plm != inp->pfp_dfl_plm) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int pfm_ita_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { int ret; pfmlib_ita_input_param_t *mod_in = (pfmlib_ita_input_param_t *)model_in; pfmlib_ita_output_param_t *mod_out = (pfmlib_ita_output_param_t *)model_out; /* * nothing will come out of this combination */ if (mod_out && mod_in == NULL) return PFMLIB_ERR_INVAL; /* check opcode match, range restriction qualifiers */ if (mod_in && check_qualifier_constraints(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; /* check for problems with raneg restriction and per-event plm */ if (mod_in && check_range_plm(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; ret = pfm_ita_dispatch_counters(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for I-EAR */ ret = pfm_dispatch_iear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for D-EAR */ ret = pfm_dispatch_dear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for Opcode matchers */ ret = pfm_dispatch_opcm(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_btb(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_irange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_drange(inp, mod_in, outp, mod_out);; return ret; } /* XXX: return value is also error code */ int pfm_ita_get_event_maxincr(unsigned int i, unsigned int *maxincr) { if (i >= PME_ITA_EVENT_COUNT || maxincr == NULL) return PFMLIB_ERR_INVAL; *maxincr = itanium_pe[i].pme_maxincr; return PFMLIB_SUCCESS; } int pfm_ita_is_ear(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! is_ear(i) ? 0 : 1; } int pfm_ita_is_dear(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! is_dear(i) ? 0 : 1; } int pfm_ita_is_dear_tlb(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! (is_dear(i) && is_ear_tlb(i)) ? 0 : 1; } int pfm_ita_is_dear_cache(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! (is_dear(i) && !is_ear_tlb(i)) ? 0 : 1; } int pfm_ita_is_iear(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! is_iear(i) ? 0 : 1; } int pfm_ita_is_iear_tlb(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! (is_iear(i) && is_ear_tlb(i)) ? 0 : 1; } int pfm_ita_is_iear_cache(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! (is_iear(i) && !is_ear_tlb(i)) ? 0 : 1; } int pfm_ita_is_btb(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! is_btb(i) ? 0 : 1; } int pfm_ita_support_iarr(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! has_iarr(i) ? 0 : 1; } int pfm_ita_support_darr(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! has_darr(i) ? 0 : 1; } int pfm_ita_support_opcm(unsigned int i) { return i >= PME_ITA_EVENT_COUNT || ! has_opcm(i) ? 0 : 1; } int pfm_ita_get_ear_mode(unsigned int i, pfmlib_ita_ear_mode_t *m) { if (!is_ear(i) || m == NULL) return PFMLIB_ERR_INVAL; *m = is_ear_tlb(i) ? PFMLIB_ITA_EAR_TLB_MODE : PFMLIB_ITA_EAR_CACHE_MODE; return PFMLIB_SUCCESS; } static int pfm_ita_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt < 4 || cnt > 7)) return PFMLIB_ERR_INVAL; *code = (int)itanium_pe[i].pme_code; return PFMLIB_SUCCESS; } /* * This function is accessible directly to the user */ int pfm_ita_get_event_umask(unsigned int i, unsigned long *umask) { if (i >= PME_ITA_EVENT_COUNT || umask == NULL) return PFMLIB_ERR_INVAL; *umask = evt_umask(i); return PFMLIB_SUCCESS; } static char * pfm_ita_get_event_name(unsigned int i) { return itanium_pe[i].pme_name; } static void pfm_ita_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; unsigned long m; memset(counters, 0, sizeof(*counters)); m =itanium_pe[j].pme_counters; for(i=0; m ; i++, m>>=1) { if (m & 0x1) pfm_regmask_set(counters, i); } } static void pfm_ita_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { unsigned int i = 0; /* all pmcs are contiguous */ for(i=0; i < PMU_ITA_NUM_PMCS; i++) pfm_regmask_set(impl_pmcs, i); } static void pfm_ita_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { unsigned int i = 0; /* all pmds are contiguous */ for(i=0; i < PMU_ITA_NUM_PMDS; i++) pfm_regmask_set(impl_pmds, i); } static void pfm_ita_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i = 0; /* counting pmds are contiguous */ for(i=4; i < 8; i++) pfm_regmask_set(impl_counters, i); } static void pfm_ita_get_hw_counter_width(unsigned int *width) { *width = PMU_ITA_COUNTER_WIDTH; } static int pfm_ita_get_cycle_event(pfmlib_event_t *e) { e->event = PME_ITA_CPU_CYCLES; return PFMLIB_SUCCESS; } static int pfm_ita_get_inst_retired(pfmlib_event_t *e) { e->event = PME_ITA_IA64_INST_RETIRED; return PFMLIB_SUCCESS; } pfm_pmu_support_t itanium_support={ .pmu_name = "itanium", .pmu_type = PFMLIB_ITANIUM_PMU, .pme_count = PME_ITA_EVENT_COUNT, .pmc_count = PMU_ITA_NUM_PMCS, .pmd_count = PMU_ITA_NUM_PMDS, .num_cnt = PMU_ITA_NUM_COUNTERS, .get_event_code = pfm_ita_get_event_code, .get_event_name = pfm_ita_get_event_name, .get_event_counters = pfm_ita_get_event_counters, .dispatch_events = pfm_ita_dispatch_events, .pmu_detect = pfm_ita_detect, .get_impl_pmcs = pfm_ita_get_impl_pmcs, .get_impl_pmds = pfm_ita_get_impl_pmds, .get_impl_counters = pfm_ita_get_impl_counters, .get_hw_counter_width = pfm_ita_get_hw_counter_width, .get_cycle_event = pfm_ita_get_cycle_event, .get_inst_retired_event = pfm_ita_get_inst_retired /* no event description available for Itanium */ }; papi-5.6.0/doc/Makefile000664 001750 001750 00000001701 13216244355 016742 0ustar00jshenry1963jshenry1963000000 000000 .PHONY: clean clobber distclean install force_me all all: man @echo "Built PAPI user documentation" html: force_me doxygen Doxyfile-html man: man/man1 man/man3 man/man3: ../src/papi.h ../src/papi.c ../src/papi_hl.c ../src/papi_fwrappers.c doxygen Doxyfile-man3 man/man1: ../src/utils/papi_avail.c ../src/utils/papi_clockres.c ../src/utils/papi_command_line.c ../src/utils/papi_component_avail.c ../src/utils/papi_cost.c ../src/utils/papi_decode.c ../src/utils/papi_error_codes.c ../src/utils/papi_event_chooser.c ../src/utils/papi_xml_event_info.c ../src/utils/papi_mem_info.c ../src/utils/papi_multiplex_cost.c ../src/utils/papi_native_avail.c ../src/utils/papi_version.c doxygen Doxyfile-man1 clean: rm -rf man html doxyerror distclean clobber: clean install: man -rm -f man/man3/HighLevelInfo.3 -rm -f man/man3/papi_data_structures.3 -rm -r ../man/man1/*.1 ../man/man3/*.3 -cp -R man/man1/*.1 ../man/man1 -cp -R man/man3/*.3 ../man/man3 papi-5.6.0/src/perfctr-2.6.x/etc/000775 001750 001750 00000000000 13216244366 020320 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/examples_v2.x/task.c000664 001750 001750 00000017303 13216244362 023010 0ustar00jshenry1963jshenry1963000000 000000 /* * task.c - example of a task monitoring another one * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int child(char **arg) { /* * will cause the program to stop before executing the first * user level instruction. We can only attach (load) a context * if the task is in the STOPPED state. */ ptrace(PTRACE_TRACEME, 0, NULL, NULL); /* * execute the requested command */ execvp(arg[0], arg); fatal_error("cannot exec: %s\n", arg[0]); /* not reached */ } int parent(char **arg) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_ctx_t ctx[1]; pfarg_pmc_t pc[NUM_PMCS]; pfarg_pmd_t pd[NUM_PMDS]; pfarg_load_t load_args; unsigned int i, num_counters; int status, ret; int ctx_fd; pid_t pid; char name[MAX_EVT_NAME_LEN]; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&load_args,0, sizeof(load_args)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ if (num_counters < i) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } inp.pfp_event_count = i; /* * now create a context. we will later attach it to the task we are creating. */ ctx_fd = pfm_create_context(ctx, NULL, NULL, 0); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We use pfp_pmc_count to determine the number of PMC to intialize. * We use pfp_pmd_count to determine the number of PMD to initialize. * Some events/features may cause extra PMCs to be used, leading to: * - pfp_pmc_count may be >= pfp_event_count * - pfp_pmd_count may be >= pfp_event_count */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for(i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers */ if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * Create the child task */ if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); /* * and launch the child code */ if (pid == 0) { close(ctx_fd); exit(child(arg)); } /* * wait for the child to exec */ waitpid(pid, &status, WUNTRACED); /* * check if process exited early */ if (WIFEXITED(status)) fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status)); /* * the task is stopped at this point */ /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = pid; if (pfm_load_context(ctx_fd, &load_args) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * activate monitoring. The task is still STOPPED at this point. Monitoring * will not take effect until the execution of the task is resumed. */ if (pfm_start(ctx_fd, NULL) == -1) fatal_error("pfm_start error errno %d\n",errno); /* * now resume execution of the task, effectively activating * monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); /* * now the task is running */ /* * simply wait for completion */ waitpid(pid, &status, 0); /* * the task has disappeared at this point but our context is still * present and contains all the latest counts. */ /* * now simply read the results. */ if (pfm_read_pmds(ctx_fd, pd, inp.pfp_event_count) == -1) fatal_error("pfm_read_pmds error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * free the context */ close(ctx_fd); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; int ret; if (argc < 2) { fatal_error("You must specify a command to execute\n"); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose= 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); return parent(argv+1); } papi-5.6.0/src/libpfm4/lib/pfmlib_powerpc_nest.c000664 001750 001750 00000004600 13216244365 023636 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_powerpc_nest.c */ #include "pfmlib_priv.h" #include "pfmlib_power_priv.h" #include "events/powerpc_nest_events.h" static int pfm_powerpc_nest_detect(void* this) { if (__is_processor(PV_POWER8)) return PFM_SUCCESS; return PFM_ERR_NOTSUPP; } pfmlib_pmu_t powerpc_nest_mcs_read_support={ .desc = "POWERPC_NEST_MCS_RD_BW", .name = "powerpc_nest_mcs_read", .pmu = PFM_PMU_POWERPC_NEST_MCS_READ_BW, .perf_name = "Nest_MCS_Read_BW", .pme_count = LIBPFM_ARRAY_SIZE(powerpc_nest_read_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 1, .pe = powerpc_nest_read_pe, .pmu_detect = pfm_powerpc_nest_detect, .get_event_encoding[PFM_OS_NONE] = pfm_gen_powerpc_get_encoding, PFMLIB_ENCODE_PERF(pfm_gen_powerpc_get_nest_perf_encoding), PFMLIB_VALID_PERF_PATTRS(pfm_gen_powerpc_perf_validate_pattrs), .get_event_first = pfm_gen_powerpc_get_event_first, .get_event_next = pfm_gen_powerpc_get_event_next, .event_is_valid = pfm_gen_powerpc_event_is_valid, .validate_table = pfm_gen_powerpc_validate_table, .get_event_info = pfm_gen_powerpc_get_event_info, .get_event_attr_info = pfm_gen_powerpc_get_event_attr_info, }; pfmlib_pmu_t powerpc_nest_mcs_write_support={ .desc = "POWERPC_NEST_MCS_WR_BW", .name = "powerpc_nest_mcs_write", .pmu = PFM_PMU_POWERPC_NEST_MCS_WRITE_BW, .perf_name = "Nest_MCS_Write_BW", .pme_count = LIBPFM_ARRAY_SIZE(powerpc_nest_write_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 1, .pe = powerpc_nest_write_pe, .pmu_detect = pfm_powerpc_nest_detect, .get_event_encoding[PFM_OS_NONE] = pfm_gen_powerpc_get_encoding, PFMLIB_ENCODE_PERF(pfm_gen_powerpc_get_nest_perf_encoding), PFMLIB_VALID_PERF_PATTRS(pfm_gen_powerpc_perf_validate_pattrs), .get_event_first = pfm_gen_powerpc_get_event_first, .get_event_next = pfm_gen_powerpc_get_event_next, .event_is_valid = pfm_gen_powerpc_event_is_valid, .validate_table = pfm_gen_powerpc_validate_table, .get_event_info = pfm_gen_powerpc_get_event_info, .get_event_attr_info = pfm_gen_powerpc_get_event_attr_info, }; papi-5.6.0/src/libpfm-3.y/examples_v3.x/x86/smpl_core_pebs_sys.c000664 001750 001750 00000027354 13216244362 026375 0ustar00jshenry1963jshenry1963000000 000000 /* * smpl_core_pebs_sys.c - Intel Core processor PEBS system-wide example * * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Based on code: * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../detect_pmcs.h" #define SMPL_EVENT "INSTRUCTIONS_RETIRED" /* not all event support PEBS */ #define NUM_PMCS 16 #define NUM_PMDS 16 #define SMPL_PERIOD 100000ULL /* must not use more bits than actual HW counter width */ typedef pfm_pebs_core_smpl_hdr_t smpl_hdr_t; typedef pfm_pebs_core_smpl_entry_t smpl_entry_t; typedef pfm_pebs_core_smpl_arg_t smpl_arg_t; #define FMT_NAME PFM_PEBS_CORE_SMPL_NAME static uint64_t collected_samples; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void warning(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } int child(char **arg) { /* * force the task to stop before executing the first * user level instruction */ execvp(arg[0], arg); /* not reached */ exit(1); } static void process_smpl_buf(smpl_hdr_t *hdr) { static uint64_t last_overflow = ~0; /* initialize to biggest value possible */ static uint64_t last_count; smpl_entry_t *ent; uint64_t entry; unsigned long count; count = (hdr->ds.pebs_index - hdr->ds.pebs_buf_base)/sizeof(*ent); if (hdr->overflows == last_overflow && last_count == count) { warning("skipping identical set of samples %"PRIu64" = %"PRIu64"\n", hdr->overflows, last_overflow); return; } last_count = count; last_overflow = hdr->overflows; /* * the beginning of the buffer does not necessarily follow the header * due to alignement. */ ent = (smpl_entry_t *)((unsigned long)(hdr+1)+ hdr->start_offs); entry = collected_samples; while(count--) { /* * print some of the machine registers of each sample */ printf("entry %06"PRIu64" eflags:0x%08llx EAX:0x%08llx ESP:0x%08llx IP:0x%08llx\n", entry, (unsigned long long)ent->eflags, (unsigned long long)ent->eax, (unsigned long long)ent->esp, (unsigned long long)ent->ip); ent++; entry++; } collected_samples = entry; } /* * pin task to CPU */ #ifndef __NR_sched_setaffinity #error "you need to define __NR_sched_setaffinity" #endif #define MAX_CPUS 2048 #define NR_CPU_BITS (MAX_CPUS>>3) int pin_cpu(pid_t pid, unsigned int cpu) { uint64_t my_mask[NR_CPU_BITS]; if (cpu >= MAX_CPUS) fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); my_mask[cpu>>6] = 1ULL << (cpu&63); return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); } static volatile int done; static void handler(int n) { done = 1; } int main(int argc, char **argv) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_core_input_param_t mod_inp; pfmlib_options_t pfmlib_options; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmd_attr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; struct pollfd fds; smpl_arg_t buf_arg; pfarg_msg_t msg; smpl_hdr_t *hdr; void *buf_addr; uint64_t pebs_size; pid_t pid; int ret, fd, type; unsigned int i; uint32_t ctx_flags; if (argc < 2) fatal_error("you need to pass a program to sample\n"); if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("libpfm intialization failed\n"); /* * check we are on an Intel Core PMU */ pfm_get_pmu_type(&type); if (type != PFMLIB_INTEL_CORE_PMU && type != PFMLIB_INTEL_ATOM_PMU) fatal_error("This program only works with an Intel Core processor\n"); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&inp, 0, sizeof(inp)); memset(&outp, 0, sizeof(outp)); memset(&mod_inp, 0, sizeof(mod_inp)); memset(&sif, 0, sizeof(sif)); memset(&buf_arg, 0, sizeof(buf_arg)); memset(&fds, 0, sizeof(fds)); /* * search for our sampling event */ if (pfm_find_full_event(SMPL_EVENT, &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find sampling event %s\n", SMPL_EVENT); inp.pfp_event_count = 1; inp.pfp_dfl_plm = PFM_PLM3; /* * important: inform libpfm we do use PEBS */ mod_inp.pfp_core_pebs.pebs_used = 1; /* * sampling buffer parameters */ pebs_size = 3 * getpagesize(); buf_arg.buf_size = pebs_size; /* * sampling period cannot use more bits than HW counter can supoprt */ buf_arg.cnt_reset = -SMPL_PERIOD; /* * We want a system-wide context for sampling */ ctx_flags = PFM_FL_SYSTEM_WIDE | PFM_FL_SMPL_FMT; /* * trigger notification (interrupt) when reaching the very end of * the buffer */ buf_arg.intr_thres = (pebs_size/sizeof(smpl_entry_t))*90/100; /* * we want to measure CPU0, thus we pin ourself to the CPU before invoking * perfmon. This ensures that the sampling buffer will be allocated on the * same NUMA node. */ ret = pin_cpu(getpid(), 0); if (ret) fatal_error("cannot pin on CPU0"); /* * create session and sampling buffer */ fd = pfm_create(ctx_flags, &sif, FMT_NAME, &buf_arg, sizeof(buf_arg)); if (fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s, maybe you do not have the PEBS sampling format in the kernel.\nCheck /sys/kernel/perfmon/formats\n", strerror(errno)); } /* * map buffer into our address space */ buf_addr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, fd, 0); printf("session [%d] buffer mapped @%p\n", fd, buf_addr); if (buf_addr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); hdr = (smpl_hdr_t *)buf_addr; printf("pebs_base=0x%llx pebs_end=0x%llx index=0x%llx\n" "intr=0x%llx version=%u.%u\n" "entry_size=%zu ds_size=%zu\n", (unsigned long long)hdr->ds.pebs_buf_base, (unsigned long long)hdr->ds.pebs_abs_max, (unsigned long long)hdr->ds.pebs_index, (unsigned long long)hdr->ds.pebs_intr_thres, PFM_VERSION_MAJOR(hdr->version), PFM_VERSION_MINOR(hdr->version), sizeof(smpl_entry_t), sizeof(hdr->ds)); if (PFM_VERSION_MAJOR(hdr->version) < 1) fatal_error("invalid buffer format version\n"); /* * get which PMC registers are available */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let libpfm figure out how to assign event onto PMU registers */ if (pfm_dispatch_events(&inp, &mod_inp, &outp, NULL) != PFMLIB_SUCCESS) fatal_error("cannot assign event %s\n", SMPL_EVENT); /* * propagate PMC setup from libpfm to perfmon */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; /* * must disable 64-bit emulation on the PMC0 counter. * PMC0 is the only counter useable with PEBS. We must disable * 64-bit emulation to avoid getting interrupts for each * sampling period, PEBS takes care of this part. */ if (pc[i].reg_num == 0) pc[i].reg_flags = PFM_REGFL_NO_EMUL64; } /* * propagate PMD set from libpfm to perfmon */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * setup sampling period for first counter * we want notification on overflow, i.e., when buffer is full */ pd[0].reg_flags = PFM_REGFL_OVFL_NOTIFY; pd[0].reg_value = -SMPL_PERIOD; pd[0].reg_long_reset = -SMPL_PERIOD; pd[0].reg_short_reset = -SMPL_PERIOD; /* * Now program the registers */ if (pfm_write(fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(fd, 0, PFM_RW_PMD_ATTR, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * attach the session to CPU0 */ if (pfm_attach(fd, 0, 0) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * Create the child task */ signal(SIGCHLD, handler); if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); if (pid == 0) { /* child does not inherit context file descriptor */ close(fd); /* if child is too short-lived we may not measure it */ child(argv+1); } /* * start monitoring */ if (pfm_set_state(fd, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); fds.fd = fd; fds.events = POLLIN; /* * core loop */ for(;done == 0;) { /* * Must use a timeout to avoid a race condition * with the SIGCHLD signal */ ret = poll(&fds, 1, 500); /* * if timeout expired, then check done */ if (ret == 0) continue; if (ret == -1) { if(ret == -1 && errno == EINTR) { warning("read interrupted, retrying\n"); continue; } fatal_error("poll failed: %s\n", strerror(errno)); } ret = read(fd, &msg, sizeof(msg)); if (ret == -1) fatal_error("cannot read perfmon msg: %s\n", strerror(errno)); switch(msg.type) { case PFM_MSG_OVFL: /* the sampling buffer is full */ process_smpl_buf(hdr); /* * reactivate monitoring once we are done with the samples * in syste-wide, interface guarantees monitoring is active * upon return from the pfm_restart() syscall */ if (pfm_set_state(fd, 0, PFM_ST_RESTART) == -1) fatal_error("pfm_set_state(restart) error errno %d\n",errno); break; default: fatal_error("unknown message type %d\n", msg.type); } } /* * cleanup child */ waitpid(pid, NULL, 0); /* * stop monitoring, this is required in order to guarantee that the PEBS buffer * header is updated with the latest position, such that we see see the final * samples */ if (pfm_set_state(fd, 0, PFM_ST_STOP) == -1) fatal_error("pfm_set_state(stop) error errno %d\n",errno); /* * check for any leftover samples. Must have monitoring stopped * for this operation to have guarantee it is up to date */ process_smpl_buf(hdr); /* * close session */ close(fd); /* * unmap sampling buffer and actually free the perfmon session */ munmap(buf_addr, (size_t)buf_arg.buf_size); return 0; } papi-5.6.0/src/libpfm4/lib/events/intel_x86_arch_events.h000664 001750 001750 00000006422 13216244364 025312 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ /* * architected events for architectural perfmon v1 and v2 as defined by the IA-32 developer's manual * Vol 3B, table 18-6 (May 2007) */ static intel_x86_entry_t intel_x86_arch_pe[]={ {.name = "UNHALTED_CORE_CYCLES", .code = 0x003c, .cntmsk = 0x200000000ull, /* temporary */ .desc = "count core clock cycles whenever the clock signal on the specific core is running (not halted)" }, {.name = "INSTRUCTION_RETIRED", .code = 0x00c0, .cntmsk = 0x100000000ull, /* temporary */ .desc = "count the number of instructions at retirement. For instructions that consists of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction", }, {.name = "UNHALTED_REFERENCE_CYCLES", .code = 0x013c, .cntmsk = 0x400000000ull, /* temporary */ .desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core frequency changes due to performance state transitions", }, {.name = "LLC_REFERENCES", .code = 0x4f2e, .desc = "count each request originating from the core to reference a cache line in the last level cache. The count may include speculation, but excludes cache line fills due to hardware prefetch", }, {.name = "LLC_MISSES", .code = 0x412e, .desc = "count each cache miss condition for references to the last level cache. The event count may include speculation, but excludes cache line fills due to hardware prefetch", }, {.name = "BRANCH_INSTRUCTIONS_RETIRED", .code = 0x00c4, .desc = "count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction", }, {.name = "MISPREDICTED_BRANCH_RETIRED", .code = 0x00c5, .desc = "count mispredicted branch instructions at retirement. Specifically, this event counts at retirement of the last micro-op of a branch instruction in the architectural path of the execution and experienced misprediction in the branch prediction hardware", } }; papi-5.6.0/src/components/libmsr/utils/libmsr_write_test.c000775 001750 001750 00000016663 13216244357 026070 0ustar00jshenry1963jshenry1963000000 000000 /** * @author Asim YarKhan (updated) * @author Vince Weaver (original version) */ #include #include #include #include #include "papi.h" #include "msr/msr_core.h" #include "msr/msr_rapl.h" #define MAX_EVENTS 128 char events[MAX_EVENTS][BUFSIZ]; char filenames[MAX_EVENTS][BUFSIZ]; int ompcpuloadprimes( int limit ) { int num, primes=0; #pragma omp parallel for schedule(dynamic) reduction(+ : primes) for (num = 1; num <= limit; num++) { int i = 2; while(i <= num) { if(num % i == 0) break; i++; } if(i == num) primes++; } return primes; } int main (int argc, char **argv) { int retval,cid,rapl_cid=-1,numcmp; int EventSet = PAPI_NULL; long long values[MAX_EVENTS]; int i,code,enum_retval; const PAPI_component_info_t *cmpinfo = NULL; long long start_time,write_start_time,write_end_time,read_start_time,read_end_time; char event_name[BUFSIZ]; union { long long ll; double dbl; } event_value_union; static int num_events=0; FILE *fileout; /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { fprintf(stderr,"PAPI_library_init failed\n"); exit(1); } /* Find the libmsr component */ numcmp = PAPI_num_components(); for(cid=0; cidname,"libmsr")) { rapl_cid=cid; printf("Found libmsr component at cid %d\n", rapl_cid); if (cmpinfo->disabled) { fprintf(stderr,"No libmsr events found: %s\n", cmpinfo->disabled_reason); exit(1); } break; } } /* Component not found */ if (cid==numcmp) { fprintf(stderr,"No libmsr component found\n"); exit(1); } /* Find events in the component */ code = PAPI_NATIVE_MASK; enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); while ( enum_retval == PAPI_OK ) { retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",code); exit(1); } printf("Found: %s\n",event_name); strncpy(events[num_events],event_name,BUFSIZ); sprintf(filenames[num_events],"results.%s",event_name); num_events++; if (num_events==MAX_EVENTS) { printf("Too many events! %d\n",num_events); exit(1); } enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); } if (num_events==0) { printf("Error! No libmsr events found!\n"); exit(1); } /* Open output file */ char fileoutname[]="libmsr_write_test_output.txt"; fileout=fopen( fileoutname ,"w" ); if ( fileout==NULL) { fprintf( stderr,"Could not open %s\n",fileoutname ); exit(1); } /* Create EventSet */ retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { fprintf(stderr,"Error creating eventset!\n"); } for(i=0;i * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: nhm (Intel Nehalem) */ static const intel_x86_umask_t nhm_arith[]={ { .uname = "CYCLES_DIV_BUSY", .udesc = "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE.", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DIV", .udesc = "Counts the number of divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE.", .uequiv = "CYCLES_DIV_BUSY:c=1:i=1:e=1", .ucode = 0x100 | INTEL_X86_MOD_EDGE | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "MUL", .udesc = "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD.", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_baclear[]={ { .uname = "BAD_TARGET", .udesc = "BACLEAR asserted with bad target address", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "CLEAR", .udesc = "BACLEAR asserted, regardless of cause", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_bpu_clears[]={ { .uname = "EARLY", .udesc = "Early Branch Prediction Unit clears", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LATE", .udesc = "Late Branch Prediction Unit clears", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Count any Branch Prediction Unit clears", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_br_inst_exec[]={ { .uname = "ANY", .udesc = "Branch instructions executed", .ucode = 0x7f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COND", .udesc = "Conditional branch instructions executed", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DIRECT", .udesc = "Unconditional branches executed", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DIRECT_NEAR_CALL", .udesc = "Unconditional call branches executed", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "INDIRECT_NEAR_CALL", .udesc = "Indirect call branches executed", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "INDIRECT_NON_CALL", .udesc = "Indirect non call branches executed", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NEAR_CALLS", .udesc = "Call branches executed", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NON_CALLS", .udesc = "All non call branches executed", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RETURN_NEAR", .udesc = "Indirect return branches executed", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TAKEN", .udesc = "Taken branches executed", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_br_inst_retired[]={ { .uname = "ALL_BRANCHES", .udesc = "Retired branch instructions (Precise Event)", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "CONDITIONAL", .udesc = "Retired conditional branch instructions (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_CALL", .udesc = "Retired near call instructions (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_br_misp_exec[]={ { .uname = "ANY", .udesc = "Mispredicted branches executed", .ucode = 0x7f00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COND", .udesc = "Mispredicted conditional branches executed", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DIRECT", .udesc = "Mispredicted unconditional branches executed", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DIRECT_NEAR_CALL", .udesc = "Mispredicted non call branches executed", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "INDIRECT_NEAR_CALL", .udesc = "Mispredicted indirect call branches executed", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "INDIRECT_NON_CALL", .udesc = "Mispredicted indirect non call branches executed", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NEAR_CALLS", .udesc = "Mispredicted call branches executed", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NON_CALLS", .udesc = "Mispredicted non call branches executed", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RETURN_NEAR", .udesc = "Mispredicted return branches executed", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TAKEN", .udesc = "Mispredicted taken branches executed", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_br_misp_retired[]={ { .uname = "NEAR_CALL", .udesc = "Counts mispredicted direct and indirect near unconditional retired calls", .ucode = 0x200, .uflags= INTEL_X86_PEBS | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_cache_lock_cycles[]={ { .uname = "L1D", .udesc = "Cycles L1D locked", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1D_L2", .udesc = "Cycles L1D and L2 locked", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_cpu_clk_unhalted[]={ { .uname = "THREAD_P", .udesc = "Cycles when thread is not halted (programmable counter)", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "REF_P", .udesc = "Reference base clock (133 Mhz) cycles when thread is not halted", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TOTAL_CYCLES", .udesc = "Total number of elapsed cycles. Does not work when C-state enabled", .uequiv = "THREAD_P:c=2:i=1", .ucode = 0x0 | INTEL_X86_MOD_INV | (0x2 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_dtlb_load_misses[]={ { .uname = "ANY", .udesc = "DTLB load misses", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "PDE_MISS", .udesc = "DTLB load miss caused by low part of address", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "DTLB load miss page walks complete", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT", .udesc = "DTLB second level hit", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PDP_MISS", .udesc = "Number of DTLB cache load misses where the high part of the linear to physical address translation was missed", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LARGE_WALK_COMPLETED", .udesc = "Counts number of completed large page walks due to load miss in the STLB", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_dtlb_misses[]={ { .uname = "ANY", .udesc = "DTLB misses", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "STLB_HIT", .udesc = "DTLB first level misses but second level hit", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "DTLB miss page walks", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PDE_MISS", .udesc = "Number of DTLB cache misses where the low part of the linear to physical address translation was missed", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PDP_MISS", .udesc = "Number of DTLB misses where the high part of the linear to physical address translation was missed", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LARGE_WALK_COMPLETED", .udesc = "Counts number of completed large page walks due to misses in the STLB", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_ept[]={ { .uname = "EPDE_MISS", .udesc = "Extended Page Directory Entry miss", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "EPDPE_MISS", .udesc = "Extended Page Directory Pointer miss", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "EPDPE_HIT", .udesc = "Extended Page Directory Pointer hit", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_fp_assist[]={ { .uname = "ALL", .udesc = "Floating point assists (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "INPUT", .udesc = "Floating point assists for invalid input value (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "OUTPUT", .udesc = "Floating point assists for invalid output value (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_fp_comp_ops_exe[]={ { .uname = "MMX", .udesc = "MMX Uops", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_DOUBLE_PRECISION", .udesc = "SSE* FP double precision Uops", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_FP", .udesc = "SSE and SSE2 FP Uops", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_FP_PACKED", .udesc = "SSE FP packed Uops", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_FP_SCALAR", .udesc = "SSE FP scalar Uops", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE_SINGLE_PRECISION", .udesc = "SSE* FP single precision Uops", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SSE2_INTEGER", .udesc = "SSE2 integer Uops", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "X87", .udesc = "Computational floating-point operations executed", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_fp_mmx_trans[]={ { .uname = "ANY", .udesc = "All Floating Point to and from MMX transitions", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "TO_FP", .udesc = "Transitions from MMX to Floating Point instructions", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TO_MMX", .udesc = "Transitions from Floating Point to MMX instructions", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_ifu_ivc[]={ { .uname = "FULL", .udesc = "Instruction Fetche unit victim cache full", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1I_EVICTION", .udesc = "L1 Instruction cache evictions", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_ild_stall[]={ { .uname = "ANY", .udesc = "Any Instruction Length Decoder stall cycles", .uequiv = "IQ_FULL:LCP:MRU:REGEN", .ucode = 0xf00, .uflags= INTEL_X86_DFL, }, { .uname = "IQ_FULL", .udesc = "Instruction Queue full stall cycles", .ucode = 0x400, }, { .uname = "LCP", .udesc = "Length Change Prefix stall cycles", .ucode = 0x100, }, { .uname = "MRU", .udesc = "Stall cycles due to BPU MRU bypass", .ucode = 0x200, }, { .uname = "REGEN", .udesc = "Regen stall cycles", .ucode = 0x800, }, }; static const intel_x86_umask_t nhm_inst_decoded[]={ { .uname = "DEC0", .udesc = "Instructions that must be decoded by decoder 0", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_inst_retired[]={ { .uname = "ANY_P", .udesc = "Instructions Retired (Precise Event)", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "X87", .udesc = "Retired floating-point operations (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_l1d[]={ { .uname = "M_EVICT", .udesc = "L1D cache lines replaced in M state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_REPL", .udesc = "L1D cache lines allocated in the M state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_SNOOP_EVICT", .udesc = "L1D snoop eviction of cache lines in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "REPL", .udesc = "L1 data cache lines allocated", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l1d_all_ref[]={ { .uname = "ANY", .udesc = "All references to the L1 data cache", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, { .uname = "CACHEABLE", .udesc = "L1 data cacheable reads and writes", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l1d_cache_ld[]={ { .uname = "E_STATE", .udesc = "L1 data cache read in E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "I_STATE", .udesc = "L1 data cache read in I state (misses)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_STATE", .udesc = "L1 data cache read in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MESI", .udesc = "L1 data cache reads", .ucode = 0xf00, .uflags= INTEL_X86_DFL, }, { .uname = "S_STATE", .udesc = "L1 data cache read in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l1d_cache_lock[]={ { .uname = "E_STATE", .udesc = "L1 data cache load locks in E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "HIT", .udesc = "L1 data cache load lock hits", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_STATE", .udesc = "L1 data cache load locks in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "S_STATE", .udesc = "L1 data cache load locks in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l1d_cache_st[]={ { .uname = "E_STATE", .udesc = "L1 data cache stores in E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "I_STATE", .udesc = "L1 data cache store in the I state", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_STATE", .udesc = "L1 data cache stores in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "S_STATE", .udesc = "L1 data cache stores in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MESI", .udesc = "L1 data cache store in all states", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_l1d_prefetch[]={ { .uname = "MISS", .udesc = "L1D hardware prefetch misses", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "REQUESTS", .udesc = "L1D hardware prefetch requests", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TRIGGERS", .udesc = "L1D hardware prefetch requests triggered", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l1d_wb_l2[]={ { .uname = "E_STATE", .udesc = "L1 writebacks to L2 in E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "I_STATE", .udesc = "L1 writebacks to L2 in I state (misses)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "M_STATE", .udesc = "L1 writebacks to L2 in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "S_STATE", .udesc = "L1 writebacks to L2 in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MESI", .udesc = "All L1 writebacks to L2", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_l1i[]={ { .uname = "CYCLES_STALLED", .udesc = "L1I instruction fetch stall cycles", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "HITS", .udesc = "L1I instruction fetch hits", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MISSES", .udesc = "L1I instruction fetch misses", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "READS", .udesc = "L1I Instruction fetches", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_data_rqsts[]={ { .uname = "ANY", .udesc = "All L2 data requests", .ucode = 0xff00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "DEMAND_E_STATE", .udesc = "L2 data demand loads in E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_I_STATE", .udesc = "L2 data demand loads in I state (misses)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_M_STATE", .udesc = "L2 data demand loads in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_MESI", .udesc = "L2 data demand requests", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_S_STATE", .udesc = "L2 data demand loads in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_E_STATE", .udesc = "L2 data prefetches in E state", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_I_STATE", .udesc = "L2 data prefetches in the I state (misses)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_M_STATE", .udesc = "L2 data prefetches in M state", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_MESI", .udesc = "All L2 data prefetches", .ucode = 0xf000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_S_STATE", .udesc = "L2 data prefetches in the S state", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_hw_prefetch[]={ { .uname = "HIT", .udesc = "Count L2 HW prefetcher detector hits", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ALLOC", .udesc = "Count L2 HW prefetcher allocations", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DATA_TRIGGER", .udesc = "Count L2 HW data prefetcher triggered", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "CODE_TRIGGER", .udesc = "Count L2 HW code prefetcher triggered", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DCA_TRIGGER", .udesc = "Count L2 HW DCA prefetcher triggered", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "KICK_START", .udesc = "Count L2 HW prefetcher kick started", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_lines_in[]={ { .uname = "ANY", .udesc = "L2 lines allocated", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "E_STATE", .udesc = "L2 lines allocated in the E state", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "S_STATE", .udesc = "L2 lines allocated in the S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_lines_out[]={ { .uname = "ANY", .udesc = "L2 lines evicted", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "DEMAND_CLEAN", .udesc = "L2 lines evicted by a demand request", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DIRTY", .udesc = "L2 modified lines evicted by a demand request", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_CLEAN", .udesc = "L2 lines evicted by a prefetch request", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_DIRTY", .udesc = "L2 modified lines evicted by a prefetch request", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_rqsts[]={ { .uname = "MISS", .udesc = "All L2 misses", .ucode = 0xaa00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "REFERENCES", .udesc = "All L2 requests", .ucode = 0xff00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "IFETCH_HIT", .udesc = "L2 instruction fetch hits", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "IFETCH_MISS", .udesc = "L2 instruction fetch misses", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "IFETCHES", .udesc = "L2 instruction fetches", .ucode = 0x3000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LD_HIT", .udesc = "L2 load hits", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LD_MISS", .udesc = "L2 load misses", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOADS", .udesc = "L2 requests", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_HIT", .udesc = "L2 prefetch hits", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH_MISS", .udesc = "L2 prefetch misses", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCHES", .udesc = "All L2 prefetches", .ucode = 0xc000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT", .udesc = "L2 RFO hits", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_MISS", .udesc = "L2 RFO misses", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFOS", .udesc = "L2 RFO requests", .ucode = 0xc00, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_transactions[]={ { .uname = "ANY", .udesc = "All L2 transactions", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FILL", .udesc = "L2 fill transactions", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "IFETCH", .udesc = "L2 instruction fetch transactions", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1D_WB", .udesc = "L1D writeback to L2 transactions", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOAD", .udesc = "L2 Load transactions", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PREFETCH", .udesc = "L2 prefetch transactions", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO", .udesc = "L2 RFO transactions", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WB", .udesc = "L2 writeback to LLC transactions", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_l2_write[]={ { .uname = "LOCK_E_STATE", .udesc = "L2 demand lock RFOs in E state", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOCK_I_STATE", .udesc = "L2 demand lock RFOs in I state (misses)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOCK_S_STATE", .udesc = "L2 demand lock RFOs in S state", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOCK_HIT", .udesc = "All demand L2 lock RFOs that hit the cache", .ucode = 0xe000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOCK_M_STATE", .udesc = "L2 demand lock RFOs in M state", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LOCK_MESI", .udesc = "All demand L2 lock RFOs", .ucode = 0xf000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT", .udesc = "All L2 demand store RFOs that hit the cache", .ucode = 0xe00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_I_STATE", .udesc = "L2 demand store RFOs in I state (misses)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_E_STATE", .udesc = "L2 demand store RFOs in the E state (exclusive)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_M_STATE", .udesc = "L2 demand store RFOs in M state", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_MESI", .udesc = "All L2 demand store RFOs", .ucode = 0xf00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RFO_S_STATE", .udesc = "L2 demand store RFOs in S state", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_large_itlb[]={ { .uname = "HIT", .udesc = "Large ITLB hit", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_load_dispatch[]={ { .uname = "ANY", .udesc = "All loads dispatched", .ucode = 0x700, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "MOB", .udesc = "Loads dispatched from the MOB", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RS", .udesc = "Loads dispatched that bypass the MOB", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "RS_DELAYED", .udesc = "Loads dispatched from stage 305", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_longest_lat_cache[]={ { .uname = "REFERENCE", .udesc = "Longest latency cache reference", .ucode = 0x4f00, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MISS", .udesc = "Longest latency cache miss", .ucode = 0x4100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_lsd[]={ { .uname = "ACTIVE", .udesc = "Cycles when uops were delivered by the LSD", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "INACTIVE", .udesc = "Cycles no uops were delivered by the LSD", .uequiv = "ACTIVE:i=1", .ucode = 0x100 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), }, }; static const intel_x86_umask_t nhm_machine_clears[]={ { .uname = "SMC", .udesc = "Self-Modifying Code detected", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "CYCLES", .udesc = "Cycles machine clear asserted", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MEM_ORDER", .udesc = "Execution pipeline restart due to Memory ordering conflicts", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "FUSION_ASSIST", .udesc = "Counts the number of macro-fusion assists", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_macro_insts[]={ { .uname = "DECODED", .udesc = "Instructions decoded", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "FUSIONS_DECODED", .udesc = "Macro-fused instructions decoded", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_memory_disambiguation[]={ { .uname = "RESET", .udesc = "Counts memory disambiguation reset cycles", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WATCHDOG", .udesc = "Counts the number of times the memory disambiguation watchdog kicked in", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WATCH_CYCLES", .udesc = "Counts the cycles that the memory disambiguation watchdog is active", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_mem_inst_retired[]={ { .uname = "LATENCY_ABOVE_THRESHOLD", .udesc = "Memory instructions retired above programmed clocks, minimum threshold value is 3, (Precise Event and ldlat required)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_LDLAT, }, { .uname = "LOADS", .udesc = "Instructions retired which contains a load (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STORES", .udesc = "Instructions retired which contains a store (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_mem_load_retired[]={ { .uname = "DTLB_MISS", .udesc = "Retired loads that miss the DTLB (Precise Event)", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "HIT_LFB", .udesc = "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L1D_HIT", .udesc = "Retired loads that hit the L1 data cache (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_HIT", .udesc = "Retired loads that hit the L2 cache (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_MISS", .udesc = "Retired loads that miss the L3 cache (Precise Event)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LLC_MISS", .udesc = "This is an alias for L3_MISS", .uequiv = "L3_MISS", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_UNSHARED_HIT", .udesc = "Retired loads that hit valid versions in the L3 cache (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LLC_UNSHARED_HIT", .udesc = "This is an alias for L3_UNSHARED_HIT", .uequiv = "L3_UNSHARED_HIT", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "OTHER_CORE_L2_HIT_HITM", .udesc = "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_mem_store_retired[]={ { .uname = "DTLB_MISS", .udesc = "Retired stores that miss the DTLB (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_mem_uncore_retired[]={ { .uname = "OTHER_CORE_L2_HITM", .udesc = "Load instructions retired that HIT modified data in sibling core (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_CACHE_LOCAL_HOME_HIT", .udesc = "Load instructions retired remote cache HIT data source (Precise Event)", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_DRAM", .udesc = "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOCAL_DRAM", .udesc = "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_DATA_MISS_UNKNOWN", .udesc = "Load instructions retired where the memory reference missed L3 and data source is unknown (Model 46 only, Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, .umodel = PFM_PMU_INTEL_NHM_EX, }, { .uname = "UNCACHEABLE", .udesc = "Load instructions retired where the memory reference missed L1, L2, L3 caches and to perform I/O (Model 46 only, Precise Event)", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, .umodel = PFM_PMU_INTEL_NHM_EX, }, }; static const intel_x86_umask_t nhm_offcore_requests[]={ { .uname = "ANY", .udesc = "All offcore requests", .ucode = 0x8000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY_READ", .udesc = "Offcore read requests", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY_RFO", .udesc = "Offcore RFO requests", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_READ_CODE", .udesc = "Counts number of offcore demand code read requests. Does not count L2 prefetch requests.", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_READ_DATA", .udesc = "Offcore demand data read requests", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO", .udesc = "Offcore demand RFO requests", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1D_WRITEBACK", .udesc = "Offcore L1 data cache writebacks", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "UNCACHED_MEM", .udesc = "Counts number of offcore uncached memory requests", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_pic_accesses[]={ { .uname = "TPR_READS", .udesc = "Counts number of TPR reads", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TPR_WRITES", .udesc = "Counts number of TPR writes", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_rat_stalls[]={ { .uname = "FLAGS", .udesc = "Flag stall cycles", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "REGISTERS", .udesc = "Partial register stall cycles", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ROB_READ_PORT", .udesc = "ROB read port stalls cycles", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SCOREBOARD", .udesc = "Scoreboard stall cycles", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "All RAT stall cycles", .ucode = 0xf00, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_resource_stalls[]={ { .uname = "FPCW", .udesc = "FPU control word write stall cycles", .ucode = 0x2000, }, { .uname = "LOAD", .udesc = "Load buffer stall cycles", .ucode = 0x200, }, { .uname = "MXCSR", .udesc = "MXCSR rename stall cycles", .ucode = 0x4000, }, { .uname = "RS_FULL", .udesc = "Reservation Station full stall cycles", .ucode = 0x400, }, { .uname = "STORE", .udesc = "Store buffer stall cycles", .ucode = 0x800, }, { .uname = "OTHER", .udesc = "Other Resource related stall cycles", .ucode = 0x8000, }, { .uname = "ROB_FULL", .udesc = "ROB full stall cycles", .ucode = 0x1000, }, { .uname = "ANY", .udesc = "Resource related stall cycles", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_simd_int_128[]={ { .uname = "PACK", .udesc = "128 bit SIMD integer pack operations", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_ARITH", .udesc = "128 bit SIMD integer arithmetic operations", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_LOGICAL", .udesc = "128 bit SIMD integer logical operations", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_MPY", .udesc = "128 bit SIMD integer multiply operations", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_SHIFT", .udesc = "128 bit SIMD integer shift operations", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SHUFFLE_MOVE", .udesc = "128 bit SIMD integer shuffle/move operations", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "UNPACK", .udesc = "128 bit SIMD integer unpack operations", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_simd_int_64[]={ { .uname = "PACK", .udesc = "SIMD integer 64 bit pack operations", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_ARITH", .udesc = "SIMD integer 64 bit arithmetic operations", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_LOGICAL", .udesc = "SIMD integer 64 bit logical operations", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_MPY", .udesc = "SIMD integer 64 bit packed multiply operations", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PACKED_SHIFT", .udesc = "SIMD integer 64 bit shift operations", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SHUFFLE_MOVE", .udesc = "SIMD integer 64 bit shuffle/move operations", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "UNPACK", .udesc = "SIMD integer 64 bit unpack operations", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_snoop_response[]={ { .uname = "HIT", .udesc = "Thread responded HIT to snoop", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "HITE", .udesc = "Thread responded HITE to snoop", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "HITM", .udesc = "Thread responded HITM to snoop", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_sq_misc[]={ { .uname = "PROMOTION", .udesc = "Counts the number of L2 secondary misses that hit the Super Queue", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PROMOTION_POST_GO", .udesc = "Counts the number of L2 secondary misses during the Super Queue filling L2", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "LRU_HINTS", .udesc = "Counts number of Super Queue LRU hints sent to L3", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "FILL_DROPPED", .udesc = "Counts the number of SQ L2 fills dropped due to L2 busy", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SPLIT_LOCK", .udesc = "Super Queue lock splits across a cache line", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_sse_mem_exec[]={ { .uname = "NTA", .udesc = "Streaming SIMD L1D NTA prefetch miss", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t nhm_ssex_uops_retired[]={ { .uname = "PACKED_DOUBLE", .udesc = "SIMD Packed-Double Uops retired (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "PACKED_SINGLE", .udesc = "SIMD Packed-Single Uops retired (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SCALAR_DOUBLE", .udesc = "SIMD Scalar-Double Uops retired (Precise Event)", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SCALAR_SINGLE", .udesc = "SIMD Scalar-Single Uops retired (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "VECTOR_INTEGER", .udesc = "SIMD Vector Integer Uops retired (Precise Event)", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_store_blocks[]={ { .uname = "AT_RET", .udesc = "Loads delayed with at-Retirement block code", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "L1D_BLOCK", .udesc = "Cacheable loads delayed with L1D block code", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "NOT_STA", .udesc = "Loads delayed due to a store blocked for unknown data", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "STA", .udesc = "Loads delayed due to a store blocked for an unknown address", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_uops_decoded[]={ { .uname = "ESP_FOLDING", .udesc = "Stack pointer instructions decoded", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ESP_SYNC", .udesc = "Stack pointer sync operations", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MS", .udesc = "Uops decoded by Microcode Sequencer", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "MS_CYCLES_ACTIVE", .udesc = "Cycles in which at least one uop is decoded by Microcode Sequencer", .uequiv = "MS:c=1", .ucode = 0x200 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_uops_executed[]={ { .uname = "PORT0", .udesc = "Uops executed on port 0", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT1", .udesc = "Uops executed on port 1", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT2_CORE", .udesc = "Uops executed on port 2 on any thread (core count only)", .ucode = 0x400 | INTEL_X86_MOD_ANY, .modhw = _INTEL_X86_ATTR_T, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT3_CORE", .udesc = "Uops executed on port 3 on any thread (core count only)", .ucode = 0x800 | INTEL_X86_MOD_ANY, .modhw = _INTEL_X86_ATTR_T, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT4_CORE", .udesc = "Uops executed on port 4 on any thread (core count only)", .ucode = 0x1000 | INTEL_X86_MOD_ANY, .modhw = _INTEL_X86_ATTR_T, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT5", .udesc = "Uops executed on port 5", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT015", .udesc = "Uops issued on ports 0, 1 or 5", .ucode = 0x4000, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT234_CORE", .udesc = "Uops issued on ports 2, 3 or 4 on any thread (core count only)", .ucode = 0x8000 | INTEL_X86_MOD_ANY, .modhw = _INTEL_X86_ATTR_T, .uflags= INTEL_X86_NCOMBO, }, { .uname = "PORT015_STALL_CYCLES", .udesc = "Cycles no Uops issued on ports 0, 1 or 5", .uequiv = "PORT015:c=1:i=1", .ucode = 0x4000 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_uops_issued[]={ { .uname = "ANY", .udesc = "Uops issued", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "STALLED_CYCLES", .udesc = "Cycles stalled no issued uops", .uequiv = "ANY:c=1:i=1", .ucode = 0x100 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "FUSED", .udesc = "Fused Uops issued", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t nhm_uops_retired[]={ { .uname = "ANY", .udesc = "Uops retired (Precise Event)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "RETIRE_SLOTS", .udesc = "Retirement slots used (Precise Event)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ACTIVE_CYCLES", .udesc = "Cycles Uops are being retired (Precise Event)", .uequiv = "ANY:c=1", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STALL_CYCLES", .udesc = "Cycles No Uops retired (Precise Event)", .uequiv = "ANY:c=1:i=1", .ucode = 0x100 | INTEL_X86_MOD_INV | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "MACRO_FUSED", .udesc = "Macro-fused Uops retired (Precise Event)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t nhm_offcore_response_0[]={ { .uname = "DMND_DATA_RD", .udesc = "Request: counts the number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", .ucode = 0x100, .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Request: counts the number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO", .ucode = 0x200, .grpid = 0, }, { .uname = "DMND_IFETCH", .udesc = "Request: counts the number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", .ucode = 0x400, .grpid = 0, }, { .uname = "WB", .udesc = "Request: counts the number of writeback (modified to exclusive) transactions", .ucode = 0x800, .grpid = 0, }, { .uname = "PF_DATA_RD", .udesc = "Request: counts the number of data cacheline reads generated by L2 prefetchers", .ucode = 0x1000, .grpid = 0, }, { .uname = "PF_RFO", .udesc = "Request: counts the number of RFO requests generated by L2 prefetchers", .ucode = 0x2000, .grpid = 0, }, { .uname = "PF_IFETCH", .udesc = "Request: counts the number of code reads generated by L2 prefetchers", .ucode = 0x4000, .grpid = 0, }, { .uname = "OTHER", .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", .ucode = 0x8000, .grpid = 0, }, { .uname = "ANY_IFETCH", .udesc = "Request: combination of PF_IFETCH | DMND_IFETCH", .uequiv = "PF_IFETCH:DMND_IFETCH", .ucode = 0x4400, .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Request: combination of all requests umasks", .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_DATA_RD:PF_RFO:PF_IFETCH:OTHER", .ucode = 0xff00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "ANY_DATA", .udesc = "Request: any data read/write request", .uequiv = "DMND_DATA_RD:PF_DATA_RD:DMND_RFO:PF_RFO", .ucode = 0x3300, .grpid = 0, }, { .uname = "ANY_DATA_RD", .udesc = "Request: any data read in request", .uequiv = "DMND_DATA_RD:PF_DATA_RD", .ucode = 0x1100, .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Request: combination of DMND_RFO | PF_RFO", .uequiv = "DMND_RFO:PF_RFO", .ucode = 0x2200, .grpid = 0, }, { .uname = "UNCORE_HIT", .udesc = "Response: counts L3 Hit: local or remote home requests that hit L3 cache in the uncore with no coherency actions required (snooping)", .ucode = 0x10000, .grpid = 1, }, { .uname = "OTHER_CORE_HIT_SNP", .udesc = "Response: counts L3 Hit: local or remote home requests that hit L3 cache in the uncore and was serviced by another core with a cross core snoop where no modified copies were found (clean)", .ucode = 0x20000, .grpid = 1, }, { .uname = "OTHER_CORE_HITM", .udesc = "Response: counts L3 Hit: local or remote home requests that hit L3 cache in the uncore and was serviced by another core with a cross core snoop where modified copies were found (HITM)", .ucode = 0x40000, .grpid = 1, }, { .uname = "REMOTE_CACHE_HITM", .udesc = "Response: counts L3 Hit: local or remote home requests that hit a remote L3 cacheline in modified (HITM) state", .ucode = 0x80000, .grpid = 1, }, { .uname = "REMOTE_CACHE_FWD", .udesc = "Response: counts L3 Miss: local homed requests that missed the L3 cache and was serviced by forwarded data following a cross package snoop where no modified copies found. (Remote home requests are not counted)", .ucode = 0x100000, .grpid = 1, }, { .uname = "REMOTE_DRAM", .udesc = "Response: counts L3 Miss: remote home requests that missed the L3 cache and were serviced by remote DRAM", .ucode = 0x200000, .grpid = 1, }, { .uname = "LOCAL_DRAM", .udesc = "Response: counts L3 Miss: local home requests that missed the L3 cache and were serviced by local DRAM", .ucode = 0x400000, .grpid = 1, }, { .uname = "NON_DRAM", .udesc = "Response: Non-DRAM requests that were serviced by IOH", .ucode = 0x800000, .grpid = 1, }, { .uname = "ANY_CACHE_DRAM", .udesc = "Response: requests serviced by any source but IOH", .uequiv = "UNCORE_HIT:OTHER_CORE_HIT_SNP:OTHER_CORE_HITM:REMOTE_CACHE_FWD:REMOTE_CACHE_HITM:REMOTE_DRAM:LOCAL_DRAM", .ucode = 0x7f0000, .grpid = 1, }, { .uname = "ANY_DRAM", .udesc = "Response: requests serviced by local or remote DRAM", .uequiv = "REMOTE_DRAM:LOCAL_DRAM", .ucode = 0x600000, .grpid = 1, }, { .uname = "ANY_LLC_MISS", .udesc = "Response: requests that missed in L3", .uequiv = "REMOTE_CACHE_HITM:REMOTE_CACHE_FWD:REMOTE_DRAM:LOCAL_DRAM:NON_DRAM", .ucode = 0xf80000, .grpid = 1, }, { .uname = "LOCAL_CACHE_DRAM", .udesc = "Response: requests hit local core or uncore caches or local DRAM", .uequiv = "UNCORE_HIT:OTHER_CORE_HIT_SNP:OTHER_CORE_HITM:LOCAL_DRAM", .ucode = 0x470000, .grpid = 1, }, { .uname = "REMOTE_CACHE_DRAM", .udesc = "Response: requests that miss L3 and hit remote caches or DRAM", .uequiv = "REMOTE_CACHE_HITM:REMOTE_CACHE_FWD:REMOTE_DRAM", .ucode = 0x380000, .grpid = 1, }, { .uname = "ANY_RESPONSE", .udesc = "Response: combination of all response umasks", .uequiv = "UNCORE_HIT:OTHER_CORE_HIT_SNP:OTHER_CORE_HITM:REMOTE_CACHE_FWD:REMOTE_CACHE_HITM:REMOTE_DRAM:LOCAL_DRAM:NON_DRAM", .ucode = 0xff0000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 1, }, }; static const intel_x86_entry_t intel_nhm_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x20000000full, .code = 0x3c, }, { .name = "INSTRUCTION_RETIRED", .desc = "Count the number of instructions at retirement", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias for INSTRUCTION_RETIRED", .modmsk = INTEL_V3_ATTRS, .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycles", .modmsk = INTEL_FIXED3_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "LLC_REFERENCES", .desc = "Count each request originating equiv the core to reference a cache line in the last level cache. The count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to L2_RQSTS:SELF_DEMAND_MESI", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x4f2e, }, { .name = "LAST_LEVEL_CACHE_REFERENCES", .desc = "This is an alias for LLC_REFERENCES", .modmsk = INTEL_V3_ATTRS, .equiv = "LLC_REFERENCES", .cntmsk = 0xf, .code = 0x4f2e, }, { .name = "LLC_MISSES", .desc = "Count each cache miss condition for references to the last level cache. The event count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to event L2_RQSTS:SELF_DEMAND_I_STATE", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x412e, }, { .name = "LAST_LEVEL_CACHE_MISSES", .desc = "This is an equiv for LLC_MISSES", .modmsk = INTEL_V3_ATTRS, .equiv = "LLC_MISSES", .cntmsk = 0xf, .code = 0x412e, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction.", .modmsk = INTEL_V3_ATTRS, .equiv = "BR_INST_RETIRED:ALL_BRANCHES", .cntmsk = 0xf, .code = 0xc4, }, { .name = "ARITH", .desc = "Counts arithmetic multiply and divide operations", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x14, .numasks = LIBPFM_ARRAY_SIZE(nhm_arith), .ngrp = 1, .umasks = nhm_arith, }, { .name = "BACLEAR", .desc = "Branch address calculator", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xe6, .numasks = LIBPFM_ARRAY_SIZE(nhm_baclear), .ngrp = 1, .umasks = nhm_baclear, }, { .name = "BACLEAR_FORCE_IQ", .desc = "Instruction queue forced BACLEAR", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1a7, }, { .name = "BOGUS_BR", .desc = "Counts the number of bogus branches.", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1e4, }, { .name = "BPU_CLEARS", .desc = "Branch prediction Unit clears", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xe8, .numasks = LIBPFM_ARRAY_SIZE(nhm_bpu_clears), .ngrp = 1, .umasks = nhm_bpu_clears, }, { .name = "BPU_MISSED_CALL_RET", .desc = "Branch prediction unit missed call or return", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1e5, }, { .name = "BR_INST_DECODED", .desc = "Branch instructions decoded", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1e0, }, { .name = "BR_INST_EXEC", .desc = "Branch instructions executed", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x88, .numasks = LIBPFM_ARRAY_SIZE(nhm_br_inst_exec), .ngrp = 1, .umasks = nhm_br_inst_exec, }, { .name = "BR_INST_RETIRED", .desc = "Retired branch instructions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc4, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_br_inst_retired), .ngrp = 1, .umasks = nhm_br_inst_retired, }, { .name = "BR_MISP_EXEC", .desc = "Mispredicted branches executed", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x89, .numasks = LIBPFM_ARRAY_SIZE(nhm_br_misp_exec), .ngrp = 1, .umasks = nhm_br_misp_exec, }, { .name = "BR_MISP_RETIRED", .desc = "Count Mispredicted Branch Activity", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc5, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_br_misp_retired), .ngrp = 1, .umasks = nhm_br_misp_retired, }, { .name = "CACHE_LOCK_CYCLES", .desc = "Cache lock cycles", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(nhm_cache_lock_cycles), .ngrp = 1, .umasks = nhm_cache_lock_cycles, }, { .name = "CPU_CLK_UNHALTED", .desc = "Cycles when processor is not in halted state", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x3c, .numasks = LIBPFM_ARRAY_SIZE(nhm_cpu_clk_unhalted), .ngrp = 1, .umasks = nhm_cpu_clk_unhalted, }, { .name = "DTLB_LOAD_MISSES", .desc = "Data TLB load misses", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x8, .numasks = LIBPFM_ARRAY_SIZE(nhm_dtlb_load_misses), .ngrp = 1, .umasks = nhm_dtlb_load_misses, }, { .name = "DTLB_MISSES", .desc = "Data TLB misses", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x49, .numasks = LIBPFM_ARRAY_SIZE(nhm_dtlb_misses), .ngrp = 1, .umasks = nhm_dtlb_misses, }, { .name = "EPT", .desc = "Extended Page Directory", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x4f, .numasks = LIBPFM_ARRAY_SIZE(nhm_ept), .ngrp = 1, .umasks = nhm_ept, }, { .name = "ES_REG_RENAMES", .desc = "ES segment renames", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1d5, }, { .name = "FP_ASSIST", .desc = "Floating point assists", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf7, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_fp_assist), .ngrp = 1, .umasks = nhm_fp_assist, }, { .name = "FP_COMP_OPS_EXE", .desc = "Floating point computational micro-ops", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x10, .numasks = LIBPFM_ARRAY_SIZE(nhm_fp_comp_ops_exe), .ngrp = 1, .umasks = nhm_fp_comp_ops_exe, }, { .name = "FP_MMX_TRANS", .desc = "Floating Point to and from MMX transitions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(nhm_fp_mmx_trans), .ngrp = 1, .umasks = nhm_fp_mmx_trans, }, { .name = "IFU_IVC", .desc = "Instruction Fetch unit victim cache", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x81, .numasks = LIBPFM_ARRAY_SIZE(nhm_ifu_ivc), .ngrp = 1, .umasks = nhm_ifu_ivc, }, { .name = "ILD_STALL", .desc = "Instruction Length Decoder stalls", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x87, .numasks = LIBPFM_ARRAY_SIZE(nhm_ild_stall), .ngrp = 1, .umasks = nhm_ild_stall, }, { .name = "INST_DECODED", .desc = "Instructions decoded", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x18, .numasks = LIBPFM_ARRAY_SIZE(nhm_inst_decoded), .ngrp = 1, .umasks = nhm_inst_decoded, }, { .name = "INST_QUEUE_WRITES", .desc = "Instructions written to instruction queue.", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x117, }, { .name = "INST_QUEUE_WRITE_CYCLES", .desc = "Cycles instructions are written to the instruction queue", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x11e, }, { .name = "INST_RETIRED", .desc = "Instructions retired", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc0, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_inst_retired), .ngrp = 1, .umasks = nhm_inst_retired, }, { .name = "IO_TRANSACTIONS", .desc = "I/O transactions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x16c, }, { .name = "ITLB_FLUSH", .desc = "Counts the number of ITLB flushes", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1ae, }, { .name = "ITLB_MISSES", .desc = "Instruction TLB misses", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x85, .numasks = LIBPFM_ARRAY_SIZE(nhm_dtlb_misses), .ngrp = 1, .umasks = nhm_dtlb_misses, /* identical to actual umasks list for this event */ }, { .name = "ITLB_MISS_RETIRED", .desc = "Retired instructions that missed the ITLB (Precise Event)", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x20c8, .flags= INTEL_X86_PEBS, }, { .name = "L1D", .desc = "L1D cache", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x51, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d), .ngrp = 1, .umasks = nhm_l1d, }, { .name = "L1D_ALL_REF", .desc = "L1D references", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x43, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_all_ref), .ngrp = 1, .umasks = nhm_l1d_all_ref, }, { .name = "L1D_CACHE_LD", .desc = "L1D cacheable loads. WARNING: event may overcount loads", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x40, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_cache_ld), .ngrp = 1, .umasks = nhm_l1d_cache_ld, }, { .name = "L1D_CACHE_LOCK", .desc = "L1 data cache load lock", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_cache_lock), .ngrp = 1, .umasks = nhm_l1d_cache_lock, }, { .name = "L1D_CACHE_LOCK_FB_HIT", .desc = "L1D load lock accepted in fill buffer", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x153, }, { .name = "L1D_CACHE_PREFETCH_LOCK_FB_HIT", .desc = "L1D prefetch load lock accepted in fill buffer", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x152, }, { .name = "L1D_CACHE_ST", .desc = "L1 data cache stores", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x41, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_cache_st), .ngrp = 1, .umasks = nhm_l1d_cache_st, }, { .name = "L1D_PREFETCH", .desc = "L1D hardware prefetch", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x4e, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_prefetch), .ngrp = 1, .umasks = nhm_l1d_prefetch, }, { .name = "L1D_WB_L2", .desc = "L1 writebacks to L2", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1d_wb_l2), .ngrp = 1, .umasks = nhm_l1d_wb_l2, }, { .name = "L1I", .desc = "L1I instruction fetches", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x80, .numasks = LIBPFM_ARRAY_SIZE(nhm_l1i), .ngrp = 1, .umasks = nhm_l1i, }, { .name = "L1I_OPPORTUNISTIC_HITS", .desc = "Opportunistic hits in streaming", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x183, }, { .name = "L2_DATA_RQSTS", .desc = "L2 data requests", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x26, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_data_rqsts), .ngrp = 1, .umasks = nhm_l2_data_rqsts, }, { .name = "L2_HW_PREFETCH", .desc = "L2 HW prefetches", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf3, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_hw_prefetch), .ngrp = 1, .umasks = nhm_l2_hw_prefetch, }, { .name = "L2_LINES_IN", .desc = "L2 lines allocated", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf1, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_lines_in), .ngrp = 1, .umasks = nhm_l2_lines_in, }, { .name = "L2_LINES_OUT", .desc = "L2 lines evicted", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf2, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_lines_out), .ngrp = 1, .umasks = nhm_l2_lines_out, }, { .name = "L2_RQSTS", .desc = "L2 requests", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_rqsts), .ngrp = 1, .umasks = nhm_l2_rqsts, }, { .name = "L2_TRANSACTIONS", .desc = "L2 transactions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf0, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_transactions), .ngrp = 1, .umasks = nhm_l2_transactions, }, { .name = "L2_WRITE", .desc = "L2 demand lock/store RFO", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x27, .numasks = LIBPFM_ARRAY_SIZE(nhm_l2_write), .ngrp = 1, .umasks = nhm_l2_write, }, { .name = "LARGE_ITLB", .desc = "Large instruction TLB", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x82, .numasks = LIBPFM_ARRAY_SIZE(nhm_large_itlb), .ngrp = 1, .umasks = nhm_large_itlb, }, { .name = "LOAD_DISPATCH", .desc = "Loads dispatched", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x13, .numasks = LIBPFM_ARRAY_SIZE(nhm_load_dispatch), .ngrp = 1, .umasks = nhm_load_dispatch, }, { .name = "LOAD_HIT_PRE", .desc = "Load operations conflicting with software prefetches", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0x3, .code = 0x14c, }, { .name = "LONGEST_LAT_CACHE", .desc = "Longest latency cache reference", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(nhm_longest_lat_cache), .ngrp = 1, .umasks = nhm_longest_lat_cache, }, { .name = "LSD", .desc = "Loop stream detector", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xa8, .numasks = LIBPFM_ARRAY_SIZE(nhm_lsd), .ngrp = 1, .umasks = nhm_lsd, }, { .name = "MACHINE_CLEARS", .desc = "Machine Clear", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc3, .numasks = LIBPFM_ARRAY_SIZE(nhm_machine_clears), .ngrp = 1, .umasks = nhm_machine_clears, }, { .name = "MACRO_INSTS", .desc = "Macro-fused instructions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xd0, .numasks = LIBPFM_ARRAY_SIZE(nhm_macro_insts), .ngrp = 1, .umasks = nhm_macro_insts, }, { .name = "MEMORY_DISAMBIGUATION", .desc = "Memory Disambiguation Activity", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x9, .numasks = LIBPFM_ARRAY_SIZE(nhm_memory_disambiguation), .ngrp = 1, .umasks = nhm_memory_disambiguation, }, { .name = "MEM_INST_RETIRED", .desc = "Memory instructions retired", .modmsk = INTEL_V3_ATTRS | _INTEL_X86_ATTR_LDLAT, .cntmsk = 0xf, .code = 0xb, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_mem_inst_retired), .ngrp = 1, .umasks = nhm_mem_inst_retired, }, { .name = "MEM_LOAD_RETIRED", .desc = "Retired loads", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xcb, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_mem_load_retired), .ngrp = 1, .umasks = nhm_mem_load_retired, }, { .name = "MEM_STORE_RETIRED", .desc = "Retired stores", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_mem_store_retired), .ngrp = 1, .umasks = nhm_mem_store_retired, }, { .name = "MEM_UNCORE_RETIRED", .desc = "Load instructions retired which hit offcore", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_mem_uncore_retired), .ngrp = 1, .umasks = nhm_mem_uncore_retired, }, { .name = "OFFCORE_REQUESTS", .desc = "Offcore memory requests", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xb0, .numasks = LIBPFM_ARRAY_SIZE(nhm_offcore_requests), .ngrp = 1, .umasks = nhm_offcore_requests, }, { .name = "OFFCORE_REQUESTS_SQ_FULL", .desc = "Counts cycles the Offcore Request buffer or Super Queue is full.", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1b2, }, { .name = "PARTIAL_ADDRESS_ALIAS", .desc = "False dependencies due to partial address forming", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x107, }, { .name = "PIC_ACCESSES", .desc = "Programmable interrupt controller", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xba, .numasks = LIBPFM_ARRAY_SIZE(nhm_pic_accesses), .ngrp = 1, .umasks = nhm_pic_accesses, }, { .name = "RAT_STALLS", .desc = "Register allocation table stalls", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xd2, .numasks = LIBPFM_ARRAY_SIZE(nhm_rat_stalls), .ngrp = 1, .umasks = nhm_rat_stalls, }, { .name = "RESOURCE_STALLS", .desc = "Processor stalls", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xa2, .numasks = LIBPFM_ARRAY_SIZE(nhm_resource_stalls), .ngrp = 1, .umasks = nhm_resource_stalls, }, { .name = "SEG_RENAME_STALLS", .desc = "Segment rename stall cycles", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1d4, }, { .name = "SEGMENT_REG_LOADS", .desc = "Counts number of segment register loads", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1f8, }, { .name = "SIMD_INT_128", .desc = "128 bit SIMD integer operations", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x12, .numasks = LIBPFM_ARRAY_SIZE(nhm_simd_int_128), .ngrp = 1, .umasks = nhm_simd_int_128, }, { .name = "SIMD_INT_64", .desc = "64 bit SIMD integer operations", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xfd, .numasks = LIBPFM_ARRAY_SIZE(nhm_simd_int_64), .ngrp = 1, .umasks = nhm_simd_int_64, }, { .name = "SNOOP_RESPONSE", .desc = "Snoop", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xb8, .numasks = LIBPFM_ARRAY_SIZE(nhm_snoop_response), .ngrp = 1, .umasks = nhm_snoop_response, }, { .name = "SQ_FULL_STALL_CYCLES", .desc = "Counts cycles the Offcore Request buffer or Super Queue is full and request(s) are outstanding.", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1f6, }, { .name = "SQ_MISC", .desc = "Super Queue Activity Related to L2 Cache Access", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xf4, .numasks = LIBPFM_ARRAY_SIZE(nhm_sq_misc), .ngrp = 1, .umasks = nhm_sq_misc, }, { .name = "SSE_MEM_EXEC", .desc = "Streaming SIMD executed", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(nhm_sse_mem_exec), .ngrp = 1, .umasks = nhm_sse_mem_exec, }, { .name = "SSEX_UOPS_RETIRED", .desc = "SIMD micro-ops retired", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc7, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_ssex_uops_retired), .ngrp = 1, .umasks = nhm_ssex_uops_retired, }, { .name = "STORE_BLOCKS", .desc = "Delayed loads", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x6, .numasks = LIBPFM_ARRAY_SIZE(nhm_store_blocks), .ngrp = 1, .umasks = nhm_store_blocks, }, { .name = "TWO_UOP_INSTS_DECODED", .desc = "Two micro-ops instructions decoded", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x119, }, { .name = "UOPS_DECODED_DEC0", .desc = "Micro-ops decoded by decoder 0", .modmsk =0x0, .cntmsk = 0xf, .code = 0x13d, }, { .name = "UOPS_DECODED", .desc = "Micro-ops decoded", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xd1, .numasks = LIBPFM_ARRAY_SIZE(nhm_uops_decoded), .ngrp = 1, .umasks = nhm_uops_decoded, }, { .name = "UOPS_EXECUTED", .desc = "Micro-ops executed", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xb1, .numasks = LIBPFM_ARRAY_SIZE(nhm_uops_executed), .ngrp = 1, .umasks = nhm_uops_executed, }, { .name = "UOPS_ISSUED", .desc = "Micro-ops issued", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xe, .numasks = LIBPFM_ARRAY_SIZE(nhm_uops_issued), .ngrp = 1, .umasks = nhm_uops_issued, }, { .name = "UOPS_RETIRED", .desc = "Micro-ops retired", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0xc2, .flags= INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(nhm_uops_retired), .ngrp = 1, .umasks = nhm_uops_retired, }, { .name = "UOP_UNFUSION", .desc = "Micro-ops unfusions due to FP exceptions", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1db, }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response 0 (must provide at least one request and one response umasks)", .modmsk = INTEL_V3_ATTRS, .cntmsk = 0xf, .code = 0x1b7, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(nhm_offcore_response_0), .ngrp = 2, .umasks = nhm_offcore_response_0, }, }; papi-5.6.0/src/perfctr-2.7.x/usr.lib/event_set.h000664 001750 001750 00000005335 13216244370 023272 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set.h,v 1.5 2004/02/20 21:32:06 mikpe Exp $ * Common definitions used when creating event set descriptions. * * Copyright (C) 2003-2004 Mikael Pettersson */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define UM(um) ((const struct perfctr_unit_mask*)&(um).header) struct perfctr_unit_mask_header { unsigned short default_value; enum perfctr_unit_mask_type type:8; unsigned char nvalues; }; struct perfctr_unit_mask_0 { struct perfctr_unit_mask_header header; }; struct perfctr_unit_mask_1 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[1]; }; struct perfctr_unit_mask_2 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[2]; }; struct perfctr_unit_mask_3 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[3]; }; struct perfctr_unit_mask_4 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[4]; }; struct perfctr_unit_mask_5 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[5]; }; struct perfctr_unit_mask_6 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[6]; }; struct perfctr_unit_mask_7 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[7]; }; struct perfctr_unit_mask_8 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[8]; }; struct perfctr_unit_mask_9 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[9]; }; struct perfctr_unit_mask_13 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[13]; }; struct perfctr_unit_mask_15 { struct perfctr_unit_mask_header header; struct perfctr_unit_mask_value values[15]; }; extern const struct perfctr_event_set perfctr_p5_event_set; extern const struct perfctr_event_set perfctr_p5mmx_event_set; extern const struct perfctr_event_set perfctr_mii_event_set; extern const struct perfctr_event_set perfctr_wcc6_event_set; extern const struct perfctr_event_set perfctr_wc2_event_set; extern const struct perfctr_event_set perfctr_vc3_event_set; extern const struct perfctr_event_set perfctr_ppro_event_set; extern const struct perfctr_event_set perfctr_p2_event_set; extern const struct perfctr_event_set perfctr_p3_event_set; extern const struct perfctr_event_set perfctr_p4_event_set; extern const struct perfctr_event_set perfctr_k7_event_set; extern const struct perfctr_event_set perfctr_k8_event_set; extern const struct perfctr_event_set perfctr_pentm_event_set; extern const struct perfctr_event_set perfctr_k8c_event_set; extern const struct perfctr_event_set perfctr_p4m3_event_set; papi-5.6.0/src/libpfm4/lib/events/intel_ivbep_unc_imc_events.h000664 001750 001750 00000043723 13216244364 026477 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: ivbep_unc_imc (Intel IvyBridge-EP IMC uncore PMU) */ static const intel_x86_umask_t ivbep_unc_m_cas_count[]={ { .uname = "ALL", .udesc = "Counts total number of DRAM CAS commands issued on this channel", .ucode = 0xf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "RD", .udesc = "Counts all DRAM reads on this channel, incl. underfills", .ucode = 0x300, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RD_REG", .udesc = "Counts number of DRAM read CAS commands issued on this channel, incl. regular read CAS and those with implicit precharge", .ucode = 0x100, }, { .uname = "RD_UNDERFILL", .udesc = "Counts number of underfill reads issued by the memory controller", .ucode = 0x200, }, { .uname = "WR", .udesc = "Counts number of DRAM write CAS commands on this channel", .ucode = 0xc00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WR_RMM", .udesc = "Counts Number of opportunistic DRAM write CAS commands issued on this channel", .ucode = 0x800, }, { .uname = "WR_WMM", .udesc = "Counts number of DRAM write CAS commands issued on this channel while in Write-Major mode", .ucode = 0x400, }, { .uname = "RD_RMM", .udesc = "Counts Number of opportunistic DRAM read CAS commands issued on this channel", .ucode = 0x1000, }, { .uname = "RD_WMM", .udesc = "Counts number of DRAM read CAS commands issued on this channel while in Write-Major mode", .ucode = 0x2000, }, }; static const intel_x86_umask_t ivbep_unc_m_dram_refresh[]={ { .uname = "HIGH", .udesc = "TBD", .ucode = 0x400, }, { .uname = "PANIC", .udesc = "TBD", .ucode = 0x200, }, }; static const intel_x86_umask_t ivbep_unc_m_major_modes[]={ { .uname = "ISOCH", .udesc = "Counts cycles in ISOCH Major mode", .ucode = 0x800, }, { .uname = "PARTIAL", .udesc = "Counts cycles in Partial Major mode", .ucode = 0x400, }, { .uname = "READ", .udesc = "Counts cycles in Read Major mode", .ucode = 0x100, }, { .uname = "WRITE", .udesc = "Counts cycles in Write Major mode", .ucode = 0x200, }, }; static const intel_x86_umask_t ivbep_unc_m_power_cke_cycles[]={ { .uname = "RANK0", .udesc = "Count cycles for rank 0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK1", .udesc = "Count cycles for rank 1", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK2", .udesc = "Count cycles for rank 2", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK3", .udesc = "Count cycles for rank 3", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK4", .udesc = "Count cycles for rank 4", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK5", .udesc = "Count cycles for rank 5", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK6", .udesc = "Count cycles for rank 6", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RANK7", .udesc = "Count cycles for rank 7", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_m_preemption[]={ { .uname = "RD_PREEMPT_RD", .udesc = "Counts read over read preemptions", .ucode = 0x100, }, { .uname = "RD_PREEMPT_WR", .udesc = "Counts read over write preemptions", .ucode = 0x200, }, }; static const intel_x86_umask_t ivbep_unc_m_pre_count[]={ { .uname = "PAGE_CLOSE", .udesc = "Counts number of DRAM precharge commands sent on this channel as a result of the page close counter expiring", .ucode = 0x200, }, { .uname = "PAGE_MISS", .udesc = "Counts number of DRAM precharge commands sent on this channel as a result of page misses", .ucode = 0x100, }, { .uname = "RD", .udesc = "Precharge due to read", .ucode = 0x400, }, { .uname = "WR", .udesc = "Precharge due to write", .ucode = 0x800, }, { .uname = "BYP", .udesc = "Precharge due to bypass", .ucode = 0x1000, }, }; static const intel_x86_umask_t ivbep_unc_m_act_count[]={ { .uname = "RD", .udesc = "Activate due to read", .ucode = 0x100, }, { .uname = "WR", .udesc = "Activate due to write", .ucode = 0x200, }, { .uname = "BYP", .udesc = "Activate due to bypass", .ucode = 0x800, }, }; static const intel_x86_umask_t ivbep_unc_m_byp_cmds[]={ { .uname = "ACT", .udesc = "ACT command issued by 2 cycle bypass", .ucode = 0x100, }, { .uname = "CAS", .udesc = "CAS command issued by 2 cycle bypass", .ucode = 0x200, }, { .uname = "PRE", .udesc = "PRE command issued by 2 cycle bypass", .ucode = 0x400, }, }; static const intel_x86_umask_t ivbep_unc_m_rd_cas_prio[]={ { .uname = "LOW", .udesc = "Read CAS issued with low priority", .ucode = 0x100, }, { .uname = "MED", .udesc = "Read CAS issued with medium priority", .ucode = 0x200, }, { .uname = "HIGH", .udesc = "Read CAS issued with high priority", .ucode = 0x400, }, { .uname = "PANIC", .udesc = "Read CAS issued with panic non isoch priority (starved)", .ucode = 0x800, }, }; static const intel_x86_umask_t ivbep_unc_m_rd_cas_rank0[]={ { .uname = "BANK0", .udesc = "Bank 0", .ucode = 0x100, }, { .uname = "BANK1", .udesc = "Bank 1", .ucode = 0x200, }, { .uname = "BANK2", .udesc = "Bank 2", .ucode = 0x400, }, { .uname = "BANK3", .udesc = "Bank 3", .ucode = 0x800, }, { .uname = "BANK4", .udesc = "Bank 4", .ucode = 0x1000, }, { .uname = "BANK5", .udesc = "Bank 5", .ucode = 0x2000, }, { .uname = "BANK6", .udesc = "Bank 6", .ucode = 0x4000, }, { .uname = "BANK7", .udesc = "Bank 7", .ucode = 0x8000, } }; static const intel_x86_umask_t ivbep_unc_m_vmse_wr_push[]={ { .uname = "WMM", .udesc = "VMSE write push issued in WMM", .ucode = 0x100, }, { .uname = "RMM", .udesc = "VMSE write push issued in RMM", .ucode = 0x200, } }; static const intel_x86_umask_t ivbep_unc_m_wmm_to_rmm[]={ { .uname = "LOW_THRES", .udesc = "Transition from WMM to RMM because of starve counter", .ucode = 0x100, }, { .uname = "STARVE", .udesc = "TBD", .ucode = 0x200, }, { .uname = "VMSE_RETRY", .udesc = "TBD", .ucode = 0x400, } }; static const intel_x86_entry_t intel_ivbep_unc_m_pe[]={ { .name = "UNC_M_CLOCKTICKS", .desc = "IMC Uncore clockticks (fixed counter)", .modmsk = 0x0, .cntmsk = 0x100000000ull, .code = 0xff, /* perf pseudo encoding for fixed counter */ .flags = INTEL_X86_FIXED, }, { .name = "UNC_M_DCLOCKTICKS", .desc = "IMC Uncore clockticks (generic counters)", .modmsk = IVBEP_UNC_IMC_ATTRS, .cntmsk = 0xf, .code = 0x00, /*encoding for generic counters */ }, { .name = "UNC_M_ACT_COUNT", .desc = "DRAM Activate Count", .code = 0x1, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_act_count), .umasks = ivbep_unc_m_act_count }, { .name = "UNC_M_CAS_COUNT", .desc = "DRAM RD_CAS and WR_CAS Commands.", .code = 0x4, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_cas_count), .umasks = ivbep_unc_m_cas_count }, { .name = "UNC_M_DRAM_PRE_ALL", .desc = "DRAM Precharge All Commands", .code = 0x6, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_DRAM_REFRESH", .desc = "Number of DRAM Refreshes Issued", .code = 0x5, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_dram_refresh), .umasks = ivbep_unc_m_dram_refresh }, { .name = "UNC_M_ECC_CORRECTABLE_ERRORS", .desc = "ECC Correctable Errors", .code = 0x9, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_MAJOR_MODES", .desc = "Cycles in a Major Mode", .code = 0x7, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_major_modes), .umasks = ivbep_unc_m_major_modes }, { .name = "UNC_M_POWER_CHANNEL_DLLOFF", .desc = "Channel DLLOFF Cycles", .code = 0x84, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_POWER_CHANNEL_PPD", .desc = "Channel PPD Cycles", .code = 0x85, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_POWER_CKE_CYCLES", .desc = "CKE_ON_CYCLES by Rank", .code = 0x83, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_power_cke_cycles), .umasks = ivbep_unc_m_power_cke_cycles }, { .name = "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES", .desc = "Critical Throttle Cycles", .code = 0x86, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_POWER_SELF_REFRESH", .desc = "Clock-Enabled Self-Refresh", .code = 0x43, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_POWER_THROTTLE_CYCLES", .desc = "Throttle Cycles", .code = 0x41, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_power_cke_cycles), .umasks = ivbep_unc_m_power_cke_cycles /* identical to snbep_unc_m_power_cke_cycles */ }, { .name = "UNC_M_PREEMPTION", .desc = "Read Preemption Count", .code = 0x8, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_preemption), .umasks = ivbep_unc_m_preemption }, { .name = "UNC_M_PRE_COUNT", .desc = "DRAM Precharge commands.", .code = 0x2, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_pre_count), .umasks = ivbep_unc_m_pre_count }, { .name = "UNC_M_RPQ_CYCLES_NE", .desc = "Read Pending Queue Not Empty", .code = 0x11, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_RPQ_INSERTS", .desc = "Read Pending Queue Allocations", .code = 0x10, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WPQ_CYCLES_FULL", .desc = "Write Pending Queue Full Cycles", .code = 0x22, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WPQ_CYCLES_NE", .desc = "Write Pending Queue Not Empty", .code = 0x21, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WPQ_INSERTS", .desc = "Write Pending Queue Allocations", .code = 0x20, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WPQ_READ_HIT", .desc = "Write Pending Queue CAM Match", .code = 0x23, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WPQ_WRITE_HIT", .desc = "Write Pending Queue CAM Match", .code = 0x24, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_BYP_CMDS", .desc = "Bypass command event", .code = 0xa1, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_byp_cmds), .umasks = ivbep_unc_m_byp_cmds }, { .name = "UNC_M_RD_CAS_PRIO", .desc = "Read CAS priority", .code = 0xa0, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_prio), .umasks = ivbep_unc_m_rd_cas_prio }, { .name = "UNC_M_RD_CAS_RANK0", .desc = "Read CAS access to Rank 0", .code = 0xb0, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK1", .desc = "Read CAS access to Rank 1", .code = 0xb1, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK2", .desc = "Read CAS access to Rank 2", .code = 0xb2, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK3", .desc = "Read CAS access to Rank 3", .code = 0xb3, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK4", .desc = "Read CAS access to Rank 4", .code = 0xb4, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK5", .desc = "Read CAS access to Rank 5", .code = 0xb5, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK6", .desc = "Read CAS access to Rank 6", .code = 0xb6, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_RD_CAS_RANK7", .desc = "Read CAS access to Rank 7", .code = 0xb7, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_VMSE_MXB_WR_OCCUPANCY", .desc = "VMSE MXB write buffer occupancy", .code = 0x91, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_VMSE_WR_PUSH", .desc = "VMSE WR push issued", .code = 0x90, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_vmse_wr_push), .umasks = ivbep_unc_m_vmse_wr_push }, { .name = "UNC_M_WMM_TO_RMM", .desc = "Transitions from WMM to RMM because of low threshold", .code = 0xc0, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_wmm_to_rmm), .umasks = ivbep_unc_m_wmm_to_rmm }, { .name = "UNC_M_WRONG_MM", .desc = "Not getting the requested major mode", .code = 0xc1, .cntmsk = 0xf, .modmsk = IVBEP_UNC_IMC_ATTRS, }, { .name = "UNC_M_WR_CAS_RANK0", .desc = "Write CAS access to Rank 0", .code = 0xb8, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK1", .desc = "Write CAS access to Rank 1", .code = 0xb9, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK2", .desc = "Write CAS access to Rank 2", .code = 0xba, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK3", .desc = "Write CAS access to Rank 3", .code = 0xbb, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK4", .desc = "Write CAS access to Rank 4", .code = 0xbc, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK5", .desc = "Write CAS access to Rank 5", .code = 0xbd, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK6", .desc = "Write CAS access to Rank 6", .code = 0xbe, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, { .name = "UNC_M_WR_CAS_RANK7", .desc = "Write CAS access to Rank 7", .code = 0xbf, .cntmsk = 0xf, .ngrp = 1, .modmsk = IVBEP_UNC_IMC_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_m_rd_cas_rank0), /* shared */ .umasks = ivbep_unc_m_rd_cas_rank0 }, }; papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-800000664 001750 001750 00000001362 13216244367 023134 0ustar00jshenry1963jshenry1963000000 000000 [data from an 800 MHz Athlon] PERFCTR INIT: vendor 2, family 6, model 4, stepping 2, clock 800450 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 164 cycles PERFCTR INIT: rdtsc cost is 13.9 cycles (1057 total) PERFCTR INIT: rdpmc cost is 12.3 cycles (956 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3478 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.5 cycles (3527 total) PERFCTR INIT: wrmsr (counter) cost is 82.9 cycles (5472 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.5 cycles (14983 total) PERFCTR INIT: read cr4 cost is 2.0 cycles (295 total) PERFCTR INIT: write cr4 cost is 63.1 cycles (4208 total) PERFCTR INIT: sync_core cost is 73.0 cycles (4840 total) perfctr: driver 2.7.5, cpu type AMD K7/K8 at 800450 kHz papi-5.6.0/src/libpfm4/lib/events/amd64_events_fam16h.h000664 001750 001750 00000106612 13216244364 024554 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: amd64_fam16h (AMD64 Fam16h) */ /* Dispatched FPU 0x0 */ static const amd64_umask_t amd64_fam16h_dispatched_fpu[]={ { .uname = "PIPE0", .udesc = "Pipe0 dispatches", .ucode = 0x1, }, { .uname = "PIPE1", .udesc = "Pipe1 dispatches", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Retired SSE/AVX 0x03 */ static const amd64_umask_t amd64_fam16h_retired_sse_operations[]={ { .uname = "SINGLE_ADD_SUB_OPS", .udesc = "Single precision add/subtract ops", .ucode = 0x1, }, { .uname = "SINGLE_MUL_OPS", .udesc = "Single precision multiply ops", .ucode = 0x2, }, { .uname = "SINGLE_DIV_OPS", .udesc = "Single precision divide/square root ops", .ucode = 0x4, }, { .uname = "DOUBLE_ADD_SUB_OPS", .udesc = "Double precision add/subtract ops", .ucode = 0x10, }, { .uname = "DOUBLE_MUL_OPS", .udesc = "Double precision multiply ops", .ucode = 0x20, }, { .uname = "DOUBLE_DIV_OPS", .udesc = "Double precision divide/square root ops", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Retired serializing ops 0x05 */ static const amd64_umask_t amd64_fam16h_retired_serializing_ops[]={ { .uname = "SSE_BOTTOM_EXECUTING_UOPS", .udesc = "SSE bottom-executing uops retired", .ucode = 0x1, }, { .uname = "SSE_CONTROL_RENAMING_UOPS", .udesc = "SSE control-renaming uops retired", .ucode = 0x2, }, { .uname = "X87_BOTTOM_EXECUTING_UOPS", .udesc = "X87 bottom-executing uops retired", .ucode = 0x4, }, { .uname = "X87_CONTROL_RENAMING_UOPS", .udesc = "X87 control-renaming uops retired", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Retired x87 ops 0x11 */ static const amd64_umask_t amd64_fam16h_retired_x87_ops[]={ { .uname = "ADD_AND_SUB", .udesc = "Add and subtract", .ucode = 0x1, }, { .uname = "MULTIPLY", .udesc = "Multiply", .ucode = 0x2, }, { .uname = "DIVIDE_AND_FSQRT", .udesc = "Divide and fsqrt", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Segment Register Loads 0x20 */ static const amd64_umask_t amd64_fam16h_segment_register_loads[]={ { .uname = "ES", .udesc = "ES", .ucode = 0x1, }, { .uname = "CS", .udesc = "CS", .ucode = 0x2, }, { .uname = "SS", .udesc = "SS", .ucode = 0x4, }, { .uname = "DS", .udesc = "DS", .ucode = 0x8, }, { .uname = "FS", .udesc = "FS", .ucode = 0x10, }, { .uname = "GS", .udesc = "GS", .ucode = 0x20, }, { .uname = "HS", .udesc = "HS", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Pipeline Restart 0x21 */ static const amd64_umask_t amd64_fam16h_pipeline_restart[]={ { .uname = "INVALIDATING_PROBES", .udesc = "Evictions caused by invalidating probes", .ucode = 0x1, }, { .uname = "FILLS", .udesc = "Evictions caused by fills", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Locked Operations 0x24 */ static const amd64_umask_t amd64_fam16h_locked_ops[]={ { .uname = "EXECUTED", .udesc = "The number of locked instructions executed", .ucode = 0x1, }, { .uname = "CYCLES_TO_ACQUIRE", .udesc = "The number of cycles to acquire bus lock", .ucode = 0x2, }, { .uname = "CYCLES_TO_UNLOCK", .udesc = "The number of cycles to unlock cache line", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* LS Dispatch 0x29 */ static const amd64_umask_t amd64_fam16h_ls_dispatch[]={ { .uname = "LOADS", .udesc = "The number of loads", .ucode = 0x1, }, { .uname = "STORES", .udesc = "The number of stores", .ucode = 0x2, }, { .uname = "LOAD_OP_STORES", .udesc = "The number of load-op-stores", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Cancells Store to Load 0x2a */ static const amd64_umask_t amd64_fam16h_cancelled_store_to_load_forward_operations[]={ { .uname = "ADDRESS_MISMATCHES", .udesc = "Address mismatches (starting byte not the same).", .ucode = 0x1, }, { .uname = "STORE_IS_SMALLER_THAN_LOAD", .udesc = "Store is smaller than load.", .ucode = 0x2, }, { .uname = "MISALIGNED", .udesc = "Misaligned.", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Data cache refills 0x42 */ static const amd64_umask_t amd64_fam16h_data_cache_refills[]={ { .uname = "NON_CACHABLE", .udesc = "Non-cachable", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Cache refills from northbridge 0x43 */ static const amd64_umask_t amd64_fam16h_data_cache_refills_from_system[]={ { .uname = "NON_CACHABLE", .udesc = "non-cachable", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Data cache lines evicted 0x44 */ static const amd64_umask_t amd64_fam16h_data_cache_lines_evicted[]={ { .uname = "EVICTED", .udesc = "Evicted from probe", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared eviction", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive eviction", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned eviction", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified eviction", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* DTLB Miss 0x46 */ static const amd64_umask_t amd64_fam16h_dtlb_miss[]={ { .uname = "STORES_L1TLB", .udesc = "Stores that miss L1TLB", .ucode = 0x1, }, { .uname = "LOADS_L1TLB", .udesc = "Loads that miss L1TLB", .ucode = 0x2, }, { .uname = "STORES_L2TLB", .udesc = "Stores that miss L2TLB", .ucode = 0x4, }, { .uname = "LOADS_L2TLB", .udesc = "Loads that miss L2TLB", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Misaligned accesses 0x47 */ static const amd64_umask_t amd64_fam16h_misaligned_accesses[]={ { .uname = "MISALIGN_16B", .udesc = "Misaligns that cross 16 Byte boundary", .ucode = 0x1, }, { .uname = "MISALIGN_4KB", .udesc = "Misaligns that cross a 4kB boundary", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Prefetch Instruction Dispatched 0x4b */ static const amd64_umask_t amd64_fam16h_prefetch_instructions_dispatched[]={ { .uname = "LOAD", .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", .ucode = 0x1, }, { .uname = "STORE", .udesc = "Store (PrefetchW)", .ucode = 0x2, }, { .uname = "NTA", .udesc = "NTA (PrefetchNTA)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* L1 DTLB Hit 0x4d */ static const amd64_umask_t amd64_fam16h_l1_dtlb_hit[]={ { .uname = "L1_4K_TLB_HIT", .udesc = "L1 4K TLB hit", .ucode = 0x1, }, { .uname = "L1_2M_TLB_HIT", .udesc = "L1 2M TLB hit", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Ineffective SW Prefetch 0x52 */ static const amd64_umask_t amd64_fam16h_ineffective_sw_prefetches[]={ { .uname = "SW_PREFETCH_DATA_CACHE", .udesc = "Software prefetch hit in data cache", .ucode = 0x1, }, { .uname = "SW_PREFETCH_PENDING_FILL", .udesc = "Software prefetch hit a pending fill", .ucode = 0x2, }, { .uname = "SW_PREFETCH_MAB", .udesc = "Software prefetches that don't get a MAB", .ucode = 0x4, }, { .uname = "SW_PREFETCH_HIT_L2", .udesc = "Software prefetches that hit in L2", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Uncachable Memory 0x61 */ static const amd64_umask_t amd64_fam16h_uncachable_memory[]={ { .uname = "READ_BYTE", .udesc = "Read byte", .ucode = 0x1, }, { .uname = "READ_DOUBLEWORD", .udesc = "Read doubleword", .ucode = 0x2, }, { .uname = "WRITE_BYTE", .udesc = "Write byte", .ucode = 0x10, }, { .uname = "WRITE_DOUBLEWORD", .udesc = "Write doubleword", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x33, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Read Block Operations 0x62 */ static const amd64_umask_t amd64_fam16h_read_block[]={ { .uname = "READ_BLOCK", .udesc = "Read block", .ucode = 0x1, }, { .uname = "RDBLKMOD", .udesc = "RdBlkMod", .ucode = 0x2, }, { .uname = "READ_BLOCK_SHARED", .udesc = "Read block shared", .ucode = 0x4, }, { .uname = "READ_BLOCK_SPEC", .udesc = "Read block speculative", .ucode = 0x10, }, { .uname = "READ_BLOCK_SPEC_MOD", .udesc = "Read block speculative modified", .ucode = 0x20, }, { .uname = "READ_BLOCK_SPEC_SHARED", .udesc = "Read block speculative shared", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x77, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Change to Dirty 0x63 */ static const amd64_umask_t amd64_fam16h_change_dirty[]={ { .uname = "CHANGE_DIRTY", .udesc = "Change to dirty", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x10, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Memory Requests 0x65 */ static const amd64_umask_t amd64_fam16h_memory_requests[]={ { .uname = "NON_CACHEABLE", .udesc = "Requests to non-cacheable (UC) memory", .ucode = 0x1, }, { .uname = "WRITE_COMBINING", .udesc = "Requests to write-combining (WC) memory or WC buffer flushes to WB memory", .ucode = 0x2, }, { .uname = "STREAMING_STORE", .udesc = "Streaming store (SS) requests", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x83, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Data Cache Prefetches 0x67 */ static const amd64_umask_t amd64_fam16h_data_prefetches[]={ { .uname = "ATTEMPTED", .udesc = "Prefetch attempts", .ucode = 0x2, }, { .uname = "MAB", .udesc = "Hits on MAB", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xa, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* MAB Requests 0x68 and 0x69 */ static const amd64_umask_t amd64_fam16h_mab_requests[]={ { .uname = "DC_MISS0", .udesc = "Data cache miss buffer 0", .ucode = 0x1, }, { .uname = "DC_MISS1", .udesc = "Data cache miss buffer 1", .ucode = 0x2, }, { .uname = "DC_MISS2", .udesc = "Data cache miss buffer 2", .ucode = 0x4, }, { .uname = "DC_MISS3", .udesc = "Data cache miss buffer 3", .ucode = 0x8, }, { .uname = "DC_MISS4", .udesc = "Data cache miss buffer 4", .ucode = 0x10, }, { .uname = "DC_MISS5", .udesc = "Data cache miss buffer 5", .ucode = 0x20, }, { .uname = "DC_MISS6", .udesc = "Data cache miss buffer 6", .ucode = 0x40, }, { .uname = "DC_MISS7", .udesc = "Data cache miss buffer 7", .ucode = 0x80, }, { .uname = "IC_MISS0", .udesc = "Instruction cache miss buffer 0", .ucode = 0x100, }, { .uname = "IC_MISS1", .udesc = "Instruction cache miss buffer 1", .ucode = 0x200, }, { .uname = "DC_ANY", .udesc = "Any data cache miss buffer", .ucode = 0x800, }, { .uname = "IC_ANY", .udesc = "Any instruction cache miss buffer", .ucode = 0x1000, }, }; /* System Response by Coherence 0x6c */ static const amd64_umask_t amd64_fam16h_system_responses[]={ { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x1, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x2, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "DATA_ERROR", .udesc = "Data Error", .ucode = 0x10, }, { .uname = "CHANGE_DIRTY", .udesc = "Change to dirty success", .ucode = 0x20, }, { .uname = "UNCACHEABLE", .udesc = "Uncacheable", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Data written to system 0x6d */ static const amd64_umask_t amd64_fam16h_data_written_to_system[]={ { .uname = "DATA_LINE_EVICTIONS", .udesc = "Data line evictions", .ucode = 0x1, }, { .uname = "INSTRUCTION_ATTRIBUTE_EVICTIONS", .udesc = "Instruction attribute evictions", .ucode = 0x2, }, { .uname = "BYTE_ENABLE_MASK_UNCACHEABLE", .udesc = "Byte enable mask for uncacheabe or I/O store", .ucode = 0x4, }, { .uname = "DATA_FOR_UNCACHEABLE", .udesc = "Data for uncacheabe or I/O store", .ucode = 0x8, }, { .uname = "BYTE_ENABLE_MASK_WRITE_COMBINE", .udesc = "Byte enable mask for write combine context flush", .ucode = 0x10, }, { .uname = "DATA_FOR_WRITE_COMBINE", .udesc = "Data for write combine contet flush", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* cache cross invalidate 0x75 */ static const amd64_umask_t amd64_fam16h_cache_cross_invalidates[]={ { .uname = "DC_INVALIDATES_IC", .udesc = "Modification of instructions of data too close to code", .ucode = 0x1, }, { .uname = "DC_INVALIDATES_DC", .udesc = "CD or WBINVD", .ucode = 0x2, }, { .uname = "IC_INVALIDATES_IC", .udesc = "aliasing", .ucode = 0x4, }, { .uname = "IC_INVALIDATES_DC_DIRTY", .udesc = "Exection of modified instruction or data too close to code", .ucode = 0x8, }, { .uname = "IC_HITS_DC_CLEAN_LINE", .udesc = "Reading code", .ucode = 0x10, }, { .uname = "DC_PROBE_REJECTED_EARLY", .udesc = "DC probe rejected early", .ucode = 0x20, }, { .uname = "DC_PROBE_REJECTED_LATE", .udesc = "DC probe rejected late", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* PDC Miss 0x162 */ static const amd64_umask_t amd64_fam16h_pdc_miss[]={ { .uname = "HOST_PDE_LEVEL", .udesc = "Host: PDE level", .ucode = 0x1, }, { .uname = "HOST_PDPE_LEVEL", .udesc = "Host: PDPE level", .ucode = 0x2, }, { .uname = "HOST_PML4E_LEVEL", .udesc = "Host: PML4E level", .ucode = 0x4, }, { .uname = "GUEST_PDE_LEVEL", .udesc = "Guest: PDE level", .ucode = 0x10, }, { .uname = "GUEST_PDPE_LEVEL", .udesc = "Guest: PDPE level", .ucode = 0x20, }, { .uname = "GUEST_PML4E_LEVEL", .udesc = "Guest: PML4E level", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x77, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* ITLB Miss 0x85 */ static const amd64_umask_t amd64_fam16h_itlb_miss[]={ { .uname = "4K_PAGE_FETCHES", .udesc = "Instruction fetches to a 4K page.", .ucode = 0x1, }, { .uname = "2M_PAGE_FETCHES", .udesc = "Instruction fetches to a 2M page.", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Instruction Cache Lines Invalidated 0x8c */ static const amd64_umask_t amd64_fam16h_instruction_cache_lines_invalidated[]={ { .uname = "IC_INVALIDATE_LS_PROBE", .udesc = "Instruction cache invalidate due to LS probe", .ucode = 0x1, }, { .uname = "IC_INVALIDATE_BU_PROBE", .udesc = "Instruction cache invalidate due to BU probe", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Retired indirect branch info (0x19a) */ static const amd64_umask_t amd64_fam16h_retired_branch_info[]={ { .uname = "RETIRED", .udesc = "Retired indirect branch instruction.", .ucode = 0x1, }, { .uname = "MISPREDICTED", .udesc = "Retired mispredicted near unconditional jump.", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* Retired MMX/FP instructions 0xcb */ static const amd64_umask_t amd64_fam16h_retired_mmx_and_fp_instructions[]={ { .uname = "X87", .udesc = "X87 instructions", .ucode = 0x1, }, { .uname = "SSE", .udesc = "SSE, SSE2, SSE3, MNI instructions", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; /* FPU exceptions 0xdb */ static const amd64_umask_t amd64_fam16h_fpu_exceptions[]={ { .uname = "X87_RECLASS_MICROFAULTS", .udesc = "X87 reclass microfaults", .ucode = 0x1, }, { .uname = "SSE_RETYPE_MICROFAULTS", .udesc = "SSE retype microfaults", .ucode = 0x2, }, { .uname = "SSE_RECLASS_MICROFAULTS", .udesc = "SSE reclass microfaults", .ucode = 0x4, }, { .uname = "SSE_AND_X87_MICROTRAPS", .udesc = "SSE and x87 microtraps", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_entry_t amd64_fam16h_pe[]={ { .name = "DISPATCHED_FPU", .desc = "Dispatched FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_dispatched_fpu), .ngrp = 1, .umasks = amd64_fam16h_dispatched_fpu, }, { .name = "FP_SCHEDULER_EMPTY", .desc = "Cycles in which the FPU is Empty", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1, }, { .name = "DISPATCHED_FPU_OPS_FAST_FLAG", .desc = "Dispatched Fast Flag FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2, }, { .name = "RETIRED_SSE_AVX_OPERATIONS", .desc = "Retired SSE/AVX Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x3, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_retired_sse_operations), .ngrp = 1, .umasks = amd64_fam16h_retired_sse_operations, }, { .name = "RETIRED_SERIALIZING_OPS", .desc = "Retired Serializing Ops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x5, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_retired_serializing_ops), .ngrp = 1, .umasks = amd64_fam16h_retired_serializing_ops, }, { .name = "RETIRED_X87_OPERATIONS", .desc = "Retired x87 operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x11, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_retired_x87_ops), .ngrp = 1, .umasks = amd64_fam16h_retired_x87_ops, }, { .name = "SEGMENT_REGISTER_LOADS", .desc = "Segment Register Loads", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x20, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_segment_register_loads), .ngrp = 1, .umasks = amd64_fam16h_segment_register_loads, }, { .name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", .desc = "Pipeline Restart Due to Self-Modifying Code", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x21, }, { .name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", .desc = "Pipeline Restart Due to Probe Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x22, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_pipeline_restart), .ngrp = 1, .umasks = amd64_fam16h_pipeline_restart, }, { .name = "LOCKED_OPS", .desc = "Locked Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_locked_ops), .ngrp = 1, .umasks = amd64_fam16h_locked_ops, }, { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", .desc = "Retired CLFLUSH Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x26, }, { .name = "RETIRED_CPUID_INSTRUCTIONS", .desc = "Retired CPUID Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x27, }, { .name = "LS_DISPATCH", .desc = "Transactions dispatched to load-store unit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_ls_dispatch), .ngrp = 1, .umasks = amd64_fam16h_ls_dispatch, }, { .name = "CANCELLED_STORE_TO_LOAD_FORWARD_OPERATIONS", .desc = "Cancelled Store to Load Forward Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_cancelled_store_to_load_forward_operations), .ngrp = 1, .umasks = amd64_fam16h_cancelled_store_to_load_forward_operations, }, { .name = "DATA_CACHE_ACCESSES", .desc = "Data Cache Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x40, }, { .name = "DATA_CACHE_MISSES", .desc = "Data Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x41, }, { .name = "DATA_CACHE_REFILLS", .desc = "Data Cache Refills from L2 or Northbridge", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_data_cache_refills), .ngrp = 1, .umasks = amd64_fam16h_data_cache_refills, }, { .name = "DATA_CACHE_REFILLS_FROM_NORTHBRIDGE", .desc = "Data Cache Refills from the Northbridge", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x43, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_data_cache_refills_from_system), .ngrp = 1, .umasks = amd64_fam16h_data_cache_refills_from_system, }, { .name = "DATA_CACHE_LINES_EVICTED", .desc = "Data Cache Lines Evicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x44, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_data_cache_lines_evicted), .ngrp = 1, .umasks = amd64_fam16h_data_cache_lines_evicted, }, { .name = "L1_DTLB_MISS_AND_L2_DTLB_HIT", .desc = "L1 DTLB Miss and L2 DTLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x45, }, { .name = "DTLB_MISS", .desc = "L1 DTLB and L2 DTLB Miss", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x46, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_dtlb_miss), .ngrp = 1, .umasks = amd64_fam16h_dtlb_miss, }, { .name = "MISALIGNED_ACCESSES", .desc = "Misaligned Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x47, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_misaligned_accesses), .ngrp = 1, .umasks = amd64_fam16h_misaligned_accesses, }, { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", .desc = "Prefetch Instructions Dispatched", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_prefetch_instructions_dispatched), .ngrp = 1, .umasks = amd64_fam16h_prefetch_instructions_dispatched, }, { .name = "DCACHE_MISSES_BY_LOCKED_INSTRUCTIONS", .desc = "DCACHE Misses by Locked Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4c, }, { .name = "L1_DTLB_HIT", .desc = "L1 DTLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_l1_dtlb_hit), .ngrp = 1, .umasks = amd64_fam16h_l1_dtlb_hit, }, { .name = "INEFFECTIVE_SW_PREFETCHES", .desc = "Ineffective Software Prefetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x52, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_ineffective_sw_prefetches), .ngrp = 1, .umasks = amd64_fam16h_ineffective_sw_prefetches, }, { .name = "GLOBAL_TLB_FLUSHES", .desc = "Global TLB Flushes", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x54, }, /* fam30h only */ { .name = "COMMAND_RELATED_UNCACHABLE", .desc = "Commands realted to uncachable memory and I/O", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x61, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_uncachable_memory), .ngrp = 1, .umasks = amd64_fam16h_uncachable_memory, }, { .name = "COMMAND_RELATED_READ_BLOCK", .desc = "Commands realted to read block operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_read_block), .ngrp = 1, .umasks = amd64_fam16h_read_block, }, { .name = "COMMAND_RELATED_DIRTY", .desc = "Commands realted to change dirty operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_change_dirty), .ngrp = 1, .umasks = amd64_fam16h_change_dirty, }, { .name = "MEMORY_REQUESTS", .desc = "Memory Requests by Type", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_memory_requests), .ngrp = 1, .umasks = amd64_fam16h_memory_requests, }, { .name = "DATA_PREFETCHES", .desc = "Data Prefetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_data_prefetches), .ngrp = 1, .umasks = amd64_fam16h_data_prefetches, }, { .name = "MAB_REQUESTS", .desc = "Miss address buffer requests", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_mab_requests), .ngrp = 1, .umasks = amd64_fam16h_mab_requests, }, { .name = "MAB_WAIT_CYCLES", .desc = "Miss address buffer wait cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_mab_requests), .ngrp = 1, .umasks = amd64_fam16h_mab_requests, }, { .name = "SYSTEM_RESPONSES", .desc = "L2I Responses by Coherency State", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_system_responses), .ngrp = 1, .umasks = amd64_fam16h_system_responses, }, { .name = "DATA_WRITTEN_TO_SYSTEM", .desc = "16-byte transfers written to system", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_data_written_to_system), .ngrp = 1, .umasks = amd64_fam16h_data_written_to_system, }, { .name = "CACHE_CROSS_INVALIDATES", .desc = "Internal probes causing cache lines to be invalidated", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x75, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_cache_cross_invalidates), .ngrp = 1, .umasks = amd64_fam16h_cache_cross_invalidates, }, { .name = "CPU_CLK_UNHALTED", .desc = "CPU Clocks not Halted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x76, }, { .name = "PDC_MISS", .desc = "Number of PDC misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x162, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_pdc_miss), .ngrp = 1, .umasks = amd64_fam16h_pdc_miss, }, { .name = "INSTRUCTION_CACHE_FETCHES", .desc = "Instruction Cache Fetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x80, }, { .name = "INSTRUCTION_CACHE_MISSES", .desc = "Instruction Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x81, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", .desc = "Instruction Cache Refills from L2", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x82, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", .desc = "Instruction Cache Refills from System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x83, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .desc = "L1 ITLB Miss and L2 ITLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x84, }, { .name = "ITLB_MISS", .desc = "Instruction fetches that miss in 4k and 2M ITLB", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x85, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_itlb_miss), .ngrp = 1, .umasks = amd64_fam16h_itlb_miss, }, { .name = "INSTRUCTION_FETCH_STALL", .desc = "Instruction Fetch Stall", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x87, }, { .name = "RETURN_STACK_HITS", .desc = "Return Stack Hits", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x88, }, { .name = "RETURN_STACK_OVERFLOWS", .desc = "Return Stack Overflows", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x89, }, { .name = "INSTRUCTION_CACHE_VICTIMS", .desc = "Instruction Cache Victims", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x8b, }, { .name = "INSTRUCTION_CACHE_LINES_INVALIDATED", .desc = "Instruction Cache Lines Invalidated", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x8c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_instruction_cache_lines_invalidated), .ngrp = 1, .umasks = amd64_fam16h_instruction_cache_lines_invalidated, }, { .name = "ITLB_RELOADS", .desc = "ITLB Reloads", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x99, }, { .name = "ITLB_RELOADS_ABORTED", .desc = "ITLB reloads aborted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x9a, }, { .name = "RETIRED_INDIRECT_BRANCH_INFO", .desc = "Retired indirect branch info", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x19a, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_retired_branch_info), .ngrp = 1, .umasks = amd64_fam16h_retired_branch_info, }, { .name = "RETIRED_INSTRUCTIONS", .desc = "Retired Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc0, }, { .name = "RETIRED_UOPS", .desc = "Retired uops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc1, }, { .name = "RETIRED_BRANCH_INSTRUCTIONS", .desc = "Retired Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc2, }, { .name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .desc = "Retired Mispredicted Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc3, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .desc = "Retired Taken Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc4, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .desc = "Retired Taken Branch Instructions Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc5, }, { .name = "RETIRED_FAR_CONTROL_TRANSFERS", .desc = "Retired Far Control Transfers", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc6, }, { .name = "RETIRED_BRANCH_RESYNCS", .desc = "Retired Branch Resyncs", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc7, }, { .name = "RETIRED_NEAR_RETURNS", .desc = "Retired Near Returns", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc8, }, { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", .desc = "Retired Near Returns Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc9, }, { .name = "RETIRED_MISPREDICTED_TAKEN", .desc = "Retired mispredicted taken branches due to target mismatch", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xca, }, { .name = "RETIRED_MMX_AND_FP_INSTRUCTIONS", .desc = "Retired MMX/FP Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_retired_mmx_and_fp_instructions), .ngrp = 1, .umasks = amd64_fam16h_retired_mmx_and_fp_instructions, }, { .name = "INTERRUPTS_MASKED_CYCLES", .desc = "Interrupts-Masked Cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcd, }, { .name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .desc = "Interrupts-Masked Cycles with Interrupt Pending", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xce, }, { .name = "INTERRUPTS_TAKEN", .desc = "Interrupts Taken", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcf, }, { .name = "FPU_EXCEPTIONS", .desc = "FPU Exceptions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam16h_fpu_exceptions), .ngrp = 1, .umasks = amd64_fam16h_fpu_exceptions, }, { .name = "DR0_BREAKPOINT_MATCHES", .desc = "DR0 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdc, }, { .name = "DR1_BREAKPOINT_MATCHES", .desc = "DR1 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdd, }, { .name = "DR2_BREAKPOINT_MATCHES", .desc = "DR2 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xde, }, { .name = "DR3_BREAKPOINT_MATCHES", .desc = "DR3 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdf, }, { .name = "TAGGED_IBS_OPS", .desc = "Ops tagged by IBS", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1cf, }, { .name = "TAGGED_IBS_OPS_RETIRED", .desc = "Ops tagged by IBS that retired", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1d0, }, }; papi-5.6.0/src/freebsd/map-westmere.h000664 001750 001750 00000045523 13216244361 021524 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-westmere.h * Author: George Neville-Neil * gnn@freebsd.org */ #ifndef FreeBSD_MAP_WESTMERE #define FreeBSD_MAP_WESTMERE enum NativeEvent_Value_WestmereProcessor { PNE_WESTMERE_LOAD_BLOCK_OVERLAP_STORE= PAPI_NATIVE_MASK , PNE_WESTMERE_SB_DRAIN_ANY, PNE_WESTMERE_MISALIGN_MEMORY_STORE, PNE_WESTMERE_STORE_BLOCKS_AT_RET, PNE_WESTMERE_STORE_BLOCKS_L1D_BLOCK, PNE_WESTMERE_PARTIAL_ADDRESS_ALIAS, PNE_WESTMERE_DTLB_LOAD_MISSES_ANY, PNE_WESTMERE_DTLB_LOAD_MISSES_WALK_COMPLETED, PNE_WESTMERE_DTLB_LOAD_MISSES_WALK_CYCLES, PNE_WESTMERE_DTLB_LOAD_MISSES_STLB_HIT, PNE_WESTMERE_DTLB_LOAD_MISSES_PDE_MISS, PNE_WESTMERE_MEM_INST_RETIRED_LOADS, PNE_WESTMERE_MEM_INST_RETIRED_STORES, PNE_WESTMERE_MEM_INST_RETIRED_LATENCY_ABOVE_THRESHOLD, PNE_WESTMERE_MEM_STORE_RETIRED_DTLB_MISS, PNE_WESTMERE_UOPS_ISSUED_ANY, PNE_WESTMERE_UOPS_ISSUED_STALLED_CYCLES, PNE_WESTMERE_UOPS_ISSUED_FUSED, PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_HITM, PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_DRAM_AND_REMOTE_CACHE_HIT, PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_DRAM, PNE_WESTMERE_MEM_UNCORE_RETIRED_REMOTE_DRAM, PNE_WESTMERE_MEM_UNCORE_RETIRED_UNCACHEABLE, PNE_WESTMERE_FP_COMP_OPS_EXE_X87, PNE_WESTMERE_FP_COMP_OPS_EXE_MMX, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE2_INTEGER, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP_PACKED, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP_SCALAR, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION, PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION, PNE_WESTMERE_SIMD_INT_128_PACKED_MPY, PNE_WESTMERE_SIMD_INT_128_PACKED_SHIFT, PNE_WESTMERE_SIMD_INT_128_PACK, PNE_WESTMERE_SIMD_INT_128_UNPACK, PNE_WESTMERE_SIMD_INT_128_PACKED_LOGICAL, PNE_WESTMERE_SIMD_INT_128_PACKED_ARITH, PNE_WESTMERE_SIMD_INT_128_SHUFFLE_MOVE, PNE_WESTMERE_LOAD_DISPATCH_RS, PNE_WESTMERE_LOAD_DISPATCH_RS_DELAYED, PNE_WESTMERE_LOAD_DISPATCH_MOB, PNE_WESTMERE_LOAD_DISPATCH_ANY, PNE_WESTMERE_ARITH_CYCLES_DIV_BUSY, PNE_WESTMERE_ARITH_MUL, PNE_WESTMERE_INST_QUEUE_WRITES, PNE_WESTMERE_INST_DECODED_DEC0, PNE_WESTMERE_TWO_UOP_INSTS_DECODED, PNE_WESTMERE_INST_QUEUE_WRITE_CYCLES, PNE_WESTMERE_LSD_OVERFLOW, PNE_WESTMERE_L2_RQSTS_LD_HIT, PNE_WESTMERE_L2_RQSTS_LD_MISS, PNE_WESTMERE_L2_RQSTS_LOADS, PNE_WESTMERE_L2_RQSTS_RFO_HIT, PNE_WESTMERE_L2_RQSTS_RFO_MISS, PNE_WESTMERE_L2_RQSTS_RFOS, PNE_WESTMERE_L2_RQSTS_IFETCH_HIT, PNE_WESTMERE_L2_RQSTS_IFETCH_MISS, PNE_WESTMERE_L2_RQSTS_IFETCHES, PNE_WESTMERE_L2_RQSTS_PREFETCH_HIT, PNE_WESTMERE_L2_RQSTS_PREFETCH_MISS, PNE_WESTMERE_L2_RQSTS_PREFETCHES, PNE_WESTMERE_L2_RQSTS_MISS, PNE_WESTMERE_L2_RQSTS_REFERENCES, PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_I_STATE, PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_S_STATE, PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_E_STATE, PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_M_STATE, PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_MESI, PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_I_STATE, PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_S_STATE, PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_E_STATE, PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_M_STATE, PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_MESI, PNE_WESTMERE_L2_DATA_RQSTS_ANY, PNE_WESTMERE_L2_WRITE_RFO_I_STATE, PNE_WESTMERE_L2_WRITE_RFO_S_STATE, PNE_WESTMERE_L2_WRITE_RFO_M_STATE, PNE_WESTMERE_L2_WRITE_RFO_HIT, PNE_WESTMERE_L2_WRITE_RFO_MESI, PNE_WESTMERE_L2_WRITE_LOCK_I_STATE, PNE_WESTMERE_L2_WRITE_LOCK_S_STATE, PNE_WESTMERE_L2_WRITE_LOCK_E_STATE, PNE_WESTMERE_L2_WRITE_LOCK_M_STATE, PNE_WESTMERE_L2_WRITE_LOCK_HIT, PNE_WESTMERE_L2_WRITE_LOCK_MESI, PNE_WESTMERE_L1D_WB_L2_I_STATE, PNE_WESTMERE_L1D_WB_L2_S_STATE, PNE_WESTMERE_L1D_WB_L2_E_STATE, PNE_WESTMERE_L1D_WB_L2_M_STATE, PNE_WESTMERE_L1D_WB_L2_MESI, PNE_WESTMERE_L3_LAT_CACHE_REFERENCE, PNE_WESTMERE_L3_LAT_CACHE_MISS, PNE_WESTMERE_CPU_CLK_UNHALTED_THREAD_P, PNE_WESTMERE_CPU_CLK_UNHALTED_REF_P, PNE_WESTMERE_DTLB_MISSES_ANY, PNE_WESTMERE_DTLB_MISSES_WALK_COMPLETED, PNE_WESTMERE_DTLB_MISSES_WALK_CYCLES, PNE_WESTMERE_DTLB_MISSES_STLB_HIT, PNE_WESTMERE_DTLB_MISSES_LARGE_WALK_COMPLETED, PNE_WESTMERE_LOAD_HIT_PRE, PNE_WESTMERE_L1D_PREFETCH_REQUESTS, PNE_WESTMERE_L1D_PREFETCH_MISS, PNE_WESTMERE_L1D_PREFETCH_TRIGGERS, PNE_WESTMERE_EPT_WALK_CYCLES, PNE_WESTMERE_L1D_REPL, PNE_WESTMERE_L1D_M_REPL, PNE_WESTMERE_L1D_M_EVICT, PNE_WESTMERE_L1D_M_SNOOP_EVICT, PNE_WESTMERE_L1D_CACHE_PREFETCH_LOCK_FB_HIT, PNE_WESTMERE_L1D_CACHE_LOCK_FB_HIT, PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_DATA, PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_CODE, PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_RFO, PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_ANY_READ, PNE_WESTMERE_CACHE_LOCK_CYCLES_L1D_L2, PNE_WESTMERE_CACHE_LOCK_CYCLES_L1D, PNE_WESTMERE_IO_TRANSACTIONS, PNE_WESTMERE_L1I_HITS, PNE_WESTMERE_L1I_MISSES, PNE_WESTMERE_L1I_READS, PNE_WESTMERE_L1I_CYCLES_STALLED, PNE_WESTMERE_LARGE_ITLB_HIT, PNE_WESTMERE_ITLB_MISSES_ANY, PNE_WESTMERE_ITLB_MISSES_WALK_COMPLETED, PNE_WESTMERE_ITLB_MISSES_WALK_CYCLES, PNE_WESTMERE_ITLB_MISSES_LARGE_WALK_COMPLETED, PNE_WESTMERE_ILD_STALL_LCP, PNE_WESTMERE_ILD_STALL_MRU, PNE_WESTMERE_ILD_STALL_IQ_FULL, PNE_WESTMERE_ILD_STALL_REGEN, PNE_WESTMERE_ILD_STALL_ANY, PNE_WESTMERE_BR_INST_EXEC_COND, PNE_WESTMERE_BR_INST_EXEC_DIRECT, PNE_WESTMERE_BR_INST_EXEC_INDIRECT_NON_CALL, PNE_WESTMERE_BR_INST_EXEC_NON_CALLS, PNE_WESTMERE_BR_INST_EXEC_RETURN_NEAR, PNE_WESTMERE_BR_INST_EXEC_DIRECT_NEAR_CALL, PNE_WESTMERE_BR_INST_EXEC_INDIRECT_NEAR_CALL, PNE_WESTMERE_BR_INST_EXEC_NEAR_CALLS, PNE_WESTMERE_BR_INST_EXEC_TAKEN, PNE_WESTMERE_BR_INST_EXEC_ANY, PNE_WESTMERE_BR_MISP_EXEC_COND, PNE_WESTMERE_BR_MISP_EXEC_DIRECT, PNE_WESTMERE_BR_MISP_EXEC_INDIRECT_NON_CALL, PNE_WESTMERE_BR_MISP_EXEC_NON_CALLS, PNE_WESTMERE_BR_MISP_EXEC_RETURN_NEAR, PNE_WESTMERE_BR_MISP_EXEC_DIRECT_NEAR_CALL, PNE_WESTMERE_BR_MISP_EXEC_INDIRECT_NEAR_CALL, PNE_WESTMERE_BR_MISP_EXEC_NEAR_CALLS, PNE_WESTMERE_BR_MISP_EXEC_TAKEN, PNE_WESTMERE_BR_MISP_EXEC_ANY, PNE_WESTMERE_RESOURCE_STALLS_ANY, PNE_WESTMERE_RESOURCE_STALLS_LOAD, PNE_WESTMERE_RESOURCE_STALLS_RS_FULL, PNE_WESTMERE_RESOURCE_STALLS_STORE, PNE_WESTMERE_RESOURCE_STALLS_ROB_FULL, PNE_WESTMERE_RESOURCE_STALLS_FPCW, PNE_WESTMERE_RESOURCE_STALLS_MXCSR, PNE_WESTMERE_RESOURCE_STALLS_OTHER, PNE_WESTMERE_MACRO_INSTS_FUSIONS_DECODED, PNE_WESTMERE_BACLEAR_FORCE_IQ, PNE_WESTMERE_LSD_UOPS, PNE_WESTMERE_ITLB_FLUSH, PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_READ_DATA, PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_READ_CODE, PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_RFO, PNE_WESTMERE_OFFCORE_REQUESTS_ANY_READ, PNE_WESTMERE_OFFCORE_REQUESTS_ANY_RFO, PNE_WESTMERE_OFFCORE_REQUESTS_L1D_WRITEBACK, PNE_WESTMERE_OFFCORE_REQUESTS_ANY, PNE_WESTMERE_UOPS_EXECUTED_PORT0, PNE_WESTMERE_UOPS_EXECUTED_PORT1, PNE_WESTMERE_UOPS_EXECUTED_PORT2_CORE, PNE_WESTMERE_UOPS_EXECUTED_PORT3_CORE, PNE_WESTMERE_UOPS_EXECUTED_PORT4_CORE, PNE_WESTMERE_UOPS_EXECUTED_CORE_ACTIVE_CYCLES_NO_PORT5, PNE_WESTMERE_UOPS_EXECUTED_PORT5, PNE_WESTMERE_UOPS_EXECUTED_CORE_ACTIVE_CYCLES, PNE_WESTMERE_UOPS_EXECUTED_PORT015, PNE_WESTMERE_UOPS_EXECUTED_PORT234, PNE_WESTMERE_OFFCORE_REQUESTS_SQ_FULL, PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_DATA, PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_INVALIDATE, PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_CODE, PNE_WESTMERE_SNOOPQ_REQUESTS_CODE, PNE_WESTMERE_SNOOPQ_REQUESTS_DATA, PNE_WESTMERE_SNOOPQ_REQUESTS_INVALIDATE, PNE_WESTMERE_OFF_CORE_RESPONSE_0, PNE_WESTMERE_SNOOP_RESPONSE_HIT, PNE_WESTMERE_SNOOP_RESPONSE_HITE, PNE_WESTMERE_SNOOP_RESPONSE_HITM, PNE_WESTMERE_OFF_CORE_RESPONSE_1, PNE_WESTMERE_INST_RETIRED_ANY_P, PNE_WESTMERE_INST_RETIRED_X87, PNE_WESTMERE_INST_RETIRED_MMX, PNE_WESTMERE_UOPS_RETIRED_ANY, PNE_WESTMERE_UOPS_RETIRED_RETIRE_SLOTS, PNE_WESTMERE_UOPS_RETIRED_MACRO_FUSED, PNE_WESTMERE_MACHINE_CLEARS_CYCLES, PNE_WESTMERE_MACHINE_CLEARS_MEM_ORDER, PNE_WESTMERE_MACHINE_CLEARS_SMC, PNE_WESTMERE_BR_INST_RETIRED_ANY_P, PNE_WESTMERE_BR_INST_RETIRED_CONDITIONAL, PNE_WESTMERE_BR_INST_RETIRED_NEAR_CALL, PNE_WESTMERE_BR_INST_RETIRED_ALL_BRANCHES, PNE_WESTMERE_BR_MISP_RETIRED_ANY_P, PNE_WESTMERE_BR_MISP_RETIRED_CONDITIONAL, PNE_WESTMERE_BR_MISP_RETIRED_NEAR_CALL, PNE_WESTMERE_BR_MISP_RETIRED_ALL_BRANCHES, PNE_WESTMERE_SSEX_UOPS_RETIRED_PACKED_SINGLE, PNE_WESTMERE_SSEX_UOPS_RETIRED_SCALAR_SINGLE, PNE_WESTMERE_SSEX_UOPS_RETIRED_PACKED_DOUBLE, PNE_WESTMERE_SSEX_UOPS_RETIRED_SCALAR_DOUBLE, PNE_WESTMERE_SSEX_UOPS_RETIRED_VECTOR_INTEGER, PNE_WESTMERE_ITLB_MISS_RETIRED, PNE_WESTMERE_MEM_LOAD_RETIRED_L1D_HIT, PNE_WESTMERE_MEM_LOAD_RETIRED_L2_HIT, PNE_WESTMERE_MEM_LOAD_RETIRED_L3_UNSHARED_HIT, PNE_WESTMERE_MEM_LOAD_RETIRED_OTHER_CORE_L2_HIT_HITM, PNE_WESTMERE_MEM_LOAD_RETIRED_L3_MISS, PNE_WESTMERE_MEM_LOAD_RETIRED_HIT_LFB, PNE_WESTMERE_MEM_LOAD_RETIRED_DTLB_MISS, PNE_WESTMERE_FP_MMX_TRANS_TO_FP, PNE_WESTMERE_FP_MMX_TRANS_TO_MMX, PNE_WESTMERE_FP_MMX_TRANS_ANY, PNE_WESTMERE_MACRO_INSTS_DECODED, PNE_WESTMERE_UOPS_DECODED_STALL_CYCLES, PNE_WESTMERE_UOPS_DECODED_MS, PNE_WESTMERE_UOPS_DECODED_ESP_FOLDING, PNE_WESTMERE_UOPS_DECODED_ESP_SYNC, PNE_WESTMERE_RAT_STALLS_FLAGS, PNE_WESTMERE_RAT_STALLS_REGISTERS, PNE_WESTMERE_RAT_STALLS_ROB_READ_PORT, PNE_WESTMERE_RAT_STALLS_SCOREBOARD, PNE_WESTMERE_RAT_STALLS_ANY, PNE_WESTMERE_SEG_RENAME_STALLS, PNE_WESTMERE_ES_REG_RENAMES, PNE_WESTMERE_UOP_UNFUSION, PNE_WESTMERE_BR_INST_DECODED, PNE_WESTMERE_BPU_MISSED_CALL_RET, PNE_WESTMERE_BACLEAR_CLEAR, PNE_WESTMERE_BACLEAR_BAD_TARGET, PNE_WESTMERE_BPU_CLEARS_EARLY, PNE_WESTMERE_BPU_CLEARS_LATE, PNE_WESTMERE_THREAD_ACTIVE, PNE_WESTMERE_L2_TRANSACTIONS_LOAD, PNE_WESTMERE_L2_TRANSACTIONS_RFO, PNE_WESTMERE_L2_TRANSACTIONS_IFETCH, PNE_WESTMERE_L2_TRANSACTIONS_PREFETCH, PNE_WESTMERE_L2_TRANSACTIONS_L1D_WB, PNE_WESTMERE_L2_TRANSACTIONS_FILL, PNE_WESTMERE_L2_TRANSACTIONS_WB, PNE_WESTMERE_L2_TRANSACTIONS_ANY, PNE_WESTMERE_L2_LINES_IN_S_STATE, PNE_WESTMERE_L2_LINES_IN_E_STATE, PNE_WESTMERE_L2_LINES_IN_ANY, PNE_WESTMERE_L2_LINES_OUT_DEMAND_CLEAN, PNE_WESTMERE_L2_LINES_OUT_DEMAND_DIRTY, PNE_WESTMERE_L2_LINES_OUT_PREFETCH_CLEAN, PNE_WESTMERE_L2_LINES_OUT_PREFETCH_DIRTY, PNE_WESTMERE_L2_LINES_OUT_ANY, PNE_WESTMERE_SQ_MISC_LRU_HINTS, PNE_WESTMERE_SQ_MISC_SPLIT_LOCK, PNE_WESTMERE_SQ_FULL_STALL_CYCLES, PNE_WESTMERE_FP_ASSIST_ALL, PNE_WESTMERE_FP_ASSIST_OUTPUT, PNE_WESTMERE_FP_ASSIST_INPUT, PNE_WESTMERE_SIMD_INT_64_PACKED_MPY, PNE_WESTMERE_SIMD_INT_64_PACKED_SHIFT, PNE_WESTMERE_SIMD_INT_64_PACK, PNE_WESTMERE_SIMD_INT_64_UNPACK, PNE_WESTMERE_SIMD_INT_64_PACKED_LOGICAL, PNE_WESTMERE_SIMD_INT_64_PACKED_ARITH, PNE_WESTMERE_SIMD_INT_64_SHUFFLE_MOVE, PNE_WESTMERE_INSTR_RETIRED_ANY, PNE_WESTMERE_CPU_CLK_UNHALTED_CORE, PNE_WESTMERE_CPU_CLK_UNHALTED_REF, PNE_WESTMERE_GQ_CYCLES_FULL_READ_TRACKER, PNE_WESTMERE_GQ_CYCLES_FULL_WRITE_TRACKER, PNE_WESTMERE_GQ_CYCLES_FULL_PEER_PROBE_TRACKER, PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_READ_TRACKER, PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_WRITE_TRACKER, PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_PEER_PROBE_TRACKER, PNE_WESTMERE_GQ_OCCUPANCY_READ_TRACKER, PNE_WESTMERE_GQ_ALLOC_READ_TRACKER, PNE_WESTMERE_GQ_ALLOC_RT_L3_MISS, PNE_WESTMERE_GQ_ALLOC_RT_TO_L3_RESP, PNE_WESTMERE_GQ_ALLOC_RT_TO_RTID_ACQUIRED, PNE_WESTMERE_GQ_ALLOC_WT_TO_RTID_ACQUIRED, PNE_WESTMERE_GQ_ALLOC_WRITE_TRACKER, PNE_WESTMERE_GQ_ALLOC_PEER_PROBE_TRACKER, PNE_WESTMERE_GQ_DATA_FROM_QPI, PNE_WESTMERE_GQ_DATA_FROM_QMC, PNE_WESTMERE_GQ_DATA_FROM_L3, PNE_WESTMERE_GQ_DATA_FROM_CORES_02, PNE_WESTMERE_GQ_DATA_FROM_CORES_13, PNE_WESTMERE_GQ_DATA_TO_QPI_QMC, PNE_WESTMERE_GQ_DATA_TO_L3, PNE_WESTMERE_GQ_DATA_TO_CORES, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_I_STATE, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_S_STATE, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_FWD_S_STATE, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_FWD_I_STATE, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_CONFLICT, PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_WB, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_I_STATE, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_S_STATE, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_FWD_S_STATE, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_FWD_I_STATE, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_CONFLICT, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_WB, PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_HITM, PNE_WESTMERE_L3_HITS_READ, PNE_WESTMERE_L3_HITS_WRITE, PNE_WESTMERE_L3_HITS_PROBE, PNE_WESTMERE_L3_HITS_ANY, PNE_WESTMERE_L3_MISS_READ, PNE_WESTMERE_L3_MISS_WRITE, PNE_WESTMERE_L3_MISS_PROBE, PNE_WESTMERE_L3_MISS_ANY, PNE_WESTMERE_L3_LINES_IN_M_STATE, PNE_WESTMERE_L3_LINES_IN_E_STATE, PNE_WESTMERE_L3_LINES_IN_S_STATE, PNE_WESTMERE_L3_LINES_IN_F_STATE, PNE_WESTMERE_L3_LINES_IN_ANY, PNE_WESTMERE_L3_LINES_OUT_M_STATE, PNE_WESTMERE_L3_LINES_OUT_E_STATE, PNE_WESTMERE_L3_LINES_OUT_S_STATE, PNE_WESTMERE_L3_LINES_OUT_I_STATE, PNE_WESTMERE_L3_LINES_OUT_F_STATE, PNE_WESTMERE_L3_LINES_OUT_ANY, PNE_WESTMERE_GQ_SNOOP_GOTO_S, PNE_WESTMERE_GQ_SNOOP_GOTO_I, PNE_WESTMERE_GQ_SNOOP_GOTO_S_HIT, PNE_WESTMERE_GQ_SNOOP_GOTO_I_HIT, PNE_WESTMERE_QHL_REQUESTS_IOH_READS, PNE_WESTMERE_QHL_REQUESTS_IOH_WRITES, PNE_WESTMERE_QHL_REQUESTS_REMOTE_READS, PNE_WESTMERE_QHL_REQUESTS_REMOTE_WRITES, PNE_WESTMERE_QHL_REQUESTS_LOCAL_READS, PNE_WESTMERE_QHL_REQUESTS_LOCAL_WRITES, PNE_WESTMERE_QHL_CYCLES_FULL_IOH, PNE_WESTMERE_QHL_CYCLES_FULL_REMOTE, PNE_WESTMERE_QHL_CYCLES_FULL_LOCAL, PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_IOH, PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_REMOTE, PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_LOCAL, PNE_WESTMERE_QHL_OCCUPANCY_IOH, PNE_WESTMERE_QHL_OCCUPANCY_REMOTE, PNE_WESTMERE_QHL_OCCUPANCY_LOCAL, PNE_WESTMERE_QHL_ADDRESS_CONFLICTS_2WAY, PNE_WESTMERE_QHL_ADDRESS_CONFLICTS_3WAY, PNE_WESTMERE_QHL_CONFLICT_CYCLES_IOH, PNE_WESTMERE_QHL_CONFLICT_CYCLES_REMOTE, PNE_WESTMERE_QHL_CONFLICT_CYCLES_LOCAL, PNE_WESTMERE_QHL_TO_QMC_BYPASS, PNE_WESTMERE_QMC_ISOC_FULL_READ_CH0, PNE_WESTMERE_QMC_ISOC_FULL_READ_CH1, PNE_WESTMERE_QMC_ISOC_FULL_READ_CH2, PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH0, PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH1, PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH2, PNE_WESTMERE_QMC_BUSY_READ_CH0, PNE_WESTMERE_QMC_BUSY_READ_CH1, PNE_WESTMERE_QMC_BUSY_READ_CH2, PNE_WESTMERE_QMC_BUSY_WRITE_CH0, PNE_WESTMERE_QMC_BUSY_WRITE_CH1, PNE_WESTMERE_QMC_BUSY_WRITE_CH2, PNE_WESTMERE_QMC_OCCUPANCY_CH0, PNE_WESTMERE_QMC_OCCUPANCY_CH1, PNE_WESTMERE_QMC_OCCUPANCY_CH2, PNE_WESTMERE_QMC_OCCUPANCY_ANY, PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH0, PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH1, PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH2, PNE_WESTMERE_QMC_ISSOC_READS_ANY, PNE_WESTMERE_QMC_NORMAL_READS_CH0, PNE_WESTMERE_QMC_NORMAL_READS_CH1, PNE_WESTMERE_QMC_NORMAL_READS_CH2, PNE_WESTMERE_QMC_NORMAL_READS_ANY, PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH0, PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH1, PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH2, PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_ANY, PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH0, PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH1, PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH2, PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_ANY, PNE_WESTMERE_QMC_WRITES_FULL_CH0, PNE_WESTMERE_QMC_WRITES_FULL_CH1, PNE_WESTMERE_QMC_WRITES_FULL_CH2, PNE_WESTMERE_QMC_WRITES_FULL_ANY, PNE_WESTMERE_QMC_WRITES_PARTIAL_CH0, PNE_WESTMERE_QMC_WRITES_PARTIAL_CH1, PNE_WESTMERE_QMC_WRITES_PARTIAL_CH2, PNE_WESTMERE_QMC_WRITES_PARTIAL_ANY, PNE_WESTMERE_QMC_CANCEL_CH0, PNE_WESTMERE_QMC_CANCEL_CH1, PNE_WESTMERE_QMC_CANCEL_CH2, PNE_WESTMERE_QMC_CANCEL_ANY, PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH0, PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH1, PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH2, PNE_WESTMERE_QMC_PRIORITY_UPDATES_ANY, PNE_WESTMERE_IMC_RETRY_CH0, PNE_WESTMERE_IMC_RETRY_CH1, PNE_WESTMERE_IMC_RETRY_CH2, PNE_WESTMERE_IMC_RETRY_ANY, PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_IOH, PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_REMOTE, PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_LOCAL, PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_ANY, PNE_WESTMERE_QHL_SLEEPS_IOH_ORDER, PNE_WESTMERE_QHL_SLEEPS_REMOTE_ORDER, PNE_WESTMERE_QHL_SLEEPS_LOCAL_ORDER, PNE_WESTMERE_QHL_SLEEPS_IOH_CONFLICT, PNE_WESTMERE_QHL_SLEEPS_REMOTE_CONFLICT, PNE_WESTMERE_QHL_SLEEPS_LOCAL_CONFLICT, PNE_WESTMERE_ADDR_OPCODE_MATCH_IOH, PNE_WESTMERE_ADDR_OPCODE_MATCH_REMOTE, PNE_WESTMERE_ADDR_OPCODE_MATCH_LOCAL, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_1, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_LINK_0, PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_LINK_1, PNE_WESTMERE_QPI_TX_HEADER_FULL_LINK_0, PNE_WESTMERE_QPI_TX_HEADER_BUSY_LINK_0, PNE_WESTMERE_QPI_TX_HEADER_FULL_LINK_1, PNE_WESTMERE_QPI_TX_HEADER_BUSY_LINK_1, PNE_WESTMERE_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_0, PNE_WESTMERE_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_1, PNE_WESTMERE_DRAM_OPEN_CH0, PNE_WESTMERE_DRAM_OPEN_CH1, PNE_WESTMERE_DRAM_OPEN_CH2, PNE_WESTMERE_DRAM_PAGE_CLOSE_CH0, PNE_WESTMERE_DRAM_PAGE_CLOSE_CH1, PNE_WESTMERE_DRAM_PAGE_CLOSE_CH2, PNE_WESTMERE_DRAM_PAGE_MISS_CH0, PNE_WESTMERE_DRAM_PAGE_MISS_CH1, PNE_WESTMERE_DRAM_PAGE_MISS_CH2, PNE_WESTMERE_DRAM_READ_CAS_CH0, PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH0, PNE_WESTMERE_DRAM_READ_CAS_CH1, PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH1, PNE_WESTMERE_DRAM_READ_CAS_CH2, PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH2, PNE_WESTMERE_DRAM_WRITE_CAS_CH0, PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH0, PNE_WESTMERE_DRAM_WRITE_CAS_CH1, PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH1, PNE_WESTMERE_DRAM_WRITE_CAS_CH2, PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH2, PNE_WESTMERE_DRAM_REFRESH_CH0, PNE_WESTMERE_DRAM_REFRESH_CH1, PNE_WESTMERE_DRAM_REFRESH_CH2, PNE_WESTMERE_DRAM_PRE_ALL_CH0, PNE_WESTMERE_DRAM_PRE_ALL_CH1, PNE_WESTMERE_DRAM_PRE_ALL_CH2, PNE_WESTMERE_DRAM_THERMAL_THROTTLED, PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_0, PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_1, PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_2, PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_3, PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_0, PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_1, PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_2, PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_3, PNE_WESTMERE_PROCHOT_ASSERTION, PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_0, PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_1, PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_2, PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_3, PNE_WESTMERE_TURBO_MODE_CORE_0, PNE_WESTMERE_TURBO_MODE_CORE_1, PNE_WESTMERE_TURBO_MODE_CORE_2, PNE_WESTMERE_TURBO_MODE_CORE_3, PNE_WESTMERE_CYCLES_UNHALTED_L3_FLL_ENABLE, PNE_WESTMERE_CYCLES_UNHALTED_L3_FLL_DISABLE, PNE_WESTMERE_PNE_WESTMERE_NATNAME_GUARD, }; extern Native_Event_LabelDescription_t WestmereProcessor_info[]; extern hwi_search_t WestmereProcessor_map[]; #endif papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/ppc.c000664 001750 001750 00000072465 13216244370 024767 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.43 2007/10/06 13:02:07 mikpe Exp $ * PPC32 performance-monitoring counters driver. * * Copyright (C) 2004-2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include #include #include #include #include /* tb_ticks_per_jiffy, get_tbl() */ #include "ppc_tests.h" /* Support for lazy evntsel and perfctr SPR updates. */ struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ unsigned int id; /* cache owner id */ /* Physically indexed cache of the MMCRs. */ unsigned int ppc_mmcr[3]; }; static DEFINE_PER_CPU(struct per_cpu_cache, per_cpu_cache); #define __get_cpu_cache(cpu) (&per_cpu(per_cpu_cache, cpu)) #define get_cpu_cache() (&__get_cpu_var(per_cpu_cache)) /* Structure for counter snapshots, as 32-bit values. */ struct perfctr_low_ctrs { unsigned int tsc; unsigned int pmc[6]; }; enum pm_type { PM_NONE, PM_604, PM_604e, PM_750, /* XXX: Minor event set diffs between IBM and Moto. */ PM_7400, PM_7450, }; static enum pm_type pm_type; static unsigned int new_id(void) { static DEFINE_SPINLOCK(lock); static unsigned int counter; int id; spin_lock(&lock); id = ++counter; spin_unlock(&lock); return id; } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT static void perfctr_default_ihandler(unsigned long pc) { } static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; void do_perfctr_interrupt(struct pt_regs *regs) { preempt_disable(); (*perfctr_ihandler)(instruction_pointer(regs)); preempt_enable_no_resched(); } static inline int perfctr_reserve_pmc_hardware(void) { return reserve_pmc_hardware(do_perfctr_interrupt); } void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) { perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; } #else static inline int perfctr_reserve_pmc_hardware(void) { return reserve_pmc_hardware(NULL); } #define perfctr_cstatus_has_ictrs(cstatus) 0 #endif static inline void perfctr_release_pmc_hardware(void) { release_pmc_hardware(); } #if defined(CONFIG_SMP) && defined(CONFIG_PERFCTR_INTERRUPT_SUPPORT) static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { state->isuspend_cpu = cpu; } static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return state->isuspend_cpu == cpu; } static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { state->isuspend_cpu = NR_CPUS; } #else static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } #endif /* The ppc driver internally uses cstatus & (1<<30) to record that a context has an asynchronously changing MMCR0. */ static inline unsigned int perfctr_cstatus_set_mmcr0_quirk(unsigned int cstatus) { return cstatus | (1 << 30); } static inline int perfctr_cstatus_has_mmcr0_quirk(unsigned int cstatus) { return cstatus & (1 << 30); } /**************************************************************** * * * Driver procedures. * * * ****************************************************************/ /* * The PowerPC 604/750/74xx family. * * Common features * --------------- * - Per counter event selection data in subfields of control registers. * MMCR0 contains both global control and PMC1/PMC2 event selectors. * - Overflow interrupt support is present in all processors, but an * erratum makes it difficult to use in 750/7400/7410 processors. * - There is no concept of per-counter qualifiers: * - User-mode/supervisor-mode restrictions are global. * - Two groups of counters, PMC1 and PMC2-PMC. Each group * has a single overflow interrupt/event enable/disable flag. * - The instructions used to read (mfspr) and write (mtspr) the control * and counter registers (SPRs) only support hardcoded register numbers. * There is no support for accessing an SPR via a runtime value. * - Each counter supports its own unique set of events. However, events * 0-1 are common for PMC1-PMC4, and events 2-4 are common for PMC1-PMC4. * - There is no separate high-resolution core clock counter. * The time-base counter is available, but it typically runs an order of * magnitude slower than the core clock. * Any performance counter can be programmed to count core clocks, but * doing this (a) reserves one PMC, and (b) needs indirect accesses * since the SPR number in general isn't known at compile-time. * * 604 * --- * 604 has MMCR0, PMC1, PMC2, SIA, and SDA. * * MMCR0[THRESHOLD] is not automatically multiplied. * * On the 604, software must always reset MMCR0[ENINT] after * taking a PMI. This is not the case for the 604e. * * 604e * ---- * 604e adds MMCR1, PMC3, and PMC4. * Bus-to-core multiplier is available via HID1[PLL_CFG]. * * MMCR0[THRESHOLD] is automatically multiplied by 4. * * When the 604e vectors to the PMI handler, it automatically * clears any pending PMIs. Unlike the 604, the 604e does not * require MMCR0[ENINT] to be cleared (and possibly reset) * before external interrupts can be re-enabled. * * 750 * --- * 750 adds user-readable MMCRn/PMCn/SIA registers, and removes SDA. * * MMCR0[THRESHOLD] is not automatically multiplied. * * Motorola MPC750UM.pdf, page C-78, states: "The performance monitor * of the MPC755 functions the same as that of the MPC750, (...), except * that for both the MPC750 and MPC755, no combination of the thermal * assist unit, the decrementer register, and the performance monitor * can be used at any one time. If exceptions for any two of these * functional blocks are enabled together, multiple exceptions caused * by any of these three blocks cause unpredictable results." * * IBM 750CXe_Err_DD2X.pdf, Erratum #13, states that a PMI which * occurs immediately after a delayed decrementer exception can * corrupt SRR0, causing the processor to hang. It also states that * PMIs via TB bit transitions can be used to simulate the decrementer. * * 750FX adds dual-PLL support and programmable core frequency switching. * * 750FX DD2.3 fixed the DEC/PMI SRR0 corruption erratum. * * 74xx * ---- * 7400 adds MMCR2 and BAMR. * * MMCR0[THRESHOLD] is multiplied by 2 or 32, as specified * by MMCR2[THRESHMULT]. * * 74xx changes the semantics of several MMCR0 control bits, * compared to 604/750. * * PPC7410 Erratum No. 10: Like the MPC750 TAU/DECR/PMI erratum. * Erratum No. 14 marks TAU as unsupported in 7410, but this leaves * perfmon and decrementer interrupts as being mutually exclusive. * Affects PPC7410 1.0-1.2 (PVR 0x800C1100-0x800C1102). 1.3 and up * (PVR 0x800C1103 up) are Ok. * * 7450 adds PMC5 and PMC6. * * 7455/7445 V3.3 (PVR 80010303) and later use the 7457 PLL table, * earlier revisions use the 7450 PLL table */ static inline unsigned int read_pmc(unsigned int pmc) { switch (pmc) { default: /* impossible, but silences gcc warning */ case 0: return mfspr(SPRN_PMC1); case 1: return mfspr(SPRN_PMC2); case 2: return mfspr(SPRN_PMC3); case 3: return mfspr(SPRN_PMC4); case 4: return mfspr(SPRN_PMC5); case 5: return mfspr(SPRN_PMC6); } } static void ppc_read_counters(struct perfctr_cpu_state *state, struct perfctr_low_ctrs *ctrs) { unsigned int cstatus, nrctrs, i; cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) ctrs->tsc = get_tbl(); nrctrs = perfctr_cstatus_nractrs(cstatus); for(i = 0; i < nrctrs; ++i) { unsigned int pmc = state->control.pmc_map[i]; ctrs->pmc[i] = read_pmc(pmc); } } static unsigned int pmc_max_event(unsigned int pmc) { switch (pmc) { default: /* impossible, but silences gcc warning */ case 0: return 127; case 1: return 63; case 2: return 31; case 3: return 31; case 4: return 31; case 5: return 63; } } static unsigned int get_nr_pmcs(void) { switch (pm_type) { case PM_7450: return 6; case PM_7400: case PM_750: case PM_604e: return 4; case PM_604: return 2; default: /* PM_NONE, but silences gcc warning */ return 0; } } static int ppc_check_control(struct perfctr_cpu_state *state) { unsigned int i, nractrs, nrctrs, pmc_mask, pmi_mask, pmc; unsigned int nr_pmcs, evntsel[6]; nr_pmcs = get_nr_pmcs(); nractrs = state->control.header.nractrs; nrctrs = nractrs + state->control.header.nrictrs; if (nrctrs < nractrs || nrctrs > nr_pmcs) return -EINVAL; pmc_mask = 0; pmi_mask = 0; evntsel[1-1] = (state->control.mmcr0 >> (31-25)) & 0x7F; evntsel[2-1] = (state->control.mmcr0 >> (31-31)) & 0x3F; evntsel[3-1] = (state->control.mmcr1 >> (31- 4)) & 0x1F; evntsel[4-1] = (state->control.mmcr1 >> (31- 9)) & 0x1F; evntsel[5-1] = (state->control.mmcr1 >> (31-14)) & 0x1F; evntsel[6-1] = (state->control.mmcr1 >> (31-20)) & 0x3F; for(i = 0; i < nrctrs; ++i) { pmc = state->control.pmc_map[i]; if (pmc >= nr_pmcs || (pmc_mask & (1<= nractrs) pmi_mask |= (1< pmc_max_event(pmc)) return -EINVAL; } /* unused event selectors must be zero */ for(i = 0; i < ARRAY_SIZE(evntsel); ++i) if (!(pmc_mask & (1<control.mmcr2 & MMCR2_RESERVED) return -EINVAL; break; default: if (state->control.mmcr2) return -EINVAL; } /* check MMCR1; non-existent event selectors are taken care of by the "unused event selectors must be zero" check above */ if (state->control.mmcr1 & MMCR1__RESERVED) return -EINVAL; /* We do not yet handle TBEE as the only exception cause, so PMXE requires at least one interrupt-mode counter. */ if ((state->control.mmcr0 & MMCR0_PMXE) && !state->control.header.nrictrs) return -EINVAL; state->id = new_id(); /* * MMCR0[FC] and MMCR0[TRIGGER] may change on 74xx if FCECE or * TRIGGER is set. At suspends we must read MMCR0 back into * the state and the cache and then freeze the counters, and * at resumes we must unfreeze the counters and reload MMCR0. */ switch (pm_type) { case PM_7450: case PM_7400: if (state->control.mmcr0 & (MMCR0_FCECE | MMCR0_TRIGGER)) state->user.cstatus = perfctr_cstatus_set_mmcr0_quirk(state->user.cstatus); default: ; } /* The MMCR0 handling for FCECE and TRIGGER is also needed for PMXE. */ if (state->control.mmcr0 & (MMCR0_PMXE | MMCR0_FCECE | MMCR0_TRIGGER)) state->user.cstatus = perfctr_cstatus_set_mmcr0_quirk(state->user.cstatus); return 0; } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT /* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ /* PRE: counters frozen */ static void ppc_isuspend(struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int cstatus, nrctrs, i; int cpu; cpu = smp_processor_id(); set_isuspend_cpu(state, cpu); /* early to limit cpu's live range */ cache = __get_cpu_cache(cpu); cstatus = state->user.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { unsigned int pmc = state->control.pmc_map[i]; unsigned int now = read_pmc(pmc); state->user.pmc[i].sum += now - state->user.pmc[i].start; state->user.pmc[i].start = now; } /* cache->id is still == state->id */ } static void ppc_iresume(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int cstatus, nrctrs, i; int cpu; unsigned int pmc[6]; cpu = smp_processor_id(); cache = __get_cpu_cache(cpu); if (cache->id == state->id) { /* Clearing cache->id to force write_control() to unfreeze MMCR0 would be done here, but it is subsumed by resume()'s MMCR0 reload logic. */ if (is_isuspend_cpu(state, cpu)) return; /* skip reload of PMCs */ } /* * The CPU state wasn't ours. * * The counters must be frozen before being reinitialised, * to prevent unexpected increments and missed overflows. * * All unused counters must be reset to a non-overflow state. */ if (!(cache->ppc_mmcr[0] & MMCR0_FC)) { cache->ppc_mmcr[0] |= MMCR0_FC; mtspr(SPRN_MMCR0, cache->ppc_mmcr[0]); } memset(&pmc[0], 0, sizeof pmc); cstatus = state->user.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) pmc[state->control.pmc_map[i]] = state->user.pmc[i].start; switch (pm_type) { case PM_7450: mtspr(SPRN_PMC6, pmc[6-1]); mtspr(SPRN_PMC5, pmc[5-1]); case PM_7400: case PM_750: case PM_604e: mtspr(SPRN_PMC4, pmc[4-1]); mtspr(SPRN_PMC3, pmc[3-1]); case PM_604: mtspr(SPRN_PMC2, pmc[2-1]); mtspr(SPRN_PMC1, pmc[1-1]); case PM_NONE: ; } /* cache->id remains != state->id */ } #endif static void ppc_write_control(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int value; cache = get_cpu_cache(); if (cache->id == state->id) return; /* * Order matters here: update threshmult and event * selectors before updating global control, which * potentially enables PMIs. * * Since mtspr doesn't accept a runtime value for the * SPR number, unroll the loop so each mtspr targets * a constant SPR. * * For processors without MMCR2, we ensure that the * cache and the state indicate the same value for it, * preventing any actual mtspr to it. Ditto for MMCR1. */ value = state->control.mmcr2; if (value != cache->ppc_mmcr[2]) { cache->ppc_mmcr[2] = value; mtspr(SPRN_MMCR2, value); } value = state->control.mmcr1; if (value != cache->ppc_mmcr[1]) { cache->ppc_mmcr[1] = value; mtspr(SPRN_MMCR1, value); } value = state->control.mmcr0; if (value != cache->ppc_mmcr[0]) { cache->ppc_mmcr[0] = value; mtspr(SPRN_MMCR0, value); } cache->id = state->id; } static void ppc_clear_counters(void) { switch (pm_type) { case PM_7450: case PM_7400: mtspr(SPRN_MMCR2, 0); mtspr(SPRN_BAMR, 0); case PM_750: case PM_604e: mtspr(SPRN_MMCR1, 0); case PM_604: mtspr(SPRN_MMCR0, 0); case PM_NONE: ; } switch (pm_type) { case PM_7450: mtspr(SPRN_PMC6, 0); mtspr(SPRN_PMC5, 0); case PM_7400: case PM_750: case PM_604e: mtspr(SPRN_PMC4, 0); mtspr(SPRN_PMC3, 0); case PM_604: mtspr(SPRN_PMC2, 0); mtspr(SPRN_PMC1, 0); case PM_NONE: ; } } /* * Driver methods, internal and exported. */ static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) { return ppc_write_control(state); } static void perfctr_cpu_read_counters(struct perfctr_cpu_state *state, struct perfctr_low_ctrs *ctrs) { return ppc_read_counters(state, ctrs); } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { return ppc_isuspend(state); } static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { return ppc_iresume(state); } /* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to bypass internal caching and force a reload if the I-mode PMCs. */ void perfctr_cpu_ireload(struct perfctr_cpu_state *state) { state->control.mmcr0 |= MMCR0_PMXE; #ifdef CONFIG_SMP clear_isuspend_cpu(state); #else get_cpu_cache()->id = 0; #endif } /* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) { unsigned int cstatus, nrctrs, i, pmc_mask; cstatus = state->user.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); pmc_mask = 0; for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { if ((int)state->user.pmc[i].start < 0) { /* PPC-specific */ unsigned int pmc = state->control.pmc_map[i]; /* XXX: "+=" to correct for overshots */ state->user.pmc[i].start = state->control.ireset[pmc]; pmc_mask |= (1 << i); } } if (!pmc_mask && (state->control.mmcr0 & MMCR0_TBEE)) pmc_mask = (1<<8); /* fake TB bit flip indicator */ return pmc_mask; } static inline int check_ireset(struct perfctr_cpu_state *state) { unsigned int nrctrs, i; i = state->control.header.nractrs; nrctrs = i + state->control.header.nrictrs; for(; i < nrctrs; ++i) { unsigned int pmc = state->control.pmc_map[i]; if ((int)state->control.ireset[pmc] < 0) /* PPC-specific */ return -EINVAL; state->user.pmc[i].start = state->control.ireset[pmc]; } return 0; } #else /* CONFIG_PERFCTR_INTERRUPT_SUPPORT */ static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } static inline int check_ireset(struct perfctr_cpu_state *state) { return 0; } #endif /* CONFIG_PERFCTR_INTERRUPT_SUPPORT */ static int check_control(struct perfctr_cpu_state *state) { return ppc_check_control(state); } int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) { int err; clear_isuspend_cpu(state); state->user.cstatus = 0; /* disallow i-mode counters if we cannot catch the interrupts */ if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) && state->control.header.nrictrs) return -EPERM; err = check_control(state); /* may initialise state->cstatus */ if (err < 0) return err; err = check_ireset(state); if (err < 0) { state->user.cstatus = 0; return err; } state->user.cstatus |= perfctr_mk_cstatus(state->control.header.tsc_on, state->control.header.nractrs, state->control.header.nrictrs); return 0; } /* * get_reg_offset() maps SPR numbers to offsets into struct perfctr_cpu_control, * suitable for accessing control data of type unsigned int. */ static const struct { unsigned int spr; unsigned int offset; } reg_offsets[] = { { SPRN_MMCR0, offsetof(struct perfctr_cpu_control, mmcr0) }, { SPRN_MMCR1, offsetof(struct perfctr_cpu_control, mmcr1) }, { SPRN_MMCR2, offsetof(struct perfctr_cpu_control, mmcr2) }, { SPRN_PMC1, offsetof(struct perfctr_cpu_control, ireset[1-1]) }, { SPRN_PMC2, offsetof(struct perfctr_cpu_control, ireset[2-1]) }, { SPRN_PMC3, offsetof(struct perfctr_cpu_control, ireset[3-1]) }, { SPRN_PMC4, offsetof(struct perfctr_cpu_control, ireset[4-1]) }, { SPRN_PMC5, offsetof(struct perfctr_cpu_control, ireset[5-1]) }, { SPRN_PMC6, offsetof(struct perfctr_cpu_control, ireset[6-1]) }, }; static int get_reg_offset(unsigned int spr) { unsigned int i; for(i = 0; i < ARRAY_SIZE(reg_offsets); ++i) if (spr == reg_offsets[i].spr) return reg_offsets[i].offset; return -1; } static int access_regs(struct perfctr_cpu_control *control, void *argp, unsigned int argbytes, int do_write) { struct perfctr_cpu_reg *regs; unsigned int i, nr_regs, *where; int offset; nr_regs = argbytes / sizeof(struct perfctr_cpu_reg); if (nr_regs * sizeof(struct perfctr_cpu_reg) != argbytes) return -EINVAL; regs = (struct perfctr_cpu_reg*)argp; for(i = 0; i < nr_regs; ++i) { offset = get_reg_offset(regs[i].nr); if (offset < 0) return -EINVAL; where = (unsigned int*)((char*)control + offset); if (do_write) *where = regs[i].value; else regs[i].value = *where; } return argbytes; } int perfctr_cpu_control_write(struct perfctr_cpu_control *control, unsigned int domain, const void *srcp, unsigned int srcbytes) { if (domain != PERFCTR_DOMAIN_CPU_REGS) return -EINVAL; return access_regs(control, (void*)srcp, srcbytes, 1); } int perfctr_cpu_control_read(const struct perfctr_cpu_control *control, unsigned int domain, void *dstp, unsigned int dstbytes) { if (domain != PERFCTR_DOMAIN_CPU_REGS) return -EINVAL; return access_regs((struct perfctr_cpu_control*)control, dstp, dstbytes, 0); } void perfctr_cpu_suspend(struct perfctr_cpu_state *state) { unsigned int i, cstatus, nractrs; struct perfctr_low_ctrs now; write_perfseq_begin(&state->user.sequence); if (perfctr_cstatus_has_mmcr0_quirk(state->user.cstatus)) { unsigned int mmcr0 = mfspr(SPRN_MMCR0); mtspr(SPRN_MMCR0, mmcr0 | MMCR0_FC); get_cpu_cache()->ppc_mmcr[0] = mmcr0 | MMCR0_FC; state->control.mmcr0 = mmcr0; } if (perfctr_cstatus_has_ictrs(state->user.cstatus)) perfctr_cpu_isuspend(state); perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) state->user.tsc_sum += now.tsc - state->user.tsc_start; nractrs = perfctr_cstatus_nractrs(cstatus); for(i = 0; i < nractrs; ++i) state->user.pmc[i].sum += now.pmc[i] - state->user.pmc[i].start; write_perfseq_end(&state->user.sequence); } void perfctr_cpu_resume(struct perfctr_cpu_state *state) { write_perfseq_begin(&state->user.sequence); if (perfctr_cstatus_has_ictrs(state->user.cstatus)) perfctr_cpu_iresume(state); if (perfctr_cstatus_has_mmcr0_quirk(state->user.cstatus)) get_cpu_cache()->id = 0; /* force reload of MMCR0 */ perfctr_cpu_write_control(state); //perfctr_cpu_read_counters(state, &state->start); { struct perfctr_low_ctrs now; unsigned int i, cstatus, nrctrs; perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) state->user.tsc_start = now.tsc; nrctrs = perfctr_cstatus_nractrs(cstatus); for(i = 0; i < nrctrs; ++i) state->user.pmc[i].start = now.pmc[i]; } write_perfseq_end(&state->user.sequence); } void perfctr_cpu_sample(struct perfctr_cpu_state *state) { unsigned int i, cstatus, nractrs; struct perfctr_low_ctrs now; write_perfseq_begin(&state->user.sequence); perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) { state->user.tsc_sum += now.tsc - state->user.tsc_start; state->user.tsc_start = now.tsc; } nractrs = perfctr_cstatus_nractrs(cstatus); for(i = 0; i < nractrs; ++i) { state->user.pmc[i].sum += now.pmc[i] - state->user.pmc[i].start; state->user.pmc[i].start = now.pmc[i]; } write_perfseq_end(&state->user.sequence); } static void perfctr_cpu_clear_counters(void) { struct per_cpu_cache *cache; cache = get_cpu_cache(); memset(cache, 0, sizeof *cache); cache->id = -1; ppc_clear_counters(); } /**************************************************************** * * * Processor detection and initialisation procedures. * * * ****************************************************************/ /* Derive CPU core frequency from TB frequency and PLL_CFG. */ enum pll_type { PLL_NONE, /* for e.g. 604 which has no HID1[PLL_CFG] */ PLL_604e, PLL_750, PLL_750FX, PLL_7400, PLL_7450, PLL_7457, }; /* These are the known bus-to-core ratios, indexed by PLL_CFG. Multiplied by 2 since half-multiplier steps are present. */ static unsigned char cfg_ratio_604e[16] __initdata = { // *2 2, 2, 14, 2, 4, 13, 5, 9, 6, 11, 8, 10, 3, 12, 7, 0 }; static unsigned char cfg_ratio_750[16] __initdata = { // *2 5, 15, 14, 2, 4, 13, 20, 9, // 0b0110 is 18 if L1_TSTCLK=0, but that is abnormal 6, 11, 8, 10, 16, 12, 7, 0 }; static unsigned char cfg_ratio_750FX[32] __initdata = { // *2 0, 0, 2, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 0 }; static unsigned char cfg_ratio_7400[16] __initdata = { // *2 18, 15, 14, 2, 4, 13, 5, 9, 6, 11, 8, 10, 16, 12, 7, 0 }; static unsigned char cfg_ratio_7450[32] __initdata = { // *2 1, 0, 15, 30, 14, 0, 2, 0, 4, 0, 13, 26, 5, 0, 9, 18, 6, 0, 11, 22, 8, 20, 10, 24, 16, 28, 12, 32, 7, 0, 0, 0 }; static unsigned char cfg_ratio_7457[32] __initdata = { // *2 23, 34, 15, 30, 14, 36, 2, 40, 4, 42, 13, 26, 17, 48, 19, 18, 6, 21, 11, 22, 8, 20, 10, 24, 16, 28, 12, 32, 27, 56, 0, 25 }; static unsigned int __init tb_to_core_ratio(enum pll_type pll_type) { unsigned char *cfg_ratio; unsigned int shift = 28, mask = 0xF, hid1, pll_cfg, ratio; switch (pll_type) { case PLL_604e: cfg_ratio = cfg_ratio_604e; break; case PLL_750: cfg_ratio = cfg_ratio_750; break; case PLL_750FX: cfg_ratio = cfg_ratio_750FX; hid1 = mfspr(SPRN_HID1); switch ((hid1 >> 16) & 0x3) { /* HID1[PI0,PS] */ case 0: /* PLL0 with external config */ shift = 31-4; /* access HID1[PCE] */ break; case 2: /* PLL0 with internal config */ shift = 31-20; /* access HID1[PC0] */ break; case 1: case 3: /* PLL1 */ shift = 31-28; /* access HID1[PC1] */ break; } mask = 0x1F; break; case PLL_7400: cfg_ratio = cfg_ratio_7400; break; case PLL_7450: cfg_ratio = cfg_ratio_7450; shift = 12; mask = 0x1F; break; case PLL_7457: cfg_ratio = cfg_ratio_7457; shift = 12; mask = 0x1F; break; default: return 0; } hid1 = mfspr(SPRN_HID1); pll_cfg = (hid1 >> shift) & mask; ratio = cfg_ratio[pll_cfg]; if (!ratio) printk(KERN_WARNING "perfctr: unknown PLL_CFG 0x%x\n", pll_cfg); return (4/2) * ratio; } static unsigned int __init pll_to_core_khz(enum pll_type pll_type) { unsigned int tb_to_core = tb_to_core_ratio(pll_type); perfctr_info.tsc_to_cpu_mult = tb_to_core; return tb_ticks_per_jiffy * tb_to_core * (HZ/10) / (1000/10); } /* Extract core and timebase frequencies from Open Firmware. */ #ifdef CONFIG_PPC_OF static unsigned int __init of_to_core_khz(void) { struct device_node *cpu; unsigned int *fp, core, tb; cpu = find_type_devices("cpu"); if (!cpu) return 0; fp = (unsigned int*)get_property(cpu, "clock-frequency", NULL); if (!fp || !(core = *fp)) return 0; fp = (unsigned int*)get_property(cpu, "timebase-frequency", NULL); if (!fp || !(tb = *fp)) return 0; perfctr_info.tsc_to_cpu_mult = core / tb; return core / 1000; } #else static inline unsigned int of_to_core_khz(void) { return 0; } #endif static unsigned int __init detect_cpu_khz(enum pll_type pll_type) { unsigned int khz; khz = pll_to_core_khz(pll_type); if (khz) return khz; khz = of_to_core_khz(); if (khz) return khz; printk(KERN_WARNING "perfctr: unable to determine CPU speed\n"); return 0; } static int __init known_init(void) { static char known_name[] __initdata = "PowerPC 60x/7xx/74xx"; unsigned int features; enum pll_type pll_type; unsigned int pvr; int have_mmcr1; features = PERFCTR_FEATURE_RDTSC | PERFCTR_FEATURE_RDPMC; have_mmcr1 = 1; pvr = mfspr(SPRN_PVR); switch (PVR_VER(pvr)) { case 0x0004: /* 604 */ pm_type = PM_604; pll_type = PLL_NONE; features = PERFCTR_FEATURE_RDTSC; have_mmcr1 = 0; break; case 0x0009: /* 604e; */ case 0x000A: /* 604ev */ pm_type = PM_604e; pll_type = PLL_604e; features = PERFCTR_FEATURE_RDTSC; break; case 0x0008: /* 750/740 */ pm_type = PM_750; pll_type = PLL_750; break; case 0x7000: case 0x7001: /* IBM750FX */ if ((pvr & 0xFF0F) >= 0x0203) features |= PERFCTR_FEATURE_PCINT; pm_type = PM_750; pll_type = PLL_750FX; break; case 0x7002: /* IBM750GX */ features |= PERFCTR_FEATURE_PCINT; pm_type = PM_750; pll_type = PLL_750FX; break; case 0x000C: /* 7400 */ pm_type = PM_7400; pll_type = PLL_7400; break; case 0x800C: /* 7410 */ if ((pvr & 0xFFFF) >= 0x1103) features |= PERFCTR_FEATURE_PCINT; pm_type = PM_7400; pll_type = PLL_7400; break; case 0x8000: /* 7451/7441 */ features |= PERFCTR_FEATURE_PCINT; pm_type = PM_7450; pll_type = PLL_7450; break; case 0x8001: /* 7455/7445 */ features |= PERFCTR_FEATURE_PCINT; pm_type = PM_7450; pll_type = ((pvr & 0xFFFF) < 0x0303) ? PLL_7450 : PLL_7457; break; case 0x8002: /* 7457/7447 */ case 0x8003: /* 7447A */ features |= PERFCTR_FEATURE_PCINT; pm_type = PM_7450; pll_type = PLL_7457; break; case 0x8004: /* 7448 */ features |= PERFCTR_FEATURE_PCINT; pm_type = PM_7450; pll_type = PLL_NONE; /* known to differ from 7447A, no details yet */ break; default: return -ENODEV; } perfctr_info.cpu_features = features; perfctr_cpu_name = known_name; perfctr_info.cpu_khz = detect_cpu_khz(pll_type); perfctr_ppc_init_tests(have_mmcr1); return 0; } static int __init unknown_init(void) { static char unknown_name[] __initdata = "Generic PowerPC with TB"; unsigned int khz; khz = detect_cpu_khz(PLL_NONE); if (!khz) return -ENODEV; perfctr_info.cpu_features = PERFCTR_FEATURE_RDTSC; perfctr_cpu_name = unknown_name; perfctr_info.cpu_khz = khz; pm_type = PM_NONE; return 0; } static void perfctr_cpu_clear_one(void *ignore) { /* PREEMPT note: when called via on_each_cpu(), this is in IRQ context with preemption disabled. */ perfctr_cpu_clear_counters(); } static void perfctr_cpu_reset(void) { on_each_cpu(perfctr_cpu_clear_one, NULL, 1, 1); perfctr_cpu_set_ihandler(NULL); } static int init_done; int __init perfctr_cpu_init(void) { int err; perfctr_info.cpu_features = 0; err = known_init(); if (err) { err = unknown_init(); if (err) goto out; } init_done = 1; out: return err; } void __exit perfctr_cpu_exit(void) { } /**************************************************************** * * * Hardware reservation. * * * ****************************************************************/ static DEFINE_MUTEX(mutex); static const char *current_service = 0; const char *perfctr_cpu_reserve(const char *service) { const char *ret; if (!init_done) return "unsupported hardware"; mutex_lock(&mutex); ret = current_service; if (ret) goto out_unlock; ret = "unknown driver (oprofile?)"; if (perfctr_reserve_pmc_hardware() < 0) goto out_unlock; current_service = service; perfctr_cpu_reset(); ret = NULL; out_unlock: mutex_unlock(&mutex); return ret; } void perfctr_cpu_release(const char *service) { mutex_lock(&mutex); if (service != current_service) { printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", __FUNCTION__, service, current_service); } else { /* power down the counters */ perfctr_cpu_reset(); current_service = 0; perfctr_release_pmc_hardware(); } mutex_unlock(&mutex); } papi-5.6.0/src/Matlab/flops.readme000664 001750 001750 00000005716 13216244356 021041 0ustar00jshenry1963jshenry1963000000 000000 Running PAPI FLOPS in the MATLAB Environment If you want to measure the rate of floating point operations in a MATLAB program, PAPI FLOPS is a good way to do it. PAPI FLOPS uses the PAPI library and underlying hardware counters to accurately compute both the total number of floating point operations and the rate of floating point execution in a section of MATLAB code. For more information on the flops function, you can type 'help flops' inside MATLAB. FLOPS - Has 3 execution possibilities: Initialize FLOP counting or reset the counters with: FLOPS(0) Record the number of floating point instructions since initialization: ops = FLOPS Record the number of floating point instructions and the incremental rate of floating point execution since initialization: [ops, mflips] = FLOPS PAPI_flops.c, when compiled, turns into a callable function in MATLAB. In order to use this function, you need to know a little about mex. mex is simply the compiler you use to make your code run in the MATLAB environment. If you don't know how to use mex, you might want to acquaint yourself a bit. "mex -setup "might be needed if you encounter problems, but the simplest explanation might be to substitute "mex" for "gcc" and you are on your way. All the other rules for compiling PAPI are the same. mex compilations can de done inside or outside of the Matlab environment, but in this case, it is recommended that you compile outside of Matlab. For some reason, compiling inside does not work on some systems. So far, the Linux environment and the Windows environment have been tested, but _in theory_ this code should work anywhere PAPI and Matlab both work. The following instructions are for a Linux/Unix environment: Assuming papi.h is present in /usr/local/include and libpapi.a is present in /usr/local/lib, the below should work. If not, you may need to alter the compile strings and/or the #include statement in PAPI_flops.c. Also, the compile string will be different for different platforms. For instance, if I want to compile and run on a linux machine assuming PAPI_flops.c is in your current working directory (you'll have a different compile string on a different architecture): 1. Compile the wrapper: mex -I/usr/local/include PAPI_flops.c /usr/local/lib/libpapi.a -output flops 2. Start Matlab: matlab 3. Run the code: b. Play with FLOPS - the first initializes the counting; the second returns the number of floating point instructions since the first call, and the third line does the same as the first AND reports the incremental rate of floating point execution since the last call: FLOPS(0) ins = FLOPS [ins, mflips] = FLOPS c. Try the example m files included with the distribution: FlopsInnerProduct.m FlopsMatrixVector.m FlopsMatrixMatrix.m FlopsSampler.m Contact jthomas@cs.utk.edu with any questions regarding PAPI calls in Matlab - either errors or questions. Also, this has just been implemented, so changes could be coming.......... papi-5.6.0/src/freebsd/map-westmere.c000664 001750 001750 00000230626 13216244361 021517 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-westmere.c * Author: George Neville-Neil * gnn@freebsd.org * Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** Westmere SUBSTRATE Westmere SUBSTRATE Westmere SUBSTRATE Westmere SUBSTRATE Westmere SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_Westmere must match Westmere_info */ Native_Event_LabelDescription_t WestmereProcessor_info[] = { {"LOAD_BLOCK.OVERLAP_STORE", "Loads that partially overlap an earlier store"}, {"SB_DRAIN.ANY", "All Store buffer stall cycles"}, {"MISALIGN_MEMORY.STORE", "All store referenced with misaligned address"}, {"STORE_BLOCKS.AT_RET", "Counts number of loads delayed with at-Retirement block code. The following loads need to be executed at retirement and wait for all senior stores on the same thread to be drained: load splitting across 4K boundary (page split), load accessing uncacheable (UC or USWC) memory, load lock, and load with page table in UC or USWC memory region."}, {"STORE_BLOCKS.L1D_BLOCK", "Cacheable loads delayed with L1D block code"}, {"PARTIAL_ADDRESS_ALIAS", "Counts false dependency due to partial address aliasing"}, {"DTLB_LOAD_MISSES.ANY", "Counts all load misses that cause a page walk"}, {"DTLB_LOAD_MISSES.WALK_COMPLETED", "Counts number of completed page walks due to load miss in the STLB."}, {"DTLB_LOAD_MISSES.WALK_CYCLES", "Cycles PMH is busy with a page walk due to a load miss in the STLB."}, {"DTLB_LOAD_MISSES.STLB_HIT", "Number of cache load STLB hits"}, {"DTLB_LOAD_MISSES.PDE_MISS", "Number of DTLB cache load misses where the low part of the linear tophysical address translation was missed."}, {"MEM_INST_RETIRED.LOADS", "Counts the number of instructions with an architecturally-visible store retired on the architected path. In conjunction with ld_lat facility"}, {"MEM_INST_RETIRED.STORES", "Counts the number of instructions with an architecturally-visible store retired on the architected path. In conjunction with ld_lat facility"}, {"MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD", "Counts the number of instructions exceeding the latency specified with ld_lat facility. In conjunction with ld_lat facility"}, {"MEM_STORE_RETIRED.DTLB_MISS", "The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not counter prefetches. Counts both primary and secondary misses to the TLB"}, {"UOPS_ISSUED.ANY", "Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, {"UOPS_ISSUED.STALLED_CYCLES", "Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, {"UOPS_ISSUED.FUSED", "Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station."}, {"MEM_UNCORE_RETIRED.LOCAL_HITM", "Load instructions retired that HIT modified data in sibling core (Precise Event)"}, {"MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT", "Load instructions retired local dram and remote cache HIT data sources (Precise Event)"}, {"MEM_UNCORE_RETIRED.LOCAL_DRAM", "Load instructions retired with a data source of local DRAM or locally homed remote cache HITM (Precise Event)"}, {"MEM_UNCORE_RETIRED.REMOTE_DRAM", "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"}, {"MEM_UNCORE_RETIRED.UNCACHEABLE", "Load instructions retired I/O (Precise Event)"}, {"FP_COMP_OPS_EXE.X87", "Counts the number of FP Computational Uops Executed. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction."}, {"FP_COMP_OPS_EXE.MMX", "Counts number of MMX Uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP", "Counts number of SSE and SSE2 FP uops executed."}, {"FP_COMP_OPS_EXE.SSE2_INTEGER", "Counts number of SSE2 integer uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP_PACKED", "Counts number of SSE FP packed uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP_SCALAR", "Counts number of SSE FP scalar uops executed."}, {"FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION", "Counts number of SSE* FP single precision uops executed."}, {"FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION", "Counts number of SSE* FP double precision uops executed."}, {"SIMD_INT_128.PACKED_MPY", "Counts number of 128 bit SIMD integer multiply operations."}, {"SIMD_INT_128.PACKED_SHIFT", "Counts number of 128 bit SIMD integer shift operations."}, {"SIMD_INT_128.PACK", "Counts number of 128 bit SIMD integer pack operations."}, {"SIMD_INT_128.UNPACK", "Counts number of 128 bit SIMD integer unpack operations."}, {"SIMD_INT_128.PACKED_LOGICAL", "Counts number of 128 bit SIMD integer logical operations."}, {"SIMD_INT_128.PACKED_ARITH", "Counts number of 128 bit SIMD integer arithmetic operations."}, {"SIMD_INT_128.SHUFFLE_MOVE", "Counts number of 128 bit SIMD integer shuffle and move operations."}, {"LOAD_DISPATCH.RS", "Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer."}, {"LOAD_DISPATCH.RS_DELAYED", "Counts the number of delayed RS dispatches at the stage latch. If an RS dispatch can not bypass to LB, it has another chance to dispatch from the one-cycle delayed staging latch before it is written into the LB."}, {"LOAD_DISPATCH.MOB", "Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer."}, {"LOAD_DISPATCH.ANY", "Counts all loads dispatched from the Reservation Station."}, {"ARITH.CYCLES_DIV_BUSY", "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE. Set 'edge =1, invert=1, cmask=1' to count the number of divides. Count may be incorrect When SMT is on."}, {"ARITH.MUL", "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD. Count may be incorrect When SMT is on."}, {"INST_QUEUE_WRITES", "Counts the number of instructions written into the instruction queue every cycle."}, {"INST_DECODED.DEC0", "Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop"}, {"TWO_UOP_INSTS_DECODED", "An instruction that generates two uops was decoded"}, {"INST_QUEUE_WRITE_CYCLES", "This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. If SSE* instructions that are 6 bytes or longer arrive one after another, then front end throughput may limit execution speed. "}, {"LSD_OVERFLOW", "Number of loops that can not stream from the instruction queue."}, {"L2_RQSTS.LD_HIT", "Counts number of loads that hit the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches. L2 loads can be rejected for various reasons. Only non rejected loads are counted."}, {"L2_RQSTS.LD_MISS", "Counts the number of loads that miss the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches."}, {"L2_RQSTS.LOADS", "Counts all L2 load requests. L2 loads include both L1D demand misses as well as L1D prefetches."}, {"L2_RQSTS.RFO_HIT", "Counts the number of store RFO requests that hit the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Count includes WC memory requests, where the data is not fetched but the permission to write the line is required."}, {"L2_RQSTS.RFO_MISS", "Counts the number of store RFO requests that miss the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, {"L2_RQSTS.RFOS", "Counts all L2 store RFO requests. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, {"L2_RQSTS.IFETCH_HIT", "Counts number of instruction fetches that hit the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.IFETCH_MISS", "Counts number of instruction fetches that miss the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.IFETCHES", "Counts all instruction fetches. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.PREFETCH_HIT", "Counts L2 prefetch hits for both code and data."}, {"L2_RQSTS.PREFETCH_MISS", "Counts L2 prefetch misses for both code and data."}, {"L2_RQSTS.PREFETCHES", "Counts all L2 prefetches for both code and data."}, {"L2_RQSTS.MISS", "Counts all L2 misses for both code and data."}, {"L2_RQSTS.REFERENCES", "Counts all L2 requests for both code and data."}, {"L2_DATA_RQSTS.DEMAND.I_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.S_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.E_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.M_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.MESI", "Counts all L2 data demand requests. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.PREFETCH.I_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, {"L2_DATA_RQSTS.PREFETCH.S_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state. A prefetch RFO will miss on an S state line, while a prefetch read will hit on an S state line."}, {"L2_DATA_RQSTS.PREFETCH.E_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state."}, {"L2_DATA_RQSTS.PREFETCH.M_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state."}, {"L2_DATA_RQSTS.PREFETCH.MESI", "Counts all L2 prefetch requests."}, {"L2_DATA_RQSTS.ANY", "Counts all L2 data requests."}, {"L2_WRITE.RFO.I_STATE", "Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.S_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.M_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.HIT", "Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states. The L1D prefetcher does not issue a RFO prefetch."}, {"L2_WRITE.RFO.MESI", "Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.LOCK.I_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, {"L2_WRITE.LOCK.S_STATE", "Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state."}, {"L2_WRITE.LOCK.E_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state."}, {"L2_WRITE.LOCK.M_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state."}, {"L2_WRITE.LOCK.HIT", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state."}, {"L2_WRITE.LOCK.MESI", "Counts all L2 demand lock RFO requests."}, {"L1D_WB_L2.I_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i.e. a cache miss."}, {"L1D_WB_L2.S_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state."}, {"L1D_WB_L2.E_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state."}, {"L1D_WB_L2.M_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state."}, {"L1D_WB_L2.MESI", "Counts all L1 writebacks to the L2."}, {"L3_LAT_CACHE.REFERENCE", "Counts uncore Last Level Cache references. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended. See Table A-1."}, {"L3_LAT_CACHE.MISS", "Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended. See Table A-1."}, {"CPU_CLK_UNHALTED.THREAD_P", "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. See Table A-1."}, {"CPU_CLK_UNHALTED.REF_P", "Increments at the frequency of TSC when not halted. See Table A-1."}, {"DTLB_MISSES.ANY", "Counts the number of misses in the STLB which causes a page walk."}, {"DTLB_MISSES.WALK_COMPLETED", "Counts number of misses in the STLB which resulted in a completed page walk."}, {"DTLB_MISSES.WALK_CYCLES", "Counts cycles of page walk due to misses in the STLB."}, {"DTLB_MISSES.STLB_HIT", "Counts the number of DTLB first level misses that hit in the second level TLB. This event is only relevant if the core contains multiple DTLB levels."}, {"DTLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, {"LOAD_HIT_PRE", "Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished."}, {"L1D_PREFETCH.REQUESTS", "Counts number of hardware prefetch requests dispatched out of the prefetch FIFO."}, {"L1D_PREFETCH.MISS", "Counts number of hardware prefetch requests that miss the L1D. There are two prefetchers in the L1D. A streamer, which predicts lines sequentially after this one should be fetched, and the IP prefetcher that remembers access patterns for the current instruction. The streamer prefetcher stops on an L1D hit, while the IP prefetcher does not."}, {"L1D_PREFETCH.TRIGGERS", "Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO. Some of the prefetch requests are dropped due to overwrites or competition between the IP index prefetcher and streamer prefetcher. The prefetch FIFO contains 4 entries."}, {"EPT.WALK_CYCLES", "Counts Extended Page walk cycles."}, {"L1D.REPL", "Counts the number of lines brought into the L1 data cache.Counter 0, 1 only."}, {"L1D.M_REPL", "Counts the number of modified lines brought into the L1 data cache. Counter 0, 1 only."}, {"L1D.M_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to replacement. Counter 0, 1 only."}, {"L1D.M_SNOOP_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention. Counter 0, 1 only."}, {"L1D_CACHE_PREFETCH_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated instructions accepted into the fill buffer."}, {"L1D_CACHE_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA", "Counts weighted cycles of offcore demand data read requests. Does not include L2 prefetch requests. Counter 0."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE", "Counts weighted cycles of offcore demand code read requests. Does not include L2 prefetch requests. Counter 0."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO", "Counts weighted cycles of offcore demand RFO requests. Does not include L2 prefetch requests. Counter 0."}, {"OFFCORE_REQUESTS_OUTSTANDING.ANY.READ", "Counts weighted cycles of offcore read requests of any kind. Include L2 prefetch requests. Counter 0."}, {"CACHE_LOCK_CYCLES.L1D_L2", "Cycle count during which the L1D and L2 are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such accesses."}, {"CACHE_LOCK_CYCLES.L1D", "Counts the number of cycles that cacheline in the L1 data cache unit is locked. Counter 0, 1 only."}, {"IO_TRANSACTIONS", "Counts the number of completed I/O transactions."}, {"L1I.HITS", "Counts all instruction fetches that hit the L1 instruction cache."}, {"L1I.MISSES", "Counts all instruction fetches that miss the L1I cache. This includes instruction cache misses, streaming buffer misses, victim cache misses and uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."}, {"L1I.READS", "Counts all instruction fetches, including uncacheable fetches that bypass the L1I."}, {"L1I.CYCLES_STALLED", "Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault."}, {"LARGE_ITLB.HIT", "Counts number of large ITLB hits."}, {"ITLB_MISSES.ANY", "Counts the number of misses in all levels of the ITLB which causes a page walk."}, {"ITLB_MISSES.WALK_COMPLETED", "Counts number of misses in all levels of the ITLB which resulted in a completed page walk."}, {"ITLB_MISSES.WALK_CYCLES", "Counts ITLB miss page walk cycles."}, {"ITLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, {"ILD_STALL.LCP", "Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX.W (for EM64T) instructions which change the length of the decoded instruction."}, {"ILD_STALL.MRU", "Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU). Most Recently Used (MRU) bypass."}, {"ILD_STALL.IQ_FULL", "Stall cycles due to a full instruction queue."}, {"ILD_STALL.REGEN", "Counts the number of regen stalls."}, {"ILD_STALL.ANY", "Counts any cycles the Instruction Length Decoder is stalled."}, {"BR_INST_EXEC.COND", "Counts the number of conditional near branch instructions executed, but not necessarily retired."}, {"BR_INST_EXEC.DIRECT", "Counts all unconditional near branch instructions excluding calls and indirect branches."}, {"BR_INST_EXEC.INDIRECT_NON_CALL", "Counts the number of executed indirect near branch instructions that are not calls."}, {"BR_INST_EXEC.NON_CALLS", "Counts all non call near branch instructions executed, but not necessarily retired."}, {"BR_INST_EXEC.RETURN_NEAR", "Counts indirect near branches that have a return mnemonic."}, {"BR_INST_EXEC.DIRECT_NEAR_CALL", "Counts unconditional near call branch instructions, excluding non call branch, executed."}, {"BR_INST_EXEC.INDIRECT_NEAR_CALL", "Counts indirect near calls, including both register and memory indirect, executed."}, {"BR_INST_EXEC.NEAR_CALLS", "Counts all near call branches executed, but not necessarily retired."}, {"BR_INST_EXEC.TAKEN", "Counts taken near branches executed, but not necessarily retired."}, {"BR_INST_EXEC.ANY", "Counts all near executed branches (not necessarily retired). This includes only instructions and not micro-op branches. Frequent branching is not necessarily a major performance issue. However frequent branch mispredictions may be a problem."}, {"BR_MISP_EXEC.COND", "Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired."}, {"BR_MISP_EXEC.DIRECT", "Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)."}, {"BR_MISP_EXEC.INDIRECT_NON_CALL", "Counts the number of executed mispredicted indirect near branch instructions that are not calls."}, {"BR_MISP_EXEC.NON_CALLS", "Counts mispredicted non call near branches executed, but not necessarily retired."}, {"BR_MISP_EXEC.RETURN_NEAR", "Counts mispredicted indirect branches that have a rear return mnemonic."}, {"BR_MISP_EXEC.DIRECT_NEAR_CALL", "Counts mispredicted non-indirect near calls executed, (should always be 0)."}, {"BR_MISP_EXEC.INDIRECT_NEAR_CALL", "Counts mispredicted indirect near calls executed, including both register and memory indirect."}, {"BR_MISP_EXEC.NEAR_CALLS", "Counts all mispredicted near call branches executed, but not necessarily retired."}, {"BR_MISP_EXEC.TAKEN", "Counts executed mispredicted near branches that are taken, but not necessarily retired."}, {"BR_MISP_EXEC.ANY", "Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired."}, {"RESOURCE_STALLS.ANY", "Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. Does not include stalls due to SuperQ (off core) queue full, too many cache misses, etc."}, {"RESOURCE_STALLS.LOAD", "Counts the cycles of stall due to lack of load buffer for load operation."}, {"RESOURCE_STALLS.RS_FULL", "This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle. A high count of this event indicates that there are long latency operations in the pipe (possibly load and store operations that miss the L2 cache, or instructions dependent upon instructions further down the pipeline that have yet to retire. When RS is full, new instructions can not enter the reservation station and start execution."}, {"RESOURCE_STALLS.STORE", "This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i.e. all store buffers are used). The stall ends when a store instruction commits its data to the cache or memory."}, {"RESOURCE_STALLS.ROB_FULL", "Counts the cycles of stall due to re- order buffer full."}, {"RESOURCE_STALLS.FPCW", "Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word."}, {"RESOURCE_STALLS.MXCSR", "Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename. The MXCSR provides control and status for the MMX registers."}, {"RESOURCE_STALLS.OTHER", "Counts the number of cycles while execution was stalled due to other resource issues."}, {"MACRO_INSTS.FUSIONS_DECODED", "Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired."}, {"BACLEAR_FORCE_IQ", "Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediction direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, {"LSD.UOPS", "Counts the number of micro-ops delivered by loop stream detector. Use cmask=1 and invert to count cycles."}, {"ITLB_FLUSH", "Counts the number of ITLB flushes"}, {"OFFCORE_REQUESTS.DEMAND.READ_DATA", "Counts number of offcore demand data read requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.DEMAND.READ_CODE", "Counts number of offcore demand code read requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.DEMAND.RFO", "Counts number of offcore demand RFO requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.ANY.READ", "Counts number of offcore read requests. Includes L2 prefetch requests."}, {"OFFCORE_REQUESTS.ANY.RFO", "Counts number of offcore RFO requests. Includes L2 prefetch requests."}, {"OFFCORE_REQUESTS.L1D_WRITEBACK", "Counts number of L1D writebacks to the uncore."}, {"OFFCORE_REQUESTS.ANY", "Counts all offcore requests."}, {"UOPS_EXECUTED.PORT0", "Counts number of Uops executed that were issued on port 0. Port 0 handles integer arithmetic, SIMD and FP add Uops."}, {"UOPS_EXECUTED.PORT1", "Counts number of Uops executed that were issued on port 1. Port 1 handles integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops."}, {"UOPS_EXECUTED.PORT2_CORE", "Counts number of Uops executed that were issued on port 2. Port 2 handles the load Uops. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT3_CORE", "Counts number of Uops executed that were issued on port 3. Port 3 handles store Uops. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT4_CORE", "Counts number of Uops executed that where issued on port 4. Port 4 handles the value to be stored for the store Uops issued on port 3. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5", "Counts number of cycles there are one or more uops being executed and were issued on ports 0-4. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT5", "Counts number of Uops executed that where issued on port 5."}, {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES", "Counts number of cycles there are one or more uops being executed on any ports. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT015", "Counts number of Uops executed that where issued on port 0, 1, or 5. Use cmask=1, invert=1 to count stall cycles."}, {"UOPS_EXECUTED.PORT234", "Counts number of Uops executed that where issued on port 2, 3, or 4."}, {"OFFCORE_REQUESTS_SQ_FULL", "Counts number of cycles the SQ is full to handle off-core requests."}, {"SNOOPQ_REQUESTS_OUTSTANDING.DATA", "Counts weighted cycles of snoopq requests for data. Counter 0 only Use cmask=1 to count cycles not empty."}, {"SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE", "Counts weighted cycles of snoopq invalidate requests. Counter 0 only Use cmask=1 to count cycles not empty."}, {"SNOOPQ_REQUESTS_OUTSTANDING.CODE", "Counts weighted cycles of snoopq requests for code. Counter 0 only. Use cmask=1 to count cycles not empty."}, {"SNOOPQ_REQUESTS.CODE", "Counts the number of snoop code requests."}, {"SNOOPQ_REQUESTS.DATA", "Counts the number of snoop data requests."}, {"SNOOPQ_REQUESTS.INVALIDATE", "Counts the number of snoop invalidate requests."}, {"OFF_CORE_RESPONSE_0", "see Section 30.6.1.3, Off-core Response Performance Monitoring in the Processor Core. Requires programming MSR 01A6H."}, {"SNOOP_RESPONSE.HIT", "Counts HIT snoop response sent by this thread in response to a snoop request."}, {"SNOOP_RESPONSE.HITE", "Counts HIT E snoop response sent by this thread in response to a snoop request."}, {"SNOOP_RESPONSE.HITM", "Counts HIT M snoop response sent by this thread in response to a snoop request."}, {"OFF_CORE_RESPONSE_1", "See Section 30.6.1.3, Off-core Response Performance Monitoring in the Processor Core. Use MSR 01A7H."}, {"INST_RETIRED.ANY_P", "See Table A-1 Notes: INST_RETIRED.ANY is counted by a designated fixed counter. INST_RETIRED.ANY_P is counted by a programmable counter and is an architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions."}, {"INST_RETIRED.X87", "Counts the number of floating point computational operations retired floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions."}, {"INST_RETIRED.MMX", "Counts the number of retired: MMX instructions."}, {"UOPS_RETIRED.ANY", "Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two micro-ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. Use cmask=1 and invert to count active cycles or stalled cycles"}, {"UOPS_RETIRED.RETIRE_SLOTS", "Counts the number of retirement slots used each cycle"}, {"UOPS_RETIRED.MACRO_FUSED", "Counts number of macro-fused uops retired."}, {"MACHINE_CLEARS.CYCLES", "Counts the cycles machine clear is asserted."}, {"MACHINE_CLEARS.MEM_ORDER", "Counts the number of machine clears due to memory order conflicts."}, {"MACHINE_CLEARS.SMC", "Counts the number of times that a program writes to a code section. Self-modifying code causes a sever penalty in all Intel 64 and IA-32 processors. The modified cache line is written back to the L2 and L3caches."}, {"BR_INST_RETIRED.ANY_P", "See Table A-1"}, {"BR_INST_RETIRED.CONDITIONAL", "Counts the number of conditional branch instructions retired."}, {"BR_INST_RETIRED.NEAR_CALL", "Counts the number of direct & indirect near unconditional calls retired."}, {"BR_INST_RETIRED.ALL_BRANCHES", "Counts the number of branch instructions retired."}, {"BR_MISP_RETIRED.ANY_P", "See Table A-1."}, {"BR_MISP_RETIRED.CONDITIONAL", "Counts mispredicted conditional retired calls."}, {"BR_MISP_RETIRED.NEAR_CALL", "Counts mispredicted direct & indirect near unconditional retired calls."}, {"BR_MISP_RETIRED.ALL_BRANCHES", "Counts all mispredicted retired calls."}, {"SSEX_UOPS_RETIRED.PACKED_SINGLE", "Counts SIMD packed single-precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.SCALAR_SINGLE", "Counts SIMD calar single-precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.PACKED_DOUBLE", "Counts SIMD packed double- precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.SCALAR_DOUBLE", "Counts SIMD scalar double-precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.VECTOR_INTEGER", "Counts 128-bit SIMD vector integer Uops retired."}, {"ITLB_MISS_RETIRED", "Counts the number of retired instructions that missed the ITLB when the instruction was fetched."}, {"MEM_LOAD_RETIRED.L1D_HIT", "Counts number of retired loads that hit the L1 data cache."}, {"MEM_LOAD_RETIRED.L2_HIT", "Counts number of retired loads that hit the L2 data cache."}, {"MEM_LOAD_RETIRED.L3_UNSHARED_HIT", "Counts number of retired loads that hit their own, unshared lines in the L3 cache."}, {"MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM", "Counts number of retired loads that hit in a sibling core's L2 (on die core). Since the L3 is inclusive of all cores on the package, this is an L3 hit. This counts both clean or modified hits."}, {"MEM_LOAD_RETIRED.L3_MISS", "Counts number of retired loads that miss the L3 cache. The load was satisfied by a remote socket, local memory or an IOH."}, {"MEM_LOAD_RETIRED.HIT_LFB", "Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache. This is counting secondary L1D misses."}, {"MEM_LOAD_RETIRED.DTLB_MISS", "Counts the number of retired loads that missed the DTLB. The DTLB miss is not counted if the load operation causes a fault. This event counts loads from cacheable memory only. The event does not count loads by software prefetches. Counts both primary and secondary misses to the TLB."}, {"FP_MMX_TRANS.TO_FP", "Counts the first floating-point instruction following any MMX instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"FP_MMX_TRANS.TO_MMX", "Counts the first MMX instruction following a floating-point instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"FP_MMX_TRANS.ANY", "Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"MACRO_INSTS.DECODED", "Counts the number of instructions decoded, (but not necessarily executed or retired)."}, {"UOPS_DECODED.STALL_CYCLES", "Counts the cycles of decoder stalls."}, {"UOPS_DECODED.MS", "Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS delivers uops when the instruction is more than 4 uops long or a microcode assist is occurring."}, {"UOPS_DECODED.ESP_FOLDING", "Counts number of stack pointer (ESP) instructions decoded: push, pop, call, ret, etc. ESP instructions do not generate a Uop to increment or decrement ESP. Instead, they update an ESP_Offset register that keeps track of the delta to the current value of the ESP register."}, {"UOPS_DECODED.ESP_SYNC", "Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register."}, {"RAT_STALLS.FLAGS", "Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall. A partial register stall may occur when two conditions are met: 1) an instruction modifies some, but not all, of the flags in the flag register and 2) the next instruction, which depends on flags, depends on flags that were not modified by this instruction."}, {"RAT_STALLS.REGISTERS", "This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction."}, {"RAT_STALLS.ROB_READ_PORT", "Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline. Note that, at this stage in the pipeline, additional stalls may occur at the same cycle and prevent the stalled micro-ops from entering the pipe. In such a case, micro-ops retry entering the execution pipe in the next cycle and the ROB-read port stall is counted again."}, {"RAT_STALLS.SCOREBOARD", "Counts the cycles where we stall due to microarchitecturally required serialization. Microcode scoreboarding stalls."}, {"RAT_STALLS.ANY", "Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe. Cycles when partial register stalls occurred Cycles when flag stalls occurred Cycles floating-point unit (FPU) status word stalls occurred. To count each of these conditions separately use the events: RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and RAT_STALLS.FPSW."}, {"SEG_RENAME_STALLS", "Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front- end of the pipeline until the renamed segment retires."}, {"ES_REG_RENAMES", "Counts the number of times the ES segment register is renamed."}, {"UOP_UNFUSION", "Counts unfusion events due to floating point exception to a fused uop."}, {"BR_INST_DECODED", "Counts the number of branch instructions decoded."}, {"BPU_MISSED_CALL_RET", "Counts number of times the Branch Prediction Unit missed predicting a call or return branch."}, {"BACLEAR.CLEAR", "Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. This can occur if the code has many branches such that they cannot be consumed by the BPU. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline. The effect on total execution time depends on the surrounding code."}, {"BACLEAR.BAD_TARGET", "Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, {"BPU_CLEARS.EARLY", "Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken. The BPU clear leads to 2 cycle bubble in the Front End."}, {"BPU_CLEARS.LATE", "Counts late Branch Prediction Unit clears due to Most Recently Used conflicts. The PBU clear leads to a 3 cycle bubble in the Front End."}, {"THREAD_ACTIVE", "Counts cycles threads are active."}, {"L2_TRANSACTIONS.LOAD", "Counts L2 load operations due to HW prefetch or demand loads."}, {"L2_TRANSACTIONS.RFO", "Counts L2 RFO operations due to HW prefetch or demand RFOs."}, {"L2_TRANSACTIONS.IFETCH", "Counts L2 instruction fetch operations due to HW prefetch or demand ifetch."}, {"L2_TRANSACTIONS.PREFETCH", "Counts L2 prefetch operations."}, {"L2_TRANSACTIONS.L1D_WB", "Counts L1D writeback operations to the L2."}, {"L2_TRANSACTIONS.FILL", "Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch."}, {"L2_TRANSACTIONS.WB", "Counts L2 writeback operations to the L3."}, {"L2_TRANSACTIONS.ANY", "Counts all L2 cache operations."}, {"L2_LINES_IN.S_STATE", "Counts the number of cache lines allocated in the L2 cache in the S (shared) state."}, {"L2_LINES_IN.E_STATE", "Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state."}, {"L2_LINES_IN.ANY", "Counts the number of cache lines allocated in the L2 cache."}, {"L2_LINES_OUT.DEMAND_CLEAN", "Counts L2 clean cache lines evicted by a demand request."}, {"L2_LINES_OUT.DEMAND_DIRTY", "Counts L2 dirty (modified) cache lines evicted by a demand request."}, {"L2_LINES_OUT.PREFETCH_CLEAN", "Counts L2 clean cache line evicted by a prefetch request."}, {"L2_LINES_OUT.PREFETCH_DIRTY", "Counts L2 modified cache line evicted by a prefetch request."}, {"L2_LINES_OUT.ANY", "Counts all L2 cache lines evicted for any reason."}, {"SQ_MISC.LRU_HINTS", "Counts number of Super Queue LRU hints sent to L3."}, {"SQ_MISC.SPLIT_LOCK", "Counts the number of SQ lock splits across a cache line."}, {"SQ_FULL_STALL_CYCLES", "Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore."}, {"FP_ASSIST.ALL", "Counts the number of floating point operations executed that required micro-code assist intervention. Assists are required in the following cases: SSE instructions, (Denormal input when the DAZ flag is off or Underflow result when the FTZ flag is off): x87 instructions, (NaN or denormal are loaded to a register or used as input from memory, Division by 0 or Underflow output)."}, {"FP_ASSIST.OUTPUT", "Counts number of floating point micro-code assist when the output value (destination register) is invalid."}, {"FP_ASSIST.INPUT", "Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid."}, {"SIMD_INT_64.PACKED_MPY", "Counts number of SID integer 64 bit packed multiply operations."}, {"SIMD_INT_64.PACKED_SHIFT", "Counts number of SID integer 64 bit packed shift operations."}, {"SIMD_INT_64.PACK", "Counts number of SID integer 64 bit pack operations."}, {"SIMD_INT_64.UNPACK", "Counts number of SID integer 64 bit unpack operations."}, {"SIMD_INT_64.PACKED_LOGICAL", "Counts number of SID integer 64 bit logical operations."}, {"SIMD_INT_64.PACKED_ARITH", "Counts number of SID integer 64 bit arithmetic operations."}, {"SIMD_INT_64.SHUFFLE_MOVE", "Counts number of SID integer 64 bit shift or move operations."}, {"INSTR_RETIRED_ANY", ""}, {"CPU_CLK_UNHALTED_CORE", ""}, {"CPU_CLK_UNHALTED_REF", ""}, {"GQ_CYCLES_FULL.READ_TRACKER", "Uncore cycles Global Queue read tracker is full."}, {"GQ_CYCLES_FULL.WRITE_TRACKER", "Uncore cycles Global Queue write tracker is full."}, {"GQ_CYCLES_FULL.PEER_PROBE_TRACKER", "Uncore cycles Global Queue peer probe tracker is full. The peer probe tracker queue tracks snoops from the IOH and remote sockets."}, {"GQ_CYCLES_NOT_EMPTY.READ_TRACKER", "Uncore cycles were Global Queue read tracker has at least one valid entry."}, {"GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER", "Uncore cycles were Global Queue write tracker has at least one valid entry."}, {"GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER", "Uncore cycles were Global Queue peer probe tracker has at least one valid entry. The peer probe tracker queue tracks IOH and remote socket snoops."}, {"GQ_OCCUPANCY.READ_TRACKER", "Increments the number of queue entries (code read, data read, and RFOs) in the tread tracker. The GQ read tracker allocate to deallocate occupancy count is divided by the count to obtain the average read tracker latency."}, {"GQ_ALLOC.READ_TRACKER", "Counts the number of tread tracker allocate to deallocate entries. The GQ read tracker allocate to deallocate occupancy count is divided by the count to obtain the average read tracker latency."}, {"GQ_ALLOC.RT_L3_MISS", "Counts the number GQ read tracker entries for which a full cache line read has missed the L3. The GQ read tracker L3 miss to fill occupancy count is divided by this count to obtain the average cache line read L3 miss latency. The latency represents the time after which the L3 has determined that the cache line has missed. The time between a GQ read tracker allocation and the L3 determining that the cache line has missed is the average L3 hit latency. The total L3 cache line read miss latency is the hit latency + L3 misslatency."}, {"GQ_ALLOC.RT_TO_L3_RESP", "Counts the number of GQ read tracker entries that are allocated in the read tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy count is divided by this count to obtain the average L3 hit latency."}, {"GQ_ALLOC.RT_TO_RTID_ACQUIRED", "Counts the number of GQ read tracker entries that are allocated in the read tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ read tracker L3 miss to RTID acquired occupancy count is divided by this count to obtain the average latency for a read L3 miss to acquire an RTID."}, {"GQ_ALLOC.WT_TO_RTID_ACQUIRED", "Counts the number of GQ write tracker entries that are allocated in the write tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is divided by this count to obtain the average latency for a write L3 miss to acquire an RTID."}, {"GQ_ALLOC.WRITE_TRACKER", "Counts the number of GQ write tracker entries that are allocated in the write tracker queue that miss the L3. The GQ write tracker occupancy count is divided by the this count to obtain the average L3 write miss latency."}, {"GQ_ALLOC.PEER_PROBE_TRACKER", "Counts the number of GQ peer probe tracker (snoop) entries that are allocated in the peer probe tracker queue that miss the L3. The GQ peer probe occupancy count is divided by this count to obtain the average L3 peer probe miss latency."}, {"GQ_DATA.FROM_QPI", "Cycles Global Queue Quickpath Interface input data port is busy importing data from the Quickpath Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, {"GQ_DATA.FROM_QMC", "Cycles Global Queue Quickpath Memory Interface input data port is busy importing data from the Quickpath Memory Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, {"GQ_DATA.FROM_L3", "Cycles GQ L3 input data port is busy importing data from the Last Level Cache. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.FROM_CORES_02", "Cycles GQ Core 0 and 2 input data port is busy importing data from processor cores 0 and 2. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.FROM_CORES_13", "Cycles GQ Core 1 and 3 input data port is busy importing data from processor cores 1 and 3. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.TO_QPI_QMC", "Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath Interface or Quickpath Memory Interface. Each cycle the output port can transfer 32 bytes of data."}, {"GQ_DATA.TO_L3", "Cycles GQ L3 output data port is busy sending data to the Last Level Cache. Each cycle the output port can transfer 32 bytes of data."}, {"GQ_DATA.TO_CORES", "Cycles GQ Core output data port is busy sending data to the Cores. Each cycle the output port can transfer 32 bytes of data."}, {"SNP_RESP_TO_LOCAL_HOME.I_STATE", "Number of snoop responses to the local home that L3 does not have the referenced cache line."}, {"SNP_RESP_TO_LOCAL_HOME.S_STATE", "Number of snoop responses to the local home that L3 has the referenced line cached in the S state."}, {"SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to the local home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the local home in the S state."}, {"SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to the local home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the local home in the M state."}, {"SNP_RESP_TO_LOCAL_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, {"SNP_RESP_TO_LOCAL_HOME.WB", "Number of responses to code or data read snoops to the local home that the L3 has the referenced line cached in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.I_STATE", "Number of snoop responses to a remote home that L3 does not have the referenced cache line."}, {"SNP_RESP_TO_REMOTE_HOME.S_STATE", "Number of snoop responses to a remote home that L3 has the referenced line cached in the S state."}, {"SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the remote home in the S state."}, {"SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to a remote home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the remote home in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, {"SNP_RESP_TO_REMOTE_HOME.WB", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced line cached in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.HITM", "Number of HITM snoop responses to a remote home."}, {"L3_HITS.READ", "Number of code read, data read and RFO requests that hit in the L3."}, {"L3_HITS.WRITE", "Number of writeback requests that hit in the L3. Writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, {"L3_HITS.PROBE", "Number of snoops from IOH or remote sockets that hit in the L3."}, {"L3_HITS.ANY", "Number of reads and writes that hit the L3."}, {"L3_MISS.READ", "Number of code read, data read and RFO requests that miss the L3."}, {"L3_MISS.WRITE", "Number of writeback requests that miss the L3. Should always be zero as writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, {"L3_MISS.PROBE", "Number of snoops from IOH or remote sockets that miss the L3."}, {"L3_MISS.ANY", "Number of reads and writes that miss the L3."}, {"L3_LINES_IN.M_STATE", "Counts the number of L3 lines allocated in M state. The only time a cache line is allocated in the M state is when the line was forwarded in M state is forwarded due to a Snoop Read Invalidate Own request."}, {"L3_LINES_IN.E_STATE", "Counts the number of L3 lines allocated in E state."}, {"L3_LINES_IN.S_STATE", "Counts the number of L3 lines allocated in S state."}, {"L3_LINES_IN.F_STATE", "Counts the number of L3 lines allocated in F state."}, {"L3_LINES_IN.ANY", "Counts the number of L3 lines allocated in any state."}, {"L3_LINES_OUT.M_STATE", "Counts the number of L3 lines victimized that were in the M state. When the victim cache line is in M state, the line is written to its home cache agent which can be either local or remote."}, {"L3_LINES_OUT.E_STATE", "Counts the number of L3 lines victimized that were in the E state."}, {"L3_LINES_OUT.S_STATE", "Counts the number of L3 lines victimized that were in the S state."}, {"L3_LINES_OUT.I_STATE", "Counts the number of L3 lines victimized that were in the I state."}, {"L3_LINES_OUT.F_STATE", "Counts the number of L3 lines victimized that were in the F state."}, {"L3_LINES_OUT.ANY", "Counts the number of L3 lines victimized in any state."}, {"GQ_SNOOP.GOTO_S", "Counts the number of remote snoops that have requested a cache line be set to the S state."}, {"GQ_SNOOP.GOTO_I", "Counts the number of remote snoops that have requested a cache line be set to the I state."}, {"GQ_SNOOP.GOTO_S_HIT", "Counts the number of remote snoops that have requested a cache line be set to the S state from E state. Requires writing MSR 301H with mask = 2H"}, {"GQ_SNOOP.GOTO_I_HIT", "Counts the number of remote snoops that have requested a cache line be set to the S state from F (forward) state. Requires writing MSR 301H with mask = 8H"}, {"QHL_REQUESTS.IOH_READS", "Counts number of Quickpath Home Logic read requests from the IOH."}, {"QHL_REQUESTS.IOH_WRITES", "Counts number of Quickpath Home Logic write requests from the IOH."}, {"QHL_REQUESTS.REMOTE_READS", "Counts number of Quickpath Home Logic read requests from a remote socket."}, {"QHL_REQUESTS.REMOTE_WRITES", "Counts number of Quickpath Home Logic write requests from a remote socket."}, {"QHL_REQUESTS.LOCAL_READS", "Counts number of Quickpath Home Logic read requests from the local socket."}, {"QHL_REQUESTS.LOCAL_WRITES", "Counts number of Quickpath Home Logic write requests from the local socket."}, {"QHL_CYCLES_FULL.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH are full."}, {"QHL_CYCLES_FULL.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker are full."}, {"QHL_CYCLES_FULL.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker are full."}, {"QHL_CYCLES_NOT_EMPTY.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy."}, {"QHL_CYCLES_NOT_EMPTY.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is busy."}, {"QHL_CYCLES_NOT_EMPTY.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker is busy."}, {"QHL_OCCUPANCY.IOH", "QHL IOH tracker allocate to deallocate read occupancy."}, {"QHL_OCCUPANCY.REMOTE", "QHL remote tracker allocate to deallocate read occupancy."}, {"QHL_OCCUPANCY.LOCAL", "QHL local tracker allocate to deallocate read occupancy."}, {"QHL_ADDRESS_CONFLICTS.2WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 conflicts. The AAT is a structure that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, {"QHL_ADDRESS_CONFLICTS.3WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 conflicts. The AAT is a structure that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, {"QHL_CONFLICT_CYCLES.IOH", "Counts cycles the Quickpath Home Logic IOH Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, {"QHL_CONFLICT_CYCLES.REMOTE", "Counts cycles the Quickpath Home Logic Remote Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, {"QHL_CONFLICT_CYCLES.LOCAL", "Counts cycles the Quickpath Home Logic Local Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, {"QHL_TO_QMC_BYPASS", "Counts number or requests to the Quickpath Memory Controller that bypass the Quickpath Home Logic. All local accesses can be bypassed. For remote requests, only read requests can be bypassed."}, {"QMC_ISOC_FULL.READ.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.READ.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.READ.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.WRITE.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous write requests."}, {"QMC_ISOC_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous write requests."}, {"QMC_ISOC_FULL.WRITE.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous write requests."}, {"QMC_BUSY.READ.CH0", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 0."}, {"QMC_BUSY.READ.CH1", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 1."}, {"QMC_BUSY.READ.CH2", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 2."}, {"QMC_BUSY.WRITE.CH0", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 0."}, {"QMC_BUSY.WRITE.CH1", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 1."}, {"QMC_BUSY.WRITE.CH2", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 2."}, {"QMC_OCCUPANCY.CH0", "IMC channel 0 normal read request occupancy."}, {"QMC_OCCUPANCY.CH1", "IMC channel 1 normal read request occupancy."}, {"QMC_OCCUPANCY.CH2", "IMC channel 2 normal read request occupancy."}, {"QMC_OCCUPANCY.ANY", "Normal read request occupancy for any channel."}, {"QMC_ISSOC_OCCUPANCY.CH0", "IMC channel 0 issoc read request occupancy."}, {"QMC_ISSOC_OCCUPANCY.CH1", "IMC channel 1 issoc read request occupancy."}, {"QMC_ISSOC_OCCUPANCY.CH2", "IMC channel 2 issoc read request occupancy."}, {"QMC_ISSOC_READS.ANY", "IMC issoc read request occupancy."}, {"QMC_NORMAL_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 medium and low priority read requests. The QMC channel 0 normal read occupancy divided by this count provides the average QMC channel 0 read latency."}, {"QMC_NORMAL_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 medium and low priority read requests. The QMC channel 1 normal read occupancy divided by this count provides the average QMC channel 1 read latency."}, {"QMC_NORMAL_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 medium and low priority read requests. The QMC channel 2 normal read occupancy divided by this count provides the average QMC channel 2 read latency."}, {"QMC_NORMAL_READS.ANY", "Counts the number of Quickpath Memory Controller medium and low priority read requests. The QMC normal read occupancy divided by this count provides the average QMC read latency."}, {"QMC_HIGH_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Controller high priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Controller critical priority isochronous read requests."}, {"QMC_WRITES.FULL.CH0", "Counts number of full cache line writes to DRAM channel 0."}, {"QMC_WRITES.FULL.CH1", "Counts number of full cache line writes to DRAM channel 1."}, {"QMC_WRITES.FULL.CH2", "Counts number of full cache line writes to DRAM channel 2."}, {"QMC_WRITES.FULL.ANY", "Counts number of full cache line writes to DRAM."}, {"QMC_WRITES.PARTIAL.CH0", "Counts number of partial cache line writes to DRAM channel 0."}, {"QMC_WRITES.PARTIAL.CH1", "Counts number of partial cache line writes to DRAM channel 1."}, {"QMC_WRITES.PARTIAL.CH2", "Counts number of partial cache line writes to DRAM channel 2."}, {"QMC_WRITES.PARTIAL.ANY", "Counts number of partial cache line writes to DRAM."}, {"QMC_CANCEL.CH0", "Counts number of DRAM channel 0 cancel requests."}, {"QMC_CANCEL.CH1", "Counts number of DRAM channel 1 cancel requests."}, {"QMC_CANCEL.CH2", "Counts number of DRAM channel 2 cancel requests."}, {"QMC_CANCEL.ANY", "Counts number of DRAM cancel requests."}, {"QMC_PRIORITY_UPDATES.CH0", "Counts number of DRAM channel 0 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.CH1", "Counts number of DRAM channel 1 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.CH2", "Counts number of DRAM channel 2 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.ANY", "Counts number of DRAM priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"IMC_RETRY.CH0", "Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when configured in RAS mode."}, {"IMC_RETRY.CH1", "Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when configured in RAS mode."}, {"IMC_RETRY.CH2", "Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when configured in RAS mode."}, {"IMC_RETRY.ANY", "Counts number of IMC DRAM retries from any channel. DRAM retry only occurs when configured in RAS mode."}, {"QHL_FRC_ACK_CNFLTS.IOH", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the IOH."}, {"QHL_FRC_ACK_CNFLTS.REMOTE", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the remote home."}, {"QHL_FRC_ACK_CNFLTS.LOCAL", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the local home."}, {"QHL_FRC_ACK_CNFLTS.ANY", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic."}, {"QHL_SLEEPS.IOH_ORDER", "Counts number of occurrences a request was put to sleep due to IOH ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"QHL_SLEEPS.REMOTE_ORDER", "Counts number of occurrences a request was put to sleep due to remote socket ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"QHL_SLEEPS.LOCAL_ORDER", "Counts number of occurrences a request was put to sleep due to local socket ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"QHL_SLEEPS.IOH_CONFLICT", "Counts number of occurrences a request was put to sleep due to IOH address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"QHL_SLEEPS.REMOTE_CONFLICT", "Counts number of occurrences a request was put to sleep due to remote socket address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"QHL_SLEEPS.LOCAL_CONFLICT", "Counts number of occurrences a request was put to sleep due to local socket address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, {"ADDR_OPCODE_MATCH.IOH", "Counts number of requests from the IOH, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, {"ADDR_OPCODE_MATCH.REMOTE", "Counts number of requests from the remote socket, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, {"ADDR_OPCODE_MATCH.LOCAL", "Counts number of requests from the local socket, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0", "Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0", "Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0", "Counts cycles the Quickpath outbound link 0 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1", "Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1", "Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1", "Counts cycles the Quickpath outbound link 1 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0", "Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1", "Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_HEADER.FULL.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is full."}, {"QPI_TX_HEADER.BUSY.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is busy."}, {"QPI_TX_HEADER.FULL.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is full."}, {"QPI_TX_HEADER.BUSY.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is busy."}, {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0", "Number of cycles that snoop packets incoming to the Quickpath Interface link0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1", "Number of cycles that snoop packets incoming to the Quickpath Interface link 1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, {"DRAM_OPEN.CH0", "Counts number of DRAM Channel 0 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_OPEN.CH1", "Counts number of DRAM Channel 1 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_OPEN.CH2", "Counts number of DRAM Channel 2 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_PAGE_CLOSE.CH0", "DRAM channel 0 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_CLOSE.CH1", "DRAM channel 1 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_CLOSE.CH2", "DRAM channel 2 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH0", "Counts the number of precharges (PRE) that were issued to DRAM channel 0 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH1", "Counts the number of precharges (PRE) that were issued to DRAM channel 1 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH2", "Counts the number of precharges (PRE) that were issued to DRAM channel 2 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_READ_CAS.CH0", "Counts the number of times a read CAS command was issued on DRAM channel 0."}, {"DRAM_READ_CAS.AUTOPRE_CH0", "Counts the number of times a read CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_READ_CAS.CH1", "Counts the number of times a read CAS command was issued on DRAM channel 1."}, {"DRAM_READ_CAS.AUTOPRE_CH1", "Counts the number of times a read CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_READ_CAS.CH2", "Counts the number of times a read CAS command was issued on DRAM channel 2."}, {"DRAM_READ_CAS.AUTOPRE_CH2", "Counts the number of times a read CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0."}, {"DRAM_WRITE_CAS.AUTOPRE_CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1."}, {"DRAM_WRITE_CAS.AUTOPRE_CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2."}, {"DRAM_WRITE_CAS.AUTOPRE_CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_REFRESH.CH0", "Counts number of DRAM channel 0 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_REFRESH.CH1", "Counts number of DRAM channel 1 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_REFRESH.CH2", "Counts number of DRAM channel 2 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_PRE_ALL.CH0", "Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, {"DRAM_PRE_ALL.CH1", "Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, {"DRAM_PRE_ALL.CH2", "Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, {"DRAM_THERMAL_THROTTLED", "Uncore cycles DRAM was throttled due to its temperature being above the thermal throttling threshold."}, {"THERMAL_THROTTLING_TEMP.CORE_0", "Cycles that the PCU records that core 0 is above the thermal throttling threshold temperature."}, {"THERMAL_THROTTLING_TEMP.CORE_1", "Cycles that the PCU records that core 1 is above the thermal throttling threshold temperature."}, {"THERMAL_THROTTLING_TEMP.CORE_2", "Cycles that the PCU records that core 2 is above the thermal throttling threshold temperature."}, {"THERMAL_THROTTLING_TEMP.CORE_3", "Cycles that the PCU records that core 3 is above the thermal throttling threshold temperature."}, {"THERMAL_THROTTLED_TEMP.CORE_0", "Cycles that the PCU records that core 0 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, {"THERMAL_THROTTLED_TEMP.CORE_1", "Cycles that the PCU records that core 1 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, {"THERMAL_THROTTLED_TEMP.CORE_2", "Cycles that the PCU records that core 2 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, {"THERMAL_THROTTLED_TEMP.CORE_3", "Cycles that the PCU records that core 3 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, {"PROCHOT_ASSERTION", "Number of system assertions of PROCHOT indicating the entire processor has exceeded the thermal limit."}, {"THERMAL_THROTTLING_PROCHOT.CORE_0", "Cycles that the PCU records that core 0 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, {"THERMAL_THROTTLING_PROCHOT.CORE_1", "Cycles that the PCU records that core 1 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, {"THERMAL_THROTTLING_PROCHOT.CORE_2", "Cycles that the PCU records that core 2 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, {"THERMAL_THROTTLING_PROCHOT.CORE_3", "Cycles that the PCU records that core 3 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, {"TURBO_MODE.CORE_0", "Uncore cycles that core 0 is operating in turbo mode."}, {"TURBO_MODE.CORE_1", "Uncore cycles that core 1 is operating in turbo mode."}, {"TURBO_MODE.CORE_2", "Uncore cycles that core 2 is operating in turbo mode."}, {"TURBO_MODE.CORE_3", "Uncore cycles that core 3 is operating in turbo mode."}, {"CYCLES_UNHALTED_L3_FLL_ENABLE", "Uncore cycles that at least one core is unhalted and all L3 ways are enabled."}, {"CYCLES_UNHALTED_L3_FLL_DISABLE", "Uncore cycles that at least one core is unhalted and all L3 ways are disabled."}, { NULL, NULL } }; papi-5.6.0/src/libpfm4/lib/events/intel_knl_unc_edc_events.h000664 001750 001750 00000005516 13216244364 026137 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: knl_unc_edc (Intel Knights Landing EDC_UCLK, EDC_ECLK uncore PMUs) */ static const intel_x86_umask_t knl_unc_edc_uclk_access_count[]={ { .uname = "HIT_CLEAN", .udesc = "Hit E", .ucode = 0x0100, }, { .uname = "HIT_DIRTY", .udesc = "Hit M", .ucode = 0x0200, }, { .uname = "MISS_CLEAN", .udesc = "Miss E", .ucode = 0x0400, }, { .uname = "MISS_DIRTY", .udesc = "Miss M", .ucode = 0x0800, }, { .uname = "MISS_INVALID", .udesc = "Miss I", .ucode = 0x1000, }, { .uname = "MISS_GARBAGE", .udesc = "Miss G", .ucode = 0x2000, }, }; static const intel_x86_entry_t intel_knl_unc_edc_uclk_pe[]={ { .name = "UNC_E_U_CLOCKTICKS", .desc = "EDC UCLK clockticks (generic counters)", .code = 0x00, /*encoding for generic counters */ .cntmsk = 0xf, }, { .name = "UNC_E_EDC_ACCESS", .desc = "Number of EDC Access Hits or Misses.", .code = 0x02, .cntmsk = 0xf, .ngrp = 1, .numasks = LIBPFM_ARRAY_SIZE(knl_unc_edc_uclk_access_count), .umasks = knl_unc_edc_uclk_access_count }, }; static const intel_x86_entry_t intel_knl_unc_edc_eclk_pe[]={ { .name = "UNC_E_E_CLOCKTICKS", .desc = "EDC ECLK clockticks (generic counters)", .code = 0x00, /*encoding for generic counters */ .cntmsk = 0xf, }, { .name = "UNC_E_RPQ_INSERTS", .desc = "Counts total number of EDC RPQ insers", .code = 0x0101, .cntmsk = 0xf, }, { .name = "UNC_E_WPQ_INSERTS", .desc = "Counts total number of EDC WPQ insers", .code = 0x0102, .cntmsk = 0xf, }, }; papi-5.6.0/src/libpfm4/lib/events/sparc_ultra3i_events.h000664 001750 001750 00000024127 13216244365 025253 0ustar00jshenry1963jshenry1963000000 000000 static const sparc_entry_t ultra3i_pe[] = { /* These two must always be first. */ { .name = "Cycle_cnt", .desc = "Accumulated cycles", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x0, }, { .name = "Instr_cnt", .desc = "Number of instructions completed", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x1, }, /* PIC0 events common to all UltraSPARC processors */ { .name = "Dispatch0_IC_miss", .desc = "I-buffer is empty from I-Cache miss", .ctrl = PME_CTRL_S0, .code = 0x2, }, { .name = "IC_ref", .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, { .name = "DC_rd", .desc = "D-cache read references (including accesses that subsequently trap)", .ctrl = PME_CTRL_S0, .code = 0x9, }, { .name = "DC_wr", .desc = "D-cache store accesses (including cacheable stores that subsequently trap)", .ctrl = PME_CTRL_S0, .code = 0xa, }, { .name = "EC_ref", .desc = "E-cache references", .ctrl = PME_CTRL_S0, .code = 0xc, }, { .name = "EC_snoop_inv", .desc = "L2-cache invalidates generated from a snoop by a remote processor", .ctrl = PME_CTRL_S0, .code = 0xe, }, /* PIC1 events common to all UltraSPARC processors */ { .name = "Dispatch0_mispred", .desc = "I-buffer is empty from Branch misprediction", .ctrl = PME_CTRL_S1, .code = 0x2, }, { .name = "EC_wb", .desc = "Dirty sub-blocks that produce writebacks due to L2-cache miss events", .ctrl = PME_CTRL_S1, .code = 0xd, }, { .name = "EC_snoop_cb", .desc = "L2-cache copybacks generated from a snoop by a remote processor", .ctrl = PME_CTRL_S1, .code = 0xe, }, /* PIC0 events common to all UltraSPARC-III/III+/IIIi processors */ { .name = "Dispatch0_br_target", .desc = "I-buffer is empty due to a branch target address calculation", .ctrl = PME_CTRL_S0, .code = 0x3, }, { .name = "Dispatch0_2nd_br", .desc = "Stall cycles due to having two branch instructions line-up in one 4-instruction group causing the second branch in the group to be re-fetched, delaying it's entrance into the I-buffer", .ctrl = PME_CTRL_S0, .code = 0x4, }, { .name = "Rstall_storeQ", .desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stalled due to the store queue being full", .ctrl = PME_CTRL_S0, .code = 0x5, }, { .name = "Rstall_IU_use", .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding integer instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S0, .code = 0x6, }, { .name = "EC_write_hit_RTO", .desc = "W-cache exclusive requests that hit L2-cache in S, O, or Os state and thus, do a read-to-own bus transaction", .ctrl = PME_CTRL_S0, .code = 0xd, }, { .name = "EC_rd_miss", .desc = "L2-cache miss events (including atomics) from D-cache events", .ctrl = PME_CTRL_S0, .code = 0xf, }, { .name = "PC_port0_rd", .desc = "P-cache cacheable FP loads to the first port (general purpose load path to D-cache and P-cache via MS pipeline)", .ctrl = PME_CTRL_S0, .code = 0x10, }, { .name = "SI_snoop", .desc = "Counts snoops from remote processor(s) including RTS, RTSR, RTO, RTOR, RS, RSR, RTSM, and WS", .ctrl = PME_CTRL_S0, .code = 0x11, }, { .name = "SI_ciq_flow", .desc = "Counts system clock cycles when the flow control (PauseOut) signal is asserted", .ctrl = PME_CTRL_S0, .code = 0x12, }, { .name = "SI_owned", .desc = "Counts events where owned_in is asserted on bus requests from the local processor", .ctrl = PME_CTRL_S0, .code = 0x13, }, { .name = "SW_count0", .desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .ctrl = PME_CTRL_S0, .code = 0x14, }, { .name = "IU_Stat_Br_miss_taken", .desc = "Retired branches that were predicted to be taken, but in fact were not taken", .ctrl = PME_CTRL_S0, .code = 0x15, }, { .name = "IU_Stat_Br_Count_taken", .desc = "Retired taken branches", .ctrl = PME_CTRL_S0, .code = 0x16, }, { .name = "Dispatch0_rs_mispred", .desc = "I-buffer is empty due to a Return Address Stack misprediction", .ctrl = PME_CTRL_S0, .code = 0x4, }, { .name = "FA_pipe_completion", .desc = "Instructions that complete execution on the FPG ALU pipelines", .ctrl = PME_CTRL_S0, .code = 0x18, }, /* PIC1 events common to all UltraSPARC-III/III+/IIIi processors */ { .name = "IC_miss_cancelled", .desc = "I-cache misses cancelled due to mis-speculation, recycle, or other events", .ctrl = PME_CTRL_S1, .code = 0x3, }, { .name = "Re_FPU_bypass", .desc = "Stall due to recirculation when an FPU bypass condition that does not have a direct bypass path occurs", .ctrl = PME_CTRL_S1, .code = 0x5, }, { .name = "Re_DC_miss", .desc = "Stall due to loads that miss D-cache and get recirculated", .ctrl = PME_CTRL_S1, .code = 0x6, }, { .name = "Re_EC_miss", .desc = "Stall due to loads that miss L2-cache and get recirculated", .ctrl = PME_CTRL_S1, .code = 0x7, }, { .name = "IC_miss", .desc = "I-cache misses, including fetches from mis-speculated execution paths which are later cancelled", .ctrl = PME_CTRL_S1, .code = 0x8, }, { .name = "DC_rd_miss", .desc = "Recirculated loads that miss the D-cache", .ctrl = PME_CTRL_S1, .code = 0x9, }, { .name = "DC_wr_miss", .desc = "D-cache store accesses that miss D-cache", .ctrl = PME_CTRL_S1, .code = 0xa, }, { .name = "Rstall_FP_use", .desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceding floating-point instruction in the pipeline that is not yet available", .ctrl = PME_CTRL_S1, .code = 0xb, }, { .name = "EC_misses", .desc = "E-cache misses", .ctrl = PME_CTRL_S1, .code = 0xc, }, { .name = "EC_ic_miss", .desc = "L2-cache read misses from I-cache requests", .ctrl = PME_CTRL_S1, .code = 0xf, }, { .name = "Re_PC_miss", .desc = "Stall due to recirculation when a prefetch cache miss occurs on a prefetch predicted second load", .ctrl = PME_CTRL_S1, .code = 0x10, }, { .name = "ITLB_miss", .desc = "I-TLB miss traps taken", .ctrl = PME_CTRL_S1, .code = 0x11, }, { .name = "DTLB_miss", .desc = "Memory reference instructions which trap due to D-TLB miss", .ctrl = PME_CTRL_S1, .code = 0x12, }, { .name = "WC_miss", .desc = "W-cache misses", .ctrl = PME_CTRL_S1, .code = 0x13, }, { .name = "WC_snoop_cb", .desc = "W-cache copybacks generated by a snoop from a remote processor", .ctrl = PME_CTRL_S1, .code = 0x14, }, { .name = "WC_scrubbed", .desc = "W-cache hits to clean lines", .ctrl = PME_CTRL_S1, .code = 0x15, }, { .name = "WC_wb_wo_read", .desc = "W-cache writebacks not requiring a read", .ctrl = PME_CTRL_S1, .code = 0x16, }, { .name = "PC_soft_hit", .desc = "FP loads that hit a P-cache line that was prefetched by a software-prefetch instruction", .ctrl = PME_CTRL_S1, .code = 0x18, }, { .name = "PC_snoop_inv", .desc = "P-cache invalidates that were generated by a snoop from a remote processor and stores by a local processor", .ctrl = PME_CTRL_S1, .code = 0x19, }, { .name = "PC_hard_hit", .desc = "FP loads that hit a P-cache line that was prefetched by a hardware prefetch", .ctrl = PME_CTRL_S1, .code = 0x1a, }, { .name = "PC_port1_rd", .desc = "P-cache cacheable FP loads to the second port (memory and out-of-pipeline instruction execution loads via the A0 and A1 pipelines)", .ctrl = PME_CTRL_S1, .code = 0x1b, }, { .name = "SW_count1", .desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .ctrl = PME_CTRL_S1, .code = 0x1c, }, { .name = "IU_Stat_Br_miss_untaken", .desc = "Retired branches that were predicted to be untaken, but in fact were taken", .ctrl = PME_CTRL_S1, .code = 0x1d, }, { .name = "IU_Stat_Br_Count_untaken", .desc = "Retired untaken branches", .ctrl = PME_CTRL_S1, .code = 0x1e, }, { .name = "PC_MS_miss", .desc = "FP loads through the MS pipeline that miss P-cache", .ctrl = PME_CTRL_S1, .code = 0x1f, }, { .name = "Re_RAW_miss", .desc = "Stall due to recirculation when there is a load in the E-stage which has a non-bypassable read-after-write hazard with an earlier store instruction", .ctrl = PME_CTRL_S1, .code = 0x26, }, { .name = "FM_pipe_completion", .desc = "Instructions that complete execution on the FPG Multiply pipelines", .ctrl = PME_CTRL_S0, .code = 0x27, }, /* PIC0 memory controller events specific to UltraSPARC-IIIi processors */ { .name = "MC_read_dispatched", .desc = "DDR 64-byte reads dispatched by the MIU", .ctrl = PME_CTRL_S0, .code = 0x20, }, { .name = "MC_write_dispatched", .desc = "DDR 64-byte writes dispatched by the MIU", .ctrl = PME_CTRL_S0, .code = 0x21, }, { .name = "MC_read_returned_to_JBU", .desc = "64-byte reads that return data to JBU", .ctrl = PME_CTRL_S0, .code = 0x22, }, { .name = "MC_msl_busy_stall", .desc = "Stall cycles due to msl_busy", .ctrl = PME_CTRL_S0, .code = 0x23, }, { .name = "MC_mdb_overflow_stall", .desc = "Stall cycles due to potential memory data buffer overflow", .ctrl = PME_CTRL_S0, .code = 0x24, }, { .name = "MC_miu_spec_request", .desc = "Speculative requests accepted by MIU", .ctrl = PME_CTRL_S0, .code = 0x25, }, /* PIC1 memory controller events specific to UltraSPARC-IIIi processors */ { .name = "MC_reads", .desc = "64-byte reads by the MSL", .ctrl = PME_CTRL_S1, .code = 0x20, }, { .name = "MC_writes", .desc = "64-byte writes by the MSL", .ctrl = PME_CTRL_S1, .code = 0x21, }, { .name = "MC_page_close_stall", .desc = "DDR page conflicts", .ctrl = PME_CTRL_S1, .code = 0x22, }, /* PIC1 events specific to UltraSPARC-III+/IIIi */ { .name = "Re_DC_missovhd", .desc = "Used to measure D-cache stall counts separately for L2-cache hits and misses. This counter is used with the recirculation and cache access events to separately calculate the D-cache loads that hit and miss the L2-cache", .ctrl = PME_CTRL_S1, .code = 0x4, }, }; #define PME_SPARC_ULTRA3I_EVENT_COUNT (sizeof(ultra3i_pe)/sizeof(sparc_entry_t)) papi-5.6.0/src/ctests/overflow_single_event.c000664 001750 001750 00000012354 13216244360 023404 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow_single_event.c * Author: Philip Mucci * mucci@cs.utk.edu */ /* This file performs the following test: overflow dispatch of an eventset with just a single event. The Eventset contains: + PAPI_FP_INS (overflow monitor) - Start eventset 1 - Do flops - Stop and measure eventset 1 - Set up overflow on eventset 1 - Start eventset 1 - Do flops - Stop eventset 1 */ #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d ) Overflow at %p overflow_vector=%#llx!\n" #define OUT_FMT "%-12s : %16lld%16lld\n" static int total = 0; /* total overflows */ void handler( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) context; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); } total++; } int main( int argc, char **argv ) { int EventSet = PAPI_NULL; long long values[2] = { 0, 0 }; long long min, max; int num_flops = NUM_FLOPS, retval; int PAPI_event = 0, mythreshold; char event_name[PAPI_MAX_STR_LEN]; const PAPI_hw_info_t *hw_info = NULL; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); /* Ugh */ if ( ( !strncmp( hw_info->model_string, "UltraSPARC", 10 ) && !( strncmp( hw_info->vendor_string, "SUN", 3 ) ) ) || ( !strncmp( hw_info->model_string, "AMD K7", 6 ) ) || ( !strncmp( hw_info->vendor_string, "Cray", 4 ) ) || ( strstr( hw_info->model_string, "POWER3" ) ) ) { /* query and set up the right instruction to monitor */ if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) { PAPI_event = PAPI_TOT_INS; } else { test_fail( __FILE__, __LINE__, "PAPI_TOT_INS not available on this Sun platform!", 0 ); } } else { /* query and set up the right instruction to monitor */ PAPI_event = find_nonderived_event( ); } if (PAPI_event==0) { if (!quiet) printf("Trouble adding event\n"); test_skip(__FILE__,__LINE__,"Event trouble",1); } if (( PAPI_event == PAPI_FP_OPS ) || ( PAPI_event == PAPI_FP_INS )) { mythreshold = THRESHOLD; } else { #if defined(linux) mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; #else mythreshold = THRESHOLD * 2; #endif } retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval = PAPI_add_event( EventSet, PAPI_event ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, &values[0] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, &values[1] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* double ugh */ #if defined(linux) || defined(__ia64__) || defined(_POWER4) num_flops *= 2; #endif if ( !quiet ) { if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); printf ( "Test case: Overflow dispatch of 1st event in set with 1 event.\n" ); printf ( "--------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", mythreshold ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-----------------------------------------------\n" ); printf( "Test type : %16d%16d\n", 1, 2 ); printf( OUT_FMT, event_name, values[0], values[1] ); printf( "Overflows : %16s%16d\n", "", total ); printf( "-----------------------------------------------\n" ); printf( "Verification:\n" ); /* if (PAPI_event == PAPI_FP_INS) printf("Row 1 approximately equals %d %d\n", num_flops, num_flops); printf("Column 1 approximately equals column 2\n"); */ printf( "Row 3 approximately equals %u +- %u %%\n", ( unsigned ) ( ( values[0] ) / ( long long ) mythreshold ), ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); } /* min = (long long)(values[0]*(1.0-TOLERANCE)); max = (long long)(values[0]*(1.0+TOLERANCE)); if ( values[1] > max || values[1] < min ) test_fail(__FILE__, __LINE__, event_name, 1); */ min = ( long long ) ( ( ( double ) values[0] * ( 1.0 - OVR_TOLERANCE ) ) / ( double ) mythreshold ); max = ( long long ) ( ( ( double ) values[0] * ( 1.0 + OVR_TOLERANCE ) ) / ( double ) mythreshold ); if ( total > max || total < min ) test_fail( __FILE__, __LINE__, "Overflows", 1 ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/lib/pfmlib_sparc.c000664 001750 001750 00000033303 13216244363 022561 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (C) 2007 David S. Miller (davem@davemloft.net) * * Based upon gen_powerpc code which is: * Copyright (C) IBM Corporation, 2007. All rights reserved. * Contributed by Corey Ashford (cjashfor@us.ibm.com) * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * pfmlib_sparc.c * * Support for libpfm for Sparc processors. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_sparc_priv.h" #include "ultra12_events.h" #include "ultra3_events.h" #include "ultra3i_events.h" #include "ultra3plus_events.h" #include "ultra4plus_events.h" #include "niagara1_events.h" #include "niagara2_events.h" static char *get_event_name(int event) { switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: return ultra12_pe[event].pme_name; case PFMLIB_SPARC_ULTRA3_PMU: return ultra3_pe[event].pme_name; case PFMLIB_SPARC_ULTRA3I_PMU: return ultra3i_pe[event].pme_name; case PFMLIB_SPARC_ULTRA3PLUS_PMU: return ultra3plus_pe[event].pme_name; case PFMLIB_SPARC_ULTRA4PLUS_PMU: return ultra4plus_pe[event].pme_name; case PFMLIB_SPARC_NIAGARA1_PMU: return niagara1_pe[event].pme_name; case PFMLIB_SPARC_NIAGARA2_PMU: return niagara2_pe[event].pme_name; } return (char *)-1; } static char *get_event_desc(int event) { switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: return ultra12_pe[event].pme_desc; case PFMLIB_SPARC_ULTRA3_PMU: return ultra3_pe[event].pme_desc; case PFMLIB_SPARC_ULTRA3I_PMU: return ultra3i_pe[event].pme_desc; case PFMLIB_SPARC_ULTRA3PLUS_PMU: return ultra3plus_pe[event].pme_desc; case PFMLIB_SPARC_ULTRA4PLUS_PMU: return ultra4plus_pe[event].pme_desc; case PFMLIB_SPARC_NIAGARA1_PMU: return niagara1_pe[event].pme_desc; case PFMLIB_SPARC_NIAGARA2_PMU: return niagara2_pe[event].pme_desc; } return (char *)-1; } static char get_ctrl(int event) { switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: return ultra12_pe[event].pme_ctrl; case PFMLIB_SPARC_ULTRA3_PMU: return ultra3_pe[event].pme_ctrl; case PFMLIB_SPARC_ULTRA3I_PMU: return ultra3i_pe[event].pme_ctrl; case PFMLIB_SPARC_ULTRA3PLUS_PMU: return ultra3plus_pe[event].pme_ctrl; case PFMLIB_SPARC_ULTRA4PLUS_PMU: return ultra4plus_pe[event].pme_ctrl; case PFMLIB_SPARC_NIAGARA1_PMU: return niagara1_pe[event].pme_ctrl; case PFMLIB_SPARC_NIAGARA2_PMU: return niagara2_pe[event].pme_ctrl; } return 0xff; } static int get_val(int event) { switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: return ultra12_pe[event].pme_val; case PFMLIB_SPARC_ULTRA3_PMU: return ultra3_pe[event].pme_val; case PFMLIB_SPARC_ULTRA3I_PMU: return ultra3i_pe[event].pme_val; case PFMLIB_SPARC_ULTRA3PLUS_PMU: return ultra3plus_pe[event].pme_val; case PFMLIB_SPARC_ULTRA4PLUS_PMU: return ultra4plus_pe[event].pme_val; case PFMLIB_SPARC_NIAGARA1_PMU: return niagara1_pe[event].pme_val; case PFMLIB_SPARC_NIAGARA2_PMU: return niagara2_pe[event].pme_val; } return -1; } static int pfm_sparc_get_event_code(unsigned int event, unsigned int pmd, int *code) { *code = get_val(event); return 0; } static char *pfm_sparc_get_event_name(unsigned int event) { return get_event_name(event); } static char *pfm_sparc_get_event_mask_name(unsigned int event, unsigned int mask) { pme_sparc_mask_entry_t *e; if (sparc_support.pmu_type != PFMLIB_SPARC_NIAGARA2_PMU) return ""; e = &niagara2_pe[event]; return e->pme_masks[mask].mask_name; } static void pfm_sparc_get_event_counters(unsigned int event, pfmlib_regmask_t *counters) { if (sparc_support.pmu_type == PFMLIB_SPARC_NIAGARA2_PMU) { counters->bits[0] = (1 << 0) | (1 << 1); } else { char ctrl = get_ctrl(event); counters->bits[0] = 0; if (ctrl & PME_CTRL_S0) counters->bits[0] |= (1 << 0); if (ctrl & PME_CTRL_S1) counters->bits[0] |= (1 << 1); } } static unsigned int pfm_sparc_get_num_event_masks(unsigned int event) { if (sparc_support.pmu_type != PFMLIB_SPARC_NIAGARA2_PMU) return 0; return (event == 0 ? 0 : EVENT_MASK_BITS); } /* Bits common to all PCR implementations */ #define PCR_PRIV (0x1UL << 0) #define PCR_SYS_TRACE (0x1UL << 1) #define PCR_USER_TRACE (0x1UL << 2) /* The S0 and S1 fields determine which events are monitored in * the assosciated PIC (PIC0 vs. PIC1 respectively). For ultra12 * these fields are 4 bits, on ultra3/3i/3+/4+ they are 6 bits. * For Niagara-1 there is only S0 and it is 3 bits in size. * Niagara-1's PIC1 is hard-coded to record retired instructions. */ #define PCR_S0_SHIFT 4 #define PCR_S0 (0x1fUL << PCR_S0_SHIFT) #define PCR_S1_SHIFT 11 #define PCR_S1 (0x1fUL << PCR_S1_SHIFT) /* Niagara-2 specific PCR bits. It supports event masking. */ #define PCR_N2_HYP_TRACE (0x1UL << 3) #define PCR_N2_TOE0 (0x1UL << 4) #define PCR_N2_TOE1 (0x1UL << 5) #define PCR_N2_SL0_SHIFT 14 #define PCR_N2_SL0 (0xf << PCR_N2_SL0_SHIFT) #define PCR_N2_MASK0_SHIFT 6 #define PCR_N2_MASK0 (0xff << PCR_N2_MASK0_SHIFT) #define PCR_N2_SL1_SHIFT 27 #define PCR_N2_SL1 (0xf << PCR_N2_SL1_SHIFT) #define PCR_N2_MASK1_SHIFT 19 #define PCR_N2_MASK1 (0xff << PCR_N2_MASK1_SHIFT) static int pfm_sparc_dispatch_events(pfmlib_input_param_t *input, void *model_input, pfmlib_output_param_t *output, void *model_output) { unsigned long long pcr, vals[2]; unsigned int plm, i; int niagara2; char ctrls[2]; if (input->pfp_event_count > 2) return PFMLIB_ERR_TOOMANY; plm = ((input->pfp_events[0].plm != 0) ? input->pfp_events[0].plm : input->pfp_dfl_plm); for (i = 1; i < input->pfp_event_count; i++) { if (input->pfp_events[i].plm == 0) { /* it's ok if the default is the same as plm */ if (plm != input->pfp_dfl_plm) return PFMLIB_ERR_NOASSIGN; } else { if (plm != input->pfp_events[i].plm) return PFMLIB_ERR_NOASSIGN; } } niagara2 = 0; if (sparc_support.pmu_type == PFMLIB_SPARC_NIAGARA2_PMU) niagara2 = 1; pcr = 0; if (plm & PFM_PLM3) pcr |= PCR_USER_TRACE; if (plm & PFM_PLM0) pcr |= PCR_SYS_TRACE; if (niagara2 && (plm & PFM_PLM1)) pcr |= PCR_N2_HYP_TRACE; for (i = 0; i < input->pfp_event_count; i++) { pfmlib_event_t *e = &input->pfp_events[i]; ctrls[i] = get_ctrl(e->event); vals[i] = get_val(e->event); if (i == 1) { if ((ctrls[0] & ctrls[1]) == 0) continue; if (ctrls[0] == (PME_CTRL_S0|PME_CTRL_S1)) { if (ctrls[1] == (PME_CTRL_S0|PME_CTRL_S1)) { ctrls[0] = PME_CTRL_S0; ctrls[1] = PME_CTRL_S1; } else { ctrls[0] &= ~ctrls[1]; } } else if (ctrls[1] == (PME_CTRL_S0|PME_CTRL_S1)) { ctrls[1] &= ~ctrls[0]; } else return PFMLIB_ERR_INVAL; } } if (input->pfp_event_count == 1) { if (ctrls[0] == (PME_CTRL_S0|PME_CTRL_S1)) ctrls[0] = PME_CTRL_S0; } for (i = 0; i < input->pfp_event_count; i++) { unsigned long long val = vals[i]; char ctrl = ctrls[i]; switch (ctrl) { case PME_CTRL_S0: output->pfp_pmds[i].reg_num = 0; pcr |= (val << (niagara2 ? PCR_N2_SL0_SHIFT : PCR_S0_SHIFT)); break; case PME_CTRL_S1: output->pfp_pmds[i].reg_num = 1; pcr |= (val << (niagara2 ? PCR_N2_SL1_SHIFT : PCR_S1_SHIFT)); break; default: return PFMLIB_ERR_INVAL; } if (niagara2) { pfmlib_event_t *e = &input->pfp_events[i]; unsigned int j, shift; if (ctrl == PME_CTRL_S0) { pcr |= PCR_N2_TOE0; shift = PCR_N2_MASK0_SHIFT; } else { pcr |= PCR_N2_TOE1; shift = PCR_N2_MASK1_SHIFT; } for (j = 0; j < e->num_masks; j++) { unsigned int mask; mask = e->unit_masks[j]; if (mask >= EVENT_MASK_BITS) return PFMLIB_ERR_INVAL; pcr |= (1ULL << (shift + mask)); } } output->pfp_pmds[i].reg_value = 0; output->pfp_pmds[i].reg_addr = 0; output->pfp_pmds[i].reg_alt_addr = 0; output->pfp_pmds[i].reg_reserved1 = 0; output->pfp_pmd_count = i + 1; } output->pfp_pmcs[0].reg_value = pcr; output->pfp_pmcs[0].reg_addr = 0; output->pfp_pmcs[0].reg_num = 0; output->pfp_pmcs[0].reg_reserved1 = 0; output->pfp_pmc_count = 1; return PFMLIB_SUCCESS; } static int pmu_name_to_pmu_type(char *name) { if (!strcmp(name, "ultra12")) return PFMLIB_SPARC_ULTRA12_PMU; if (!strcmp(name, "ultra3")) return PFMLIB_SPARC_ULTRA3_PMU; if (!strcmp(name, "ultra3i")) return PFMLIB_SPARC_ULTRA3I_PMU; if (!strcmp(name, "ultra3+")) return PFMLIB_SPARC_ULTRA3PLUS_PMU; if (!strcmp(name, "ultra4+")) return PFMLIB_SPARC_ULTRA4PLUS_PMU; if (!strcmp(name, "niagara2")) return PFMLIB_SPARC_NIAGARA2_PMU; if (!strcmp(name, "niagara")) return PFMLIB_SPARC_NIAGARA1_PMU; return -1; } static int pfm_sparc_pmu_detect(void) { int ret, pmu_type, pme_count; char buffer[32]; ret = __pfm_getcpuinfo_attr("pmu", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; pmu_type = pmu_name_to_pmu_type(buffer); if (pmu_type == -1) return PFMLIB_ERR_NOTSUPP; switch (pmu_type) { default: return PFMLIB_ERR_NOTSUPP; case PFMLIB_SPARC_ULTRA12_PMU: pme_count = PME_ULTRA12_EVENT_COUNT; break; case PFMLIB_SPARC_ULTRA3_PMU: pme_count = PME_ULTRA3_EVENT_COUNT; break; case PFMLIB_SPARC_ULTRA3I_PMU: pme_count = PME_ULTRA3I_EVENT_COUNT; break; case PFMLIB_SPARC_ULTRA3PLUS_PMU: pme_count = PME_ULTRA3PLUS_EVENT_COUNT; break; case PFMLIB_SPARC_ULTRA4PLUS_PMU: pme_count = PME_ULTRA4PLUS_EVENT_COUNT; break; case PFMLIB_SPARC_NIAGARA1_PMU: pme_count = PME_NIAGARA1_EVENT_COUNT; break; case PFMLIB_SPARC_NIAGARA2_PMU: pme_count = PME_NIAGARA2_EVENT_COUNT; break; } sparc_support.pmu_type = pmu_type; sparc_support.pmu_name = strdup(buffer); sparc_support.pme_count = pme_count; return PFMLIB_SUCCESS; } static void pfm_sparc_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { impl_pmcs->bits[0] = 0x1; } static void pfm_sparc_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { impl_pmds->bits[0] = 0x3; } static void pfm_sparc_get_impl_counters(pfmlib_regmask_t *impl_counters) { pfm_sparc_get_impl_pmds(impl_counters); } static void pfm_sparc_get_hw_counter_width(unsigned int *width) { *width = 32; } static int pfm_sparc_get_event_desc(unsigned int event, char **desc) { *desc = strdup(get_event_desc(event)); return 0; } static int pfm_sparc_get_event_mask_desc(unsigned int event, unsigned int mask, char **desc) { if (sparc_support.pmu_type != PFMLIB_SPARC_NIAGARA2_PMU) { *desc = strdup(""); } else { pme_sparc_mask_entry_t *e; e = &niagara2_pe[event]; *desc = strdup(e->pme_masks[mask].mask_desc); } return 0; } static int pfm_sparc_get_event_mask_code(unsigned int event, unsigned int mask, unsigned int *code) { if (sparc_support.pmu_type != PFMLIB_SPARC_NIAGARA2_PMU) *code = 0; else *code = mask; return 0; } static int pfm_sparc_get_cycle_event(pfmlib_event_t *e) { switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: case PFMLIB_SPARC_ULTRA3_PMU: case PFMLIB_SPARC_ULTRA3I_PMU: case PFMLIB_SPARC_ULTRA3PLUS_PMU: case PFMLIB_SPARC_ULTRA4PLUS_PMU: e->event = 0; break; case PFMLIB_SPARC_NIAGARA1_PMU: case PFMLIB_SPARC_NIAGARA2_PMU: default: return PFMLIB_ERR_NOTSUPP; } return PFMLIB_SUCCESS; } static int pfm_sparc_get_inst_retired(pfmlib_event_t *e) { unsigned int i; switch (sparc_support.pmu_type) { case PFMLIB_SPARC_ULTRA12_PMU: case PFMLIB_SPARC_ULTRA3_PMU: case PFMLIB_SPARC_ULTRA3I_PMU: case PFMLIB_SPARC_ULTRA3PLUS_PMU: case PFMLIB_SPARC_ULTRA4PLUS_PMU: e->event = 1; break; case PFMLIB_SPARC_NIAGARA1_PMU: e->event = 0; break; case PFMLIB_SPARC_NIAGARA2_PMU: e->event = 1; e->num_masks = EVENT_MASK_BITS; for (i = 0; i < e->num_masks; i++) e->unit_masks[i] = i; break; default: return PFMLIB_ERR_NOTSUPP; } return PFMLIB_SUCCESS; } /** * sparc_support **/ pfm_pmu_support_t sparc_support = { /* the next 3 fields are initialized in pfm_sparc_pmu_detect */ .pmu_name = NULL, .pmu_type = PFMLIB_UNKNOWN_PMU, .pme_count = 0, .pmd_count = 2, .pmc_count = 1, .num_cnt = 2, .get_event_code = pfm_sparc_get_event_code, .get_event_name = pfm_sparc_get_event_name, .get_event_mask_name = pfm_sparc_get_event_mask_name, .get_event_counters = pfm_sparc_get_event_counters, .get_num_event_masks = pfm_sparc_get_num_event_masks, .dispatch_events = pfm_sparc_dispatch_events, .pmu_detect = pfm_sparc_pmu_detect, .get_impl_pmcs = pfm_sparc_get_impl_pmcs, .get_impl_pmds = pfm_sparc_get_impl_pmds, .get_impl_counters = pfm_sparc_get_impl_counters, .get_hw_counter_width = pfm_sparc_get_hw_counter_width, .get_event_desc = pfm_sparc_get_event_desc, .get_event_mask_desc = pfm_sparc_get_event_mask_desc, .get_event_mask_code = pfm_sparc_get_event_mask_code, .get_cycle_event = pfm_sparc_get_cycle_event, .get_inst_retired_event = pfm_sparc_get_inst_retired }; papi-5.6.0/src/components/infiniband/Rules.infiniband000664 001750 001750 00000000441 13216244357 024731 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/infiniband/linux-infiniband.c COMPOBJS += linux-infiniband.o linux-infiniband.o: components/infiniband/linux-infiniband.c components/infiniband/pscanf.h $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/infiniband/linux-infiniband.c -o linux-infiniband.o papi-5.6.0/src/libpfm-3.y/lib/pfmlib_gen_ia64.c000664 001750 001750 00000032506 13216244363 023051 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_gen_ia64.c : support default architected IA-64 PMU features * * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include "pfmlib_priv.h" /* library private */ #include "pfmlib_priv_ia64.h" /* architecture private */ #define PMU_GEN_IA64_MAX_COUNTERS 4 /* * number of architected events */ #define PME_GEN_COUNT 2 /* * Description of the PMC register mappings use by * this module (as reported in pfmlib_reg_t.reg_num): * * 0 -> PMC0 * 1 -> PMC1 * n -> PMCn */ #define PFMLIB_GEN_IA64_PMC_BASE 0 /* * generic event as described by architecture */ typedef struct { unsigned long pme_code:8; /* major event code */ unsigned long pme_ig:56; /* ignored */ } pme_gen_ia64_code_t; /* * union of all possible entry codes. All encodings must fit in 64bit */ typedef union { unsigned long pme_vcode; pme_gen_ia64_code_t pme_gen_code; } pme_gen_ia64_entry_code_t; /* * entry in the event table (one table per implementation) */ typedef struct pme_entry { char *pme_name; pme_gen_ia64_entry_code_t pme_entry_code; /* event code */ pfmlib_regmask_t pme_counters; /* counter bitmask */ } pme_gen_ia64_entry_t; /* let's define some handy shortcuts ! */ #define pmc_plm pmc_gen_count_reg.pmc_plm #define pmc_ev pmc_gen_count_reg.pmc_ev #define pmc_oi pmc_gen_count_reg.pmc_oi #define pmc_pm pmc_gen_count_reg.pmc_pm #define pmc_es pmc_gen_count_reg.pmc_es /* * this table is patched by initialization code */ static pme_gen_ia64_entry_t generic_pe[PME_GEN_COUNT]={ #define PME_IA64_GEN_CPU_CYCLES 0 { "CPU_CYCLES", }, #define PME_IA64_GEN_INST_RETIRED 1 { "IA64_INST_RETIRED", }, }; static int pfm_gen_ia64_counter_width; static int pfm_gen_ia64_counters; static pfmlib_regmask_t pfm_gen_ia64_impl_pmcs; static pfmlib_regmask_t pfm_gen_ia64_impl_pmds; /* * Description of the PMC register mappings use by * this module (as reported in pfmlib_reg_t.reg_num): * * 0 -> PMC0 * 1 -> PMC1 * n -> PMCn * We do not use a mapping table, instead we make up the * values on the fly given the base. */ #define PFMLIB_GEN_IA64_PMC_BASE 0 /* * convert text range (e.g. 4-15 18 12-26) into actual bitmask * range argument is modified */ static int parse_counter_range(char *range, pfmlib_regmask_t *b) { char *p, c; int start, end; if (range[strlen(range)-1] == '\n') range[strlen(range)-1] = '\0'; while(range) { p = range; while (*p && *p != ' ' && *p != '-') p++; if (*p == '\0') break; c = *p; *p = '\0'; start = atoi(range); range = p+1; if (c == '-') { p++; while (*p && *p != ' ' && *p != '-') p++; if (*p) *p++ = '\0'; end = atoi(range); range = p; } else { end = start; } if (end >= PFMLIB_REG_MAX|| start >= PFMLIB_REG_MAX) goto invalid; for (; start <= end; start++) pfm_regmask_set(b, start); } return 0; invalid: fprintf(stderr, "%s.%s : bitmask too small need %d bits\n", __FILE__, __FUNCTION__, start); return -1; } static int pfm_gen_ia64_initialize(void) { FILE *fp; char *p; char buffer[64]; int matches = 0; fp = fopen("/proc/pal/cpu0/perfmon_info", "r"); if (fp == NULL) return PFMLIB_ERR_NOTSUPP; for (;;) { p = fgets(buffer, sizeof(buffer)-1, fp); if (p == NULL) break; if ((p = strchr(buffer, ':')) == NULL) break; *p = '\0'; if (!strncmp("Counter width", buffer, 13)) { pfm_gen_ia64_counter_width = atoi(p+2); matches++; continue; } if (!strncmp("PMC/PMD pairs", buffer, 13)) { pfm_gen_ia64_counters = atoi(p+2); matches++; continue; } if (!strncmp("Cycle event number", buffer, 18)) { generic_pe[0].pme_entry_code.pme_vcode = atoi(p+2); matches++; continue; } if (!strncmp("Retired event number", buffer, 20)) { generic_pe[1].pme_entry_code.pme_vcode = atoi(p+2); matches++; continue; } if (!strncmp("Cycles count capable", buffer, 20)) { if (parse_counter_range(p+2, &generic_pe[0].pme_counters) == -1) return -1; matches++; continue; } if (!strncmp("Retired bundles count capable", buffer, 29)) { if (parse_counter_range(p+2, &generic_pe[1].pme_counters) == -1) return -1; matches++; continue; } if (!strncmp("Implemented PMC", buffer, 15)) { if (parse_counter_range(p+2, &pfm_gen_ia64_impl_pmcs) == -1) return -1; matches++; continue; } if (!strncmp("Implemented PMD", buffer, 15)) { if (parse_counter_range(p+2, &pfm_gen_ia64_impl_pmds) == -1) return -1; matches++; continue; } } pfm_regmask_weight(&pfm_gen_ia64_impl_pmcs, &generic_ia64_support.pmc_count); pfm_regmask_weight(&pfm_gen_ia64_impl_pmds, &generic_ia64_support.pmd_count); fclose(fp); return matches == 8 ? PFMLIB_SUCCESS : PFMLIB_ERR_NOTSUPP; } static void pfm_gen_ia64_forced_initialize(void) { unsigned int i; pfm_gen_ia64_counter_width = 47; pfm_gen_ia64_counters = 4; generic_pe[0].pme_entry_code.pme_vcode = 18; generic_pe[1].pme_entry_code.pme_vcode = 8; memset(&pfm_gen_ia64_impl_pmcs, 0, sizeof(pfmlib_regmask_t)); memset(&pfm_gen_ia64_impl_pmds, 0, sizeof(pfmlib_regmask_t)); for(i=0; i < 8; i++) pfm_regmask_set(&pfm_gen_ia64_impl_pmcs, i); for(i=4; i < 8; i++) pfm_regmask_set(&pfm_gen_ia64_impl_pmds, i); memset(&generic_pe[0].pme_counters, 0, sizeof(pfmlib_regmask_t)); memset(&generic_pe[1].pme_counters, 0, sizeof(pfmlib_regmask_t)); for(i=4; i < 8; i++) { pfm_regmask_set(&generic_pe[0].pme_counters, i); pfm_regmask_set(&generic_pe[1].pme_counters, i); } generic_ia64_support.pmc_count = 8; generic_ia64_support.pmd_count = 4; generic_ia64_support.num_cnt = 4; } static int pfm_gen_ia64_detect(void) { /* PMU is architected, so guaranteed to be present */ return PFMLIB_SUCCESS; } static int pfm_gen_ia64_init(void) { if (forced_pmu != PFMLIB_NO_PMU) { pfm_gen_ia64_forced_initialize(); } else if (pfm_gen_ia64_initialize() == -1) return PFMLIB_ERR_NOTSUPP; return PFMLIB_SUCCESS; } static int valid_assign(unsigned int *as, pfmlib_regmask_t *r_pmcs, unsigned int cnt) { unsigned int i; for(i=0; i < cnt; i++) { if (as[i]==0) return 0; /* * take care of restricted PMC registers */ if (pfm_regmask_isset(r_pmcs, as[i])) return 0; } return 1; } /* * Automatically dispatch events to corresponding counters following constraints. * Upon return the pfarg_reg_t structure is ready to be submitted to kernel */ static int pfm_gen_ia64_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_output_param_t *outp) { #define has_counter(e,b) (pfm_regmask_isset(&generic_pe[e].pme_counters, b) ? b : 0) unsigned int max_l0, max_l1, max_l2, max_l3; unsigned int assign[PMU_GEN_IA64_MAX_COUNTERS]; pfm_gen_ia64_pmc_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; unsigned int i,j,k,l; unsigned int cnt; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; if (cnt > PMU_GEN_IA64_MAX_COUNTERS) return PFMLIB_ERR_TOOMANY; max_l0 = PMU_GEN_IA64_FIRST_COUNTER + PMU_GEN_IA64_MAX_COUNTERS; max_l1 = PMU_GEN_IA64_FIRST_COUNTER + PMU_GEN_IA64_MAX_COUNTERS*(cnt>1); max_l2 = PMU_GEN_IA64_FIRST_COUNTER + PMU_GEN_IA64_MAX_COUNTERS*(cnt>2); max_l3 = PMU_GEN_IA64_FIRST_COUNTER + PMU_GEN_IA64_MAX_COUNTERS*(cnt>3); if (PFMLIB_DEBUG()) { DPRINT("max_l0=%u max_l1=%u max_l2=%u max_l3=%u\n", max_l0, max_l1, max_l2, max_l3); } /* * This code needs fixing. It is not very pretty and * won't handle more than 4 counters if more become * available ! * For now, worst case in the loop nest: 4! (factorial) */ for (i=PMU_GEN_IA64_FIRST_COUNTER; i < max_l0; i++) { assign[0]= has_counter(e[0].event,i); if (max_l1 == PMU_GEN_IA64_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt)) goto done; for (j=PMU_GEN_IA64_FIRST_COUNTER; j < max_l1; j++) { if (j == i) continue; assign[1] = has_counter(e[1].event,j); if (max_l2 == PMU_GEN_IA64_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt)) goto done; for (k=PMU_GEN_IA64_FIRST_COUNTER; k < max_l2; k++) { if(k == i || k == j) continue; assign[2] = has_counter(e[2].event,k); if (max_l3 == PMU_GEN_IA64_FIRST_COUNTER && valid_assign(assign, r_pmcs, cnt)) goto done; for (l=PMU_GEN_IA64_FIRST_COUNTER; l < max_l3; l++) { if(l == i || l == j || l == k) continue; assign[3] = has_counter(e[3].event,l); if (valid_assign(assign, r_pmcs, cnt)) goto done; } } } } /* we cannot satisfy the constraints */ return PFMLIB_ERR_NOASSIGN; done: memset(pc, 0, cnt*sizeof(pfmlib_reg_t)); memset(pd, 0, cnt*sizeof(pfmlib_reg_t)); for (j=0; j < cnt ; j++ ) { reg.pmc_val = 0; /* clear all */ /* if not specified per event, then use default (could be zero: measure nothing) */ reg.pmc_plm = e[j].plm ? e[j].plm: inp->pfp_dfl_plm; reg.pmc_oi = 1; /* overflow interrupt */ reg.pmc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE? 1 : 0; reg.pmc_es = generic_pe[e[j].event].pme_entry_code.pme_gen_code.pme_code; pc[j].reg_num = assign[j]; pc[j].reg_value = reg.pmc_val; pc[j].reg_addr = PFMLIB_GEN_IA64_PMC_BASE+j; pd[j].reg_num = assign[j]; pd[j].reg_addr = assign[j]; __pfm_vbprintf("[PMC%u(pmc%u)=0x%lx,es=0x%02x,plm=%d pm=%d] %s\n", assign[j], assign[j], reg.pmc_val, reg.pmc_es,reg.pmc_plm, reg.pmc_pm, generic_pe[e[j].event].pme_name); __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of PMC programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_gen_ia64_dispatch_events(pfmlib_input_param_t *inp, void *dummy1, pfmlib_output_param_t *outp, void *dummy2) { return pfm_gen_ia64_dispatch_counters(inp, outp); } static int pfm_gen_ia64_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt < 4 || cnt > 7)) return PFMLIB_ERR_INVAL; *code = (int)generic_pe[i].pme_entry_code.pme_gen_code.pme_code; return PFMLIB_SUCCESS; } static char * pfm_gen_ia64_get_event_name(unsigned int i) { return generic_pe[i].pme_name; } static void pfm_gen_ia64_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; memset(counters, 0, sizeof(*counters)); for(i=0; i < pfm_gen_ia64_counters; i++) { if (pfm_regmask_isset(&generic_pe[j].pme_counters, i)) pfm_regmask_set(counters, i); } } static void pfm_gen_ia64_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { *impl_pmcs = pfm_gen_ia64_impl_pmcs; } static void pfm_gen_ia64_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { *impl_pmds = pfm_gen_ia64_impl_pmds; } static void pfm_gen_ia64_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i = 0; /* pmd4-pmd7 */ for(i=4; i < 8; i++) pfm_regmask_set(impl_counters, i); } static void pfm_gen_ia64_get_hw_counter_width(unsigned int *width) { *width = pfm_gen_ia64_counter_width; } static int pfm_gen_ia64_get_event_desc(unsigned int ev, char **str) { switch(ev) { case PME_IA64_GEN_CPU_CYCLES: *str = strdup("CPU cycles"); break; case PME_IA64_GEN_INST_RETIRED: *str = strdup("IA-64 instructions retired"); break; default: *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_gen_ia64_get_cycle_event(pfmlib_event_t *e) { e->event = PME_IA64_GEN_CPU_CYCLES; return PFMLIB_SUCCESS; } static int pfm_gen_ia64_get_inst_retired(pfmlib_event_t *e) { e->event = PME_IA64_GEN_INST_RETIRED; return PFMLIB_SUCCESS; } pfm_pmu_support_t generic_ia64_support={ .pmu_name ="IA-64", .pmu_type = PFMLIB_GEN_IA64_PMU, .pme_count = PME_GEN_COUNT, .pmc_count = 4+4, .pmd_count = PMU_GEN_IA64_MAX_COUNTERS, .num_cnt = PMU_GEN_IA64_MAX_COUNTERS, .get_event_code = pfm_gen_ia64_get_event_code, .get_event_name = pfm_gen_ia64_get_event_name, .get_event_counters = pfm_gen_ia64_get_event_counters, .dispatch_events = pfm_gen_ia64_dispatch_events, .pmu_detect = pfm_gen_ia64_detect, .pmu_init = pfm_gen_ia64_init, .get_impl_pmcs = pfm_gen_ia64_get_impl_pmcs, .get_impl_pmds = pfm_gen_ia64_get_impl_pmds, .get_impl_counters = pfm_gen_ia64_get_impl_counters, .get_hw_counter_width = pfm_gen_ia64_get_hw_counter_width, .get_event_desc = pfm_gen_ia64_get_event_desc, .get_cycle_event = pfm_gen_ia64_get_cycle_event, .get_inst_retired_event = pfm_gen_ia64_get_inst_retired }; papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_snbep_unc_ubo.3000664 001750 001750 00000005336 13216244364 026110 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_snbep_unc_ubo - support for Intel Sandy Bridge-EP U-Box uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: snbep_unc_ubo .B PMU desc: Intel Sandy Bridge-EP U-Box uncore PMU .sp .SH DESCRIPTION The library supports the Intel Sandy Bridge system configuration unit (U-Box) uncore PMU. This PMU model only exists on Sandy Bridge model 45. There is only one U-Box PMU per processor socket. .SH MODIFIERS The following modifiers are supported on Intel Sandy Bridge U-Box uncore PMU: .TP .B i Invert the meaning of the event. The counter will now count HA cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of HA cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:15]. .TP .B oi Invert the meaning of the occupancy event POWER_STATE_OCCUPANCY. The counter will now count PCU cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B oe Enable edge detection for the occupancy event POWER_STATE_OCCUPANCY. The event now counts only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B ff Enable frequency band filtering. This modifier applies only to the UNC_P_FREQ_BANDx_CYCLES events, where x is [0-3]. The modifiers expects an integer in the range [0-255]. The value is interpreted as a frequency value to be multiplied by 100Mhz. Thus if the value is 32, then all cycles where the processor is running at 3.2GHz and more are counted. .SH Frequency band filtering There are 3 events which support frequency band filtering, namely, UNC_P_FREQ_BAND0_CYCLES, UNC_P_FREQ_BAND1_CYCLES, UNC_P_FREQ_BAND2_CYCLES, UNC_P_FREQ_BAND3_CYCLES. The frequency filter (available via the ff modifier) is stored into a PMU shared register which hold all 4 possible frequency bands, one per event. However, the library generate the encoding for each event individually because it processes events one at a time. The caller or the underlying kernel interface may have to merge the band filter settings to program the filter register properly. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/perfctr-2.6.x/linux/include/asm-arm/perfctr.h000664 001750 001750 00000011721 13216244367 025505 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: perfctr.h,v 1.1.2.2 2009/06/11 12:33:51 mikpe Exp $ * ARM/XScale Performance-Monitoring Counters driver * * Copyright (C) 2005-2007, 2009 Mikael Pettersson */ #ifndef _ASM_ARM_PERFCTR_H #define _ASM_ARM_PERFCTR_H /* perfctr_info.cpu_type values */ #define PERFCTR_ARM_XSC1 1 #define PERFCTR_ARM_XSC2 2 struct perfctr_sum_ctrs { unsigned long long tsc; unsigned long long pmc[4]; }; struct perfctr_cpu_control { unsigned int tsc_on; unsigned int nractrs; /* # of a-mode counters */ unsigned int nrictrs; /* # of i-mode counters */ unsigned int pmc_map[4]; unsigned int evntsel[4]; /* one per counter, even on P5 */ int ireset[4]; /* < 0, for i-mode counters */ unsigned int _reserved1; unsigned int _reserved2; unsigned int _reserved3; unsigned int _reserved4; }; struct perfctr_cpu_state { unsigned int cstatus; struct { /* k1 is opaque in the user ABI */ unsigned int id; int isuspend_cpu; } k1; /* The two tsc fields must be inlined. Placing them in a sub-struct causes unwanted internal padding on x86-64. */ unsigned int tsc_start; unsigned long long tsc_sum; struct { unsigned int map; unsigned int start; unsigned long long sum; } pmc[4]; /* the size is not part of the user ABI */ #ifdef __KERNEL__ union { struct { unsigned int pmnc; } xsc1; struct { unsigned int evtsel; unsigned int inten; } xsc2; } arm; struct perfctr_cpu_control control; #endif }; /* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs which should have less overhead in most cases */ static inline unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, unsigned int nrictrs) { return (tsc_on<<31) | (nrictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; } static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) { return cstatus; } static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) { return (int)cstatus < 0; /* test and jump on sign */ } static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) { return cstatus & 0x7F; /* and with imm8 */ } static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) { return (cstatus >> 8) & 0x7F; } static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) { return cstatus & (0x7F << 16); } /* * 'struct siginfo' support for perfctr overflow signals. * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask * describing which perfctrs overflowed is put in si_pmc_ovf_mask. * A bitmask is used since more than one perfctr can have overflowed * by the time the interrupt handler runs. * * glibc's doesn't seem to define __SI_FAULT or __SI_CODE(), * and including as well may cause redefinition errors, * so the user and kernel values are different #defines here. */ #ifdef __KERNEL__ #define SI_PMC_OVF (__SI_FAULT|'P') #else #define SI_PMC_OVF ('P') #endif #define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ /* version number for user-visible CPU-specific data */ #define PERFCTR_CPU_VERSION 0 /* XXX: not yet cast in stone */ #ifdef __KERNEL__ #if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) /* Driver init/exit. */ extern int perfctr_cpu_init(void); extern void perfctr_cpu_exit(void); /* CPU type name. */ extern char *perfctr_cpu_name; /* Hardware reservation. */ extern const char *perfctr_cpu_reserve(const char *service); extern void perfctr_cpu_release(const char *service); /* PRE: state has no running interrupt-mode counters. Check that the new control data is valid. Update the driver's private control data. Returns a negative error code if the control data is invalid. */ extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, cpumask_t *cpumask); /* Read a-mode counters. Subtract from start and accumulate into sums. Must be called with preemption disabled. */ extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); /* Write control registers. Read a-mode counters into start. Must be called with preemption disabled. */ extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); /* Perform an efficient combined suspend/resume operation. Must be called with preemption disabled. */ extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); /* The type of a perfctr overflow interrupt handler. It will be called in IRQ context, with preemption disabled. */ typedef void (*perfctr_ihandler_t)(unsigned long pc); /* Operations related to overflow interrupt handling. */ #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); #else static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } #endif static inline int perfctr_cpu_has_pending_interrupt(const struct perfctr_cpu_state *state) { return 0; } #endif /* CONFIG_PERFCTR */ #endif /* __KERNEL__ */ #endif /* _ASM_ARM_PERFCTR_H */ papi-5.6.0/src/components/host_micpower/tests/host_micpower_basic.c000664 001750 001750 00000005632 13216244357 027735 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @author Vince Weaver * * test case for micpower component * Based on coretemp test code by Vince Weaver * * * @brief * Tests basic component functionality */ #include #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 1 int main (int argc, char **argv) { int retval,cid,numcmp; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; int code; char event_name[PAPI_MAX_STR_LEN]; int total_events=0; int r; const PAPI_component_info_t *cmpinfo = NULL; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } numcmp = PAPI_num_components(); for(cid=0; cidname); } if ( 0 != strncmp(cmpinfo->name,"host_micpower",13)) { continue; } code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); while ( r == PAPI_OK ) { retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",code); test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } if (!TESTS_QUIET) printf("%#x %s ",code,event_name); EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset()",retval); } retval = PAPI_add_event( EventSet, code ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_add_event()",retval); } retval = PAPI_start( EventSet); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } retval = PAPI_stop( EventSet, values); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_stop()",retval); } if (!TESTS_QUIET) printf(" value: %lld\n",values[0]); retval = PAPI_cleanup_eventset( EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()",retval); } retval = PAPI_destroy_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()",retval); } total_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); } } if (total_events==0) { test_skip(__FILE__,__LINE__,"No events from host_micpower found",0); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/ftests/fdmemtest.F000664 001750 001750 00000002506 13216244361 020734 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program dmemtest IMPLICIT integer (p) INTEGER retval INTEGER*8 dmeminfo(PAPIF_DMEM_MAXVAL) integer tests_quiet, get_quiet external get_quiet real EventSet tests_quiet = get_quiet() EventSet = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if CALL PAPIf_get_dmem_info(dmeminfo, retval) if ( retval.NE.PAPI_OK) then stop end if if (tests_quiet .EQ. 0) then print *, "Mem Size: ", dmeminfo(PAPIF_DMEM_VMSIZE) print *, "Mem Resident: ", dmeminfo(PAPIF_DMEM_RESIDENT) print *, "Mem High Water: ", dmeminfo(PAPIF_DMEM_HIGH_WATER) print *, "Mem Shared: ", dmeminfo(PAPIF_DMEM_SHARED) print *, "Mem Text: ", dmeminfo(PAPIF_DMEM_TEXT) print *, "Mem Library: ", dmeminfo(PAPIF_DMEM_LIBRARY) print *, "Mem Heap: ", dmeminfo(PAPIF_DMEM_HEAP) print *, "Mem Locked: ", dmeminfo(PAPIF_DMEM_LOCKED) print *, "Mem Stack: ", dmeminfo(PAPIF_DMEM_STACK) print *, "Mem Pagesize: ", dmeminfo(PAPIF_DMEM_PAGESIZE) end if call ftests_pass(__FILE__) end papi-5.6.0/src/ctests/overflow_twoevents.c000664 001750 001750 00000020572 13216244360 022761 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow_twoevents.c * Author: min@cs.utk.edu * Min Zhou * Mods: Philip Mucci * mucci@cs.utk.edu */ /* This file performs the following test: overflow dispatch on 2 counters. */ #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d) Overflow at %p! vector=%#llx\n" #define OUT_FMT "%-12s : %18lld%18lld%18lld\n" #define VEC_FMT " at vector %#llx, event %-12s : %6d\n" typedef struct { long long mask; int count; } ocount_t; /* there are two experiments: batch and interleaf; for each experiment there are three possible vectors, one counter overflows, the other counter overflows, both overflow */ static ocount_t overflow_counts[2][3] = { {{0, 0}, {0, 0}, {0, 0}}, {{0, 0}, {0, 0}, {0, 0}} }; static int total_unknown = 0; static void handler( int mode, void *address, long long overflow_vector, void *context ) { ( void ) context; /*unused */ int i; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, mode, address, overflow_vector ); } /* Look for the overflow_vector entry */ for ( i = 0; i < 3; i++ ) { if ( overflow_counts[mode][i].mask == overflow_vector ) { overflow_counts[mode][i].count++; return; } } /* Didn't find it so add it. */ for ( i = 0; i < 3; i++ ) { if ( overflow_counts[mode][i].mask == ( long long ) 0 ) { overflow_counts[mode][i].mask = overflow_vector; overflow_counts[mode][i].count = 1; return; } } /* Unknown entry!?! */ total_unknown++; } static void handler_batch( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) EventSet; /*unused */ handler( 0, address, overflow_vector, context ); } static void handler_interleaf( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) EventSet; /*unused */ handler( 1, address, overflow_vector, context ); } int main( int argc, char **argv ) { int EventSet = PAPI_NULL; long long ( values[3] )[2]; int retval; int PAPI_event, k, idx[4]; char event_name[3][PAPI_MAX_STR_LEN]; int num_events1; int threshold = THRESHOLD; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* decide which of PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS to add, depending on the availability and derived status of the event on this platform */ if ( ( PAPI_event = find_nonderived_event( ) ) == 0 ) { if (!quiet) printf("No events found!\n"); test_skip( __FILE__, __LINE__, "no PAPI_event", 0 ); } if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); /* Set both overflows after adding both events (batch) */ retval = PAPI_overflow( EventSet, PAPI_event, threshold, 0, handler_batch ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); } retval = PAPI_overflow( EventSet, PAPI_TOT_CYC, threshold, 0, handler_batch ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); } if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); num_events1 = 1; retval = PAPI_get_overflow_event_index( EventSet, 1, &idx[0], &num_events1 ); if ( retval != PAPI_OK ) { printf( "PAPI_get_overflow_event_index error: %s\n", PAPI_strerror( retval ) ); } num_events1 = 1; retval = PAPI_get_overflow_event_index( EventSet, 2, &idx[1], &num_events1 ); if ( retval != PAPI_OK ) { printf( "PAPI_get_overflow_event_index error: %s\n", PAPI_strerror( retval ) ); } if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); /* Add each event and set its overflow (interleaved) */ if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); if ( ( retval = PAPI_overflow( EventSet, PAPI_event, threshold, 0, handler_interleaf ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); if ( ( retval = PAPI_overflow( EventSet, PAPI_TOT_CYC, threshold, 0, handler_interleaf ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); if ( ( retval = PAPI_stop( EventSet, values[2] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); num_events1 = 1; retval = PAPI_get_overflow_event_index( EventSet, 1, &idx[2], &num_events1 ); if ( retval != PAPI_OK ) { printf( "PAPI_get_overflow_event_index error: %s\n", PAPI_strerror( retval ) ); } num_events1 = 1; retval = PAPI_get_overflow_event_index( EventSet, 2, &idx[3], &num_events1 ); if ( retval != PAPI_OK ) { printf( "PAPI_get_overflow_event_index error: %s\n", PAPI_strerror( retval ) ); } if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_event_code_to_name( PAPI_event, event_name[0] ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } retval = PAPI_event_code_to_name( PAPI_TOT_CYC, event_name[1] ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } strcpy( event_name[2], "Unknown" ); if (!TESTS_QUIET) { printf( "Test case: Overflow dispatch of both events in set with 2 events.\n" ); printf( "---------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", threshold ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-----------------------------------------------\n" ); printf( "Test type : %18s%18s%18s\n", "1 (no overflow)", "2 (batch)", "3 (interleaf)" ); printf( OUT_FMT, event_name[0], ( values[0] )[0], ( values[1] )[0], ( values[2] )[0] ); printf( OUT_FMT, event_name[1], ( values[0] )[1], ( values[1] )[1], ( values[2] )[1] ); printf( "\n" ); printf( "Predicted overflows at event %-12s : %6d\n", event_name[0], ( int ) ( ( values[0] )[0] / threshold ) ); printf( "Predicted overflows at event %-12s : %6d\n", event_name[1], ( int ) ( ( values[0] )[1] / threshold ) ); printf( "\nBatch overflows (add, add, over, over):\n" ); for ( k = 0; k < 2; k++ ) { if ( overflow_counts[0][k].mask ) { printf( VEC_FMT, ( long long ) overflow_counts[0][k].mask, event_name[idx[k]], overflow_counts[0][k].count ); } } printf( "\nInterleaved overflows (add, over, add, over):\n" ); for ( k = 0; k < 2; k++ ) { if ( overflow_counts[1][k].mask ) printf( VEC_FMT, ( long long ) overflow_counts[1][k].mask, event_name[idx[k + 2]], overflow_counts[1][k].count ); } printf( "\nCases 2+3 Unknown overflows: %d\n", total_unknown ); printf( "-----------------------------------------------\n" ); } if ( overflow_counts[0][0].count == 0 || overflow_counts[0][1].count == 0 ) test_fail( __FILE__, __LINE__, "a batch counter had no overflows", 1 ); if ( overflow_counts[1][0].count == 0 || overflow_counts[1][1].count == 0 ) test_fail( __FILE__, __LINE__, "an interleaved counter had no overflows", 1 ); if ( total_unknown > 0 ) test_fail( __FILE__, __LINE__, "Unknown counter had overflows", 1 ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_comp_powerpc.h000664 001750 001750 00000003344 13216244362 026477 0ustar00jshenry1963jshenry1963000000 000000 /* * PowerPC compiler specific macros * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_COMP_POWERPC_H__ #define __PFMLIB_COMP_POWERPC_H__ #ifndef __PFMLIB_COMP_H__ #error "you should never include this file directly, use pfmlib_comp.h" #endif #ifndef __powerpc__ #error "you should not be including this file" #endif #ifdef __cplusplus extern "C" { #endif static inline unsigned long pfmlib_popcnt(unsigned long v) { unsigned long sum = 0; for(; v ; v >>=1) { if (v & 0x1) sum++; } return sum; } #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_COMP_POWERPC_H__ */ papi-5.6.0/src/ctests/get_event_component.c000664 001750 001750 00000003650 13216244360 023040 0ustar00jshenry1963jshenry1963000000 000000 /* * File: get_event_component.c * Author: Vince Weaver * vweaver1@eecs.utk.edu */ /* This test makes sure PAPI_get_event_component() works */ #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int i; int retval; PAPI_event_info_t info; int numcmp, cid, our_cid; const PAPI_component_info_t* cmpinfo; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* Init PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } numcmp = PAPI_num_components( ); /* Loop through all components */ for( cid = 0; cid < numcmp; cid++ ) { cmpinfo = PAPI_get_component_info( cid ); if (cmpinfo == NULL) { test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); } if (cmpinfo->disabled && !TESTS_QUIET) { printf( "Name: %-23s %s\n", cmpinfo->name ,cmpinfo->description); printf(" \\-> Disabled: %s\n",cmpinfo->disabled_reason); continue; } i = 0 | PAPI_NATIVE_MASK; retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cid ); if (retval!=PAPI_OK) continue; do { if (PAPI_get_event_info( i, &info ) != PAPI_OK) { if (!TESTS_QUIET) { printf("Getting information about event: %#x failed\n", i); } continue; } our_cid=PAPI_get_event_component(i); if (our_cid!=cid) { if (!TESTS_QUIET) { printf("%d %d %s\n",cid,our_cid,info.symbol); } test_fail( __FILE__, __LINE__, "component mismatch", 1 ); } if (!TESTS_QUIET) { printf("%d %d %s\n",cid,our_cid,info.symbol); } } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cid ) == PAPI_OK ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/linux/drivers/perfctr/global.h000775 001750 001750 00000000704 13216244366 025444 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: global.h,v 1.7.2.1 2005/01/22 14:04:03 mikpe Exp $ * Global-mode performance-monitoring counters. * * Copyright (C) 2000-2005 Mikael Pettersson */ #ifdef CONFIG_PERFCTR_GLOBAL extern int gperfctr_ioctl(struct file*, unsigned int, unsigned long); extern void gperfctr_init(void); #else extern int gperfctr_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { return -EINVAL; } static inline void gperfctr_init(void) { } #endif papi-5.6.0/src/perfctr-2.7.x/examples/self/x86.c000664 001750 001750 00000004550 13216244370 023105 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.c,v 1.5 2005/03/14 01:48:42 mikpe Exp $ * x86-specific code. * * Copyright (C) 1999-2004 Mikael Pettersson */ #include #include #include #include "libperfctr.h" #include "arch.h" void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { unsigned int tsc_on = 1; unsigned int nractrs = 1; unsigned int pmc_map0 = 0; unsigned int evntsel0 = 0; memset(cpu_control, 0, sizeof *cpu_control); /* Attempt to set up control to count clocks via the TSC and retired instructions via PMC0. */ switch( info->cpu_type ) { case PERFCTR_X86_GENERIC: nractrs = 0; /* no PMCs available */ break; #if !defined(__x86_64__) case PERFCTR_X86_INTEL_P5: case PERFCTR_X86_INTEL_P5MMX: case PERFCTR_X86_CYRIX_MII: /* event 0x16 (INSTRUCTIONS_EXECUTED), count at CPL 3 */ evntsel0 = 0x16 | (2 << 6); break; case PERFCTR_X86_INTEL_P6: case PERFCTR_X86_INTEL_PII: case PERFCTR_X86_INTEL_PIII: case PERFCTR_X86_INTEL_PENTM: case PERFCTR_X86_AMD_K7: #endif case PERFCTR_X86_AMD_K8: case PERFCTR_X86_AMD_K8C: /* event 0xC0 (INST_RETIRED), count at CPL > 0, Enable */ evntsel0 = 0xC0 | (1 << 16) | (1 << 22); break; #if !defined(__x86_64__) case PERFCTR_X86_WINCHIP_C6: tsc_on = 0; /* no working TSC available */ evntsel0 = 0x02; /* X86_INSTRUCTIONS */ break; case PERFCTR_X86_WINCHIP_2: tsc_on = 0; /* no working TSC available */ evntsel0 = 0x16; /* INSTRUCTIONS_EXECUTED */ break; case PERFCTR_X86_VIA_C3: pmc_map0 = 1; /* redirect PMC0 to PERFCTR1 */ evntsel0 = 0xC0; /* INSTRUCTIONS_EXECUTED */ break; case PERFCTR_X86_INTEL_P4: case PERFCTR_X86_INTEL_P4M2: #endif case PERFCTR_X86_INTEL_P4M3: /* PMC0: IQ_COUNTER0 with fast RDPMC */ pmc_map0 = 0x0C | (1 << 31); /* IQ_CCCR0: required flags, ESCR 4 (CRU_ESCR0), Enable */ evntsel0 = (0x3 << 16) | (4 << 13) | (1 << 12); /* CRU_ESCR0: event 2 (instr_retired), NBOGUSNTAG, CPL>0 */ cpu_control->p4.escr[0] = (2 << 25) | (1 << 9) | (1 << 2); break; default: fprintf(stderr, "cpu type %u (%s) not supported\n", info->cpu_type, perfctr_info_cpu_name(info)); exit(1); } cpu_control->tsc_on = tsc_on; cpu_control->nractrs = nractrs; cpu_control->pmc_map[0] = pmc_map0; cpu_control->evntsel[0] = evntsel0; } papi-5.6.0/src/perfctr-2.7.x/etc/costs/Duron-700000664 001750 001750 00000001374 13216244367 023000 0ustar00jshenry1963jshenry1963000000 000000 [data from a 700MHz Duron] PERFCTR INIT: vendor 2, family 6, model 3, stepping 1, clock 707080 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 109 cycles PERFCTR INIT: rdtsc cost is 16.9 cycles (1193 total) PERFCTR INIT: rdpmc cost is 13.2 cycles (954 total) PERFCTR INIT: rdmsr (counter) cost is 51.5 cycles (3405 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.2 cycles (3454 total) PERFCTR INIT: wrmsr (counter) cost is 83.1 cycles (5430 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.2 cycles (14908 total) PERFCTR INIT: read cr4 cost is 1.8 cycles (229 total) PERFCTR INIT: write cr4 cost is 62.6 cycles (4121 total) PERFCTR INIT: write LVTPC cost is 5.0 cycles (431 total) perfctr: driver 2.7.3, cpu type AMD K7/K8 at 707080 kHz papi-5.6.0/src/perfctr-2.6.x/linux/drivers/perfctr/global.c000775 001750 001750 00000014474 13216244366 025450 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: global.c,v 1.38.2.7 2009/06/11 08:11:31 mikpe Exp $ * Global-mode performance-monitoring counters via /dev/perfctr. * * Copyright (C) 2000-2006, 2008, 2009 Mikael Pettersson * * XXX: Doesn't do any authentication yet. Should we limit control * to root, or base it on having write access to /dev/perfctr? */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #define __NO_VERSION__ #include #include #include #include #include #include #include "compat.h" #include "global.h" #include "marshal.h" static const char this_service[] = __FILE__; static int hardware_is_ours = 0; static struct timer_list sampling_timer; static DEFINE_MUTEX(control_mutex); static unsigned int nr_active_cpus = 0; struct gperfctr { struct perfctr_cpu_state cpu_state; spinlock_t lock; } ____cacheline_aligned; static struct gperfctr per_cpu_gperfctr[NR_CPUS] __cacheline_aligned; static int reserve_hardware(void) { const char *other; if (hardware_is_ours) return 0; other = perfctr_cpu_reserve(this_service); if (other) { printk(KERN_ERR __FILE__ ":%s: failed because hardware is taken by '%s'\n", __FUNCTION__, other); return -EBUSY; } hardware_is_ours = 1; __module_get(THIS_MODULE); return 0; } static void release_hardware(void) { int i; nr_active_cpus = 0; if (hardware_is_ours) { hardware_is_ours = 0; if (sampling_timer.data) del_timer(&sampling_timer); sampling_timer.data = 0; perfctr_cpu_release(this_service); module_put(THIS_MODULE); for(i = 0; i < NR_CPUS; ++i) per_cpu_gperfctr[i].cpu_state.cstatus = 0; } } static void sample_this_cpu(void *unused) { /* PREEMPT note: when called via smp_call_function(), this is in IRQ context with preemption disabled. */ struct gperfctr *perfctr; perfctr = &per_cpu_gperfctr[smp_processor_id()]; if (!perfctr_cstatus_enabled(perfctr->cpu_state.cstatus)) return; spin_lock(&perfctr->lock); perfctr_cpu_sample(&perfctr->cpu_state); spin_unlock(&perfctr->lock); } static void sample_all_cpus(void) { on_each_cpu(sample_this_cpu, NULL, 1); } static void do_sample_one_cpu(void *info) { unsigned int cpu = (unsigned long)info; if (cpu == smp_processor_id()) sample_this_cpu(NULL); } static void sample_one_cpu(unsigned int cpu) { on_each_cpu(do_sample_one_cpu, (void*)(unsigned long)cpu, 1); } static void sampling_timer_function(unsigned long interval) { sample_all_cpus(); sampling_timer.expires = jiffies + interval; add_timer(&sampling_timer); } static unsigned long usectojiffies(unsigned long usec) { usec += 1000000 / HZ - 1; usec /= 1000000 / HZ; return usec; } static void start_sampling_timer(unsigned long interval_usec) { if (interval_usec > 0) { unsigned long interval = usectojiffies(interval_usec); init_timer(&sampling_timer); sampling_timer.function = sampling_timer_function; sampling_timer.data = interval; sampling_timer.expires = jiffies + interval; add_timer(&sampling_timer); } } static void start_this_cpu(void *unused) { /* PREEMPT note: when called via smp_call_function(), this is in IRQ context with preemption disabled. */ struct gperfctr *perfctr; perfctr = &per_cpu_gperfctr[smp_processor_id()]; if (perfctr_cstatus_enabled(perfctr->cpu_state.cstatus)) perfctr_cpu_resume(&perfctr->cpu_state); } static void start_all_cpus(void) { on_each_cpu(start_this_cpu, NULL, 1); } static int gperfctr_control(struct perfctr_struct_buf *argp) { int ret; struct gperfctr *perfctr; struct gperfctr_cpu_control cpu_control; ret = perfctr_copy_from_user(&cpu_control, argp, &gperfctr_cpu_control_sdesc); if (ret) return ret; if (cpu_control.cpu >= NR_CPUS || !cpu_online(cpu_control.cpu) || perfctr_cpu_is_forbidden(cpu_control.cpu)) return -EINVAL; /* we don't permit i-mode counters */ if (cpu_control.cpu_control.nrictrs != 0) return -EPERM; mutex_lock(&control_mutex); ret = -EBUSY; if (hardware_is_ours) goto out_unlock; /* you have to stop them first */ perfctr = &per_cpu_gperfctr[cpu_control.cpu]; spin_lock(&perfctr->lock); perfctr->cpu_state.tsc_start = 0; perfctr->cpu_state.tsc_sum = 0; memset(&perfctr->cpu_state.pmc, 0, sizeof perfctr->cpu_state.pmc); perfctr->cpu_state.control = cpu_control.cpu_control; ret = perfctr_cpu_update_control(&perfctr->cpu_state, NULL); spin_unlock(&perfctr->lock); if (ret < 0) goto out_unlock; if (perfctr_cstatus_enabled(perfctr->cpu_state.cstatus)) ++nr_active_cpus; ret = nr_active_cpus; out_unlock: mutex_unlock(&control_mutex); return ret; } static int gperfctr_start(unsigned int interval_usec) { int ret; if (interval_usec && interval_usec < 10000) return -EINVAL; mutex_lock(&control_mutex); ret = nr_active_cpus; if (ret > 0) { if (reserve_hardware() < 0) { ret = -EBUSY; } else { start_all_cpus(); start_sampling_timer(interval_usec); } } mutex_unlock(&control_mutex); return ret; } static int gperfctr_stop(void) { mutex_lock(&control_mutex); release_hardware(); mutex_unlock(&control_mutex); return 0; } static int gperfctr_read(struct perfctr_struct_buf *argp) { struct gperfctr *perfctr; struct gperfctr_cpu_state state; int err; err = perfctr_copy_from_user(&state, argp, &gperfctr_cpu_state_only_cpu_sdesc); if (err) return err; if (state.cpu >= NR_CPUS || !cpu_online(state.cpu)) return -EINVAL; if (!sampling_timer.data) sample_one_cpu(state.cpu); perfctr = &per_cpu_gperfctr[state.cpu]; spin_lock(&perfctr->lock); state.cpu_control = perfctr->cpu_state.control; //state.sum = perfctr->cpu_state.sum; { int j; state.sum.tsc = perfctr->cpu_state.tsc_sum; for(j = 0; j < ARRAY_SIZE(state.sum.pmc); ++j) state.sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; } spin_unlock(&perfctr->lock); return perfctr_copy_to_user(argp, &state, &gperfctr_cpu_state_sdesc); } int gperfctr_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case GPERFCTR_CONTROL: return gperfctr_control((struct perfctr_struct_buf*)arg); case GPERFCTR_READ: return gperfctr_read((struct perfctr_struct_buf*)arg); case GPERFCTR_STOP: return gperfctr_stop(); case GPERFCTR_START: return gperfctr_start(arg); } return -EINVAL; } void __init gperfctr_init(void) { int i; for(i = 0; i < NR_CPUS; ++i) per_cpu_gperfctr[i].lock = SPIN_LOCK_UNLOCKED; } papi-5.6.0/man/man3/PAPI_query_event.3000664 001750 001750 00000002753 13216244356 021422 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_query_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_query_event \- .PP Query if PAPI event exists\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_query_event(int EventCode)\fP; .RE .PP \fBPAPI_query_event()\fP asks the PAPI library if the PAPI Preset event can be counted on this architecture\&. If the event CAN be counted, the function returns PAPI_OK\&. If the event CANNOT be counted, the function returns an error code\&. This function also can be used to check the syntax of native and user events\&. .PP \fBParameters:\fP .RS 4 \fIEventCode\fP -- a defined event such as PAPI_TOT_INS\&. .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. .RE .PP \fBExamples\fP .RS 4 .PP .nf * int retval; * // Initialize the library * retval = PAPI_library_init(PAPI_VER_CURRENT); * if (retval != PAPI_VER_CURRENT) { * fprintf(stderr,\"PAPI library init error!\\n\"); * exit(1); * } * if (PAPI_query_event(PAPI_TOT_INS) != PAPI_OK) { * fprintf(stderr,\"No instruction counter? How lame\&.\\n\"); * exit(1); * } * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_remove_event\fP .PP \fBPAPI_remove_events\fP .PP PAPI_presets .PP PAPI_native .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/net/CHANGES000664 001750 001750 00000000766 13216244357 021306 0ustar00jshenry1963jshenry1963000000 000000 Net component changelog: 2011-11-07 Jose Pedro Oliveira * Dynamically detects the network interfaces (i.e. the ones listed in /proc/net/dev) * No longer needs to fork/exec the external ifconfig command and parse its output. It now reads the Linux kernel network statistics directly from /proc/net/dev. * Each network interface now has 16 events instead of 13 (all counters in /proc/net/dev). * Adds support for PAPI_event_name_to_code() * Adds a couple of small tests/examples papi-5.6.0/src/libpfm4/lib/events/000775 001750 001750 00000000000 13216244365 020735 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/infiniband_umad/tests/000775 001750 001750 00000000000 13216244357 023765 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/docs/man3/pfm_strerror.3000664 001750 001750 00000002245 13216244364 023263 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2009" "" "Linux Programmer's Manual" .SH NAME pfm_strerror \- return constant string describing error code .SH SYNOPSIS .nf .B #include .sp .BI "const char *pfm_strerror(int "code); .sp .SH DESCRIPTION This function returns a string which describes the libpfm error value in \fBcode\fR. The string returned by the call is \fBread-only\fR. The function must \fBonly\fR be used with libpfm calls documented to return specific error codes. The value \-1 is not considered a specific error code. Strings and \fBpfm_pmu_t\fR return values cannot be used with this function. Typically \fBNULL\fR is returned in case of error for string values, and \fBPFM_PMU_NONE\fR is returned for \fBpfm_pmu_t\fR values. The function is also not designed to handle OS system call errors, i.e., errno values. .SH RETURN The function returns a pointer to the constant string describing the error code. The string is in English. If code is invalid then a default error message is returned. .SH ERRORS If the error code is invalid, then the function returns a pointer to a string which says "unknown error code". .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/components/micpower/tests/000775 001750 001750 00000000000 13216244357 022503 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPIF_start.3000664 001750 001750 00000000741 13216244355 020311 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_start" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_start \- .PP Start counting hardware events in an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_start( C_INT EventSet, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_start\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/rapl/Rules.rapl000664 001750 001750 00000000301 13216244360 022410 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/rapl/linux-rapl.c COMPOBJS += linux-rapl.o linux-rapl.o: components/rapl/linux-rapl.c $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/rapl/linux-rapl.c -o linux-rapl.o papi-5.6.0/src/examples/PAPI_get_real_cyc.c000664 001750 001750 00000003312 13216244361 022534 0ustar00jshenry1963jshenry1963000000 000000 /****************************************************************************** * This is an example to show how to use low level function PAPI_get_real_cyc * * and PAPI_get_real_usec. * ******************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ int your_slow_code() { int i,tmp; for(i=1; i<20000; i++) { tmp=(tmp+100)/i; } return 0; } int main() { long long s,s1, e, e1; int retval; /**************************************************************************** * This part initializes the library and compares the version number of the * * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ****************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { printf("Library initialization error! \n"); exit(1); } /* Here you get initial cycles and time */ /* No error checking is done here because this function call is always successful */ s = PAPI_get_real_cyc(); your_slow_code(); /*Here you get final cycles and time */ e = PAPI_get_real_cyc(); s1= PAPI_get_real_usec(); your_slow_code(); e1= PAPI_get_real_usec(); printf("Wallclock cycles : %lld\nWallclock time(ms): %lld\n",e-s,e1-s1); /* clean up */ PAPI_shutdown(); exit(0); } papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.75000664 001750 001750 00000001457 13216244367 023224 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.75 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 8, stepping 1, clock 1746816 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 157 cycles PERFCTR INIT: rdtsc cost is 17.3 cycles (1268 total) PERFCTR INIT: rdpmc cost is 16.4 cycles (1209 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3472 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.7 cycles (3536 total) PERFCTR INIT: wrmsr (counter) cost is 81.9 cycles (5400 total) PERFCTR INIT: wrmsr (evntsel) cost is 232.3 cycles (15026 total) PERFCTR INIT: read cr4 cost is 2.0 cycles (291 total) PERFCTR INIT: write cr4 cost is 62.8 cycles (4180 total) PERFCTR INIT: write LVTPC cost is 9.3 cycles (755 total) PERFCTR INIT: sync_core cost is 73.9 cycles (4887 total) perfctr: driver 2.7.10, cpu type AMD K7/K8 at 1746816 kHz papi-5.6.0/src/libpfm-3.y/lib/ultra3i_events.h000664 001750 001750 00000026007 13216244363 023077 0ustar00jshenry1963jshenry1963000000 000000 static pme_sparc_entry_t ultra3i_pe[] = { /* These two must always be first. */ { .pme_name = "Cycle_cnt", .pme_desc = "Accumulated cycles", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x0, }, { .pme_name = "Instr_cnt", .pme_desc = "Number of instructions completed", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x1, }, /* PIC0 events common to all UltraSPARC processors */ { .pme_name = "Dispatch0_IC_miss", .pme_desc = "I-buffer is empty from I-Cache miss", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x2, }, { .pme_name = "IC_ref", .pme_desc = "I-cache refrences", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x8, }, { .pme_name = "DC_rd", .pme_desc = "D-cache read references (including accesses that subsequently trap)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x9, }, { .pme_name = "DC_wr", .pme_desc = "D-cache store accesses (including cacheable stores that subsequently trap)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xa, }, { .pme_name = "EC_ref", .pme_desc = "E-cache references", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xc, }, { .pme_name = "EC_snoop_inv", .pme_desc = "L2-cache invalidates generated from a snoop by a remote processor", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xe, }, /* PIC1 events common to all UltraSPARC processors */ { .pme_name = "Dispatch0_mispred", .pme_desc = "I-buffer is empty from Branch misprediction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x2, }, { .pme_name = "EC_wb", .pme_desc = "Dirty sub-blocks that produce writebacks due to L2-cache miss events", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xd, }, { .pme_name = "EC_snoop_cb", .pme_desc = "L2-cache copybacks generated from a snoop by a remote processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xe, }, /* PIC0 events common to all UltraSPARC-III/III+/IIIi processors */ { .pme_name = "Dispatch0_br_target", .pme_desc = "I-buffer is empty due to a branch target address calculation", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x3, }, { .pme_name = "Dispatch0_2nd_br", .pme_desc = "Stall cycles due to having two branch instructions line-up in one 4-instruction group causing the second branch in the group to be re-fetched, delaying it's entrance into the I-buffer", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x4, }, { .pme_name = "Rstall_storeQ", .pme_desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x5, }, { .pme_name = "Rstall_IU_use", .pme_desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x6, }, { .pme_name = "EC_write_hit_RTO", .pme_desc = "W-cache exclusive requests that hit L2-cache in S, O, or Os state and thus, do a read-to-own bus transaction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xd, }, { .pme_name = "EC_rd_miss", .pme_desc = "L2-cache miss events (including atomics) from D-cache events", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xf, }, { .pme_name = "PC_port0_rd", .pme_desc = "P-cache cacheable FP loads to the first port (general purpose load path to D-cache and P-cache via MS pipeline)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x10, }, { .pme_name = "SI_snoop", .pme_desc = "Counts snoops from remote processor(s) including RTS, RTSR, RTO, RTOR, RS, RSR, RTSM, and WS", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x11, }, { .pme_name = "SI_ciq_flow", .pme_desc = "Counts system clock cycles when the flow control (PauseOut) signal is asserted", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x12, }, { .pme_name = "SI_owned", .pme_desc = "Counts events where owned_in is asserted on bus requests from the local processor", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x13, }, { .pme_name = "SW_count0", .pme_desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x14, }, { .pme_name = "IU_Stat_Br_miss_taken", .pme_desc = "Retired branches that were predicted to be taken, but in fact were not taken", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x15, }, { .pme_name = "IU_Stat_Br_Count_taken", .pme_desc = "Retired taken branches", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x16, }, { .pme_name = "Dispatch0_rs_mispred", .pme_desc = "I-buffer is empty due to a Return Address Stack misprediction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x4, }, { .pme_name = "FA_pipe_completion", .pme_desc = "Instructions that complete execution on the FPG ALU pipelines", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x18, }, /* PIC1 events common to all UltraSPARC-III/III+/IIIi processors */ { .pme_name = "IC_miss_cancelled", .pme_desc = "I-cache misses cancelled due to mis-speculation, recycle, or other events", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x3, }, { .pme_name = "Re_FPU_bypass", .pme_desc = "Stall due to recirculation when an FPU bypass condition that does not have a direct bypass path occurs", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x5, }, { .pme_name = "Re_DC_miss", .pme_desc = "Stall due to loads that miss D-cache and get recirculated", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x6, }, { .pme_name = "Re_EC_miss", .pme_desc = "Stall due to loads that miss L2-cache and get recirculated", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x7, }, { .pme_name = "IC_miss", .pme_desc = "I-cache misses, including fetches from mis-speculated execution paths which are later cancelled", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x8, }, { .pme_name = "DC_rd_miss", .pme_desc = "Recirculated loads that miss the D-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x9, }, { .pme_name = "DC_wr_miss", .pme_desc = "D-cache store accesses that miss D-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xa, }, { .pme_name = "Rstall_FP_use", .pme_desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xb, }, { .pme_name = "EC_misses", .pme_desc = "E-cache misses", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xc, }, { .pme_name = "EC_ic_miss", .pme_desc = "L2-cache read misses from I-cache requests", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xf, }, { .pme_name = "Re_PC_miss", .pme_desc = "Stall due to recirculation when a prefetch cache miss occurs on a prefetch predicted second load", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x10, }, { .pme_name = "ITLB_miss", .pme_desc = "I-TLB miss traps taken", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x11, }, { .pme_name = "DTLB_miss", .pme_desc = "Memory reference instructions which trap due to D-TLB miss", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x12, }, { .pme_name = "WC_miss", .pme_desc = "W-cache misses", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x13, }, { .pme_name = "WC_snoop_cb", .pme_desc = "W-cache copybacks generated by a snoop from a remote processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x14, }, { .pme_name = "WC_scrubbed", .pme_desc = "W-cache hits to clean lines", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x15, }, { .pme_name = "WC_wb_wo_read", .pme_desc = "W-cache writebacks not requiring a read", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x16, }, { .pme_name = "PC_soft_hit", .pme_desc = "FP loads that hit a P-cache line that was prefetched by a software-prefetch instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x18, }, { .pme_name = "PC_snoop_inv", .pme_desc = "P-cache invalidates that were generated by a snoop from a remote processor and stores by a local processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x19, }, { .pme_name = "PC_hard_hit", .pme_desc = "FP loads that hit a P-cache line that was prefetched by a hardware prefetch", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1a, }, { .pme_name = "PC_port1_rd", .pme_desc = "P-cache cacheable FP loads to the second port (memory and out-of-pipeline instruction execution loads via the A0 and A1 pipelines)", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1b, }, { .pme_name = "SW_count1", .pme_desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1c, }, { .pme_name = "IU_Stat_Br_miss_untaken", .pme_desc = "Retired branches that were predicted to be untaken, but in fact were taken", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1d, }, { .pme_name = "IU_Stat_Br_Count_untaken", .pme_desc = "Retired untaken branches", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1e, }, { .pme_name = "PC_MS_miss", .pme_desc = "FP loads through the MS pipeline that miss P-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1f, }, { .pme_name = "Re_RAW_miss", .pme_desc = "Stall due to recirculation when there is a load in the E-stage which has a non-bypassable read-after-write hazard with an earlier store instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x26, }, { .pme_name = "FM_pipe_completion", .pme_desc = "Instructions that complete execution on the FPG Multiply pipelines", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x27, }, /* PIC0 memory controller events specific to UltraSPARC-IIIi processors */ { .pme_name = "MC_read_dispatched", .pme_desc = "DDR 64-byte reads dispatched by the MIU", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x20, }, { .pme_name = "MC_write_dispatched", .pme_desc = "DDR 64-byte writes dispatched by the MIU", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x21, }, { .pme_name = "MC_read_returned_to_JBU", .pme_desc = "64-byte reads that return data to JBU", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x22, }, { .pme_name = "MC_msl_busy_stall", .pme_desc = "Stall cycles due to msl_busy", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x23, }, { .pme_name = "MC_mdb_overflow_stall", .pme_desc = "Stall cycles due to potential memory data buffer overflow", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x24, }, { .pme_name = "MC_miu_spec_request", .pme_desc = "Speculative requests accepted by MIU", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x25, }, /* PIC1 memory controller events specific to UltraSPARC-IIIi processors */ { .pme_name = "MC_reads", .pme_desc = "64-byte reads by the MSL", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x20, }, { .pme_name = "MC_writes", .pme_desc = "64-byte writes by the MSL", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x21, }, { .pme_name = "MC_page_close_stall", .pme_desc = "DDR page conflicts", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x22, }, /* PIC1 events specific to UltraSPARC-III+/IIIi */ { .pme_name = "Re_DC_missovhd", .pme_desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x4, }, }; #define PME_ULTRA3I_EVENT_COUNT (sizeof(ultra3i_pe)/sizeof(pme_sparc_entry_t)) papi-5.6.0/src/components/perf_event/tests/Makefile000664 001750 001750 00000003677 13216244357 024470 0ustar00jshenry1963jshenry1963000000 000000 NAME=perf_event include ../../Makefile_comp_tests.target TESTS = broken_events nmi_watchdog perf_event_offcore_response perf_event_system_wide perf_event_user_kernel DOLOOPS= $(testlibdir)/do_loops.o perf_event_tests: $(TESTS) event_name_lib.o: event_name_lib.c event_name_lib.h $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c event_name_lib.c broken_events.o: broken_events.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c broken_events.c broken_events: broken_events.o event_name_lib.o $(UTILOBJS) $(PAPILIB) $(CC) $(INCLUDE) -o broken_events broken_events.o event_name_lib.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) nmi_watchdog.o: nmi_watchdog.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c nmi_watchdog.c nmi_watchdog: nmi_watchdog.o event_name_lib.o $(UTILOBJS) $(PAPILIB) $(CC) $(INCLUDE) -o nmi_watchdog nmi_watchdog.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) perf_event_offcore_response.o: perf_event_offcore_response.c event_name_lib.h $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_offcore_response.c perf_event_offcore_response: perf_event_offcore_response.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) -o perf_event_offcore_response perf_event_offcore_response.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_system_wide.o: perf_event_system_wide.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_system_wide.c perf_event_system_wide: perf_event_system_wide.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) -o perf_event_system_wide perf_event_system_wide.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) perf_event_user_kernel.o: perf_event_user_kernel.c event_name_lib.h $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_user_kernel.c perf_event_user_kernel: perf_event_user_kernel.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(CC) $(INCLUDE) -o perf_event_user_kernel perf_event_user_kernel.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o *~ papi-5.6.0/src/libpfm4/docs/man3/libpfm_mips_74k.3000664 001750 001750 00000003630 13216244364 023524 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2011" "" "Linux Programmer's Manual" .SH NAME libpfm_mips_74k - support for MIPS 74k processors .SH SYNOPSIS .nf .B #include .sp .B PMU name: mips_74k .B PMU desc: MIPS 74k .sp .SH DESCRIPTION The library supports MIPS 74k processors in big or little endian modes. .SH ENCODINGS On this processor, what is measured by an event depends on the event code and on the counter it is programmed on. Usually the meaning of the event code changes between odd and even indexed counters. For instance, event code \fB0x2\fR means 'PREDICTED_JR31' when programmed on even-indexed counters and it means 'JR_31_MISPREDICTIONS' when programmed on odd-indexed counters. To correctly measure an event, one needs both the event encoding and a list of possible counters. When \fRpfm_get_os_event_encoding()\fR is used with \fBPFM_OS_NONE\fR to return the raw PMU encoding, the library returns two values: the event encoding as per the architecture manual and a bitmask of valid counters to program it on. For instance, for 'JR_31_MISPREDICTIONS' The library returns codes[0] = 0x4a, codes[1]= 0xa (supported on counter 1, 3). The encoding for a specific kernel interface may vary and is handled internally by the library. .SH MODIFIERS The following modifiers are supported on MIPS 74k. .TP .B u Measure at user level. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B e Measure at exception level. This corresponds to \fBPFM_PLM2\fR. This is a boolean modifier. .TP .B s Measure at supervisor level. This corresponds to \fBPFM_PLM1\fR. This is a boolean modifier. It should be noted that those modifiers are available for encoding as raw mode with \fBPFM_OS_NONE\fR but they may not all be present with specific kernel interfaces. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm-3.y/lib/gen_mips64_events.h000664 001750 001750 00000152513 13216244363 023471 0ustar00jshenry1963jshenry1963000000 000000 static pme_gen_mips64_entry_t gen_mips64_20K_pe[] = { {.pme_name="INSN_REQ_FROM_IFU_TO_BIU", .pme_code = 0x00000009, .pme_counters = 0x1, .pme_desc = "Instruction requests from the IFU to the BIU" }, {.pme_name="BRANCHES_MISSPREDICTED", .pme_code = 0x00000005, .pme_counters = 0x1, .pme_desc = "Branches that mispredicted before completing execution" }, {.pme_name="REPLAYS", .pme_code = 0x0000000b, .pme_counters = 0x1, .pme_desc = "Total number of LSU requested replays, Load-dependent speculative dispatch or FPU exception prediction replays." }, {.pme_name="JR_INSNS_COMPLETED", .pme_code = 0x0000000d, .pme_counters = 0x1, .pme_desc = "JR instruction that completed execution" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x1, .pme_desc = "CPU cycles" }, {.pme_name="REPLAY_DUE_TO_LOAD_DEPENDENT_SPEC_DISPATCH", .pme_code = 0x00000008, .pme_counters = 0x1, .pme_desc = "Replays due to load-dependent speculative dispatch" }, {.pme_name="LSU_REPLAYS", .pme_code = 0x0000000e, .pme_counters = 0x1, .pme_desc = "LSU requested replays" }, {.pme_name="FP_INSNS_COMPLETED", .pme_code = 0x00000003, .pme_counters = 0x1, .pme_desc = "Instructions completed in FPU datapath (computational event" }, {.pme_name="FPU_EXCEPTIONS_TAKEN", .pme_code = 0x0000000a, .pme_counters = 0x1, .pme_desc = "Taken FPU exceptions" }, {.pme_name="TLB_REFILLS_TAKEN", .pme_code = 0x00000004, .pme_counters = 0x1, .pme_desc = "Taken TLB refill exceptions" }, {.pme_name="RPS_MISSPREDICTS", .pme_code = 0x0000000c, .pme_counters = 0x1, .pme_desc = "JR instructions that mispredicted using the Return Prediction Stack (RPS)" }, {.pme_name="INSN_ISSUED", .pme_code = 0x00000001, .pme_counters = 0x1, .pme_desc = "Dispatched/issued instructions" }, {.pme_name="INSNS_COMPLETED", .pme_code = 0x0000000f, .pme_counters = 0x1, .pme_desc = "Instruction that completed execution (with or without exception)" }, {.pme_name="BRANCHES_COMPLETED", .pme_code = 0x00000006, .pme_counters = 0x1, .pme_desc = "Branches that completed execution" }, {.pme_name="JTLB_EXCEPTIONS", .pme_code = 0x00000007, .pme_counters = 0x1, .pme_desc = "Taken Joint-TLB exceptions" }, {.pme_name="FETCH_GROUPS", .pme_code = 0x00000002, .pme_counters = 0x1, .pme_desc = "Fetch groups entering CPU execution pipes" }, }; static pme_gen_mips64_entry_t gen_mips64_24K_pe[] = { {.pme_name="DCACHE_MISS", .pme_code = 0x00000b0b, .pme_counters = 0x3, .pme_desc = "Data cache misses" }, {.pme_name="REPLAY_TRAPS_NOT_UTLB", .pme_code = 0x00001200, .pme_counters = 0x2, .pme_desc = "``replay traps'' (other than micro-TLB related)" }, {.pme_name="ITLB_ACCESSES", .pme_code = 0x00000005, .pme_counters = 0x1, .pme_desc = "Instruction micro-TLB accesses" }, {.pme_name="INSTRUCTIONS", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "Instructions completed" }, {.pme_name="LOADS_COMPLETED", .pme_code = 0x0000000f, .pme_counters = 0x1, .pme_desc = "Loads completed (including FP)" }, {.pme_name="SC_COMPLETE_BUT_FAILED", .pme_code = 0x00001300, .pme_counters = 0x2, .pme_desc = "sc instructions completed, but store failed (because the link bit had been cleared)." }, {.pme_name="JTLB_DATA_MISSES", .pme_code = 0x00000800, .pme_counters = 0x2, .pme_desc = "Joint TLB data (non-instruction) misses" }, {.pme_name="L2_MISSES", .pme_code = 0x00001616, .pme_counters = 0x3, .pme_desc = "L2 cache misses" }, {.pme_name="SC_COMPLETED", .pme_code = 0x00000013, .pme_counters = 0x1, .pme_desc = "sc instructions completed" }, {.pme_name="SUPERFLUOUS_INSTRUCTIONS", .pme_code = 0x00001400, .pme_counters = 0x2, .pme_desc = "``superfluous'' prefetch instructions (data was already in cache)." }, {.pme_name="DCACHE_WRITEBACKS", .pme_code = 0x00000a00, .pme_counters = 0x2, .pme_desc = "Data cache writebacks" }, {.pme_name="JR_31_MISSPREDICTS", .pme_code = 0x00000300, .pme_counters = 0x2, .pme_desc = "jr r31 (return) mispredictions" }, {.pme_name="JTLB_DATA_ACCESSES", .pme_code = 0x00000007, .pme_counters = 0x1, .pme_desc = "Joint TLB instruction accesses" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00000900, .pme_counters = 0x2, .pme_desc = "Instruction cache misses" }, {.pme_name="STALLS", .pme_code = 0x00000012, .pme_counters = 0x1, .pme_desc = "Stalls" }, {.pme_name="INTEGER_INSNS_COMPLETED", .pme_code = 0x0000000e, .pme_counters = 0x1, .pme_desc = "Integer instructions completed" }, {.pme_name="INTEGER_MUL_DIV_COMPLETED", .pme_code = 0x00001100, .pme_counters = 0x2, .pme_desc = "integer multiply/divide unit instructions completed" }, {.pme_name="STORES_COMPLETED", .pme_code = 0x00000f00, .pme_counters = 0x2, .pme_desc = "Stores completed (including FP)" }, {.pme_name="MIPS16_INSTRUCTIONS_COMPLETED", .pme_code = 0x00001000, .pme_counters = 0x2, .pme_desc = "MIPS16 instructions completed" }, {.pme_name="BRANCHES_LAUNCHED", .pme_code = 0x00000002, .pme_counters = 0x1, .pme_desc = "Branch instructions launched (whether completed or mispredicted)" }, {.pme_name="SCACHE_ACCESSES", .pme_code = 0x00001500, .pme_counters = 0x2, .pme_desc = "L2 cache accesses" }, {.pme_name="JR_31_LAUNCHED", .pme_code = 0x00000003, .pme_counters = 0x1, .pme_desc = "jr r31 (return) instructions launched (whether completed or mispredicted)" }, {.pme_name="PREFETCH_COMPLETED", .pme_code = 0x00000014, .pme_counters = 0x1, .pme_desc = "Prefetch instructions completed" }, {.pme_name="EXCEPTIONS_TAKEN", .pme_code = 0x00000017, .pme_counters = 0x1, .pme_desc = "Exceptions taken" }, {.pme_name="JR_NON_31_LAUNCHED", .pme_code = 0x00000004, .pme_counters = 0x1, .pme_desc = "jr (not r31) issues, which cost the same as a mispredict." }, {.pme_name="DTLB_ACCESSES", .pme_code = 0x00000006, .pme_counters = 0x1, .pme_desc = "Data micro-TLB accesses" }, {.pme_name="JTLB_INSTRUCTION_ACCESSES", .pme_code = 0x00000008, .pme_counters = 0x1, .pme_desc = "Joint TLB data (non-instruction) accesses" }, {.pme_name="CACHE_FIXUPS", .pme_code = 0x00000018, .pme_counters = 0x1, .pme_desc = "``cache fixup'' events (specific to the 24K family microarchitecture)." }, {.pme_name="INSTRUCTION_CACHE_ACCESSES", .pme_code = 0x00000009, .pme_counters = 0x1, .pme_desc = "Instruction cache accesses" }, {.pme_name="DTLB_MISSES", .pme_code = 0x00000600, .pme_counters = 0x2, .pme_desc = "Data micro-TLB misses" }, {.pme_name="J_JAL_INSNS_COMPLETED", .pme_code = 0x00000010, .pme_counters = 0x1, .pme_desc = "j/jal instructions completed" }, {.pme_name="DCACHE_ACCESSES", .pme_code = 0x0000000a, .pme_counters = 0x1, .pme_desc = "Data cache accesses" }, {.pme_name="BRANCH_MISSPREDICTS", .pme_code = 0x00000200, .pme_counters = 0x2, .pme_desc = "Branch mispredictions" }, {.pme_name="SCACHE_WRITEBACKS", .pme_code = 0x00000015, .pme_counters = 0x1, .pme_desc = "L2 cache writebacks" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Cycles" }, {.pme_name="JTLB_INSN_MISSES", .pme_code = 0x00000700, .pme_counters = 0x2, .pme_desc = "Joint TLB instruction misses" }, {.pme_name="FPU_INSNS_NON_LOAD_STORE_COMPLETED", .pme_code = 0x00000e00, .pme_counters = 0x2, .pme_desc = "FPU instructions completed (not including loads/stores)" }, {.pme_name="NOPS_COMPLETED", .pme_code = 0x00000011, .pme_counters = 0x1, .pme_desc = "no-ops completed, ie instructions writing $0" }, {.pme_name="ITLB_MISSES", .pme_code = 0x00000500, .pme_counters = 0x2, .pme_desc = "Instruction micro-TLB misses" }, }; static pme_gen_mips64_entry_t gen_mips64_25K_pe[] = { {.pme_name="INSNS_FETCHED_FROM_ICACHE", .pme_code = 0x00001818, .pme_counters = 0x3, .pme_desc = "Total number of instructions fetched from the I-Cache" }, {.pme_name="FP_EXCEPTIONS_TAKEN", .pme_code = 0x00000b0b, .pme_counters = 0x3, .pme_desc = "Taken FPU exceptions" }, {.pme_name="INSN_ISSUED", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "Dispatched/issued instructions" }, {.pme_name="STORE_INSNS_ISSUED", .pme_code = 0x00000505, .pme_counters = 0x3, .pme_desc = "Store instructions issued" }, {.pme_name="L2_MISSES", .pme_code = 0x00001e1e, .pme_counters = 0x3, .pme_desc = "L2 Cache miss" }, {.pme_name="REPLAYS_LOAD_DEP_DISPATCH", .pme_code = 0x00002323, .pme_counters = 0x3, .pme_desc = "replays due to load-dependent speculative dispatch" }, {.pme_name="BRANCHES_JUMPS_ISSUED", .pme_code = 0x00000606, .pme_counters = 0x3, .pme_desc = "Branch/Jump instructions issued" }, {.pme_name="REPLAYS_LSU_LOAD_DEP_FPU", .pme_code = 0x00002121, .pme_counters = 0x3, .pme_desc = "LSU requested replays, load-dependent speculative dispatch, FPU exception prediction" }, {.pme_name="INSNS_COMPLETE", .pme_code = 0x00000808, .pme_counters = 0x3, .pme_desc = "Instruction that completed execution (with or without exception)" }, {.pme_name="JTLB_MISSES_LOADS_STORES", .pme_code = 0x00001313, .pme_counters = 0x3, .pme_desc = "Raw count of Joint-TLB misses for loads/stores" }, {.pme_name="CACHEABLE_DCACHE_REQUEST", .pme_code = 0x00001d1d, .pme_counters = 0x3, .pme_desc = "number of cacheable requests to D-Cache" }, {.pme_name="DCACHE_WRITEBACKS", .pme_code = 0x00001c1c, .pme_counters = 0x3, .pme_desc = "D-Cache number of write-backs" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00001a1a, .pme_counters = 0x3, .pme_desc = "I-Cache miss" }, {.pme_name="ICACHE_PSEUDO_HITS", .pme_code = 0x00002626, .pme_counters = 0x3, .pme_desc = "I-Cache pseudo-hits" }, {.pme_name="FP_EXCEPTION_PREDICTED", .pme_code = 0x00000c0c, .pme_counters = 0x3, .pme_desc = "Predicted FPU exceptions" }, {.pme_name="LOAD_STORE_ISSUED", .pme_code = 0x00002727, .pme_counters = 0x3, .pme_desc = "Load/store instructions issued" }, {.pme_name="REPLAYS_WBB_FULL", .pme_code = 0x00002424, .pme_counters = 0x3, .pme_desc = "replays due to WBB full" }, {.pme_name="L2_WBACKS", .pme_code = 0x00001f1f, .pme_counters = 0x3, .pme_desc = "L2 Cache number of write-backs" }, {.pme_name="JR_COMPLETED", .pme_code = 0x00001010, .pme_counters = 0x3, .pme_desc = "JR instruction that completed execution" }, {.pme_name="JR_RPD_MISSPREDICTED", .pme_code = 0x00000f0f, .pme_counters = 0x3, .pme_desc = "JR instructions that mispredicted using the Return Prediction Stack" }, {.pme_name="JTLB_IFETCH_REFILL_EXCEPTIONS", .pme_code = 0x00001515, .pme_counters = 0x3, .pme_desc = "Joint-TLB refill exceptions due to instruction fetch" }, {.pme_name="DUAL_ISSUED_PAIRS", .pme_code = 0x00000707, .pme_counters = 0x3, .pme_desc = "Dual-issued pairs" }, {.pme_name="FSB_FULL_REPLAYS", .pme_code = 0x00002525, .pme_counters = 0x3, .pme_desc = "replays due to FSB full" }, {.pme_name="JTLB_REFILL_EXCEPTIONS", .pme_code = 0x00001717, .pme_counters = 0x3, .pme_desc = "total Joint-TLB Instruction exceptions (refill)" }, {.pme_name="INT_INSNS_ISSUED", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Integer instructions issued" }, {.pme_name="FP_INSNS_ISSUED", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "FPU instructions issued" }, {.pme_name="BRANCHES_MISSPREDICTED", .pme_code = 0x00000d0d, .pme_counters = 0x3, .pme_desc = "Branches that mispredicted before completing execution" }, {.pme_name="FETCH_GROUPS_IN_PIPE", .pme_code = 0x00000909, .pme_counters = 0x3, .pme_desc = "Fetch groups entering CPU execution pipes" }, {.pme_name="CACHEABLE_L2_REQS", .pme_code = 0x00002020, .pme_counters = 0x3, .pme_desc = "Number of cacheable requests to L2" }, {.pme_name="JTLB_DATA_ACCESS_REFILL_EXCEPTIONS", .pme_code = 0x00001616, .pme_counters = 0x3, .pme_desc = "Joint-TLB refill exceptions due to data access" }, {.pme_name="UTLB_MISSES", .pme_code = 0x00001111, .pme_counters = 0x3, .pme_desc = "U-TLB misses" }, {.pme_name="LOAD_INSNS_ISSUED", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Load instructions issued" }, {.pme_name="JTLB_MISSES_IFETCH", .pme_code = 0x00001212, .pme_counters = 0x3, .pme_desc = "Raw count of Joint-TLB misses for instruction fetch" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "CPU cycles" }, {.pme_name="LSU_REQ_REPLAYS", .pme_code = 0x00002222, .pme_counters = 0x3, .pme_desc = "LSU requested replays" }, {.pme_name="INSN_REQ_FROM_IFU_BIU", .pme_code = 0x00001919, .pme_counters = 0x3, .pme_desc = "instruction requests from the IFU to the BIU" }, {.pme_name="JTLB_EXCEPTIONS", .pme_code = 0x00001414, .pme_counters = 0x3, .pme_desc = "Refill, Invalid and Modified TLB exceptions" }, {.pme_name="BRANCHES_COMPLETED", .pme_code = 0x00000e0e, .pme_counters = 0x3, .pme_desc = "Branches that completed execution" }, {.pme_name="INSN_FP_DATAPATH_COMPLETED", .pme_code = 0x00000a0a, .pme_counters = 0x3, .pme_desc = "Instructions completed in FPU datapath (computational instructions only)" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x00001b1b, .pme_counters = 0x3, .pme_desc = "D-Cache miss" }, }; static pme_gen_mips64_entry_t gen_mips64_34K_pe[] = { {.pme_name="YIELD_INSNS", .pme_code = 0x00220022, .pme_counters = 0x5, .pme_desc = "yield instructions." }, {.pme_name="BRANCH_MISPREDICT_STALLS", .pme_code = 0x002e002e, .pme_counters = 0x5, .pme_desc = "Branch mispredict stalls" }, {.pme_name="SC_FAILED_INSNS", .pme_code = 0x00130013, .pme_counters = 0x5, .pme_desc = "sc instructions completed, but store failed (because the link bit had been cleared)." }, {.pme_name="ITC_LOAD_STORE_STALLS", .pme_code = 0x00280028, .pme_counters = 0x5, .pme_desc = "ITC load/store stalls" }, {.pme_name="ITC_LOADS", .pme_code = 0x00200020, .pme_counters = 0x5, .pme_desc = "ITC Loads" }, {.pme_name="LOADS_COMPLETED", .pme_code = 0x000f000f, .pme_counters = 0x5, .pme_desc = "Loads completed (including FP)" }, {.pme_name="BRANCH_INSNS_LAUNCHED", .pme_code = 0x00020002, .pme_counters = 0x5, .pme_desc = "Branch instructions launched (whether completed or mispredicted)" }, {.pme_name="DATA_SIDE_SCRATCHPAD_ACCESS_STALLS", .pme_code = 0x002b002b, .pme_counters = 0x5, .pme_desc = "Data-side scratchpad access stalls" }, {.pme_name="FB_ENTRY_ALLOCATED", .pme_code = 0x00300030, .pme_counters = 0x5, .pme_desc = "FB entry allocated" }, {.pme_name="CP2_STALLS", .pme_code = 0x002a002a, .pme_counters = 0x5, .pme_desc = "CP2 stalls" }, {.pme_name="FSB_25_50_FULL", .pme_code = 0x00320032, .pme_counters = 0x5, .pme_desc = "FSB 25-50% full" }, {.pme_name="CACHE_FIXUP_EVENTS", .pme_code = 0x00180018, .pme_counters = 0x5, .pme_desc = "cache fixup events (specific to the 34K family microarchitecture)" }, {.pme_name="IFU_FB_FULL_REFETCHES", .pme_code = 0x00300030, .pme_counters = 0x5, .pme_desc = "IFU FB full re-fetches" }, {.pme_name="L1_DCACHE_MISS_STALLS", .pme_code = 0x00250025, .pme_counters = 0x5, .pme_desc = "L1 D-cache miss stalls" }, {.pme_name="INT_MUL_DIV_UNIT_INSNS_COMPLETED", .pme_code = 0x00110011, .pme_counters = 0x5, .pme_desc = "integer multiply/divide unit instructions completed" }, {.pme_name="JTLB_INSN_ACCESSES", .pme_code = 0x00070007, .pme_counters = 0x5, .pme_desc = "Joint TLB instruction accesses" }, {.pme_name="ALU_STALLS", .pme_code = 0x00190019, .pme_counters = 0x5, .pme_desc = "ALU stalls" }, {.pme_name="FPU_STALLS", .pme_code = 0x00290029, .pme_counters = 0x5, .pme_desc = "FPU stalls" }, {.pme_name="JTLB_DATA_ACCESSES", .pme_code = 0x00080008, .pme_counters = 0x5, .pme_desc = "Joint TLB data (non-instruction) accesses" }, {.pme_name="INTEGER_INSNS_COMPLETED", .pme_code = 0x000e000e, .pme_counters = 0x5, .pme_desc = "Integer instructions completed" }, {.pme_name="MFC2_MTC2_INSNS", .pme_code = 0x00230023, .pme_counters = 0x5, .pme_desc = "CP2 move to/from instructions." }, {.pme_name="STORES_COMPLETED", .pme_code = 0x000f000f, .pme_counters = 0x5, .pme_desc = "Stores completed (including FP)" }, {.pme_name="JR_NON_31_INSN_EXECED", .pme_code = 0x00040004, .pme_counters = 0x5, .pme_desc = "jr $xx (not $31), which cost the same as a mispredict." }, {.pme_name="EXCEPTIONS_TAKEN", .pme_code = 0x00170017, .pme_counters = 0x5, .pme_desc = "Exceptions taken" }, {.pme_name="L2_MISS_PENDING_CYCLES", .pme_code = 0x00270027, .pme_counters = 0x5, .pme_desc = "Cycles where L2 miss is pending" }, {.pme_name="LDQ_FULL_PIPE_STALLS", .pme_code = 0x00350035, .pme_counters = 0x5, .pme_desc = "LDQ full pipeline stalls" }, {.pme_name="DTLB_ACCESSES", .pme_code = 0x00060006, .pme_counters = 0x5, .pme_desc = "Data micro-TLB accesses" }, {.pme_name="SUPERFLUOUS_PREFETCHES", .pme_code = 0x00140014, .pme_counters = 0x5, .pme_desc = "``superfluous'' prefetch instructions (data was already in cache)." }, {.pme_name="LDQ_LESS_25_FULL", .pme_code = 0x00340034, .pme_counters = 0x5, .pme_desc = "LDQ < 25% full" }, {.pme_name="FORK_INSTRUCTIONS", .pme_code = 0x00220022, .pme_counters = 0x5, .pme_desc = "fork instructions" }, {.pme_name="UNCACHED_LOAD_STALLS", .pme_code = 0x00280028, .pme_counters = 0x5, .pme_desc = "Uncached load stalls" }, {.pme_name="FSB_FULL_PIPE_STALLS", .pme_code = 0x00330033, .pme_counters = 0x5, .pme_desc = "FSB full pipeline stalls" }, {.pme_name="MDU_STALLS", .pme_code = 0x00290029, .pme_counters = 0x5, .pme_desc = "MDU stalls" }, {.pme_name="FSB_LESS_25_FULL", .pme_code = 0x00320032, .pme_counters = 0x5, .pme_desc = "FSB < 25% full" }, {.pme_name="UNCACHED_LOADS", .pme_code = 0x00210021, .pme_counters = 0x5, .pme_desc = "Uncached Loads" }, {.pme_name="NO_OPS_COMPLETED", .pme_code = 0x00110011, .pme_counters = 0x5, .pme_desc = "no-ops completed, ie instructions writing $0" }, {.pme_name="DATA_SIDE_SCRATCHPAD_RAM_LOGIC", .pme_code = 0x001d001d, .pme_counters = 0x5, .pme_desc = "Data-side scratchpad RAM logic" }, {.pme_name="CYCLES_INSN_NOT_IN_SKID_BUFFER", .pme_code = 0x00180018, .pme_counters = 0x5, .pme_desc = "Cycles lost when an unblocked thread's instruction isn't in the skid buffer, and must be re-fetched from I-cache." }, {.pme_name="ITC_LOGIC", .pme_code = 0x001f001f, .pme_counters = 0x5, .pme_desc = "ITC logic" }, {.pme_name="L2_IMISS_STALLS", .pme_code = 0x00260026, .pme_counters = 0x5, .pme_desc = "L2 I-miss stalls" }, {.pme_name="DSP_RESULT_SATURATED", .pme_code = 0x00240024, .pme_counters = 0x5, .pme_desc = "DSP result saturated" }, {.pme_name="INSTRUCTIONS", .pme_code = 0x01010101, .pme_counters = 0xf, .pme_desc = "Instructions completed" }, {.pme_name="ITLB_ACCESSES", .pme_code = 0x00050005, .pme_counters = 0x5, .pme_desc = "Instruction micro-TLB accesses" }, {.pme_name="CP2_REG_TO_REG_INSNS", .pme_code = 0x00230023, .pme_counters = 0x5, .pme_desc = "CP2 register-to-register instructions" }, {.pme_name="SC_INSNS_COMPLETED", .pme_code = 0x00130013, .pme_counters = 0x5, .pme_desc = "sc instructions completed" }, {.pme_name="COREEXTEND_STALLS", .pme_code = 0x002a002a, .pme_counters = 0x5, .pme_desc = "CorExtend stalls" }, {.pme_name="LOAD_USE_STALLS", .pme_code = 0x002d002d, .pme_counters = 0x5, .pme_desc = "Load to Use stalls" }, {.pme_name="JR_31_INSN_EXECED", .pme_code = 0x00030003, .pme_counters = 0x5, .pme_desc = "jr $31 (return) instructions executed." }, {.pme_name="JR_31_MISPREDICTS", .pme_code = 0x00030003, .pme_counters = 0x5, .pme_desc = "jr $31 mispredictions." }, {.pme_name="REPLAY_CYCLES", .pme_code = 0x00120012, .pme_counters = 0x5, .pme_desc = "Cycles lost due to ``replays'' - when a thread blocks, its instructions in the pipeline are discarded to allow other threads to advance." }, {.pme_name="L2_MISSES", .pme_code = 0x16161616, .pme_counters = 0xf, .pme_desc = "L2 cache misses" }, {.pme_name="JTLB_DATA_MISSES", .pme_code = 0x00080008, .pme_counters = 0x5, .pme_desc = "Joint TLB data (non-instruction) misses" }, {.pme_name="SYSTEM_INTERFACE", .pme_code = 0x001e001e, .pme_counters = 0x5, .pme_desc = "System interface" }, {.pme_name="BRANCH_MISPREDICTS", .pme_code = 0x00020002, .pme_counters = 0x5, .pme_desc = "Branch mispredictions" }, {.pme_name="ITC_STORES", .pme_code = 0x00200020, .pme_counters = 0x5, .pme_desc = "ITC Stores" }, {.pme_name="LDQ_OVER_50_FULL", .pme_code = 0x00350035, .pme_counters = 0x5, .pme_desc = "LDQ > 50% full" }, {.pme_name="FSB_OVER_50_FULL", .pme_code = 0x00330033, .pme_counters = 0x5, .pme_desc = "FSB > 50% full" }, {.pme_name="STALLS_NO_ROOM_PENDING_WRITE", .pme_code = 0x002c002c, .pme_counters = 0x5, .pme_desc = "Stalls when no more room to store pending write." }, {.pme_name="JR_31_NOT_PREDICTED", .pme_code = 0x00040004, .pme_counters = 0x5, .pme_desc = "jr $31 not predicted (stack mismatch)." }, {.pme_name="EXTERNAL_YIELD_MANAGER_LOGIC", .pme_code = 0x001f001f, .pme_counters = 0x5, .pme_desc = "External Yield Manager logic" }, {.pme_name="DCACHE_WRITEBACKS", .pme_code = 0x000a000a, .pme_counters = 0x5, .pme_desc = "Data cache writebacks" }, {.pme_name="RELAX_BUBBLES", .pme_code = 0x002f002f, .pme_counters = 0x5, .pme_desc = "``Relax bubbles'' - when thread scheduler chooses to schedule nothing to reduce power consumption." }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00090009, .pme_counters = 0x5, .pme_desc = "Instruction cache misses" }, {.pme_name="MIPS16_INSNS_COMPLETED", .pme_code = 0x00100010, .pme_counters = 0x5, .pme_desc = "MIPS16 instructions completed" }, {.pme_name="OTHER_INTERLOCK_STALLS", .pme_code = 0x002e002e, .pme_counters = 0x5, .pme_desc = "Other interlock stalls" }, {.pme_name="L2_CACHE_WRITEBACKS", .pme_code = 0x00150015, .pme_counters = 0x5, .pme_desc = "L2 cache writebacks" }, {.pme_name="WBB_LESS_25_FULL", .pme_code = 0x00360036, .pme_counters = 0x5, .pme_desc = "WBB < 25% full" }, {.pme_name="L2_DCACHE_MISS_STALLS", .pme_code = 0x00260026, .pme_counters = 0x5, .pme_desc = "L2 D-miss stalls" }, {.pme_name="CACHE_INSTRUCTION_STALLS", .pme_code = 0x002c002c, .pme_counters = 0x5, .pme_desc = "Stalls due to cache instructions" }, {.pme_name="L1_DCACHE_MISS_PENDING_CYCLES", .pme_code = 0x00270027, .pme_counters = 0x5, .pme_desc = "Cycles where L1 D-cache miss pending" }, {.pme_name="ALU_TO_AGEN_STALLS", .pme_code = 0x002d002d, .pme_counters = 0x5, .pme_desc = "ALU to AGEN stalls" }, {.pme_name="L2_ACCESSES", .pme_code = 0x00150015, .pme_counters = 0x5, .pme_desc = "L2 cache accesses" }, {.pme_name="J_JAL_INSN_COMPLETED", .pme_code = 0x00100010, .pme_counters = 0x5, .pme_desc = "j/jal instructions completed" }, {.pme_name="ALL_STALLS", .pme_code = 0x00120012, .pme_counters = 0x5, .pme_desc = "All stalls (no action in RF pipe stage)" }, {.pme_name="DSP_INSTRUCTIONS", .pme_code = 0x00240024, .pme_counters = 0x5, .pme_desc = "DSP instructions" }, {.pme_name="UNCACHED_STORES", .pme_code = 0x00210021, .pme_counters = 0x5, .pme_desc = "Uncached Stores" }, {.pme_name="WBB_FULL_PIPE_STALLS", .pme_code = 0x00370037, .pme_counters = 0x5, .pme_desc = "WBB full pipeline stalls" }, {.pme_name="INSN_CACHE_ACCESSES", .pme_code = 0x00090009, .pme_counters = 0x5, .pme_desc = "Instruction cache accesses" }, {.pme_name="EXT_POLICY_MANAGER", .pme_code = 0x001c001c, .pme_counters = 0x5, .pme_desc = "External policy manager" }, {.pme_name="WBB_OVER_50_FULL", .pme_code = 0x00370037, .pme_counters = 0x5, .pme_desc = "WBB > 50% full" }, {.pme_name="DTLB_MISSES", .pme_code = 0x00060006, .pme_counters = 0x5, .pme_desc = "Data micro-TLB misses" }, {.pme_name="DCACHE_ACCESSES", .pme_code = 0x000a000a, .pme_counters = 0x5, .pme_desc = "Data cache accesses" }, {.pme_name="COREEXTEND_LOGIC", .pme_code = 0x001e001e, .pme_counters = 0x5, .pme_desc = "CorExtend logic" }, {.pme_name="LDQ_25_50_FULL", .pme_code = 0x00340034, .pme_counters = 0x5, .pme_desc = "LDQ 25-50% full" }, {.pme_name="PREFETCH_INSNS_COMPLETED", .pme_code = 0x00140014, .pme_counters = 0x5, .pme_desc = "Prefetch instructions completed" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0xf, .pme_desc = "Cycles" }, {.pme_name="L1_ICACHE_MISS_STALLS", .pme_code = 0x00250025, .pme_counters = 0x5, .pme_desc = "L1 I-cache miss stalls" }, {.pme_name="JTLB_INSN_MISSES", .pme_code = 0x00070007, .pme_counters = 0x5, .pme_desc = "Joint TLB instruction misses" }, {.pme_name="COP2", .pme_code = 0x001c001c, .pme_counters = 0x5, .pme_desc = "Co-Processor 2" }, {.pme_name="FPU_INSNS_COMPLETED", .pme_code = 0x000e000e, .pme_counters = 0x5, .pme_desc = "FPU instructions completed (not including loads/stores)" }, {.pme_name="ITLB_MISSES", .pme_code = 0x00050005, .pme_counters = 0x5, .pme_desc = "Instruction micro-TLB misses" }, {.pme_name="IFU_STALLS", .pme_code = 0x00190019, .pme_counters = 0x5, .pme_desc = "IFU stalls (when no instruction offered) ALU stalls" }, {.pme_name="WBB_25_50_FULL", .pme_code = 0x00360036, .pme_counters = 0x5, .pme_desc = "WBB 25-50% full" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x0b0b0b0b, .pme_counters = 0xf, .pme_desc = "Data cache misses" }, }; static pme_gen_mips64_entry_t gen_mips64_5K_pe[] = { {.pme_name="DCACHE_LINE_EVICTED", .pme_code = 0x00000600, .pme_counters = 0x2, .pme_desc = "Data cache line evicted" }, {.pme_name="LOADS_EXECED", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "Load/pref(x)/sync/cache-ops executed" }, {.pme_name="INSN_SCHEDULED", .pme_code = 0x0000000a, .pme_counters = 0x1, .pme_desc = "Instruction scheduled" }, {.pme_name="DUAL_ISSUED_INSNS", .pme_code = 0x0000000e, .pme_counters = 0x1, .pme_desc = "Dual issued instructions executed" }, {.pme_name="BRANCHES_MISSPREDICTED", .pme_code = 0x00000800, .pme_counters = 0x2, .pme_desc = "Branch mispredicted" }, {.pme_name="CONFLICT_STALL_M_STAGE", .pme_code = 0x00000a00, .pme_counters = 0x2, .pme_desc = "Instruction stall in M stage due to scheduling conflicts" }, {.pme_name="STORES_EXECED", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Stores (including conditional stores) executed" }, {.pme_name="DCACHE_MISS", .pme_code = 0x00000900, .pme_counters = 0x2, .pme_desc = "Data cache miss" }, {.pme_name="INSN_FETCHED", .pme_code = 0x00000001, .pme_counters = 0x1, .pme_desc = "Instructions fetched" }, {.pme_name="TLB_MISS_EXCEPTIONS", .pme_code = 0x00000700, .pme_counters = 0x2, .pme_desc = "TLB miss exceptions" }, {.pme_name="COP2_INSNS_EXECED", .pme_code = 0x00000f00, .pme_counters = 0x2, .pme_desc = "COP2 instructions executed" }, {.pme_name="FAILED_COND_STORES", .pme_code = 0x00000005, .pme_counters = 0x1, .pme_desc = "Failed conditional stores" }, {.pme_name="INSNS_EXECED", .pme_code = 0x0000010f, .pme_counters = 0x3, .pme_desc = "Instructions executed" }, {.pme_name="ICACHE_MISS", .pme_code = 0x00000009, .pme_counters = 0x1, .pme_desc = "Instruction cache miss" }, {.pme_name="COND_STORES_EXECED", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Conditional stores executed" }, {.pme_name="FP_INSNS_EXECED", .pme_code = 0x00000500, .pme_counters = 0x2, .pme_desc = "Floating-point instructions executed" }, {.pme_name="DTLB_MISSES", .pme_code = 0x00000008, .pme_counters = 0x1, .pme_desc = "DTLB miss" }, {.pme_name="BRANCHES_EXECED", .pme_code = 0x00000006, .pme_counters = 0x1, .pme_desc = "Branches executed" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Cycles" }, {.pme_name="ITLB_MISSES", .pme_code = 0x00000007, .pme_counters = 0x1, .pme_desc = "ITLB miss" }, }; static pme_gen_mips64_entry_t gen_mips64_r10000_pe[] = { {.pme_name="BRANCHES_RESOLVED", .pme_code = 0x00000006, .pme_counters = 0x1, .pme_desc = "Branches resolved" }, {.pme_name="TLB_REFILL_EXCEPTIONS", .pme_code = 0x00000700, .pme_counters = 0x2, .pme_desc = "TLB refill exceptions" }, {.pme_name="EXTERNAL_INTERVENTION_RQ", .pme_code = 0x0000000c, .pme_counters = 0x1, .pme_desc = "External intervention requests" }, {.pme_name="STORES_GRADUATED", .pme_code = 0x00000300, .pme_counters = 0x2, .pme_desc = "Stores graduated" }, {.pme_name="SCACHE_WAY_MISPREDICTED_INSN", .pme_code = 0x0000000b, .pme_counters = 0x1, .pme_desc = "Secondary cache way mispredicted (instruction)" }, {.pme_name="INSTRUCTION_CACHE_MISSES", .pme_code = 0x00000009, .pme_counters = 0x1, .pme_desc = "Instruction cache misses" }, {.pme_name="SCACHE_MISSES_DATA", .pme_code = 0x00000a00, .pme_counters = 0x2, .pme_desc = "Secondary cache misses (data)" }, {.pme_name="QUADWORDS_WB_FROM_PRIMARY_DCACHE", .pme_code = 0x00000600, .pme_counters = 0x2, .pme_desc = "Quadwords written back from primary data cache" }, {.pme_name="EXTERNAL_INVALIDATE_RQ_HITS_SCACHE", .pme_code = 0x00000d00, .pme_counters = 0x2, .pme_desc = "External invalidate request is determined to have hit in secondary cache" }, {.pme_name="LOAD_PREFETC_SYNC_CACHEOP_ISSUED", .pme_code = 0x00000002, .pme_counters = 0x1, .pme_desc = "Load / prefetch / sync / CacheOp issued" }, {.pme_name="STORES_OR_STORE_PREF_TO_SHD_SCACHE_BLOCKS", .pme_code = 0x00000f00, .pme_counters = 0x2, .pme_desc = "Stores or prefetches with store hint to Shared secondary cache blocks" }, {.pme_name="STORE_COND_ISSUED", .pme_code = 0x00000004, .pme_counters = 0x1, .pme_desc = "Store conditional issued" }, {.pme_name="BRANCHES_MISPREDICTED", .pme_code = 0x00000800, .pme_counters = 0x2, .pme_desc = "Branches mispredicted" }, {.pme_name="EXTERNAL_INVALIDATE_RQ", .pme_code = 0x0000000d, .pme_counters = 0x1, .pme_desc = "External invalidate requests" }, {.pme_name="LOAD_PREFETC_SYNC_CACHEOP_GRADUATED", .pme_code = 0x00000200, .pme_counters = 0x2, .pme_desc = "Load / prefetch / sync / CacheOp graduated" }, {.pme_name="INSTRUCTIONS_ISSUED", .pme_code = 0x00000001, .pme_counters = 0x1, .pme_desc = "Instructions issued" }, {.pme_name="INSTRUCTION_GRADUATED", .pme_code = 0x0000000f, .pme_counters = 0x1, .pme_desc = "Instructions graduated" }, {.pme_name="EXTERNAL_INTERVENTION_RQ_HITS_SCACHE", .pme_code = 0x00000c00, .pme_counters = 0x2, .pme_desc = "External intervention request is determined to have hit in secondary cache" }, {.pme_name="SCACHE_MISSES_INSTRUCTION", .pme_code = 0x0000000a, .pme_counters = 0x1, .pme_desc = "Secondary cache misses (instruction)" }, {.pme_name="SCACHE_LOAD_STORE_CACHEOP_OPERATIONS", .pme_code = 0x00000900, .pme_counters = 0x2, .pme_desc = "Secondary cache load / store and cache-ops operations" }, {.pme_name="STORES_OR_STORE_PREF_TO_CLEANEXCLUSIVE_SCACHE_BLOCKS", .pme_code = 0x00000e00, .pme_counters = 0x2, .pme_desc = "Stores or prefetches with store hint to CleanExclusive secondary cache blocks" }, {.pme_name="INSTRUCTIONS_GRADUATED", .pme_code = 0x00000100, .pme_counters = 0x2, .pme_desc = "Instructions graduated" }, {.pme_name="FP_INSTRUCTON_GRADUATED", .pme_code = 0x00000500, .pme_counters = 0x2, .pme_desc = "Floating-point instructions graduated" }, {.pme_name="STORES_ISSUED", .pme_code = 0x00000003, .pme_counters = 0x1, .pme_desc = "Stores issued" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Cycles" }, {.pme_name="CORRECTABLE_ECC_ERRORS_SCACHE", .pme_code = 0x00000008, .pme_counters = 0x1, .pme_desc = "Correctable ECC errors on secondary cache data" }, {.pme_name="QUADWORDS_WB_FROM_SCACHE", .pme_code = 0x00000007, .pme_counters = 0x1, .pme_desc = "Quadwords written back from secondary cache" }, {.pme_name="STORE_COND_GRADUATED", .pme_code = 0x00000400, .pme_counters = 0x2, .pme_desc = "Store conditional graduated" }, {.pme_name="FUNCTIONAL_UNIT_COMPLETION_CYCLES", .pme_code = 0x0000000e, .pme_counters = 0x1, .pme_desc = "Functional unit completion cycles" }, {.pme_name="FAILED_STORE_CONDITIONAL", .pme_code = 0x00000005, .pme_counters = 0x1, .pme_desc = "Failed store conditional" }, {.pme_name="SCACHE_WAY_MISPREDICTED_DATA", .pme_code = 0x00000b00, .pme_counters = 0x2, .pme_desc = "Secondary cache way mispredicted (data)" }, }; static pme_gen_mips64_entry_t gen_mips64_r12000_pe[] = { {.pme_name="INTERVENTION_REQUESTS", .pme_code = 0x0c0c0c0c, .pme_counters = 0xf, .pme_desc = "External intervention requests" }, {.pme_name="QUADWORDS", .pme_code = 0x16161616, .pme_counters = 0xf, .pme_desc = "Quadwords written back from primary data cache" }, {.pme_name="MISPREDICTED_BRANCHES", .pme_code = 0x18181818, .pme_counters = 0xf, .pme_desc = "Mispredicted branches" }, {.pme_name="DECODED_STORES", .pme_code = 0x03030303, .pme_counters = 0xf, .pme_desc = "Decoded stores" }, {.pme_name="TLB_MISSES", .pme_code = 0x17171717, .pme_counters = 0xf, .pme_desc = "TLB misses" }, {.pme_name="GRADUATED_FP_INSTRUCTIONS", .pme_code = 0x15151515, .pme_counters = 0xf, .pme_desc = "Graduated floating point instructions" }, {.pme_name="EXTERNAL_REQUESTS", .pme_code = 0x0d0d0d0d, .pme_counters = 0xf, .pme_desc = "External invalidate requests" }, {.pme_name="GRADUATED_STORES", .pme_code = 0x13131313, .pme_counters = 0xf, .pme_desc = "Graduated stores" }, {.pme_name="PREFETCH_MISSES_IN_DCACHE", .pme_code = 0x11111111, .pme_counters = 0xf, .pme_desc = "Primary data cache misses by prefetch instructions" }, {.pme_name="STORE_PREFETCH_EXCLUSIVE_SHARED_SC_BLOCK", .pme_code = 0x1f1f1f1f, .pme_counters = 0xf, .pme_desc = "Store/prefetch exclusive to shared block in secondary" }, {.pme_name="DECODED_LOADS", .pme_code = 0x02020202, .pme_counters = 0xf, .pme_desc = "Decoded loads" }, {.pme_name="GRADUATED_STORE_CONDITIONALS", .pme_code = 0x14141414, .pme_counters = 0xf, .pme_desc = "Graduated store conditionals" }, {.pme_name="INSTRUCTION_SECONDARY_CACHE_MISSES", .pme_code = 0x0a0a0a0a, .pme_counters = 0xf, .pme_desc = "Secondary cache misses (instruction)" }, {.pme_name="STATE_OF_EXTERNAL_INVALIDATION_HIT", .pme_code = 0x1d1d1d1d, .pme_counters = 0xf, .pme_desc = "State of external invalidation hits in secondary cache" }, {.pme_name="SECONDARY_CACHE_WAY_MISSPREDICTED", .pme_code = 0x0b0b0b0b, .pme_counters = 0xf, .pme_desc = "Secondary cache way mispredicted (instruction)" }, {.pme_name="DECODED_INSTRUCTIONS", .pme_code = 0x01010101, .pme_counters = 0xf, .pme_desc = "Decoded instructions" }, {.pme_name="SCACHE_MISSES", .pme_code = 0x1a1a1a1a, .pme_counters = 0xf, .pme_desc = "Secondary cache misses (data)" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x09090909, .pme_counters = 0xf, .pme_desc = "Instruction cache misses" }, {.pme_name="SCACHE_WAY_MISPREDICTION", .pme_code = 0x1b1b1b1b, .pme_counters = 0xf, .pme_desc = "Misprediction from scache way prediction table (data)" }, {.pme_name="STATE_OF_SCACHE_INTERVENTION_HIT", .pme_code = 0x1c1c1c1c, .pme_counters = 0xf, .pme_desc = "State of external intervention hit in secondary cache" }, {.pme_name="GRADUATED_LOADS", .pme_code = 0x12121212, .pme_counters = 0xf, .pme_desc = "Graduated loads" }, {.pme_name="PREFETCH_INSTRUCTIONS_EXECUTED", .pme_code = 0x10101010, .pme_counters = 0xf, .pme_desc = "Executed prefetch instructions" }, {.pme_name="MISS_TABLE_OCCUPANCY", .pme_code = 0x04040404, .pme_counters = 0xf, .pme_desc = "Miss Handling Table Occupancy" }, {.pme_name="INSTRUCTIONS_GRADUATED", .pme_code = 0x0f0f0f0f, .pme_counters = 0xf, .pme_desc = "Instructions graduated" }, {.pme_name="QUADWORDS_WRITEBACK_FROM_SC", .pme_code = 0x07070707, .pme_counters = 0xf, .pme_desc = "Quadwords written back from secondary cache" }, {.pme_name="CORRECTABLE_ECC_ERRORS", .pme_code = 0x08080808, .pme_counters = 0xf, .pme_desc = "Correctable ECC errors on secondary cache data" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0xf, .pme_desc = "Cycles" }, {.pme_name="RESOLVED_BRANCH_CONDITIONAL", .pme_code = 0x06060606, .pme_counters = 0xf, .pme_desc = "Resolved conditional branches" }, {.pme_name="STORE_PREFETCH_EXCLUSIVE_TO_CLEAN_SC_BLOCK", .pme_code = 0x1e1e1e1e, .pme_counters = 0xf, .pme_desc = "Store/prefetch exclusive to clean block in secondary cache" }, {.pme_name="FAILED_STORE_CONDITIONAL", .pme_code = 0x05050505, .pme_counters = 0xf, .pme_desc = "Failed store conditional" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x19191919, .pme_counters = 0xf, .pme_desc = "Primary data cache misses" }, }; static pme_gen_mips64_entry_t gen_mips64_rm7000_pe[] = { {.pme_name="SLIP_CYCLES_PENDING_NON_BLKING_LOAD", .pme_code = 0x00001a1a, .pme_counters = 0x3, .pme_desc = "Slip cycles due to pending non-blocking loads" }, {.pme_name="STORE_INSTRUCTIONS_ISSUED", .pme_code = 0x00000505, .pme_counters = 0x3, .pme_desc = "Store instructions issued" }, {.pme_name="BRANCH_PREFETCHES", .pme_code = 0x00000707, .pme_counters = 0x3, .pme_desc = "Branch prefetches" }, {.pme_name="PCACHE_WRITEBACKS", .pme_code = 0x00001414, .pme_counters = 0x3, .pme_desc = "Primary cache writebacks" }, {.pme_name="STALL_CYCLES_PENDING_NON_BLKING_LOAD", .pme_code = 0x00001f1f, .pme_counters = 0x3, .pme_desc = "Stall cycles due to pending non-blocking loads - stall start of exception" }, {.pme_name="STALL_CYCLES", .pme_code = 0x00000909, .pme_counters = 0x3, .pme_desc = "Stall cycles" }, {.pme_name="CACHE_MISSES", .pme_code = 0x00001616, .pme_counters = 0x3, .pme_desc = "Cache misses" }, {.pme_name="DUAL_ISSUED_PAIRS", .pme_code = 0x00000606, .pme_counters = 0x3, .pme_desc = "Dual issued pairs" }, {.pme_name="SLIP_CYCLES_DUE_MULTIPLIER_BUSY", .pme_code = 0x00001818, .pme_counters = 0x3, .pme_desc = "Slip Cycles due to multiplier busy" }, {.pme_name="INTEGER_INSTRUCTIONS_ISSUED", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Integer instructions issued" }, {.pme_name="SCACHE_WRITEBACKS", .pme_code = 0x00001313, .pme_counters = 0x3, .pme_desc = "Secondary cache writebacks" }, {.pme_name="DCACHE_MISS_STALL_CYCLES", .pme_code = 0x00001515, .pme_counters = 0x3, .pme_desc = "Dcache miss stall cycles (cycles where both cache miss tokens taken and a third try is requested)" }, {.pme_name="MULTIPLIER_STALL_CYCLES", .pme_code = 0x00001e1e, .pme_counters = 0x3, .pme_desc = "Multiplier stall cycles" }, {.pme_name="WRITE_BUFFER_FULL_STALL_CYCLES", .pme_code = 0x00001c1c, .pme_counters = 0x3, .pme_desc = "Write buffer full stall cycles" }, {.pme_name="FP_INSTRUCTIONS_ISSUED", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "Floating-point instructions issued" }, {.pme_name="JTLB_DATA_MISSES", .pme_code = 0x00001010, .pme_counters = 0x3, .pme_desc = "Joint TLB data misses" }, {.pme_name="FP_EXCEPTION_STALL_CYCLES", .pme_code = 0x00001717, .pme_counters = 0x3, .pme_desc = "FP possible exception cycles" }, {.pme_name="SCACHE_MISSES", .pme_code = 0x00000a0a, .pme_counters = 0x3, .pme_desc = "Secondary cache misses" }, {.pme_name="BRANCHES_ISSUED", .pme_code = 0x00001212, .pme_counters = 0x3, .pme_desc = "Branches issued" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00000b0b, .pme_counters = 0x3, .pme_desc = "Instruction cache misses" }, {.pme_name="INSTRUCTIONS_ISSUED", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "Total instructions issued" }, {.pme_name="JTLB_INSTRUCTION_MISSES", .pme_code = 0x00000f0f, .pme_counters = 0x3, .pme_desc = "Joint TLB instruction misses" }, {.pme_name="LOAD_INSTRUCTIONS_ISSUED", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Load instructions issued" }, {.pme_name="EXTERNAL_CACHE_MISSES", .pme_code = 0x00000808, .pme_counters = 0x3, .pme_desc = "External Cache Misses" }, {.pme_name="BRANCHES_TAKEN", .pme_code = 0x00001111, .pme_counters = 0x3, .pme_desc = "Branches taken" }, {.pme_name="DTLB_MISSES", .pme_code = 0x00000d0d, .pme_counters = 0x3, .pme_desc = "Data TLB misses" }, {.pme_name="CACHE_INSTRUCTION_STALL_CYCLES", .pme_code = 0x00001d1d, .pme_counters = 0x3, .pme_desc = "Cache instruction stall cycles" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Clock cycles" }, {.pme_name="COP0_SLIP_CYCLES", .pme_code = 0x00001919, .pme_counters = 0x3, .pme_desc = "Coprocessor 0 slip cycles" }, {.pme_name="ITLB_MISSES", .pme_code = 0x00000e0e, .pme_counters = 0x3, .pme_desc = "Instruction TLB misses" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x00000c0c, .pme_counters = 0x3, .pme_desc = "Data cache misses" }, }; static pme_gen_mips64_entry_t gen_mips64_rm9000_pe[] = { {.pme_name="FP_POSSIBLE_EXCEPTION_CYCLES", .pme_code = 0x00001717, .pme_counters = 0x3, .pme_desc = "Floating-point possible exception cycles" }, {.pme_name="STORE_INSTRUCTIONS_ISSUED", .pme_code = 0x00000505, .pme_counters = 0x3, .pme_desc = "Store instructions issued" }, {.pme_name="STALL_CYCLES", .pme_code = 0x00000909, .pme_counters = 0x3, .pme_desc = "Stall cycles" }, {.pme_name="L2_WRITEBACKS", .pme_code = 0x00001313, .pme_counters = 0x3, .pme_desc = "L2 cache writebacks" }, {.pme_name="NONBLOCKING_LOAD_SLIP_CYCLES", .pme_code = 0x00001a1a, .pme_counters = 0x3, .pme_desc = "Slip cycles due to pending non-blocking loads" }, {.pme_name="NONBLOCKING_LOAD_PENDING_EXCEPTION_STALL_CYCLES", .pme_code = 0x00001e1e, .pme_counters = 0x3, .pme_desc = "Stall cycles due to pending non-blocking loads - stall start of exception" }, {.pme_name="BRANCH_MISSPREDICTS", .pme_code = 0x00000707, .pme_counters = 0x3, .pme_desc = "Branch mispredictions" }, {.pme_name="DCACHE_MISS_STALL_CYCLES", .pme_code = 0x00001515, .pme_counters = 0x3, .pme_desc = "Dcache-miss stall cycles" }, {.pme_name="WRITE_BUFFER_FULL_STALL_CYCLES", .pme_code = 0x00001b1b, .pme_counters = 0x3, .pme_desc = "Stall cycles due to a full write buffer" }, {.pme_name="INT_INSTRUCTIONS_ISSUED", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Integer instructions issued" }, {.pme_name="FP_INSTRUCTIONS_ISSUED", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "Floating-point instructions issued" }, {.pme_name="JTLB_DATA_MISSES", .pme_code = 0x00001010, .pme_counters = 0x3, .pme_desc = "Joint TLB data misses" }, {.pme_name="L2_CACHE_MISSES", .pme_code = 0x00000a0a, .pme_counters = 0x3, .pme_desc = "L2 cache misses" }, {.pme_name="DCACHE_WRITEBACKS", .pme_code = 0x00001414, .pme_counters = 0x3, .pme_desc = "Dcache writebacks" }, {.pme_name="BRANCHES_ISSUED", .pme_code = 0x00001212, .pme_counters = 0x3, .pme_desc = "Branch instructions issued" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00000b0b, .pme_counters = 0x3, .pme_desc = "Icache misses" }, {.pme_name="INSTRUCTIONS_ISSUED", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "Instructions issued" }, {.pme_name="MULTIPLIER_BUSY_SLIP_CYCLES", .pme_code = 0x00001818, .pme_counters = 0x3, .pme_desc = "Slip cycles due to busy multiplier" }, {.pme_name="INSTRUCTIONS_DUAL_ISSUED", .pme_code = 0x00000606, .pme_counters = 0x3, .pme_desc = "Dual-issued instruction pairs" }, {.pme_name="CACHE_INSN_STALL_CYCLES", .pme_code = 0x00001c1c, .pme_counters = 0x3, .pme_desc = "Stall cycles due to cache instructions" }, {.pme_name="JTLB_INSTRUCTION_MISSES", .pme_code = 0x00000f0f, .pme_counters = 0x3, .pme_desc = "Joint TLB instruction misses" }, {.pme_name="LOAD_INSTRUCTIONS_ISSUED", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Load instructions issued" }, {.pme_name="CACHE_REMISSES", .pme_code = 0x00001616, .pme_counters = 0x3, .pme_desc = "Cache remisses" }, {.pme_name="BRANCHES_TAKEN", .pme_code = 0x00001111, .pme_counters = 0x3, .pme_desc = "Branches taken" }, {.pme_name="DTLB_MISSES", .pme_code = 0x00000d0d, .pme_counters = 0x3, .pme_desc = "Data TLB misses" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Processor clock cycles" }, {.pme_name="COP0_SLIP_CYCLES", .pme_code = 0x00001919, .pme_counters = 0x3, .pme_desc = "Co-processor 0 slip cycles" }, {.pme_name="ITLB_MISSES", .pme_code = 0x00000e0e, .pme_counters = 0x3, .pme_desc = "Instruction TLB misses" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x00000c0c, .pme_counters = 0x3, .pme_desc = "Dcache misses" }, }; static pme_gen_mips64_entry_t gen_mips64_sb1_pe[] = { {.pme_name="DATA_DEPENDENCY_REPLAY", .pme_code = 0x1e1e1e1e, .pme_counters = 0xf, .pme_desc = "Data dependency replay" }, {.pme_name="DCACHE_READ_MISS", .pme_code = 0x0f0f0f00, .pme_counters = 0xe, .pme_desc = "Dcache read results in a miss" }, {.pme_name="R_RESP_OTHER_CORE_D_MOD", .pme_code = 0x19191900, .pme_counters = 0xe, .pme_desc = "Read response comes from the other core with D_MOD set" }, {.pme_name="RQ_LENGTH", .pme_code = 0x01010100, .pme_counters = 0xe, .pme_desc = "Read queue length" }, {.pme_name="READ_RQ_NOPS_SENT_TO_ABUS", .pme_code = 0x14141400, .pme_counters = 0xe, .pme_desc = "Read requests and NOPs sent to ZB Abus" }, {.pme_name="R_RESP_OTHER_CORE", .pme_code = 0x18181800, .pme_counters = 0xe, .pme_desc = "Read response comes from the other core" }, {.pme_name="SNOOP_RQ_HITS", .pme_code = 0x16161600, .pme_counters = 0xe, .pme_desc = "Snoop request hits anywhere" }, {.pme_name="LOAD_SURVIVED_STAGE4", .pme_code = 0x08080800, .pme_counters = 0xe, .pme_desc = "Load survived stage 4" }, {.pme_name="BRANCH_PREDICTED_TAKEN", .pme_code = 0x2e2e2e00, .pme_counters = 0xe, .pme_desc = "Predicted taken conditional branch" }, {.pme_name="ISSUE_L1", .pme_code = 0x29292900, .pme_counters = 0xe, .pme_desc = "Issue to L0" }, {.pme_name="ANY_REPLAY", .pme_code = 0x1f1f1f1f, .pme_counters = 0xf, .pme_desc = "Any replay except mispredict" }, {.pme_name="LD_ST_HITS_PREFETCH_IN_QUEUE", .pme_code = 0x06060600, .pme_counters = 0xe, .pme_desc = "Load/store hits prefetch in read queue" }, {.pme_name="NOT_DATA_READY", .pme_code = 0x23232300, .pme_counters = 0xe, .pme_desc = "Not data ready" }, {.pme_name="DCFIFO", .pme_code = 0x1c1c1c1c, .pme_counters = 0xf, .pme_desc = "DCFIFO" }, {.pme_name="ISSUE_E1", .pme_code = 0x2b2b2b00, .pme_counters = 0xe, .pme_desc = "Issue to E1" }, {.pme_name="PREFETCH_HITS_CACHE_OR_READ_Q", .pme_code = 0x05050500, .pme_counters = 0xe, .pme_desc = "Prefetch hits in cache or read queue" }, {.pme_name="BRANCH_STAGE4", .pme_code = 0x2c2c2c00, .pme_counters = 0xe, .pme_desc = "Branch survived stage 4" }, {.pme_name="SNOOP_ADDR_Q_FULL", .pme_code = 0x17171700, .pme_counters = 0xe, .pme_desc = "Snoop address queue is full" }, {.pme_name="CONSUMER_WAITING_FOR_LOAD", .pme_code = 0x22222200, .pme_counters = 0xe, .pme_desc = "load consumer waiting for dfill" }, {.pme_name="VICTIM_WRITEBACK", .pme_code = 0x0d0d0d00, .pme_counters = 0xe, .pme_desc = "A writeback occurs due to replacement" }, {.pme_name="BRANCH_MISSPREDICTS", .pme_code = 0x2f2f2f00, .pme_counters = 0xe, .pme_desc = "Branch mispredicts" }, {.pme_name="UPGRADE_SHARED_TO_EXCLUSIVE", .pme_code = 0x07070700, .pme_counters = 0xe, .pme_desc = "A line is upgraded from shared to exclusive" }, {.pme_name="READ_HITS_READ_Q", .pme_code = 0x04040400, .pme_counters = 0xe, .pme_desc = "Read hits in read queue" }, {.pme_name="INSN_STAGE4", .pme_code = 0x27272700, .pme_counters = 0xe, .pme_desc = "One or more instructions survives stage 4" }, {.pme_name="UNCACHED_RQ_LENGTH", .pme_code = 0x02020200, .pme_counters = 0xe, .pme_desc = "Number of valid uncached entries in read queue" }, {.pme_name="READ_RQ_SENT_TO_ABUS", .pme_code = 0x17171700, .pme_counters = 0xe, .pme_desc = "Read requests sent to ZB Abus" }, {.pme_name="DCACHE_FILL_SHARED_LINE", .pme_code = 0x0b0b0b00, .pme_counters = 0xe, .pme_desc = "Dcache is filled with shared line" }, {.pme_name="ISSUE_CONFLICT_DUE_IMISS", .pme_code = 0x25252500, .pme_counters = 0xe, .pme_desc = "issue conflict due to imiss using LS0" }, {.pme_name="NO_VALID_INSN", .pme_code = 0x21212100, .pme_counters = 0xe, .pme_desc = "No valid instr to issue" }, {.pme_name="ISSUE_E0", .pme_code = 0x2a2a2a00, .pme_counters = 0xe, .pme_desc = "Issue to E0" }, {.pme_name="INSN_SURVIVED_STAGE7", .pme_code = 0x00000000, .pme_counters = 0xe, .pme_desc = "Instruction survived stage 7" }, {.pme_name="BRANCH_REALLY_TAKEN", .pme_code = 0x2d2d2d00, .pme_counters = 0xe, .pme_desc = "Conditional branch was really taken" }, {.pme_name="STORE_COND_FAILED", .pme_code = 0x1a1a1a00, .pme_counters = 0xe, .pme_desc = "Failed store conditional" }, {.pme_name="MAX_ISSUE", .pme_code = 0x20202000, .pme_counters = 0xe, .pme_desc = "Max issue" }, {.pme_name="BIU_STALLS_ON_ZB_ADDR_BUS", .pme_code = 0x11111100, .pme_counters = 0xe, .pme_desc = "BIU stalls on ZB addr bus" }, {.pme_name="STORE_SURVIVED_STAGE4", .pme_code = 0x09090900, .pme_counters = 0xe, .pme_desc = "Store survived stage 4" }, {.pme_name="RESOURCE_CONSTRAINT", .pme_code = 0x24242400, .pme_counters = 0xe, .pme_desc = "Resource (L0/1 E0/1) constraint" }, {.pme_name="DCACHE_FILL_REPLAY", .pme_code = 0x1b1b1b1b, .pme_counters = 0xf, .pme_desc = "Dcache fill replay" }, {.pme_name="BIU_STALLS_ON_ZB_DATA_BUS", .pme_code = 0x12121200, .pme_counters = 0xe, .pme_desc = "BIU stalls on ZB data bus" }, {.pme_name="ISSUE_CONFLICT_DUE_DFILL", .pme_code = 0x26262600, .pme_counters = 0xe, .pme_desc = "issue conflict due to dfill using LS0/1" }, {.pme_name="WRITEBACK_RETURNS", .pme_code = 0x0f0f0f00, .pme_counters = 0xe, .pme_desc = "Number of instruction returns" }, {.pme_name="DCACHE_FILLED_SHD_NONC_EXC", .pme_code = 0x0a0a0a00, .pme_counters = 0xe, .pme_desc = "Dcache is filled (shared, nonc, exclusive)" }, {.pme_name="ISSUE_L0", .pme_code = 0x28282800, .pme_counters = 0xe, .pme_desc = "Issue to L0" }, {.pme_name="CYCLES", .pme_code = 0x10101010, .pme_counters = 0xf, .pme_desc = "Elapsed cycles" }, {.pme_name="MBOX_RQ_WHEN_BIU_BUSY", .pme_code = 0x0e0e0e00, .pme_counters = 0xe, .pme_desc = "MBOX requests to BIU when BIU busy" }, {.pme_name="MBOX_REPLAY", .pme_code = 0x1d1d1d1d, .pme_counters = 0xf, .pme_desc = "MBOX replay" }, }; static pme_gen_mips64_entry_t gen_mips64_vr5432_pe[] = { {.pme_name="INSTRUCTIONS_EXECUTED", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "(Instructions executed)/2 and truncated" }, {.pme_name="JTLB_REFILLS", .pme_code = 0x00000707, .pme_counters = 0x3, .pme_desc = "JTLB refills" }, {.pme_name="BRANCHES", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Branch execution (no jumps or jump registers)" }, {.pme_name="FP_INSTRUCTIONS", .pme_code = 0x00000505, .pme_counters = 0x3, .pme_desc = "(FP instruction execution) / 2 and truncated excluding cp1 loads and stores" }, {.pme_name="BRANCHES_MISPREDICTED", .pme_code = 0x00000a0a, .pme_counters = 0x3, .pme_desc = "Branches mispredicted" }, {.pme_name="DOUBLEWORDS_FLUSHED", .pme_code = 0x00000606, .pme_counters = 0x3, .pme_desc = "Doublewords flushed to main memory (no uncached stores)" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00000909, .pme_counters = 0x3, .pme_desc = "Instruction cache misses (no D-cache misses)" }, {.pme_name="LOAD_PREF_CACHE_INSTRUCTIONS", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "Load, prefetch/CacheOps execution (no sync)" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Processor cycles (PClock)" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x00000808, .pme_counters = 0x3, .pme_desc = "Data cache misses (no I-cache misses)" }, {.pme_name="STORES", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Store execution" }, }; static pme_gen_mips64_entry_t gen_mips64_vr5500_pe[] = { {.pme_name="INSTRUCTIONS_EXECUTED", .pme_code = 0x00000101, .pme_counters = 0x3, .pme_desc = "Instructions executed" }, {.pme_name="JTLB_REFILLS", .pme_code = 0x00000707, .pme_counters = 0x3, .pme_desc = "TLB refill" }, {.pme_name="BRANCHES", .pme_code = 0x00000404, .pme_counters = 0x3, .pme_desc = "Execution of branch instruction" }, {.pme_name="FP_INSTRUCTIONS", .pme_code = 0x00000505, .pme_counters = 0x3, .pme_desc = "Execution of floating-point instruction" }, {.pme_name="BRANCHES_MISPREDICTED", .pme_code = 0x00000a0a, .pme_counters = 0x3, .pme_desc = "Branch prediction miss" }, {.pme_name="DOUBLEWORDS_FLUSHED", .pme_code = 0x00000606, .pme_counters = 0x3, .pme_desc = "Doubleword flush to main memory" }, {.pme_name="ICACHE_MISSES", .pme_code = 0x00000909, .pme_counters = 0x3, .pme_desc = "Instruction cache miss" }, {.pme_name="LOAD_PREF_CACHE_INSTRUCTIONS", .pme_code = 0x00000202, .pme_counters = 0x3, .pme_desc = "Execution of load/prefetch/cache instruction" }, {.pme_name="CYCLES", .pme_code = 0x00000000, .pme_counters = 0x3, .pme_desc = "Processor clock cycles" }, {.pme_name="DCACHE_MISSES", .pme_code = 0x00000808, .pme_counters = 0x3, .pme_desc = "Data cache miss" }, {.pme_name="STORES", .pme_code = 0x00000303, .pme_counters = 0x3, .pme_desc = "Execution of store instruction" }, }; papi-5.6.0/src/freebsd-memory.h000664 001750 001750 00000000170 13216244361 020411 0ustar00jshenry1963jshenry1963000000 000000 int _freebsd_get_memory_info( PAPI_hw_info_t *hw_info, int id); int _papi_freebsd_get_dmem_info(PAPI_dmem_info_t *d); papi-5.6.0/src/perfctr-2.6.x/etc/costs/Athlon-850000775 001750 001750 00000001265 13216244366 023144 0ustar00jshenry1963jshenry1963000000 000000 [data from an 850 Mhz Athlon] PERFCTR INIT: vendor 2, family 6, model 4, stepping 2, clock 850063 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 15.2 cycles (1064 total) PERFCTR INIT: rdpmc cost is 13.3 cycles (947 total) PERFCTR INIT: rdmsr (counter) cost is 51.5 cycles (3391 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.6 cycles (3458 total) PERFCTR INIT: wrmsr (counter) cost is 79.7 cycles (5191 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.8 cycles (14928 total) PERFCTR INIT: read cr4 cost is 7.0 cycles (540 total) PERFCTR INIT: write cr4 cost is 62.9 cycles (4121 total) perfctr: driver 2.3.4, cpu type AMD K7 at 850063 kHz papi-5.6.0/src/freebsd-memory.c000664 001750 001750 00000002076 13216244361 020413 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: freebsd-memory.c * Author: Harald Servat * redcrash@gmail.com * Mod: James Ralph * ralph@cs.utk.edu */ #include "papi.h" #include "papi_internal.h" #include "x86_cpuid_info.h" #define UNREFERENCED(x) (void)x #if defined(__i386__)||defined(__x86_64__) static int x86_get_memory_info( PAPI_hw_info_t *hw_info ) { int retval = PAPI_OK; switch ( hw_info->vendor ) { case PAPI_VENDOR_AMD: case PAPI_VENDOR_INTEL: retval = _x86_cache_info( &hw_info->mem_hierarchy ); break; default: PAPIERROR( "Unknown vendor in memory information call for x86." ); return PAPI_ENOIMPL; } return retval; } #endif int _freebsd_get_memory_info( PAPI_hw_info_t *hw_info, int id) { UNREFERENCED(id); UNREFERENCED(hw_info); #if defined(__i386__)||defined(__x86_64__) x86_get_memory_info( hw_info ); #endif return PAPI_ENOIMPL; } int _papi_freebsd_get_dmem_info(PAPI_dmem_info_t *d) { /* TODO */ d->pagesize = getpagesize(); return PAPI_OK; } papi-5.6.0/src/libpfm4/lib/pfmlib_itanium2.c000664 001750 001750 00000173430 13216244365 022666 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_itanium2.c : support for the Itanium2 PMU family * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_priv_ia64.h" /* architecture private */ #include "pfmlib_itanium2_priv.h" /* PMU private */ #include "itanium2_events.h" /* PMU private */ #define is_ear(i) event_is_ear(itanium2_pe+(i)) #define is_ear_tlb(i) event_is_ear_tlb(itanium2_pe+(i)) #define is_ear_alat(i) event_is_ear_alat(itanium2_pe+(i)) #define is_ear_cache(i) event_is_ear_cache(itanium2_pe+(i)) #define is_iear(i) event_is_iear(itanium2_pe+(i)) #define is_dear(i) event_is_dear(itanium2_pe+(i)) #define is_btb(i) event_is_btb(itanium2_pe+(i)) #define has_opcm(i) event_opcm_ok(itanium2_pe+(i)) #define has_iarr(i) event_iarr_ok(itanium2_pe+(i)) #define has_darr(i) event_darr_ok(itanium2_pe+(i)) #define evt_use_opcm(e) ((e)->pfp_ita2_pmc8.opcm_used != 0 || (e)->pfp_ita2_pmc9.opcm_used !=0) #define evt_use_irange(e) ((e)->pfp_ita2_irange.rr_used) #define evt_use_drange(e) ((e)->pfp_ita2_drange.rr_used) #define evt_grp(e) (int)itanium2_pe[e].pme_qualifiers.pme_qual.pme_group #define evt_set(e) (int)itanium2_pe[e].pme_qualifiers.pme_qual.pme_set #define evt_umask(e) itanium2_pe[e].pme_umask #define FINE_MODE_BOUNDARY_BITS 12 #define FINE_MODE_MASK ~((1U<<12)-1) /* let's define some handy shortcuts! */ #define pmc_plm pmc_ita2_counter_reg.pmc_plm #define pmc_ev pmc_ita2_counter_reg.pmc_ev #define pmc_oi pmc_ita2_counter_reg.pmc_oi #define pmc_pm pmc_ita2_counter_reg.pmc_pm #define pmc_es pmc_ita2_counter_reg.pmc_es #define pmc_umask pmc_ita2_counter_reg.pmc_umask #define pmc_thres pmc_ita2_counter_reg.pmc_thres #define pmc_ism pmc_ita2_counter_reg.pmc_ism static char * pfm_ita2_get_event_name(unsigned int i); /* * Description of the PMC register mappings use by * this module (as reported in pfmlib_reg_t.reg_num): * * 0 -> PMC0 * 1 -> PMC1 * n -> PMCn * * The following are in the model specific rr_br[]: * IBR0 -> 0 * IBR1 -> 1 * ... * IBR7 -> 7 * DBR0 -> 0 * DBR1 -> 1 * ... * DBR7 -> 7 * * We do not use a mapping table, instead we make up the * values on the fly given the base. */ /* * The Itanium2 PMU has a bug in the fine mode implementation. * It only sees ranges with a granularity of two bundles. * So we prepare for the day they fix it. */ static int has_fine_mode_bug; static int pfm_ita2_detect(void) { int tmp; int ret = PFMLIB_ERR_NOTSUPP; tmp = pfm_ia64_get_cpu_family(); if (tmp == 0x1f) { has_fine_mode_bug = 1; ret = PFMLIB_SUCCESS; } return ret; } /* * Check the event for incompatibilities. This is useful * for L1 and L2 related events. Due to wire limitations, * some caches events are separated into sets. There * are 5 sets for the L1D cache group and 6 sets for L2 group. * It is NOT possible to simultaneously measure events from * differents sets within a group. For instance, you cannot * measure events from set0 and set1 in L1D cache group. However * it is possible to measure set0 in L1D and set1 in L2 at the same * time. * * This function verifies that the set constraint are respected. */ static int check_cross_groups_and_umasks(pfmlib_input_param_t *inp) { unsigned long ref_umask, umask; int g, s; unsigned int cnt = inp->pfp_event_count; pfmlib_event_t *e = inp->pfp_events; unsigned int i, j; /* * XXX: could possibly be optimized */ for (i=0; i < cnt; i++) { g = evt_grp(e[i].event); s = evt_set(e[i].event); if (g == PFMLIB_ITA2_EVT_NO_GRP) continue; ref_umask = evt_umask(e[i].event); for (j=i+1; j < cnt; j++) { if (evt_grp(e[j].event) != g) continue; if (evt_set(e[j].event) != s) return PFMLIB_ERR_EVTSET; /* only care about L2 cache group */ if (g != PFMLIB_ITA2_EVT_L2_CACHE_GRP || (s == 1 || s == 2)) continue; umask = evt_umask(e[j].event); /* * there is no assignement possible if the event in PMC4 * has a umask (ref_umask) and an event (from the same * set) also has a umask AND it is different. For some * sets, the umasks are shared, therefore the value * programmed into PMC4 determines the umask for all * the other events (with umask) from the set. */ if (umask && ref_umask != umask) return PFMLIB_ERR_NOASSIGN; } } return PFMLIB_SUCCESS; } /* * Certain prefetch events must be treated specially when instruction range restriction * is in use because they can only be constrained by IBRP1 in fine-mode. Other events * will use IBRP0 if tagged as a demand fetch OR IBPR1 if tagged as a prefetch match. * From the library's point of view there is no way of distinguishing this, so we leave * it up to the user to interpret the results. * * Events which can be qualified by the two pairs depending on their tag: * - IBP_BUNPAIRS_IN * - L1I_FETCH_RAB_HIT * - L1I_FETCH_ISB_HIT * - L1I_FILLS * * This function returns the number of qualifying prefetch events found * * XXX: not clear which events do qualify as prefetch events. */ static int prefetch_events[]={ PME_ITA2_L1I_PREFETCHES, PME_ITA2_L1I_STRM_PREFETCHES, PME_ITA2_L2_INST_PREFETCHES }; #define NPREFETCH_EVENTS sizeof(prefetch_events)/sizeof(int) static int check_prefetch_events(pfmlib_input_param_t *inp) { int code; int prefetch_codes[NPREFETCH_EVENTS]; unsigned int i, j, count; int c; int found = 0; for(i=0; i < NPREFETCH_EVENTS; i++) { pfm_get_event_code(prefetch_events[i], &code); prefetch_codes[i] = code; } count = inp->pfp_event_count; for(i=0; i < count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); for(j=0; j < NPREFETCH_EVENTS; j++) { if (c == prefetch_codes[j]) found++; } } return found; } /* * IA64_INST_RETIRED (and subevents) is the only event which can be measured on all * 4 IBR when non-fine mode is not possible. * * This function returns: * - the number of events matching the IA64_INST_RETIRED code * - in retired_mask the bottom 4 bits indicates which of the 4 INST_RETIRED event * is present */ static unsigned int check_inst_retired_events(pfmlib_input_param_t *inp, unsigned long *retired_mask) { int code; int c; unsigned int i, count, found = 0; unsigned long umask, mask; pfm_get_event_code(PME_ITA2_IA64_INST_RETIRED_THIS, &code); count = inp->pfp_event_count; mask = 0; for(i=0; i < count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); if (c == code) { pfm_ita2_get_event_umask(inp->pfp_events[i].event, &umask); switch(umask) { case 0: mask |= 1; break; case 1: mask |= 2; break; case 2: mask |= 4; break; case 3: mask |= 8; break; } found++; } } if (retired_mask) *retired_mask = mask; return found; } static int check_fine_mode_possible(pfmlib_ita2_input_rr_t *rr, int n) { pfmlib_ita2_input_rr_desc_t *lim = rr->rr_limits; int i; for(i=0; i < n; i++) { if ((lim[i].rr_start & FINE_MODE_MASK) != (lim[i].rr_end & FINE_MODE_MASK)) return 0; } return 1; } /* * mode = 0 -> check code (enforce bundle alignment) * mode = 1 -> check data */ static int check_intervals(pfmlib_ita2_input_rr_t *irr, int mode, unsigned int *n_intervals) { unsigned int i; pfmlib_ita2_input_rr_desc_t *lim = irr->rr_limits; for(i=0; i < 4; i++) { /* end marker */ if (lim[i].rr_start == 0 && lim[i].rr_end == 0) break; /* invalid entry */ if (lim[i].rr_start >= lim[i].rr_end) return PFMLIB_ERR_IRRINVAL; if (mode == 0 && (lim[i].rr_start & 0xf || lim[i].rr_end & 0xf)) return PFMLIB_ERR_IRRALIGN; } *n_intervals = i; return PFMLIB_SUCCESS; } static int valid_assign(pfmlib_event_t *e, unsigned int *as, pfmlib_regmask_t *r_pmcs, unsigned int cnt) { unsigned long pmc4_umask = 0, umask; char *name; int l1_grp_present = 0, l2_grp_present = 0; unsigned int i; int c, failure; int need_pmc5, need_pmc4; int pmc5_evt = -1, pmc4_evt = -1; if (PFMLIB_DEBUG()) { unsigned int j; for(j=0;jpfp_event_count; for(i=0; i < count; i++) { for (j=0; j < NCANCEL_EVENTS; j++) { pfm_get_event_code(inp->pfp_events[i].event, &code); if (code == cancel_codes[j]) { if (idx != -1) { return PFMLIB_ERR_INVAL; } idx = inp->pfp_events[i].event; } } } return PFMLIB_SUCCESS; } /* * Automatically dispatch events to corresponding counters following constraints. * Upon return the pfarg_regt structure is ready to be submitted to kernel */ static int pfm_ita2_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { #define has_counter(e,b) (itanium2_pe[e].pme_counters & (1 << (b)) ? (b) : 0) pfmlib_ita2_input_param_t *param = mod_in; pfm_ita2_pmc_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; unsigned int i,j,k,l; int ret; unsigned int max_l0, max_l1, max_l2, max_l3; unsigned int assign[PMU_ITA2_NUM_COUNTERS]; unsigned int m, cnt; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; if (PFMLIB_DEBUG()) for (m=0; m < cnt; m++) { DPRINT("ev[%d]=%s counters=0x%lx\n", m, itanium2_pe[e[m].event].pme_name, itanium2_pe[e[m].event].pme_counters); } if (cnt > PMU_ITA2_NUM_COUNTERS) return PFMLIB_ERR_TOOMANY; ret = check_cross_groups_and_umasks(inp); if (ret != PFMLIB_SUCCESS) return ret; ret = check_cancel_events(inp); if (ret != PFMLIB_SUCCESS) return ret; max_l0 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS; max_l1 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>1); max_l2 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>2); max_l3 = PMU_ITA2_FIRST_COUNTER + PMU_ITA2_NUM_COUNTERS*(cnt>3); DPRINT("max_l0=%u max_l1=%u max_l2=%u max_l3=%u\n", max_l0, max_l1, max_l2, max_l3); /* * For now, worst case in the loop nest: 4! (factorial) */ for (i=PMU_ITA2_FIRST_COUNTER; i < max_l0; i++) { assign[0] = has_counter(e[0].event,i); if (max_l1 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (j=PMU_ITA2_FIRST_COUNTER; j < max_l1; j++) { if (j == i) continue; assign[1] = has_counter(e[1].event,j); if (max_l2 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (k=PMU_ITA2_FIRST_COUNTER; k < max_l2; k++) { if(k == i || k == j) continue; assign[2] = has_counter(e[2].event,k); if (max_l3 == PMU_ITA2_FIRST_COUNTER && valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; for (l=PMU_ITA2_FIRST_COUNTER; l < max_l3; l++) { if(l == i || l == j || l == k) continue; assign[3] = has_counter(e[3].event,l); if (valid_assign(e, assign, r_pmcs, cnt) == PFMLIB_SUCCESS) goto done; } } } } /* we cannot satisfy the constraints */ return PFMLIB_ERR_NOASSIGN; done: for (j=0; j < cnt ; j++ ) { reg.pmc_val = 0; /* clear all, bits 26-27 must be zero for proper operations */ /* if plm is 0, then assume not specified per-event and use default */ reg.pmc_plm = inp->pfp_events[j].plm ? inp->pfp_events[j].plm : inp->pfp_dfl_plm; reg.pmc_oi = 1; /* overflow interrupt */ reg.pmc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc_thres = param ? param->pfp_ita2_counters[j].thres: 0; reg.pmc_ism = param ? param->pfp_ita2_counters[j].ism : PFMLIB_ITA2_ISM_BOTH; reg.pmc_umask = is_ear(e[j].event) ? 0x0 : itanium2_pe[e[j].event].pme_umask; reg.pmc_es = itanium2_pe[e[j].event].pme_code; /* * Note that we don't force PMC4.pmc_ena = 1 because the kernel takes care of this for us. * This way we don't have to program something in PMC4 even when we don't use it */ pc[j].reg_num = assign[j]; pc[j].reg_value = reg.pmc_val; pc[j].reg_addr = pc[j].reg_alt_addr = assign[j]; pd[j].reg_num = assign[j]; pd[j].reg_addr = pd[j].reg_addr = assign[j]; __pfm_vbprintf("[PMC%u(pmc%u)=0x%06lx thres=%d es=0x%02x plm=%d umask=0x%x pm=%d ism=0x%x oi=%d] %s\n", assign[j], assign[j], reg.pmc_val, reg.pmc_thres, reg.pmc_es,reg.pmc_plm, reg.pmc_umask, reg.pmc_pm, reg.pmc_ism, reg.pmc_oi, itanium2_pe[e[j].event].pme_name); __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of PMC registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_dispatch_iear(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_iear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_ita2_iear.ear_used == 0) { /* * case 3: no I-EAR event, no (or nothing) in param->pfp_ita2_iear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_ita2_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_ita2_iear.ear_mode); param->pfp_ita2_iear.ear_umask = evt_umask(inp->pfp_events[i].event); param->pfp_ita2_iear.ear_ism = PFMLIB_ITA2_ISM_BOTH; /* force both instruction sets */ DPRINT("I-EAR event with no info\n"); } /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running I-EAR), use param info */ reg.pmc_val = 0; if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc10_ita2_tlb_reg.iear_plm = param->pfp_ita2_iear.ear_plm ? param->pfp_ita2_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc10_ita2_tlb_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc10_ita2_tlb_reg.iear_ct = 0x0; reg.pmc10_ita2_tlb_reg.iear_umask = param->pfp_ita2_iear.ear_umask; reg.pmc10_ita2_tlb_reg.iear_ism = param->pfp_ita2_iear.ear_ism; } else if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_CACHE_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc10_ita2_cache_reg.iear_plm = param->pfp_ita2_iear.ear_plm ? param->pfp_ita2_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc10_ita2_cache_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc10_ita2_cache_reg.iear_ct = 0x1; reg.pmc10_ita2_cache_reg.iear_umask = param->pfp_ita2_iear.ear_umask; reg.pmc10_ita2_cache_reg.iear_ism = param->pfp_ita2_iear.ear_ism; } else { DPRINT("ALAT mode not supported in I-EAR mode\n"); return PFMLIB_ERR_INVAL; } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 10)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 10; /* PMC10 is I-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 10; pos1++; pd[pos2].reg_num = 0; pd[pos2].reg_addr = pd[pos2].reg_alt_addr= 0; pos2++; pd[pos2].reg_num = 1; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 1; pos2++; if (param->pfp_ita2_iear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE) { __pfm_vbprintf("[PMC10(pmc10)=0x%lx ctb=tlb plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc10_ita2_tlb_reg.iear_plm, reg.pmc10_ita2_tlb_reg.iear_pm, reg.pmc10_ita2_tlb_reg.iear_ism, reg.pmc10_ita2_tlb_reg.iear_umask); } else { __pfm_vbprintf("[PMC10(pmc10)=0x%lx ctb=cache plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc10_ita2_cache_reg.iear_plm, reg.pmc10_ita2_cache_reg.iear_pm, reg.pmc10_ita2_cache_reg.iear_ism, reg.pmc10_ita2_cache_reg.iear_umask); } __pfm_vbprintf("[PMD0(pmd0)]\n[PMD1(pmd1)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_dear(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_dear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_ita2_dear.ear_used == 0) { /* * case 3: no D-EAR event, no (or nothing) in param->pfp_ita2_dear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_ita2_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_ita2_dear.ear_mode); param->pfp_ita2_dear.ear_umask = evt_umask(inp->pfp_events[i].event); param->pfp_ita2_dear.ear_ism = PFMLIB_ITA2_ISM_BOTH; /* force both instruction sets */ DPRINT("D-EAR event with no info\n"); } /* sanity check on the mode */ if ( param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_CACHE_MODE && param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_TLB_MODE && param->pfp_ita2_dear.ear_mode != PFMLIB_ITA2_EAR_ALAT_MODE) return PFMLIB_ERR_INVAL; /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running D-EAR), use param info */ reg.pmc_val = 0; /* if plm is 0, then assume not specified per-event and use default */ reg.pmc11_ita2_reg.dear_plm = param->pfp_ita2_dear.ear_plm ? param->pfp_ita2_dear.ear_plm : inp->pfp_dfl_plm; reg.pmc11_ita2_reg.dear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc11_ita2_reg.dear_mode = param->pfp_ita2_dear.ear_mode; reg.pmc11_ita2_reg.dear_umask = param->pfp_ita2_dear.ear_umask; reg.pmc11_ita2_reg.dear_ism = param->pfp_ita2_dear.ear_ism; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 11)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 11; /* PMC11 is D-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 11; pos1++; pd[pos2].reg_num = 2; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 2; pos2++; pd[pos2].reg_num = 3; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 3; pos2++; pd[pos2].reg_num = 17; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 17; pos2++; __pfm_vbprintf("[PMC11(pmc11)=0x%lx mode=%s plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc11_ita2_reg.dear_mode == 0 ? "L1D" : (reg.pmc11_ita2_reg.dear_mode == 1 ? "L1DTLB" : "ALAT"), reg.pmc11_ita2_reg.dear_plm, reg.pmc11_ita2_reg.dear_pm, reg.pmc11_ita2_reg.dear_ism, reg.pmc11_ita2_reg.dear_umask); __pfm_vbprintf("[PMD2(pmd2)]\n[PMD3(pmd3)\nPMD17(pmd17)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_opcm(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfm_ita2_pmc_reg_t reg, pmc15; unsigned int i, has_1st_pair, has_2nd_pair, count; unsigned int pos = outp->pfp_pmc_count; if (param == NULL) return PFMLIB_SUCCESS; /* not constrained by PMC8 nor PMC9 */ pmc15.pmc_val = 0xffffffff; /* XXX: use PAL instead. PAL value is 0xfffffff0 */ if (param->pfp_ita2_irange.rr_used && mod_out == NULL) return PFMLIB_ERR_INVAL; if (param->pfp_ita2_pmc8.opcm_used || (param->pfp_ita2_irange.rr_used && mod_out->pfp_ita2_irange.rr_nbr_used!=0) ) { reg.pmc_val = param->pfp_ita2_pmc8.opcm_used ? param->pfp_ita2_pmc8.pmc_val : 0xffffffff3fffffff; if (param->pfp_ita2_irange.rr_used) { reg.pmc8_9_ita2_reg.opcm_ig_ad = 0; reg.pmc8_9_ita2_reg.opcm_inv = param->pfp_ita2_irange.rr_flags & PFMLIB_ITA2_RR_INV ? 1 : 0; } else { /* clear range restriction fields when none is used */ reg.pmc8_9_ita2_reg.opcm_ig_ad = 1; reg.pmc8_9_ita2_reg.opcm_inv = 0; } /* force bit 2 to 1 */ reg.pmc8_9_ita2_reg.opcm_bit2 = 1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 8)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 8; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_addr = 8; pos++; /* * will be constrained by PMC8 */ if (param->pfp_ita2_pmc8.opcm_used) { has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP0_PMC8) has_1st_pair=1; if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP2_PMC8) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp0_pmc8 = 0; if (has_2nd_pair || has_1st_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp2_pmc8 = 0; } __pfm_vbprintf("[PMC8(pmc8)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x inv=%d ig_ad=%d]\n", reg.pmc_val, reg.pmc8_9_ita2_reg.opcm_m, reg.pmc8_9_ita2_reg.opcm_i, reg.pmc8_9_ita2_reg.opcm_f, reg.pmc8_9_ita2_reg.opcm_b, reg.pmc8_9_ita2_reg.opcm_match, reg.pmc8_9_ita2_reg.opcm_mask, reg.pmc8_9_ita2_reg.opcm_inv, reg.pmc8_9_ita2_reg.opcm_ig_ad); } if (param->pfp_ita2_pmc9.opcm_used) { /* * PMC9 can only be used to qualify IA64_INST_RETIRED_* events */ if (check_inst_retired_events(inp, NULL) != inp->pfp_event_count) return PFMLIB_ERR_FEATCOMB; reg.pmc_val = param->pfp_ita2_pmc9.pmc_val; /* ig_ad, inv are ignored for PMC9, to avoid confusion we force default values */ reg.pmc8_9_ita2_reg.opcm_ig_ad = 1; reg.pmc8_9_ita2_reg.opcm_inv = 0; /* force bit 2 to 1 */ reg.pmc8_9_ita2_reg.opcm_bit2 = 1; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 9)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 9; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 9; pos++; /* * will be constrained by PMC9 */ has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP1_PMC9) has_1st_pair=1; if (inp->pfp_events[i].event == PME_ITA2_IA64_TAGGED_INST_RETIRED_IBRP3_PMC9) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp1_pmc9 = 0; if (has_2nd_pair || has_1st_pair == 0) pmc15.pmc15_ita2_reg.opcmc_ibrp3_pmc9 = 0; __pfm_vbprintf("[PMC9(pmc9)=0x%lx m=%d i=%d f=%d b=%d match=0x%x mask=0x%x]\n", reg.pmc_val, reg.pmc8_9_ita2_reg.opcm_m, reg.pmc8_9_ita2_reg.opcm_i, reg.pmc8_9_ita2_reg.opcm_f, reg.pmc8_9_ita2_reg.opcm_b, reg.pmc8_9_ita2_reg.opcm_match, reg.pmc8_9_ita2_reg.opcm_mask); } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 15)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 15; pc[pos].reg_value = pmc15.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 15; pos++; __pfm_vbprintf("[PMC15(pmc15)=0x%lx ibrp0_pmc8=%d ibrp1_pmc9=%d ibrp2_pmc8=%d ibrp3_pmc9=%d]\n", pmc15.pmc_val, pmc15.pmc15_ita2_reg.opcmc_ibrp0_pmc8, pmc15.pmc15_ita2_reg.opcmc_ibrp1_pmc9, pmc15.pmc15_ita2_reg.opcmc_ibrp2_pmc8, pmc15.pmc15_ita2_reg.opcmc_ibrp3_pmc9); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int pfm_dispatch_btb(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_event_t *e= inp->pfp_events; pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_ita2_input_param_t fake_param; int found_btb = 0, found_bad_dear = 0; int has_btb_param; unsigned int i, pos1, pos2; unsigned int count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; /* * explicit BTB settings */ has_btb_param = param && param->pfp_ita2_btb.btb_used; reg.pmc_val = 0UL; /* * we need to scan all events looking for DEAR ALAT/TLB due to incompatibility */ count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_btb(e[i].event)) found_btb = 1; /* * keep track of the first BTB event */ /* look only for DEAR TLB */ if (is_dear(e[i].event) && (is_ear_tlb(e[i].event) || is_ear_alat(e[i].event))) { found_bad_dear = 1; } } DPRINT("found_btb=%d found_bar_dear=%d\n", found_btb, found_bad_dear); /* * did not find D-EAR TLB/ALAT event, need to check param structure */ if (found_bad_dear == 0 && param && param->pfp_ita2_dear.ear_used == 1) { if ( param->pfp_ita2_dear.ear_mode == PFMLIB_ITA2_EAR_TLB_MODE || param->pfp_ita2_dear.ear_mode == PFMLIB_ITA2_EAR_ALAT_MODE) found_bad_dear = 1; } /* * no explicit BTB event and no special case to deal with (cover part of case 3) */ if (found_btb == 0 && has_btb_param == 0 && found_bad_dear == 0) return PFMLIB_SUCCESS; if (has_btb_param == 0) { /* * case 3: no BTB event, btb_used=0 but found_bad_dear=1, need to cleanup PMC12 */ if (found_btb == 0) goto assign_zero; /* * case 1: we have a BTB event but no param, default setting is to capture * all branches. */ memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; param->pfp_ita2_btb.btb_ds = 0; /* capture branch targets */ param->pfp_ita2_btb.btb_tm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_ptm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_ppm = 0x3; /* all branches */ param->pfp_ita2_btb.btb_brt = 0x0; /* all branches */ DPRINT("BTB event with no info\n"); } /* * case 2: BTB event in the list, param provided * case 4: no BTB event, param provided (free running mode) */ reg.pmc12_ita2_reg.btbc_plm = param->pfp_ita2_btb.btb_plm ? param->pfp_ita2_btb.btb_plm : inp->pfp_dfl_plm; reg.pmc12_ita2_reg.btbc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc12_ita2_reg.btbc_ds = param->pfp_ita2_btb.btb_ds & 0x1; reg.pmc12_ita2_reg.btbc_tm = param->pfp_ita2_btb.btb_tm & 0x3; reg.pmc12_ita2_reg.btbc_ptm = param->pfp_ita2_btb.btb_ptm & 0x3; reg.pmc12_ita2_reg.btbc_ppm = param->pfp_ita2_btb.btb_ppm & 0x3; reg.pmc12_ita2_reg.btbc_brt = param->pfp_ita2_btb.btb_brt & 0x3; /* * if DEAR-ALAT or DEAR-TLB is set then PMC12 must be set to zero (see documentation p. 87) * * D-EAR ALAT/TLB and BTB cannot be used at the same time. * From documentation: PMC12 must be zero in this mode; else the wrong IP for misses * coming right after a mispredicted branch. * * D-EAR cache is fine. */ assign_zero: if (found_bad_dear && reg.pmc_val != 0UL) return PFMLIB_ERR_EVTINCOMP; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 12)) return PFMLIB_ERR_NOASSIGN; memset(pc+pos1, 0, sizeof(pfmlib_reg_t)); pc[pos1].reg_num = 12; pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 12; pos1++; __pfm_vbprintf("[PMC12(pmc12)=0x%lx plm=%d pm=%d ds=%d tm=%d ptm=%d ppm=%d brt=%d]\n", reg.pmc_val, reg.pmc12_ita2_reg.btbc_plm, reg.pmc12_ita2_reg.btbc_pm, reg.pmc12_ita2_reg.btbc_ds, reg.pmc12_ita2_reg.btbc_tm, reg.pmc12_ita2_reg.btbc_ptm, reg.pmc12_ita2_reg.btbc_ppm, reg.pmc12_ita2_reg.btbc_brt); /* * only add BTB PMD when actually using BTB. * Not needed when dealing with D-EAR TLB and DEAR-ALAT * PMC12 restriction */ if (found_btb || has_btb_param) { /* * PMD16 is included in list of used PMD */ for(i=8; i < 17; i++, pos2++) { pd[pos2].reg_num = i; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = i; __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[pos2].reg_num, pd[pos2].reg_num); } } /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static void do_normal_rr(unsigned long start, unsigned long end, pfmlib_reg_t *br, int nbr, int dir, int *idx, int *reg_idx, int plm) { unsigned long size, l_addr, c; unsigned long l_offs = 0, r_offs = 0; unsigned long l_size, r_size; dbreg_t db; int p2; if (nbr < 1 || end <= start) return; size = end - start; DPRINT("start=0x%016lx end=0x%016lx size=0x%lx bytes (%lu bundles) nbr=%d dir=%d\n", start, end, size, size >> 4, nbr, dir); p2 = pfm_ia64_fls(size); c = ALIGN_DOWN(end, p2); DPRINT("largest power of two possible: 2^%d=0x%lx, crossing=0x%016lx\n", p2, 1UL << p2, c); if ((c - (1UL<= start) { l_addr = c - (1UL << p2); } else { p2--; if ((c + (1UL<>l_offs: 0x%lx\n", l_offs); } } else if (dir == 1 && r_size != 0 && nbr == 1) { p2++; l_addr = start; if (PFMLIB_DEBUG()) { r_offs = l_addr+(1UL<>r_offs: 0x%lx\n", r_offs); } } l_size = l_addr - start; r_size = end - l_addr-(1UL<>largest chunk: 2^%d @0x%016lx-0x%016lx\n", p2, l_addr, l_addr+(1UL<>before: 0x%016lx-0x%016lx\n", start, l_addr); if (r_size && !r_offs) printf(">>after : 0x%016lx-0x%016lx\n", l_addr+(1UL<>1; if (nbr & 0x1) { /* * our simple heuristic is: * we assign the largest number of registers to the largest * of the two chunks */ if (l_size > r_size) { l_nbr++; } else { r_nbr++; } } do_normal_rr(start, l_addr, br, l_nbr, 0, idx, reg_idx, plm); do_normal_rr(l_addr+(1UL<rr_start, in_rr->rr_end, n_pairs, fine_mode ? ", fine_mode" : "", rr_flags & PFMLIB_ITA2_RR_INV ? ", inversed" : ""); __pfm_vbprintf("start offset: -0x%lx end_offset: +0x%lx\n", out_rr->rr_soff, out_rr->rr_eoff); for (j=0; j < n_pairs; j++, base_idx+=2) { d.val = dbr[base_idx+1].reg_value; r_end = dbr[base_idx].reg_value+((~(d.db.db_mask)) & ~(0xffUL << 56)); if (fine_mode) __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask); else __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx end=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask, r_end); } } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_fine_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_ita2_output_rr_t *orr) { int i; pfmlib_reg_t *br; pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; unsigned long addr; int reg_idx; dbreg_t db; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; db.val = 0; db.db.db_mask = FINE_MODE_MASK; if (n > 2) return PFMLIB_ERR_IRRTOOMANY; for (i=0; i < n; i++, reg_idx += 2, in_rr++, br+= 4) { /* * setup lower limit pair * * because of the PMU bug, we must align down to the closest bundle-pair * aligned address. 5 => 32-byte aligned address */ addr = has_fine_mode_bug ? ALIGN_DOWN(in_rr->rr_start, 5) : in_rr->rr_start; out_rr->rr_soff = in_rr->rr_start - addr; /* * adjust plm for each range */ db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[0].reg_num = reg_idx; br[0].reg_value = addr; br[0].reg_addr = br[0].reg_alt_addr = reg_idx; br[1].reg_num = reg_idx+1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = reg_idx+1; /* * setup upper limit pair * * * In fine mode, the bundle address stored in the upper limit debug * registers is included in the count, so we substract 0x10 to exclude it. * * because of the PMU bug, we align the (corrected) end to the nearest * 32-byte aligned address + 0x10. With this correction and depending * on the correction, we may count one * * */ addr = in_rr->rr_end - 0x10; if (has_fine_mode_bug && (addr & 0x1f) == 0) addr += 0x10; out_rr->rr_eoff = addr - in_rr->rr_end + 0x10; br[2].reg_num = reg_idx+4; br[2].reg_value = addr; br[2].reg_addr = br[2].reg_alt_addr = reg_idx+4; br[3].reg_num = reg_idx+5; br[3].reg_value = db.val; br[3].reg_addr = br[3].reg_alt_addr = reg_idx+5; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 2, 1, irr->rr_flags); } orr->rr_nbr_used += i<<2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_single_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int *base_idx, pfmlib_ita2_output_rr_t *orr) { unsigned long size, end, start; unsigned long p_start, p_end; pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; pfmlib_reg_t *br; dbreg_t db; int reg_idx; int l, m; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; start = in_rr->rr_start; end = in_rr->rr_end; size = end - start; reg_idx = *base_idx; l = pfm_ia64_fls(size); m = l; if (size & ((1UL << l)-1)) { if (l>62) { printf("range: [0x%lx-0x%lx] too big\n", start, end); return PFMLIB_ERR_IRRTOOBIG; } m++; } DPRINT("size=%ld, l=%d m=%d, internal: 0x%lx full: 0x%lx\n", size, l, m, 1UL << l, 1UL << m); for (; m < 64; m++) { p_start = ALIGN_DOWN(start, m); p_end = p_start+(1UL<= end) goto found; } return PFMLIB_ERR_IRRINVAL; found: DPRINT("m=%d p_start=0x%lx p_end=0x%lx\n", m, p_start,p_end); /* when the event is not IA64_INST_RETIRED, then we MUST use ibrp0 */ br[0].reg_num = reg_idx; br[0].reg_value = p_start; br[0].reg_addr = br[0].reg_alt_addr = reg_idx; db.val = 0; db.db.db_mask = ~((1UL << m)-1); db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[1].reg_num = reg_idx + 1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = reg_idx + 1; out_rr->rr_soff = start - p_start; out_rr->rr_eoff = p_end - end; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 1, 0, irr->rr_flags); orr->rr_nbr_used += 2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int compute_normal_rr(pfmlib_ita2_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_ita2_output_rr_t *orr) { pfmlib_ita2_input_rr_desc_t *in_rr; pfmlib_ita2_output_rr_desc_t *out_rr; unsigned long r_end; pfmlib_reg_t *br; dbreg_t d; int i, j; int br_index, reg_idx, prev_index; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; br_index = 0; for (i=0; i < n; i++, in_rr++, out_rr++) { /* * running out of registers */ if (br_index == 8) break; prev_index = br_index; do_normal_rr( in_rr->rr_start, in_rr->rr_end, br, 4 - (reg_idx>>1), /* how many pairs available */ 0, &br_index, ®_idx, in_rr->rr_plm ? in_rr->rr_plm : dfl_plm); DPRINT("br_index=%d reg_idx=%d\n", br_index, reg_idx); /* * compute offsets */ out_rr->rr_soff = out_rr->rr_eoff = 0; for(j=prev_index; j < br_index; j+=2) { d.val = br[j+1].reg_value; r_end = br[j].reg_value+((~(d.db.db_mask)+1) & ~(0xffUL << 56)); if (br[j].reg_value <= in_rr->rr_start) out_rr->rr_soff = in_rr->rr_start - br[j].reg_value; if (r_end >= in_rr->rr_end) out_rr->rr_eoff = r_end - in_rr->rr_end; } if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, prev_index, (br_index-prev_index)>>1, 0, irr->rr_flags); } /* do not have enough registers to cover all the ranges */ if (br_index == 8 && i < n) return PFMLIB_ERR_TOOMANY; orr->rr_nbr_used += br_index; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int pfm_dispatch_irange(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfm_ita2_pmc_reg_t reg; pfmlib_ita2_input_param_t *param = mod_in; pfmlib_ita2_input_rr_t *irr; pfmlib_ita2_output_rr_t *orr; pfmlib_reg_t *pc = outp->pfp_pmcs; unsigned int i, pos = outp->pfp_pmc_count, count; int ret; unsigned int retired_only, retired_count, fine_mode, prefetch_count; unsigned int n_intervals; int base_idx = 0; unsigned long retired_mask; if (param == NULL) return PFMLIB_SUCCESS; if (param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita2_irange; orr = &mod_out->pfp_ita2_irange; ret = check_intervals(irr, 0, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_IRRINVAL; retired_count = check_inst_retired_events(inp, &retired_mask); retired_only = retired_count == inp->pfp_event_count; prefetch_count = check_prefetch_events(inp); fine_mode = irr->rr_flags & PFMLIB_ITA2_RR_NO_FINE_MODE ? 0 : check_fine_mode_possible(irr, n_intervals); DPRINT("n_intervals=%d retired_only=%d retired_count=%d prefetch_count=%d fine_mode=%d\n", n_intervals, retired_only, retired_count, prefetch_count, fine_mode); /* * On Itanium2, there are more constraints on what can be measured with irange. * * - The fine mode is the best because you directly set the lower and upper limits of * the range. This uses 2 ibr pairs for range (ibrp0/ibrp2 and ibp1/ibrp3). Therefore * at most 2 fine mode ranges can be defined. There is a limit on the size and alignment * of the range to allow fine mode: the range must be less than 4KB in size AND the lower * and upper limits must NOT cross a 4KB page boundary. The fine mode works will all events. * * - if the fine mode fails, then for all events, except IA64_TAGGED_INST_RETIRED_*, only * the first pair of ibr is available: ibrp0. This imposes some severe restrictions on the * size and alignement of the range. It can be bigger than 4KB and must be properly aligned * on its size. The library relaxes these constraints by allowing the covered areas to be * larger than the expected range. It may start before and end after. You can determine how * far off the range is in either direction for each range by looking at the rr_soff (start * offset) and rr_eoff (end offset). * * - if the events include certain prefetch events then only IBRP1 can be used in fine mode * See 10.3.5.1 Exception 1. * * - Finally, when the events are ONLY IA64_TAGGED_INST_RETIRED_* then all IBR pairs can be used * to cover the range giving us more flexibility to approximate the range when it is not * properly aligned on its size (see 10.3.5.2 Exception 2). */ if (fine_mode == 0 && retired_only == 0 && n_intervals > 1) return PFMLIB_ERR_IRRTOOMANY; /* we do not default to non-fine mode to support more ranges */ if (n_intervals > 2 && fine_mode == 1) return PFMLIB_ERR_IRRTOOMANY; if (fine_mode == 0) { if (retired_only) { ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } else { /* unless we have only prefetch and instruction retired events, * we cannot satisfy the request because the other events cannot * be measured on anything but IBRP0. */ if (prefetch_count && (prefetch_count+retired_count) != inp->pfp_event_count) return PFMLIB_ERR_FEATCOMB; base_idx = prefetch_count ? 2 : 0; ret = compute_single_rr(irr, inp->pfp_dfl_plm, &base_idx, orr); } } else { if (prefetch_count && n_intervals != 1) return PFMLIB_ERR_IRRTOOMANY; base_idx = prefetch_count ? 2 : 0; ret = compute_fine_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_IRRTOOMANY : ret; } reg.pmc_val = 0xdb6; /* default value */ count = orr->rr_nbr_used; for (i=0; i < count; i++) { switch(orr->rr_br[i].reg_num) { case 0: reg.pmc14_ita2_reg.iarc_ibrp0 = 0; break; case 2: reg.pmc14_ita2_reg.iarc_ibrp1 = 0; break; case 4: reg.pmc14_ita2_reg.iarc_ibrp2 = 0; break; case 6: reg.pmc14_ita2_reg.iarc_ibrp3 = 0; break; } } if (retired_only && (param->pfp_ita2_pmc8.opcm_used ||param->pfp_ita2_pmc9.opcm_used)) { /* * PMC8 + IA64_INST_RETIRED only works if irange on IBRP0 and/or IBRP2 * PMC9 + IA64_INST_RETIRED only works if irange on IBRP1 and/or IBRP3 */ count = orr->rr_nbr_used; for (i=0; i < count; i++) { if (orr->rr_br[i].reg_num == 0 && param->pfp_ita2_pmc9.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 2 && param->pfp_ita2_pmc8.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 4 && param->pfp_ita2_pmc9.opcm_used) return PFMLIB_ERR_FEATCOMB; if (orr->rr_br[i].reg_num == 6 && param->pfp_ita2_pmc8.opcm_used) return PFMLIB_ERR_FEATCOMB; } } if (fine_mode) { reg.pmc14_ita2_reg.iarc_fine = 1; } else if (retired_only) { /* * we need to check that the user provided all the events needed to cover * all the ibr pairs used to cover the range */ if ((retired_mask & 0x1) == 0 && reg.pmc14_ita2_reg.iarc_ibrp0 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x2) == 0 && reg.pmc14_ita2_reg.iarc_ibrp1 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x4) == 0 && reg.pmc14_ita2_reg.iarc_ibrp2 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x8) == 0 && reg.pmc14_ita2_reg.iarc_ibrp3 == 0) return PFMLIB_ERR_IRRINVAL; } /* initialize pmc request slot */ memset(pc+pos, 0, sizeof(pfmlib_reg_t)); if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 14)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 14; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 14; pos++; __pfm_vbprintf("[PMC14(pmc14)=0x%lx ibrp0=%d ibrp1=%d ibrp2=%d ibrp3=%d fine=%d]\n", reg.pmc_val, reg.pmc14_ita2_reg.iarc_ibrp0, reg.pmc14_ita2_reg.iarc_ibrp1, reg.pmc14_ita2_reg.iarc_ibrp2, reg.pmc14_ita2_reg.iarc_ibrp3, reg.pmc14_ita2_reg.iarc_fine); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static const unsigned long iod_tab[8]={ /* --- */ 3, /* --D */ 2, /* -O- */ 3, /* should not be used */ /* -OD */ 0, /* =IOD safe because default IBR is harmless */ /* I-- */ 1, /* =IO safe because by defaut OPC is turned off */ /* I-D */ 0, /* =IOD safe because by default opc is turned off */ /* IO- */ 1, /* IOD */ 0 }; /* * IMPORTANT: MUST BE CALLED *AFTER* pfm_dispatch_irange() to make sure we see * the irange programming to adjust pmc13. */ static int pfm_dispatch_drange(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfmlib_ita2_input_rr_t *irr; pfmlib_ita2_output_rr_t *orr, *orr2; pfm_ita2_pmc_reg_t pmc13; pfm_ita2_pmc_reg_t pmc14; unsigned int i, pos = outp->pfp_pmc_count; int iod_codes[4], dfl_val_pmc8, dfl_val_pmc9; unsigned int n_intervals; int ret; int base_idx = 0; int fine_mode = 0; #define DR_USED 0x1 /* data range is used */ #define OP_USED 0x2 /* opcode matching is used */ #define IR_USED 0x4 /* code range is used */ if (param == NULL) return PFMLIB_SUCCESS; /* * if only pmc8/pmc9 opcode matching is used, we do not need to change * the default value of pmc13 regardless of the events being measured. */ if ( param->pfp_ita2_drange.rr_used == 0 && param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * it seems like the ignored bits need to have special values * otherwise this does not work. */ pmc13.pmc_val = 0x2078fefefefe; /* * initialize iod codes */ iod_codes[0] = iod_codes[1] = iod_codes[2] = iod_codes[3] = 0; /* * setup default iod value, we need to separate because * if drange is used we do not know in advance which DBR will be used * therefore we need to apply dfl_val later */ dfl_val_pmc8 = param->pfp_ita2_pmc8.opcm_used ? OP_USED : 0; dfl_val_pmc9 = param->pfp_ita2_pmc9.opcm_used ? OP_USED : 0; if (param->pfp_ita2_drange.rr_used == 1) { if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_ita2_drange; orr = &mod_out->pfp_ita2_drange; ret = check_intervals(irr, 1, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_DRRINVAL; ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_DRRTOOMANY : ret; } /* * Update iod_codes to reflect the use of the DBR constraint. */ for (i=0; i < orr->rr_nbr_used; i++) { if (orr->rr_br[i].reg_num == 0) iod_codes[0] |= DR_USED | dfl_val_pmc8; if (orr->rr_br[i].reg_num == 2) iod_codes[1] |= DR_USED | dfl_val_pmc9; if (orr->rr_br[i].reg_num == 4) iod_codes[2] |= DR_USED | dfl_val_pmc8; if (orr->rr_br[i].reg_num == 6) iod_codes[3] |= DR_USED | dfl_val_pmc9; } } /* * XXX: assume dispatch_irange executed before calling this function */ if (param->pfp_ita2_irange.rr_used == 1) { orr2 = &mod_out->pfp_ita2_irange; if (mod_out == NULL) return PFMLIB_ERR_INVAL; /* * we need to find out whether or not the irange is using * fine mode. If this is the case, then we only need to * program pmc13 for the ibr pairs which designate the lower * bounds of a range. For instance, if IBRP0/IBRP2 are used, * then we only need to program pmc13.cfg_dbrp0 and pmc13.ena_dbrp0, * the PMU will automatically use IBRP2, even though pmc13.ena_dbrp2=0. */ for(i=0; i <= pos; i++) { if (pc[i].reg_num == 14) { pmc14.pmc_val = pc[i].reg_value; if (pmc14.pmc14_ita2_reg.iarc_fine == 1) fine_mode = 1; break; } } /* * Update to reflect the use of the IBR constraint */ for (i=0; i < orr2->rr_nbr_used; i++) { if (orr2->rr_br[i].reg_num == 0) iod_codes[0] |= IR_USED | dfl_val_pmc8; if (orr2->rr_br[i].reg_num == 2) iod_codes[1] |= IR_USED | dfl_val_pmc9; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 4) iod_codes[2] |= IR_USED | dfl_val_pmc8; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 6) iod_codes[3] |= IR_USED | dfl_val_pmc9; } } if (param->pfp_ita2_irange.rr_used == 0 && param->pfp_ita2_drange.rr_used ==0) { iod_codes[0] = iod_codes[2] = dfl_val_pmc8; iod_codes[1] = iod_codes[3] = dfl_val_pmc9; } /* * update the cfg dbrpX field. If we put a constraint on a cfg dbrp, then * we must enable it in the corresponding ena_dbrpX */ pmc13.pmc13_ita2_reg.darc_ena_dbrp0 = iod_codes[0] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp0 = iod_tab[iod_codes[0]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp1 = iod_codes[1] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp1 = iod_tab[iod_codes[1]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp2 = iod_codes[2] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp2 = iod_tab[iod_codes[2]]; pmc13.pmc13_ita2_reg.darc_ena_dbrp3 = iod_codes[3] ? 1 : 0; pmc13.pmc13_ita2_reg.darc_cfg_dbrp3 = iod_tab[iod_codes[3]]; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 13)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 13; pc[pos].reg_value = pmc13.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 13; pos++; __pfm_vbprintf("[PMC13(pmc13)=0x%lx cfg_dbrp0=%d cfg_dbrp1=%d cfg_dbrp2=%d cfg_dbrp3=%d ena_dbrp0=%d ena_dbrp1=%d ena_dbrp2=%d ena_dbrp3=%d]\n", pmc13.pmc_val, pmc13.pmc13_ita2_reg.darc_cfg_dbrp0, pmc13.pmc13_ita2_reg.darc_cfg_dbrp1, pmc13.pmc13_ita2_reg.darc_cfg_dbrp2, pmc13.pmc13_ita2_reg.darc_cfg_dbrp3, pmc13.pmc13_ita2_reg.darc_ena_dbrp0, pmc13.pmc13_ita2_reg.darc_ena_dbrp1, pmc13.pmc13_ita2_reg.darc_ena_dbrp2, pmc13.pmc13_ita2_reg.darc_ena_dbrp3); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int check_qualifier_constraints(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in) { pfmlib_ita2_input_param_t *param = mod_in; pfmlib_event_t *e = inp->pfp_events; unsigned int i, count; count = inp->pfp_event_count; for(i=0; i < count; i++) { /* * skip check for counter which requested it. Use at your own risk. * No all counters have necessarily been validated for use with * qualifiers. Typically the event is counted as if no constraint * existed. */ if (param->pfp_ita2_counters[i].flags & PFMLIB_ITA2_FL_EVT_NO_QUALCHECK) continue; if (evt_use_irange(param) && has_iarr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_drange(param) && has_darr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_opcm(param) && has_opcm(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int check_range_plm(pfmlib_input_param_t *inp, pfmlib_ita2_input_param_t *mod_in) { pfmlib_ita2_input_param_t *param = mod_in; unsigned int i, count; if (param->pfp_ita2_drange.rr_used == 0 && param->pfp_ita2_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * range restriction applies to all events, therefore we must have a consistent * set of plm and they must match the pfp_dfl_plm which is used to setup the debug * registers */ count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].plm && inp->pfp_events[i].plm != inp->pfp_dfl_plm) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int pfm_ita2_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { int ret; pfmlib_ita2_input_param_t *mod_in = (pfmlib_ita2_input_param_t *)model_in; pfmlib_ita2_output_param_t *mod_out = (pfmlib_ita2_output_param_t *)model_out; /* * nothing will come out of this combination */ if (mod_out && mod_in == NULL) return PFMLIB_ERR_INVAL; /* check opcode match, range restriction qualifiers */ if (mod_in && check_qualifier_constraints(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; /* check for problems with raneg restriction and per-event plm */ if (mod_in && check_range_plm(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; ret = pfm_ita2_dispatch_counters(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for I-EAR */ ret = pfm_dispatch_iear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for D-EAR */ ret = pfm_dispatch_dear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* XXX: must be done before dispatch_opcm() and dispatch_drange() */ ret = pfm_dispatch_irange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_drange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; /* now check for Opcode matchers */ ret = pfm_dispatch_opcm(inp, mod_in, outp, mod_out); if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_btb(inp, mod_in, outp); return ret; } /* XXX: return value is also error code */ int pfm_ita2_get_event_maxincr(unsigned int i, unsigned int *maxincr) { if (i >= PME_ITA2_EVENT_COUNT || maxincr == NULL) return PFMLIB_ERR_INVAL; *maxincr = itanium2_pe[i].pme_maxincr; return PFMLIB_SUCCESS; } int pfm_ita2_is_ear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_ear(i); } int pfm_ita2_is_dear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i); } int pfm_ita2_is_dear_tlb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i) && is_ear_tlb(i); } int pfm_ita2_is_dear_cache(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_dear(i) && is_ear_cache(i); } int pfm_ita2_is_dear_alat(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_ear_alat(i); } int pfm_ita2_is_iear(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i); } int pfm_ita2_is_iear_tlb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i) && is_ear_tlb(i); } int pfm_ita2_is_iear_cache(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_iear(i) && is_ear_cache(i); } int pfm_ita2_is_btb(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && is_btb(i); } int pfm_ita2_support_iarr(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_iarr(i); } int pfm_ita2_support_darr(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_darr(i); } int pfm_ita2_support_opcm(unsigned int i) { return i < PME_ITA2_EVENT_COUNT && has_opcm(i); } int pfm_ita2_get_ear_mode(unsigned int i, pfmlib_ita2_ear_mode_t *m) { pfmlib_ita2_ear_mode_t r; if (!is_ear(i) || m == NULL) return PFMLIB_ERR_INVAL; r = PFMLIB_ITA2_EAR_TLB_MODE; if (is_ear_tlb(i)) goto done; r = PFMLIB_ITA2_EAR_CACHE_MODE; if (is_ear_cache(i)) goto done; r = PFMLIB_ITA2_EAR_ALAT_MODE; if (is_ear_alat(i)) goto done; return PFMLIB_ERR_INVAL; done: *m = r; return PFMLIB_SUCCESS; } static int pfm_ita2_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt < 4 || cnt > 7)) return PFMLIB_ERR_INVAL; *code = (int)itanium2_pe[i].pme_code; return PFMLIB_SUCCESS; } /* * This function is accessible directly to the user */ int pfm_ita2_get_event_umask(unsigned int i, unsigned long *umask) { if (i >= PME_ITA2_EVENT_COUNT || umask == NULL) return PFMLIB_ERR_INVAL; *umask = evt_umask(i); return PFMLIB_SUCCESS; } int pfm_ita2_get_event_group(unsigned int i, int *grp) { if (i >= PME_ITA2_EVENT_COUNT || grp == NULL) return PFMLIB_ERR_INVAL; *grp = evt_grp(i); return PFMLIB_SUCCESS; } int pfm_ita2_get_event_set(unsigned int i, int *set) { if (i >= PME_ITA2_EVENT_COUNT || set == NULL) return PFMLIB_ERR_INVAL; *set = evt_set(i) == 0xf ? PFMLIB_ITA2_EVT_NO_SET : evt_set(i); return PFMLIB_SUCCESS; } /* external interface */ int pfm_ita2_irange_is_fine(pfmlib_output_param_t *outp, pfmlib_ita2_output_param_t *mod_out) { pfmlib_ita2_output_param_t *param = mod_out; pfm_ita2_pmc_reg_t reg; unsigned int i, count; /* some sanity checks */ if (outp == NULL || param == NULL) return 0; if (outp->pfp_pmc_count >= PFMLIB_MAX_PMCS) return 0; if (param->pfp_ita2_irange.rr_nbr_used == 0) return 0; /* * we look for pmc14 as it contains the bit indicating if fine mode is used */ count = outp->pfp_pmc_count; for(i=0; i < count; i++) { if (outp->pfp_pmcs[i].reg_num == 14) goto found; } return 0; found: reg.pmc_val = outp->pfp_pmcs[i].reg_value; return reg.pmc14_ita2_reg.iarc_fine ? 1 : 0; } static char * pfm_ita2_get_event_name(unsigned int i) { return itanium2_pe[i].pme_name; } static void pfm_ita2_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; unsigned long m; memset(counters, 0, sizeof(*counters)); m =itanium2_pe[j].pme_counters; for(i=0; m ; i++, m>>=1) { if (m & 0x1) pfm_regmask_set(counters, i); } } static void pfm_ita2_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { unsigned int i = 0; /* all pmcs are contiguous */ for(i=0; i < PMU_ITA2_NUM_PMCS; i++) pfm_regmask_set(impl_pmcs, i); } static void pfm_ita2_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { unsigned int i = 0; /* all pmds are contiguous */ for(i=0; i < PMU_ITA2_NUM_PMDS; i++) pfm_regmask_set(impl_pmds, i); } static void pfm_ita2_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i = 0; /* counting pmds are contiguous */ for(i=4; i < 8; i++) pfm_regmask_set(impl_counters, i); } static void pfm_ita2_get_hw_counter_width(unsigned int *width) { *width = PMU_ITA2_COUNTER_WIDTH; } static int pfm_ita2_get_event_description(unsigned int ev, char **str) { char *s; s = itanium2_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_ita2_get_cycle_event(pfmlib_event_t *e) { e->event = PME_ITA2_CPU_CYCLES; return PFMLIB_SUCCESS; } static int pfm_ita2_get_inst_retired(pfmlib_event_t *e) { e->event = PME_ITA2_IA64_INST_RETIRED; return PFMLIB_SUCCESS; } pfm_pmu_support_t itanium2_support={ .pmu_name = "itanium2", .pmu_type = PFMLIB_ITANIUM2_PMU, .pme_count = PME_ITA2_EVENT_COUNT, .pmc_count = PMU_ITA2_NUM_PMCS, .pmd_count = PMU_ITA2_NUM_PMDS, .num_cnt = PMU_ITA2_NUM_COUNTERS, .get_event_code = pfm_ita2_get_event_code, .get_event_name = pfm_ita2_get_event_name, .get_event_counters = pfm_ita2_get_event_counters, .dispatch_events = pfm_ita2_dispatch_events, .pmu_detect = pfm_ita2_detect, .get_impl_pmcs = pfm_ita2_get_impl_pmcs, .get_impl_pmds = pfm_ita2_get_impl_pmds, .get_impl_counters = pfm_ita2_get_impl_counters, .get_hw_counter_width = pfm_ita2_get_hw_counter_width, .get_event_desc = pfm_ita2_get_event_description, .get_cycle_event = pfm_ita2_get_cycle_event, .get_inst_retired_event = pfm_ita2_get_inst_retired }; papi-5.6.0/src/perfctr-2.7.x/examples/perfex/000775 001750 001750 00000000000 13216244370 022650 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/validation_tests/papi_br_ucn.c000664 001750 001750 00000004001 13216244370 023316 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the unconditional branch instruction */ /* performance counter PAPI_BR_UCN */ /* by Vince Weaver, */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; long long high=0,low=0,average=0,expected=500000; double error; long long count,total=0; int quiet=0,retval,ins_result; int eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_BR_UCN event.\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_BR_UCN"); if (retval!=PAPI_OK) { test_skip( __FILE__, __LINE__, "adding PAPI_BR_UCN", retval ); } if (!quiet) { printf("Testing a loop with %lld unconditional branches (%d times):\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/ftests/zero.F000664 001750 001750 00000007437 13216244361 017733 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program zero integer*8 values(10) integer es, event integer*8 uso, usn, cyco, cycn integer domain, granularity character*(PAPI_MAX_STR_LEN) domainstr, grnstr character*(PAPI_MAX_STR_LEN) name integer retval Integer last_char, n External last_char integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() es = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event = PAPI_TOT_INS else event = PAPI_FP_INS end if call PAPIf_create_eventset(es, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', * retval) end if call PAPIf_add_event( es, event, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_add_event( es, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_get_real_usec(uso) call PAPIf_get_real_cyc(cyco) call PAPIf_start(es, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_stop(es, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_get_real_usec(usn) call PAPIf_get_real_cyc(cycn) if (tests_quiet .EQ. 0) then print *, "Test case 0: start, stop." print *, "-----------------------------------------------", * "--------------------------" end if call PAPIf_get_domain(es, domain, PAPI_DEFDOM, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_domain', retval) end if call stringify_domain(domain, domainstr) if (tests_quiet .EQ. 0) then write (*,800) "Default domain is :", domain, domainstr end if call PAPIf_get_granularity(es, granularity, PAPI_DEFGRN, * retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_granularity', * retval) end if call stringify_granularity(granularity, grnstr) if (tests_quiet .EQ. 0) then call PAPIf_event_code_to_name (event, name, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, * 'PAPIf_event_code_to_name', retval) end if n=last_char(name) write (*,800) "Default granularity is:", granularity, grnstr 800 format(a25, i3, " ", a70) write (*,810) "Using", NUM_FLOPS, $ " iterations of c = c + a * b" 810 format(a7, i9, a) print *, "-----------------------------------------------", * "--------------------------" write (*,100) "Test type", 1 write (*,100) name(1:n), values(1) write (*,100) "PAPI_TOT_CYC", values(2) write (*,100) "Real usec", usn-uso write (*,100) "Real cycles", cycn-cyco 100 format(a13, ":", i12) print *, "-----------------------------------------------", * "--------------------------" print *, "Verification: none" endif call ftests_pass(__FILE__) end papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.66000664 001750 001750 00000002722 13216244367 023220 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.66 GHz Athlon XP 2000+] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1679042 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 157 cycles PERFCTR INIT: rdtsc cost is 14.0 cycles (1054 total) PERFCTR INIT: rdpmc cost is 14.9 cycles (1113 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3472 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.6 cycles (3526 total) PERFCTR INIT: wrmsr (counter) cost is 82.9 cycles (5463 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.9 cycles (14999 total) PERFCTR INIT: read cr4 cost is 4.7 cycles (464 total) PERFCTR INIT: write cr4 cost is 62.8 cycles (4180 total) PERFCTR INIT: sync_core cost is 73.9 cycles (4887 total) perfctr: driver 2.7.8, cpu type AMD K7/K8 at 1679042 kHz PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1659642 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 236 cycles PERFCTR INIT: rdtsc cost is 17.1 cycles (1336 total) PERFCTR INIT: rdpmc cost is 15.4 cycles (1225 total) PERFCTR INIT: rdmsr (counter) cost is 49.4 cycles (3402 total) PERFCTR INIT: rdmsr (evntsel) cost is 50.2 cycles (3455 total) PERFCTR INIT: wrmsr (counter) cost is 83.3 cycles (5573 total) PERFCTR INIT: wrmsr (evntsel) cost is 229.6 cycles (14936 total) PERFCTR INIT: read cr4 cost is 0.1 cycles (245 total) PERFCTR INIT: write cr4 cost is 60.7 cycles (4124 total) PERFCTR INIT: write LVTPC cost is 3.0 cycles (432 total) perfctr: driver 2.6.6, cpu type AMD K7 at 1659642 kHz papi-5.6.0/src/libpfm-3.y/docs/man3/libpfm_montecito.3000664 001750 001750 00000061670 13216244361 024420 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" .SH NAME libpfm_montecito - support for Itanium 2 9000 (Montecito) processor specific PMU features .SH SYNOPSIS .nf .B #include .B #include .sp .BI "int pfm_mont_is_ear(unsigned int " i ");" .BI "int pfm_mont_is_dear(unsigned int " i ");" .BI "int pfm_mont_is_dear_tlb(unsigned int " i ");" .BI "int pfm_mont_is_dear_cache(unsigned int " i ");" .BI "int pfm_mont_is_dear_alat(unsigned int " i ");" .BI "int pfm_mont_is_iear(unsigned int " i ");" .BI "int pfm_mont_is_iear_tlb(unsigned int " i ");" .BI "int pfm_mont_is_iear_cache(unsigned int " i ");" .BI "int pfm_mont_is_etb(unsigned int " i ");" .BI "int pfm_mont_support_opcm(unsigned int " i ");" .BI "int pfm_mont_support_iarr(unsigned int " i ");" .BI "int pfm_mont_support_darr(unsigned int " i ");" .BI "int pfm_mont_get_event_maxincr(unsigned int "i ", unsigned int *"maxincr ");" .BI "int pfm_mont_get_event_umask(unsigned int "i ", unsigned long *"umask ");" .BI "int pfm_mont_get_event_group(unsigned int "i ", int *"grp ");" .BI "int pfm_mont_get_event_set(unsigned int "i ", int *"set ");" .BI "int pfm_mont_get_event_type(unsigned int "i ", int *"type ");" .BI "int pfm_mont_get_ear_mode(unsigned int "i ", pfmlib_mont_ear_mode_t *"mode ");" .BI "int pfm_mont_irange_is_fine(pfmlib_output_param_t *"outp ", pfmlib_mont_output_param_t *"mod_out ");" .sp .SH DESCRIPTION The libpfm library provides full support for all the Itanium 2 900 (Montecito) processor specific features of the PMU. The interface is defined in \fBpfmlib_montecito.h\fR. It consists of a set of functions and structures which describe and allow access to the model specific PMU features. .sp The Itanium 2 900 (Montecito) processor specific functions presented here are mostly used to retrieve the characteristics of an event. Given a opaque event descriptor, obtained by the \fBpfm_find_event()\fR or its derivative functions, they return a boolean value indicating whether this event support this feature or is of a particular kind. .sp The \fBpfm_mont_is_ear()\fR function returns 1 if the event designated by \fBi\fR corresponds to a EAR event, i.e., an Event Address Register type of events. Otherwise 0 is returned. For instance, \fBDATA_EAR_CACHE_LAT4\fR is an ear event, but \fBCPU_OP_CYCLES_ALL\fR is not. It can be a data or instruction EAR event. .sp The \fBpfm_mont_is_dear()\fR function returns 1 if the event designated by \fBi\fR corresponds to an Data EAR event. Otherwise 0 is returned. It can be a cache or TLB EAR event. .sp The \fBpfm_mont_is_dear_tlb()\fR function returns 1 if the event designated by \fBi\fR corresponds to a Data EAR TLB event. Otherwise 0 is returned. .sp The \fBpfm_mont_is_dear_cache()\fR function returns 1 if the event designated by \fBi\fR corresponds to a Data EAR cache event. Otherwise 0 is returned. .sp The \fBpfm_mont_is_dear_alat()\fR function returns 1 if the event designated by \fBi\fR corresponds to a ALAT EAR cache event. Otherwise 0 is returned. .sp The \fBpfm_mont_is_iear()\fR function returns 1 if the event designated by \fBi\fR corresponds to an instruction EAR event. Otherwise 0 is returned. It can be a cache or TLB instruction EAR event. .sp The \fBpfm_mont_is_iear_tlb()\fR function returns 1 if the event designated by \fBi\fR corresponds to an instruction EAR TLB event. Otherwise 0 is returned. .sp The \fBpfm_mont_is_iear_cache()\fR function returns 1 if the event designated by \fBi\fR corresponds to an instruction EAR cache event. Otherwise 0 is returned. .sp The \fBpfm_mont_support_opcm()\fR function returns 1 if the event designated by \fBi\fR supports opcode matching, i.e., can this event be measured accurately when opcode matching via PMC32/PMC34 is active. Not all events supports this feature. .sp The \fBpfm_mont_support_iarr()\fR function returns 1 if the event designated by \fBi\fR supports code address range restrictions, i.e., can this event be measured accurately when code range restriction is active. Otherwise 0 is returned. Not all events supports this feature. .sp The \fBpfm_mont_support_darr()\fR function returns 1 if the event designated by \fBi\fR supports data address range restrictions, i.e., can this event be measured accurately when data range restriction is active. Otherwise 0 is returned. Not all events supports this feature. .sp The \fBpfm_mont_get_event_maxincr()\fR function returns in \fBmaxincr\fR the maximum number of occurrences per cycle for the event designated by \fBi\fR. Certain Itanium 2 9000 (Montecito) events can occur more than once per cycle. When an event occurs more than once per cycle, the PMD counter will be incremented accordingly. It is possible to restrict measurement when event occur more than once per cycle. For instance, \fBNOPS_RETIRED\fR can happen up to 6 times/cycle which means that the threshold can be adjusted between 0 and 5, where 5 would mean that the PMD counter would be incremented by 1 only when the nop instruction is executed more than 5 times/cycle. This function returns the maximum number of occurrences of the event per cycle, and is the non-inclusive upper bound for the threshold to program in the PMC register. .sp The \fBpfm_mont_get_event_umask()\fR function returns in \fBumask\fR the umask for the event designated by \fBi\fR. .sp The \fBpfm_mont_get_event_grp()\fR function returns in \fBgrp\fR the group to which the event designated by \fBi\fR belongs. The notion of group is used for L1D and L2D cache events only. For all other events, a group is irrelevant and can be ignored. If the event is an L2D cache event then the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_L2D_CACHE_GRP\fR. Similarly, if the event is an L1D cache event, the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_L1D_CACHE_GRP\fR. In any other cases, the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_NO_GRP\fR. .sp The \fBpfm_mont_get_event_set()\fR function returns in \fBset\fR the set to which the event designated by \fBi\fR belongs. A set is a subdivision of a group and is therefore only relevant for L1 and L2 cache events. An event can only belong to one group and one set. This partitioning of the cache events is due to some hardware limitations which impose some restrictions on events. For a given group, events from different sets cannot be measured at the same time. If the event does not belong to a group then the value of \fBset\fR is \fBPFMLIB_MONT_EVT_NO_SET\fR. .sp The \fBpfm_mont_get_event_type()\fR function returns in \fBtype\fR the type of the event designated by \fBi\fR belongs. The itanium2 9000 (Montecito) events can have any one of the following types: .sp .TP .B PFMLIB_MONT_EVT_ACTIVE The event can only occur when the processor thread that generated it is currently active .TP .B PFMLIB_MONT_EVT_FLOATING The event can be generated when the processor thread is inactive .TP .B PFMLIB_MONT_EVT_CAUSAL The event does not belong to a processor thread .TP .B PFMLIB_MONT_EVT_SELF_FLOATING Hybrid event. It is floating if measured with .me. If is causal otherwise. .LP .sp The \fBpfm_mont_irange_is_fine()\fR function returns 1 if the configuration description passed in \fBoutp\fR, the generic output parameters and \fBmod_out\fR, the Itanium 2 9000 (Montecito) specific output parameters, use code range restriction in fine mode. Otherwise the function returns 0. This function can only be called after a call to the \fBpfm_dispatch_events()\fR function returns successfully and had the data structures pointed to by \fBoutp\fR and \fBmod_out\fR as output parameters. .sp The \fBpfm_mont_get_event_ear_mode()\fR function returns in \fBmode\fR the EAR mode of the event designated by \fBi\fR. If the event is not an EAR event, then \fBPFMLIB_ERR_INVAL\fR is returned and mode is not updated. Otherwise mode can have the following values: .TP .B PFMLIB_MONT_EAR_TLB_MODE The event is an EAR TLB mode. It can be either data or instruction TLB EAR. .TP .B PFMLIB_MONT_EAR_CACHE_MODE The event is a cache EAR. It can be either data or instruction cache EAR. .TP .B PFMLIB_MONT_EAR_ALAT_MODE The event is an ALAT EAR. It can only be a data EAR event. .sp .LP When the Itanium 2 9000 (Montecito) specific features are needed to support a measurement their descriptions must be passed as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The Itanium 2 9000 (Montecito) specific input arguments are described in the \fBpfmlib_mont_input_param_t\fR structure and the output parameters in \fBpfmlib_mont_output_param_t\fR. They are defined as follows: .sp .nf typedef struct { unsigned int flags; unsigned int thres; } pfmlib_mont_counter_t; typedef struct { unsigned char opcm_used; unsigned char opcm_m; unsigned char opcm_i; unsigned char opcm_f; unsigned char opcm_b; unsigned long opcm_match; unsigned long opcm_mask; } pfmlib_mont_opcm_t; typedef struct { unsigned char etb_used; unsigned int etb_plm; unsigned char etb_ds; unsigned char etb_tm; unsigned char etb_ptm; unsigned char etb_ppm; unsigned char etb_brt; } pfmlib_mont_etb_t; typedef struct { unsigned char ipear_used; unsigned int ipear_plm; unsigned short ipear_delay; } pfmlib_mont_ipear_t; typedef enum { PFMLIB_MONT_EAR_CACHE_MODE= 0, PFMLIB_MONT_EAR_TLB_MODE = 1, PFMLIB_MONT_EAR_ALAT_MODE = 2 } pfmlib_mont_ear_mode_t; typedef struct { unsigned char ear_used; pfmlib_mont_ear_mode_t ear_mode; unsigned int ear_plm; unsigned long ear_umask; } pfmlib_mont_ear_t; typedef struct { unsigned int rr_plm; unsigned long rr_start; unsigned long rr_end; } pfmlib_mont_input_rr_desc_t; typedef struct { unsigned long rr_soff; unsigned long rr_eoff; } pfmlib_mont_output_rr_desc_t; typedef struct { unsigned int rr_flags; pfmlib_mont_input_rr_desc_t rr_limits[4]; unsigned char rr_used; } pfmlib_mont_input_rr_t; typedef struct { unsigned int rr_nbr_used; pfmlib_mont_output_rr_desc_t rr_infos[4]; pfmlib_reg_t rr_br[8]; } pfmlib_mont_output_rr_t; typedef struct { pfmlib_mont_counter_t pfp_mont_counters[PMU_MONT_NUM_COUNTERS]; unsigned long pfp_mont_flags; pfmlib_mont_opcm_t pfp_mont_opcm1; pfmlib_mont_opcm_t pfp_mont_opcm2; pfmlib_mont_ear_t pfp_mont_iear; pfmlib_mont_ear_t pfp_mont_dear; pfmlib_mont_ipear_t pfp_mont_ipear; pfmlib_mont_etb_t pfp_mont_etb; pfmlib_mont_input_rr_t pfp_mont_drange; pfmlib_mont_input_rr_t pfp_mont_irange; } pfmlib_mont_input_param_t; typedef struct { pfmlib_mont_output_rr_t pfp_mont_drange; pfmlib_mont_output_rr_t pfp_mont_irange; } pfmlib_mont_output_param_t; .fi .sp .SH PER-EVENT OPTIONS .sp The Itanium 2 9000 (Montecito) processor provides one per-event feature for counters: thresholding. It can be set using the \fBpfp_mont_counters\fR data structure for each event. .sp The \fBthres\fR indicates the threshold for the event. A threshold of \fBn\fR means that the counter will be incremented by one only when the event occurs more than \fBn\fR times per cycle. The \fBflags\fR field contains event-specific flags. The currently defined flags are: .sp .TP PFMLIB_MONT_FL_EVT_NO_QUALCHECK When this flag is set it indicates that the library should ignore the qualifiers constraints for this event. Qualifiers includes opcode matching, code and data range restrictions. When an event is marked as not supporting a particular qualifier, it usually means that it is ignored, i.e., the extra level of filtering is ignored. For instance, the FE_BUBBLE_ALL event does not support code range restrictions and by default the library will refuse to program it if range restriction is also requested. Using the flag will override the check and the call to the \fBpfm_dispatch_events()\fR function will succeed. In this case, FE_BUBBLE_ALL will be measured for the entire program and not just for the code range requested. For certain measurements this is perfectly acceptable as the range restriction will only be applied relevant to events which support it. Make sure you understand which events do not support certain qualifiers before using this flag. .LP .SH OPCODE MATCHING .sp The \fBpfp_mont_opcm1\fR and \fBpfp_mont_opcm2\fR fields of type \fBpfmlib_mont_opcm_t\fR contain the description of what to do with the opcode matchers. The Itanium 2 9000 (Montecito) processor supports opcode matching via PMC32 and PMC34. When this feature is used the \fBopcm_used\fR field must be set to 1, otherwise it is ignored by the library. The Itanium 2 9000 (Montecito) processor implements two full 41-bit opcode matchers. As such, it is possible to match all instructions individually. It is possible to match a single instruction or an instruction pattern based on opcode or slot type. The slots are specified in: .TP .B opcm_m Match when the instruction is in a M-slot (memory) .TP .B opcm_i Match when the instruction is in an I-slot (ALU) .TP .B opcm_f Match when the instruction is in an F-slot (FPU) .TP .B opcm_b Match when the instruction is in a B-slot (Branch) .sp .LP Any combinations of slot settings is supported. To match all slot types, simply set all fields to 1. .sp The 41-bit opcode is specified in \fBopcm_match\fR and a 41-bit mask is passed in \fBopcm_mask\fR. When a bit is set in \fBopcm_mask\fR the corresponding bit is ignored in \fBopcm_match\fR. .SH EVENT ADDRESS REGISTERS .sp The \fBpfp_mont_iear\fR field of type \fBpfmlib_mont_ear_t\fR describes what to do with instruction Event Address Registers (I-EARs). Again if this feature is used the \fBear_used\fR must be set to 1, otherwise it will be ignored by the library. The \fBear_mode\fR must be set to either one of \fBPFMLIB_MONT_EAR_TLB_MODE\fR, \fBPFMLIB_MONT_EAR_CACHE_MODE\fRto indicate the type of EAR to program. The umask to store into PMC10 must be in \fBear_umask\fR. The privilege level mask at which the I-EAR will be monitored must be set in \fBear_plm\fR which can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBear_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. .sp The \fBpfp_mont_dear\fR field of type \fBpfmlib_mont_ear_t\fR describes what to do with data Event Address Registers (D-EARs). The description is identical to the I-EARs except that it applies to PMC11 and that a \fBear_mode\fR of \fBPFMLIB_MONT_EAR_ALAT_MODE\fR is possible. In general, there are four different methods to program the EAR (data or instruction): .TP .B Method 1 There is an EAR event in the list of events to monitor and \fBear_used\fR is cleared. In this case the EAR will be programmed (PMC10 or PMC11) based on the information encoded in the event. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBDATA_EAR_EVENT\fR or \fBL1I_EAR_EVENTS\fR depending on the type of EAR. .TP .B Method 2 There is an EAR event in the list of events to monitor and \fBear_used\fR is set. In this case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita_iear\fR or \fBpfp_ita_dear\fR structure because it contains more detailed information, such as privilege level and instruction set. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count DATA_EAR_EVENT or L1I_EAR_EVENTS depending on the type of EAR. .TP .B Method 3 There is no EAR event in the list of events to monitor and and \fBear_used\fR is cleared. In this case no EAR is programmed. .TP .B Method 4 There is no EAR event in the list of events to monitor and and \fBear_used\fR is set. In this case case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_mont_iear\fR or \fBpfp_mont_dear\fR structure. This is the free running mode for the EAR. .sp .SH EXECUTION TRACE BUFFER The \fBpfp_mont_etb\fR of type \fBpfmlib_mont_etb_t\fR field is used to configure the Execution Trace Buffer (ETB). If the \fBetb_used\fR is set, then the library will take the configuration into account, otherwise any ETB configuration will be ignored. The various fields in this structure provide means to filter out the kind of changes in the control flow (branches, traps, rfi, ...) that get recorded in the ETB. Each one represents an element of the branch architecture of the Itanium 2 9000 (Montecito) processor. Refer to the Itanium 2 9000 (Montecito) specific documentation for more details on the branch architecture. The fields are as follows: .TP .B etb_tm If this field is 0, then no branch is captured. If this field is 1, then non taken branches are captured. If this field is 2, then taken branches are captured. Finally if this field is 3 then all branches are captured. .TP .B etb_ptm If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted target address are captured. If this field is 2, then branches with correctly predicted target address are captured. Finally if this field is 3 then all branches are captured regardless of target address prediction. .TP .B etb_ppm If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted path (taken/non taken) are captured. If this field is 2, then branches with correctly predicted path are captured. Finally if this field is 3 then all branches are captured regardless of their path prediction. .TP .B etb_brt If this field is 0, then no branch is captured. If this field is 1, then only IP-relative branches are captured. If this field is 2, then only return branches are captured. Finally if this field is 3 then only non-return indirect branches are captured. .TP .B etb_plm This is the privilege level mask at which the ETB captures branches. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. .sp There are 4 methods to program the ETB and they are as follows: .sp .TP .B Method 1 The \fBETB_EVENT\fR is in the list of event to monitor and \fBetb_used\fR is cleared. In this case, the ETB will be configured (PMC39) to record ALL branches. A counting monitor will be programmed to count \fBETB_EVENT\fR. .TP .B Method 2 The \fBETB_EVENT\fR is in the list of events to monitor and \fBetb_used\fR is set. In this case, the BTB will be configured (PMC39) using the information in the \fBpfp_mont_etb\fR structure. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBBRANCH_EVENT\fR. .TP .B Method 3 The \fBETB_EVENT\fR is not in the list of events to monitor and \fBetb_used\fR is set. In this case, the ETB will be configured (PMC39) using the information in the \fBpfp_mont_etb\fR structure. This is the free running mode for the ETB. .TP .B Method 4 The \fBETB_EVENT\fR is not in the list of events to monitor and \fBetb_used\fR is cleared. In this case, the ETB is not programmed. .SH DATA AND CODE RANGE RESTRICTIONS The \fBpfp_mont_drange\fR and \fBpfp_mont_irange\fR fields control the range restrictions for the data and code respectively. The idea is that the application passes a set of ranges, each designated by a start and end address. Upon return from the \fBpfm_dispatch_events()\fR function, the application gets back the set of registers and their values that needs to be programmed via a kernel interface. Range restriction is implemented using the debug registers. There is a limited number of debug registers and they go in pair. With 8 data debug registers, a maximum of 4 distinct ranges can be specified. The same applies to code range restrictions. Moreover, there are some severe constraints on the alignment and size of the ranges. Given that the size of a range is specified using a bitmask, there can be situations where the actual range is larger than the requested range. For code ranges, Itanium 2 9000 (Montecito) processor can use what is called a fine mode, where a range is designated using two pairs of code debug registers. In this mode, the bitmask is not used, the start and end addresses are directly specified. Not all code ranges qualify for fine mode, the size of the range must be 64KB or less and the range cannot cross a 64KB page boundary. The library will make a best effort in choosing the right mode for each range. For code ranges, it will try the fine mode first and will default to using the bitmask mode otherwise. Fine mode applies to all code debug registers or none, i.e., you cannot have a range using fine mode and another using the bitmask. The Itanium 2 9000 (Montecito) processor somehow limits the use of multiple pairs to accurately cover a code range. This can only be done for \fBIA64_INST_RETIRED\fR and even then, you need several events to collect the counts. For all other events, only one pair can be used, which leads to more inaccuracy due to approximation. Data ranges can used multiple debug register pairs to gain more accuracy. The library will never cover less than what is requested. The algorithm will use more than one pair of debug registers whenever possible to get a more precise range. Hence, up to the 4 pairs can be used to describe a single range. If range restriction is to be used, the \fBrr_used\fR field must be set to one, otherwise settings will be ignored. The ranges are described by the \fBpfmlib_mont_input_rr_t\fR structure. Up to 4 ranges can be defined. Each range is described in by a entry in \fBrr_limits\fR. Some flags for all ranges can be defined in \fBrr_flags\fR. Currently defined flags are: .sp .TP .B PFMLIB_MONT_RR_INV Inverse the code ranges. The qualifying events will be measurement when executing outside the specified ranges. .TP .B PFMLIB_MONT_RR_NO_FINE_MODE Force non fine mode for all code ranges (mostly for debug) .sp .LP The \fBpfmlib_mont_input_rr_desc_t\fR structure is defined as follows: .TP .B rr_plm The privilege level at which the range is active. It can be any combinations of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. The privilege level is only relevant for code ranges, data ranges ignores the setting. .TP .B rr_start This is the start address of the range. Any address is supported but for code range it must be bundle aligned, i.e., 16-byte aligned. .TP .B rr_end This is the end address of the range. Any address is supported but for code range it must be bundle aligned, i.e., 16-byte aligned. .sp .LP The library will provide the values for the debug registers as well as some information about the actual ranges in the output parameters and more precisely in the \fBpfmlib_mont_output_rr_t\fR structure for each range. The structure is defined as follows: .TP .B rr_nbr_used Contains the number of debug registers used to cover the range. This is necessarily an even number as debug registers always go in pair. The value of this field is between 0 and 7. .TP .B rr_br This table contains the list of debug registers necessary to cover the ranges. Each element is of type \fBpfmlib_reg_t\fR. The \fBreg_num\fR field contains the debug register index while \fBreg_value\fR contains the debug register value. Both the index and value must be copied into the kernel specific argument to program the debug registers. The library never programs them. .TP .B rr_infos Contains information about the ranges defined. Because of alignment restrictions, the actual range covered by the debug registers may be larger than the requested range. This table describe the differences between the requested and actual ranges expressed as offsets: .TP .B rr_soff Contains the start offset of the actual range described by the debug registers. If zero, it means the library was able to match exactly the beginning of the range. Otherwise it represents the number of byte by which the actual range precedes the requested range. .TP .B rr_eoff Contains the end offset of the actual range described by the debug registers. If zero, it means the library was able to match exactly the end of the range. Otherwise it represents the number of bytes by which the actual range exceeds the requested range. .sp .LP .SH IP EVENT CAPTURE (IP-EAR) The Execution Trace Buffer (ETB) can be configured to record the addresses of consecutive retiring instructions. In this case the ETB contains IP addresses and not branches related information. This feature cannot be used in conjunction with regular branch captures as described above. To active this feature the \fBipear_used\fR field of the \fBpfmlib_mont_ipear_t\fR must be set to 1. The other fields in this structure are used as follows: .sp .TP .B ipear_plm The privilege level of the instructions to capture. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. .TP .B ipear_delay The number of cycles by which to delay the freeze of the ETB after a PMU interrupt (which freeze the rest of counters). .LP .sp .SH ERRORS Refer to the description of the \fBpfm_dispatch_events()\fR function for errors when using the Itanium 2 9000 (Montecito) specific input and output arguments. .SH SEE ALSO pfm_dispatch_events(3) and set of examples shipped with the library .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.64000664 001750 001750 00000001365 13216244367 023220 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.64 GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 8, stepping 0, clock 1639332 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 108 cycles PERFCTR INIT: rdtsc cost is 14.3 cycles (1025 total) PERFCTR INIT: rdpmc cost is 18.0 cycles (1261 total) PERFCTR INIT: rdmsr (counter) cost is 53.2 cycles (3516 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.6 cycles (3476 total) PERFCTR INIT: wrmsr (counter) cost is 81.5 cycles (5327 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.2 cycles (14907 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (218 total) PERFCTR INIT: write cr4 cost is 62.4 cycles (4105 total) PERFCTR INIT: write LVTPC cost is 4.4 cycles (395 total) perfctr: driver 2.7.3, cpu type AMD K7/K8 at 1639332 kHz papi-5.6.0/src/libpfm4/lib/events/intel_bdx_unc_ha_events.h000664 001750 001750 00000122560 13216244364 025764 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2017 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdx_unc_ha */ static intel_x86_umask_t bdx_unc_h_bypass_imc[]={ { .uname = "NOT_TAKEN", .ucode = 0x200, .udesc = "HA to iMC Bypass -- Not Taken", }, { .uname = "TAKEN", .ucode = 0x100, .udesc = "HA to iMC Bypass -- Taken", }, }; static intel_x86_umask_t bdx_unc_h_directory_lookup[]={ { .uname = "NO_SNP", .ucode = 0x200, .udesc = "Directory Lookups -- Snoop Not Needed", }, { .uname = "SNP", .ucode = 0x100, .udesc = "Directory Lookups -- Snoop Needed", }, }; static intel_x86_umask_t bdx_unc_h_directory_update[]={ { .uname = "ANY", .ucode = 0x300, .udesc = "Directory Updates -- Any Directory Update", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "CLEAR", .ucode = 0x200, .udesc = "Directory Updates -- Directory Clear", }, { .uname = "SET", .ucode = 0x100, .udesc = "Directory Updates -- Directory Set", }, }; static intel_x86_umask_t bdx_unc_h_hitme_hit[]={ { .uname = "ACKCNFLTWBI", .ucode = 0x400, .udesc = "Counts Number of Hits in HitMe Cache -- op is AckCnfltWbI", }, { .uname = "ALL", .ucode = 0xff00, .udesc = "Counts Number of Hits in HitMe Cache -- All Requests", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALLOCS", .ucode = 0x7000, .udesc = "Counts Number of Hits in HitMe Cache -- Allocations", .uflags = INTEL_X86_NCOMBO, }, { .uname = "EVICTS", .ucode = 0x4200, .udesc = "Counts Number of Hits in HitMe Cache -- Allocations", .uflags = INTEL_X86_NCOMBO, }, { .uname = "HOM", .ucode = 0xf00, .udesc = "Counts Number of Hits in HitMe Cache -- HOM Requests", .uflags = INTEL_X86_NCOMBO, }, { .uname = "INVALS", .ucode = 0x2600, .udesc = "Counts Number of Hits in HitMe Cache -- Invalidations", .uflags = INTEL_X86_NCOMBO, }, { .uname = "READ_OR_INVITOE", .ucode = 0x100, .udesc = "Counts Number of Hits in HitMe Cache -- op is RdCode, RdData, RdDataMigratory, RdInvOwn, RdCur or InvItoE", }, { .uname = "RSP", .ucode = 0x8000, .udesc = "Counts Number of Hits in HitMe Cache -- op is RspI, RspIWb, RspS, RspSWb, RspCnflt or RspCnfltWbI", }, { .uname = "RSPFWDI_LOCAL", .ucode = 0x2000, .udesc = "Counts Number of Hits in HitMe Cache -- op is RspIFwd or RspIFwdWb for a local request", }, { .uname = "RSPFWDI_REMOTE", .ucode = 0x1000, .udesc = "Counts Number of Hits in HitMe Cache -- op is RspIFwd or RspIFwdWb for a remote request", }, { .uname = "RSPFWDS", .ucode = 0x4000, .udesc = "Counts Number of Hits in HitMe Cache -- op is RsSFwd or RspSFwdWb", }, { .uname = "WBMTOE_OR_S", .ucode = 0x800, .udesc = "Counts Number of Hits in HitMe Cache -- op is WbMtoE or WbMtoS", }, { .uname = "WBMTOI", .ucode = 0x200, .udesc = "Counts Number of Hits in HitMe Cache -- op is WbMtoI", }, }; static intel_x86_umask_t bdx_unc_h_hitme_hit_pv_bits_set[]={ { .uname = "ACKCNFLTWBI", .ucode = 0x400, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is AckCnfltWbI", }, { .uname = "ALL", .ucode = 0xff00, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- All Requests", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "HOM", .ucode = 0xf00, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- HOM Requests", }, { .uname = "READ_OR_INVITOE", .ucode = 0x100, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is RdCode, RdData, RdDataMigratory, RdInvOwn, RdCur or InvItoE", }, { .uname = "RSP", .ucode = 0x8000, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is RspI, RspIWb, RspS, RspSWb, RspCnflt or RspCnfltWbI", }, { .uname = "RSPFWDI_LOCAL", .ucode = 0x2000, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is RspIFwd or RspIFwdWb for a local request", }, { .uname = "RSPFWDI_REMOTE", .ucode = 0x1000, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is RspIFwd or RspIFwdWb for a remote request", }, { .uname = "RSPFWDS", .ucode = 0x4000, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is RsSFwd or RspSFwdWb", }, { .uname = "WBMTOE_OR_S", .ucode = 0x800, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is WbMtoE or WbMtoS", }, { .uname = "WBMTOI", .ucode = 0x200, .udesc = "Accumulates Number of PV bits set on HitMe Cache Hits -- op is WbMtoI", }, }; static intel_x86_umask_t bdx_unc_h_hitme_lookup[]={ { .uname = "ACKCNFLTWBI", .ucode = 0x400, .udesc = "Counts Number of times HitMe Cache is accessed -- op is AckCnfltWbI", }, { .uname = "ALL", .ucode = 0xff00, .udesc = "Counts Number of times HitMe Cache is accessed -- All Requests", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALLOCS", .ucode = 0x7000, .udesc = "Counts Number of times HitMe Cache is accessed -- Allocations", }, { .uname = "HOM", .ucode = 0xf00, .udesc = "Counts Number of times HitMe Cache is accessed -- HOM Requests", .uflags = INTEL_X86_NCOMBO, }, { .uname = "INVALS", .ucode = 0x2600, .udesc = "Counts Number of times HitMe Cache is accessed -- Invalidations", .uflags = INTEL_X86_NCOMBO, }, { .uname = "READ_OR_INVITOE", .ucode = 0x100, .udesc = "Counts Number of times HitMe Cache is accessed -- op is RdCode, RdData, RdDataMigratory, RdInvOwn, RdCur or InvItoE", }, { .uname = "RSP", .ucode = 0x8000, .udesc = "Counts Number of times HitMe Cache is accessed -- op is RspI, RspIWb, RspS, RspSWb, RspCnflt or RspCnfltWbI", }, { .uname = "RSPFWDI_LOCAL", .ucode = 0x2000, .udesc = "Counts Number of times HitMe Cache is accessed -- op is RspIFwd or RspIFwdWb for a local request", }, { .uname = "RSPFWDI_REMOTE", .ucode = 0x1000, .udesc = "Counts Number of times HitMe Cache is accessed -- op is RspIFwd or RspIFwdWb for a remote request", }, { .uname = "RSPFWDS", .ucode = 0x4000, .udesc = "Counts Number of times HitMe Cache is accessed -- op is RsSFwd or RspSFwdWb", }, { .uname = "WBMTOE_OR_S", .ucode = 0x800, .udesc = "Counts Number of times HitMe Cache is accessed -- op is WbMtoE or WbMtoS", }, { .uname = "WBMTOI", .ucode = 0x200, .udesc = "Counts Number of times HitMe Cache is accessed -- op is WbMtoI", }, }; static intel_x86_umask_t bdx_unc_h_igr_no_credit_cycles[]={ { .uname = "AD_QPI0", .ucode = 0x100, .udesc = "Cycles without QPI Ingress Credits -- AD to QPI Link 0", }, { .uname = "AD_QPI1", .ucode = 0x200, .udesc = "Cycles without QPI Ingress Credits -- AD to QPI Link 1", }, { .uname = "AD_QPI2", .ucode = 0x1000, .udesc = "Cycles without QPI Ingress Credits -- BL to QPI Link 0", }, { .uname = "BL_QPI0", .ucode = 0x400, .udesc = "Cycles without QPI Ingress Credits -- BL to QPI Link 0", }, { .uname = "BL_QPI1", .ucode = 0x800, .udesc = "Cycles without QPI Ingress Credits -- BL to QPI Link 1", }, { .uname = "BL_QPI2", .ucode = 0x2000, .udesc = "Cycles without QPI Ingress Credits -- BL to QPI Link 1", }, }; static intel_x86_umask_t bdx_unc_h_imc_reads[]={ { .uname = "NORMAL", .ucode = 0x100, .udesc = "HA to iMC Normal Priority Reads Issued -- Normal Priority", .uflags = INTEL_X86_DFL, }, }; static intel_x86_umask_t bdx_unc_h_imc_writes[]={ { .uname = "ALL", .ucode = 0xf00, .udesc = "HA to iMC Full Line Writes Issued -- All Writes", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .ucode = 0x100, .udesc = "HA to iMC Full Line Writes Issued -- Full Line Non-ISOCH", }, { .uname = "FULL_ISOCH", .ucode = 0x400, .udesc = "HA to iMC Full Line Writes Issued -- ISOCH Full Line", }, { .uname = "PARTIAL", .ucode = 0x200, .udesc = "HA to iMC Full Line Writes Issued -- Partial Non-ISOCH", }, { .uname = "PARTIAL_ISOCH", .ucode = 0x800, .udesc = "HA to iMC Full Line Writes Issued -- ISOCH Partial", }, }; static intel_x86_umask_t bdx_unc_h_osb[]={ { .uname = "CANCELLED", .ucode = 0x1000, .udesc = "OSB Snoop Broadcast -- Cancelled", }, { .uname = "INVITOE_LOCAL", .ucode = 0x400, .udesc = "OSB Snoop Broadcast -- Local InvItoE", }, { .uname = "READS_LOCAL", .ucode = 0x200, .udesc = "OSB Snoop Broadcast -- Local Reads", }, { .uname = "READS_LOCAL_USEFUL", .ucode = 0x2000, .udesc = "OSB Snoop Broadcast -- Reads Local - Useful", }, { .uname = "REMOTE", .ucode = 0x800, .udesc = "OSB Snoop Broadcast -- Remote", }, { .uname = "REMOTE_USEFUL", .ucode = 0x4000, .udesc = "OSB Snoop Broadcast -- Remote - Useful", }, }; static intel_x86_umask_t bdx_unc_h_osb_edr[]={ { .uname = "ALL", .ucode = 0x100, .udesc = "OSB Early Data Return -- All", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "READS_LOCAL_I", .ucode = 0x200, .udesc = "OSB Early Data Return -- Reads to Local I", }, { .uname = "READS_LOCAL_S", .ucode = 0x800, .udesc = "OSB Early Data Return -- Reads to Local S", }, { .uname = "READS_REMOTE_I", .ucode = 0x400, .udesc = "OSB Early Data Return -- Reads to Remote I", }, { .uname = "READS_REMOTE_S", .ucode = 0x1000, .udesc = "OSB Early Data Return -- Reads to Remote S", }, }; static intel_x86_umask_t bdx_unc_h_requests[]={ { .uname = "INVITOE_LOCAL", .ucode = 0x1000, .udesc = "Read and Write Requests -- Local InvItoEs", }, { .uname = "INVITOE_REMOTE", .ucode = 0x2000, .udesc = "Read and Write Requests -- Remote InvItoEs", }, { .uname = "READS", .ucode = 0x300, .udesc = "Read and Write Requests -- Reads", .uflags = INTEL_X86_NCOMBO, }, { .uname = "READS_LOCAL", .ucode = 0x100, .udesc = "Read and Write Requests -- Local Reads", }, { .uname = "READS_REMOTE", .ucode = 0x200, .udesc = "Read and Write Requests -- Remote Reads", }, { .uname = "WRITES", .ucode = 0xc00, .udesc = "Read and Write Requests -- Writes", .uflags = INTEL_X86_NCOMBO, }, { .uname = "WRITES_LOCAL", .ucode = 0x400, .udesc = "Read and Write Requests -- Local Writes", }, { .uname = "WRITES_REMOTE", .ucode = 0x800, .udesc = "Read and Write Requests -- Remote Writes", }, }; static intel_x86_umask_t bdx_unc_h_ring_ad_used[]={ { .uname = "CCW", .ucode = 0xc00, .udesc = "Counterclockwise", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CCW_EVEN", .ucode = 0x400, .udesc = "Counterclockwise and Even", }, { .uname = "CCW_ODD", .ucode = 0x800, .udesc = "Counterclockwise and Odd", }, { .uname = "CW", .ucode = 0x300, .udesc = "Clockwise", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CW_EVEN", .ucode = 0x100, .udesc = "Clockwise and Even", }, { .uname = "CW_ODD", .ucode = 0x200, .udesc = "Clockwise and Odd", }, }; static intel_x86_umask_t bdx_unc_h_rpq_cycles_no_reg_credits[]={ { .uname = "CHN0", .ucode = 0x100, .udesc = "iMC RPQ Credits Empty - Regular -- Channel 0", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN1", .ucode = 0x200, .udesc = "iMC RPQ Credits Empty - Regular -- Channel 1", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN2", .ucode = 0x400, .udesc = "iMC RPQ Credits Empty - Regular -- Channel 2", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN3", .ucode = 0x800, .udesc = "iMC RPQ Credits Empty - Regular -- Channel 3", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_sbo0_credits_acquired[]={ { .uname = "AD", .ucode = 0x100, .udesc = "For AD Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "For BL Ring", }, }; static intel_x86_umask_t bdx_unc_h_snoops_rsp_after_data[]={ { .uname = "LOCAL", .ucode = 0x100, .udesc = "Data beat the Snoop Responses -- Local Requests", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .ucode = 0x200, .udesc = "Data beat the Snoop Responses -- Remote Requests", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_snoop_cycles_ne[]={ { .uname = "ALL", .ucode = 0x300, .udesc = "Cycles with Snoops Outstanding -- All Requests", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "LOCAL", .ucode = 0x100, .udesc = "Cycles with Snoops Outstanding -- Local Requests", }, { .uname = "REMOTE", .ucode = 0x200, .udesc = "Cycles with Snoops Outstanding -- Remote Requests", }, }; static intel_x86_umask_t bdx_unc_h_snoop_occupancy[]={ { .uname = "LOCAL", .ucode = 0x100, .udesc = "Tracker Snoops Outstanding Accumulator -- Local Requests", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .ucode = 0x200, .udesc = "Tracker Snoops Outstanding Accumulator -- Remote Requests", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_snoop_resp[]={ { .uname = "RSPCNFLCT", .ucode = 0x4000, .udesc = "Snoop Responses Received -- RSPCNFLCT*", }, { .uname = "RSPI", .ucode = 0x100, .udesc = "Snoop Responses Received -- RspI", }, { .uname = "RSPIFWD", .ucode = 0x400, .udesc = "Snoop Responses Received -- RspIFwd", }, { .uname = "RSPS", .ucode = 0x200, .udesc = "Snoop Responses Received -- RspS", }, { .uname = "RSPSFWD", .ucode = 0x800, .udesc = "Snoop Responses Received -- RspSFwd", }, { .uname = "RSP_FWD_WB", .ucode = 0x2000, .udesc = "Snoop Responses Received -- Rsp*Fwd*WB", }, { .uname = "RSP_WB", .ucode = 0x1000, .udesc = "Snoop Responses Received -- Rsp*WB", }, }; static intel_x86_umask_t bdx_unc_h_snp_resp_recv_local[]={ { .uname = "OTHER", .ucode = 0x8000, .udesc = "Snoop Responses Received Local -- Other", }, { .uname = "RSPCNFLCT", .ucode = 0x4000, .udesc = "Snoop Responses Received Local -- RspCnflct", }, { .uname = "RSPI", .ucode = 0x100, .udesc = "Snoop Responses Received Local -- RspI", }, { .uname = "RSPIFWD", .ucode = 0x400, .udesc = "Snoop Responses Received Local -- RspIFwd", }, { .uname = "RSPS", .ucode = 0x200, .udesc = "Snoop Responses Received Local -- RspS", }, { .uname = "RSPSFWD", .ucode = 0x800, .udesc = "Snoop Responses Received Local -- RspSFwd", }, { .uname = "RSPxFWDxWB", .ucode = 0x2000, .udesc = "Snoop Responses Received Local -- Rsp*FWD*WB", }, { .uname = "RSPxWB", .ucode = 0x1000, .udesc = "Snoop Responses Received Local -- Rsp*WB", }, }; static intel_x86_umask_t bdx_unc_h_stall_no_sbo_credit[]={ { .uname = "SBO0_AD", .ucode = 0x100, .udesc = "Stall on No Sbo Credits -- For SBo0, AD Ring", }, { .uname = "SBO0_BL", .ucode = 0x400, .udesc = "Stall on No Sbo Credits -- For SBo0, BL Ring", }, { .uname = "SBO1_AD", .ucode = 0x200, .udesc = "Stall on No Sbo Credits -- For SBo1, AD Ring", }, { .uname = "SBO1_BL", .ucode = 0x800, .udesc = "Stall on No Sbo Credits -- For SBo1, BL Ring", }, }; static intel_x86_umask_t bdx_unc_h_tad_requests_g0[]={ { .uname = "REGION0", .ucode = 0x100, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 0", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION1", .ucode = 0x200, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 1", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION2", .ucode = 0x400, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 2", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION3", .ucode = 0x800, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 3", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION4", .ucode = 0x1000, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 4", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION5", .ucode = 0x2000, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 5", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION6", .ucode = 0x4000, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 6", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION7", .ucode = 0x8000, .udesc = "HA Requests to a TAD Region - Group 0 -- TAD Region 7", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_tad_requests_g1[]={ { .uname = "REGION10", .ucode = 0x400, .udesc = "HA Requests to a TAD Region - Group 1 -- TAD Region 10", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION11", .ucode = 0x800, .udesc = "HA Requests to a TAD Region - Group 1 -- TAD Region 11", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION8", .ucode = 0x100, .udesc = "HA Requests to a TAD Region - Group 1 -- TAD Region 8", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REGION9", .ucode = 0x200, .udesc = "HA Requests to a TAD Region - Group 1 -- TAD Region 9", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_tracker_cycles_full[]={ { .uname = "ALL", .ucode = 0x200, .udesc = "Tracker Cycles Full -- Cycles Completely Used", .uflags = INTEL_X86_DFL, }, { .uname = "GP", .ucode = 0x100, .udesc = "Tracker Cycles Full -- Cycles GP Completely Used", }, }; static intel_x86_umask_t bdx_unc_h_tracker_cycles_ne[]={ { .uname = "ALL", .ucode = 0x300, .udesc = "Tracker Cycles Not Empty -- All Requests", .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "LOCAL", .ucode = 0x100, .udesc = "Tracker Cycles Not Empty -- Local Requests", }, { .uname = "REMOTE", .ucode = 0x200, .udesc = "Tracker Cycles Not Empty -- Remote Requests", }, }; static intel_x86_umask_t bdx_unc_h_tracker_occupancy[]={ { .uname = "INVITOE_LOCAL", .ucode = 0x4000, .udesc = "Tracker Occupancy Accumultor -- Local InvItoE Requests", }, { .uname = "INVITOE_REMOTE", .ucode = 0x8000, .udesc = "Tracker Occupancy Accumultor -- Remote InvItoE Requests", }, { .uname = "READS_LOCAL", .ucode = 0x400, .udesc = "Tracker Occupancy Accumultor -- Local Read Requests", }, { .uname = "READS_REMOTE", .ucode = 0x800, .udesc = "Tracker Occupancy Accumultor -- Remote Read Requests", }, { .uname = "WRITES_LOCAL", .ucode = 0x1000, .udesc = "Tracker Occupancy Accumultor -- Local Write Requests", }, { .uname = "WRITES_REMOTE", .ucode = 0x2000, .udesc = "Tracker Occupancy Accumultor -- Remote Write Requests", }, }; static intel_x86_umask_t bdx_unc_h_tracker_pending_occupancy[]={ { .uname = "LOCAL", .ucode = 0x100, .udesc = "Data Pending Occupancy Accumultor -- Local Requests", .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .ucode = 0x200, .udesc = "Data Pending Occupancy Accumultor -- Remote Requests", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_umask_t bdx_unc_h_txr_ad_cycles_full[]={ { .uname = "ALL", .ucode = 0x300, .udesc = "All", .uflags = INTEL_X86_DFL, }, { .uname = "SCHED0", .ucode = 0x100, .udesc = "Scheduler 0", }, { .uname = "SCHED1", .ucode = 0x200, .udesc = "Scheduler 1", }, }; static intel_x86_umask_t bdx_unc_h_txr_bl[]={ { .uname = "DRS_CACHE", .ucode = 0x100, .udesc = "Outbound DRS Ring Transactions to Cache -- Data to Cache", }, { .uname = "DRS_CORE", .ucode = 0x200, .udesc = "Outbound DRS Ring Transactions to Cache -- Data to Core", }, { .uname = "DRS_QPI", .ucode = 0x400, .udesc = "Outbound DRS Ring Transactions to Cache -- Data to QPI", }, }; static intel_x86_umask_t bdx_unc_h_txr_starved[]={ { .uname = "AK", .ucode = 0x100, .udesc = "Injection Starvation -- For AK Ring", }, { .uname = "BL", .ucode = 0x200, .udesc = "Injection Starvation -- For BL Ring", }, }; static intel_x86_umask_t bdx_unc_h_wpq_cycles_no_reg_credits[]={ { .uname = "CHN0", .ucode = 0x100, .udesc = "HA iMC CHN0 WPQ Credits Empty - Regular -- Channel 0", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN1", .ucode = 0x200, .udesc = "HA iMC CHN0 WPQ Credits Empty - Regular -- Channel 1", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN2", .ucode = 0x400, .udesc = "HA iMC CHN0 WPQ Credits Empty - Regular -- Channel 2", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CHN3", .ucode = 0x800, .udesc = "HA iMC CHN0 WPQ Credits Empty - Regular -- Channel 3", .uflags = INTEL_X86_NCOMBO, }, }; static intel_x86_entry_t intel_bdx_unc_h_pe[]={ /* ADDR_OPC_MATCH not supported (linux kernel has no support for HA OPC yet*/ { .name = "UNC_H_BT_CYCLES_NE", .code = 0x42, .desc = "Cycles the Backup Tracker (BT) is not empty. The BT is the actual HOM tracker in IVT.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_BT_OCCUPANCY", .code = 0x43, .desc = "Accumulates the occupancy of te HA BT pool in every cycle. This can be used with the 'not empty' stat to calculate the average queue occupancy or the 'allocations' stat to calculate average queue latency. HA BTs are allocated as son as a request enters the HA and are released after the snoop response and data return and the response is returned to the ring", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_BYPASS_IMC", .code = 0x14, .desc = "Counts the number of times when the HA was able to bypass was attempted. This is a latency optimization for situations when there is light loadings on the memory subsystem. This can be filted by when the bypass was taken and when it was not.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_bypass_imc, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_bypass_imc), }, { .name = "UNC_H_CONFLICT_CYCLES", .code = 0xb, .desc = "TBD", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_CLOCKTICKS", .code = 0x0, .desc = "Counts the number of uclks in the HA. This will be slightly different than the count in the Ubox because of enable/freeze delays. The HA is on the other side of the die from the fixed Ubox uclk counter, so the drift could be somewhat larger than in units that are closer like the QPI Agent.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_DIRECT2CORE_COUNT", .code = 0x11, .desc = "Number of Direct2Core messages sent", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_DIRECT2CORE_CYCLES_DISABLED", .code = 0x12, .desc = "Number of cycles in which Direct2Core was disabled", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_DIRECT2CORE_TXN_OVERRIDE", .code = 0x13, .desc = "Number of Reads where Direct2Core overridden", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_DIRECTORY_LAT_OPT", .code = 0x41, .desc = "Directory Latency Optimization Data Return Path Taken. When directory mode is enabled and the directory retuned for a read is Dir=I, then data can be returned using a faster path if certain conditions are met (credits, free pipeline, etc).", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_DIRECTORY_LOOKUP", .code = 0xc, .desc = "Counts the number of transactions that looked up the directory. Can be filtered by requests that had to snoop and those that did not have to.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_directory_lookup, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_directory_lookup), }, { .name = "UNC_H_DIRECTORY_UPDATE", .code = 0xd, .desc = "Counts the number of directory updates that were required. These result in writes to the memory controller. This can be filtered by directory sets and directory clears.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_directory_update, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_directory_update), }, { .name = "UNC_H_HITME_HIT", .code = 0x71, .desc = "", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_hitme_hit, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_hitme_hit), }, { .name = "UNC_H_HITME_HIT_PV_BITS_SET", .code = 0x72, .desc = "", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_hitme_hit_pv_bits_set, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_hitme_hit_pv_bits_set), }, { .name = "UNC_H_HITME_LOOKUP", .code = 0x70, .desc = "", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_hitme_lookup, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_hitme_lookup), }, { .name = "UNC_H_IGR_NO_CREDIT_CYCLES", .code = 0x22, .desc = "Counts the number of cycles when the HA does not have credits to send messages to the QPI Agent. This can be filtered by the different credit pools and the different links.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_igr_no_credit_cycles, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_igr_no_credit_cycles), }, { .name = "UNC_H_IMC_READS", .code = 0x17, .desc = "Count of the number of reads issued to any of the memory controller channels. This can be filtered by the priority of the reads.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_imc_reads, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_imc_reads), }, { .name = "UNC_H_IMC_RETRY", .code = 0x1e, .desc = "", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, }, { .name = "UNC_H_IMC_WRITES", .code = 0x1a, .desc = "Counts the total number of full line writes issued from the HA into the memory controller. This counts for all four channels. It can be filtered by full/partial and ISOCH/non-ISOCH.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_imc_writes, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_imc_writes), }, { .name = "UNC_H_OSB", .code = 0x53, .desc = "Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_osb, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_osb), }, { .name = "UNC_H_OSB_EDR", .code = 0x54, .desc = "Counts the number of transactions that broadcast snoop due to OSB, but found clean data in memory and was able to do early data return", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_osb_edr, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_osb_edr), }, { .name = "UNC_H_REQUESTS", .code = 0x1, .desc = "Counts the total number of read requests made into the Home Agent. Reads include all read opcodes (including RFO). Writes include all writes (streaming, evictions, HitM, etc).", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_requests, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_requests), }, { .name = "UNC_H_RING_AD_USED", .code = 0x3e, .desc = "Counts the number of cycles that the AD ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_ring_ad_used), }, { .name = "UNC_H_RING_AK_USED", .code = 0x3f, .desc = "Counts the number of cycles that the AK ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_ring_ad_used), }, { .name = "UNC_H_RING_BL_USED", .code = 0x40, .desc = "Counts the number of cycles that the BL ring is being used at this ring stop. This includes when packets are passing by and when packets are being sunk, but does not include when packets are being sent from the ring stop.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_ring_ad_used, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_ring_ad_used), }, { .name = "UNC_H_RPQ_CYCLES_NO_REG_CREDITS", .code = 0x15, .desc = "Counts the number of cycles when there are no regular credits available for posting reads from the HA into the iMC. In order to send reads into the memory controller, the HA must first acquire a credit for the iMCs RPQ (read pending queue). This queue is broken into regular credits/buffers that are used by general reads, and special requests such as ISOCH reads. This count only tracks the regular credits Common high banwidth workloads should be able to make use of all of the regular buffers, but it will be difficult (and uncommon) to make use of both the regular and special buffers at the same time. One can filter based on the memory controller channel. One or more channels can be tracked at a given iven time.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_rpq_cycles_no_reg_credits, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_rpq_cycles_no_reg_credits), }, { .name = "UNC_H_SBO0_CREDITS_ACQUIRED", .code = 0x68, .desc = "Number of Sbo 0 credits acquired in a given cycle, per ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_sbo0_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_sbo0_credits_acquired), }, { .name = "UNC_H_SBO0_CREDIT_OCCUPANCY", .code = 0x6a, .desc = "Number of Sbo 0 credits in use in a given cycle, per ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_sbo0_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_sbo0_credits_acquired), }, { .name = "UNC_H_SBO1_CREDITS_ACQUIRED", .code = 0x69, .desc = "Number of Sbo 1 credits acquired in a given cycle, per ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_sbo0_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_sbo0_credits_acquired), }, { .name = "UNC_H_SBO1_CREDIT_OCCUPANCY", .code = 0x6b, .desc = "Number of Sbo 1 credits in use in a given cycle, per ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_sbo0_credits_acquired, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_sbo0_credits_acquired), }, { .name = "UNC_H_SNOOPS_RSP_AFTER_DATA", .code = 0xa, .desc = "Counts the number of reads when the snoop was on the critical path to the data return.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_snoops_rsp_after_data, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_snoops_rsp_after_data), }, { .name = "UNC_H_SNOOP_CYCLES_NE", .code = 0x8, .desc = "Counts cycles when one or more snoops are outstanding.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_snoop_cycles_ne, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_snoop_cycles_ne), }, { .name = "UNC_H_SNOOP_OCCUPANCY", .code = 0x9, .desc = "Accumulates the occupancy of either the local HA tracker pool that have snoops pending in every cycle. This can be used in conjection with the not empty stat to calculate average queue occupancy or the allocations stat in order to calculate average queue latency. HA trackers are allocated as soon as a request enters the HA if an HT (HomeTracker) entry is available and this occupancy is decremented when all the snoop responses have retureturned.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_snoop_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_snoop_occupancy), }, { .name = "UNC_H_SNOOP_RESP", .code = 0x21, .desc = "Counts the total number of RspI snoop responses received. Whenever a snoops are issued, one or more snoop responses will be returned depending on the topology of the system. In systems larger than 2s, when multiple snoops are returned this will count all the snoops that are received. For example, if 3 snoops were issued and returned RspI, RspS, and RspSFwd; then each of these sub-events would increment by 1.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_snoop_resp, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_snoop_resp), }, { .name = "UNC_H_SNP_RESP_RECV_LOCAL", .code = 0x60, .desc = "Number of snoop responses received for a Local request", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_snp_resp_recv_local, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_snp_resp_recv_local), }, { .name = "UNC_H_STALL_NO_SBO_CREDIT", .code = 0x6c, .desc = "Number of cycles Egress is stalled waiting for an Sbo credit to become available. Per Sbo, per Ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_stall_no_sbo_credit, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_stall_no_sbo_credit), }, { .name = "UNC_H_TAD_REQUESTS_G0", .code = 0x1b, .desc = "Counts the number of HA requests to a given TAD region. There are up to 11 TAD (target address decode) regions in each home agent. All requests destined for the memory controller must first be decoded to determine which TAD region they are in. This event is filtered based on the TAD region ID, and covers regions 0 to 7. This event is useful for understanding how applications are using the memory that is spread across the different memory regions. It is particularly useful for Monroe systems that use the TAD to enable individual channels to enter self-refresh to save powewer.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tad_requests_g0, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tad_requests_g0), }, { .name = "UNC_H_TAD_REQUESTS_G1", .code = 0x1c, .desc = "Counts the number of HA requests to a given TAD region. There are up to 11 TAD (target address decode) regions in each home agent. All requests destined for the memory controller must first be decoded to determine which TAD region they are in. This event is filtered based on the TAD region ID, and covers regions 8 to 10. This event is useful for understanding how applications are using the memory that is spread across the different memory regions. It is particularly useful for Monroe systems that use the TAD to enable individual channels to enter self-refresh to save powewer.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tad_requests_g1, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tad_requests_g1), }, { .name = "UNC_H_TRACKER_CYCLES_FULL", .code = 0x2, .desc = "Counts the number of cycles when the local HA tracker pool is completely used. This can be used with edge detect to identify the number of situations when the pool became fully utilized. This should not be confused with RTID credit usage -- which must be tracked inside each cbo individually -- but represents the actual tracker buffer structure. In other words, the system could be starved for RTIDs but not fill up the HA trackers. HA trackers are allocated as soon as a request enters the HA and is released after the snoop response and data return (or post in the case of a write) and the response is returned on the ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tracker_cycles_full, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tracker_cycles_full), }, { .name = "UNC_H_TRACKER_CYCLES_NE", .code = 0x3, .desc = "Counts the number of cycles when the local HA tracker pool is not empty. This can be used with edge detect to identify the number of situations when the pool became empty. This should not be confused with RTID credit usage -- which must be tracked inside each cbo individually -- but represents the actual tracker buffer structure. In other words, this buffer could be completely empty, but there may still be credits in use by the CBos. This stat can be used in conjunction with the occupancy accumulation stat in order to calculate average queue occpancy. HA trackers are allocated as soon as a request enters the HA if an HT (Home Tracker) entry is available and is released after the snoop response and data return (or post in the case of a write) and the response is returned on the ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tracker_cycles_ne, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tracker_cycles_ne), }, { .name = "UNC_H_TRACKER_OCCUPANCY", .code = 0x4, .desc = "Accumulates the occupancy of the local HA tracker pool in every cycle. This can be used in conjection with the not empty stat to calculate average queue occupancy or the allocations stat in order to calculate average queue latency. HA trackers are allocated as soon as a request enters the HA if a HT (Home Tracker) entry is available and is released after the snoop response and data return (or post in the case of a write) and the response is returned on the rhe ring.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tracker_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tracker_occupancy), }, { .name = "UNC_H_TRACKER_PENDING_OCCUPANCY", .code = 0x5, .desc = "Accumulates the number of transactions that have data from the memory controller until they get scheduled to the Egress. This can be used to calculate the queuing latency for two things. (1) If the system is waiting for snoops, this will increase. (2) If the system cant schedule to the Egress because of either (a) Egress Credits or (b) QPI BL IGR credits for remote requestss.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_tracker_pending_occupancy, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_tracker_pending_occupancy), }, { .name = "UNC_H_TXR_AD_CYCLES_FULL", .code = 0x2a, .desc = "AD Egress Full", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_txr_ad_cycles_full, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_txr_ad_cycles_full), }, { .name = "UNC_H_TXR_AK_CYCLES_FULL", .code = 0x32, .desc = "AK Egress Full", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_txr_ad_cycles_full, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_txr_ad_cycles_full), /* shared */ }, { .name = "UNC_H_TXR_BL", .code = 0x10, .desc = "Counts the number of DRS messages sent out on the BL ring. This can be filtered by the destination.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_txr_bl, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_txr_bl), }, { .name = "UNC_H_TXR_BL_CYCLES_FULL", .code = 0x36, .desc = "BL Egress Full", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_txr_ad_cycles_full, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_txr_ad_cycles_full), /* shared */ }, { .name = "UNC_H_TXR_STARVED", .code = 0x6d, .desc = "Counts injection starvation. This starvation is triggered when the Egress cannot send a transaction onto the ring for a long period of time.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_txr_starved, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_txr_starved), }, { .name = "UNC_H_WPQ_CYCLES_NO_REG_CREDITS", .code = 0x18, .desc = "Counts the number of cycles when there are no regular credits available for posting writes from the HA into the iMC. In order to send writes into the memory controller, the HA must first acquire a credit for the iMCs WPQ (write pending queue). This queue is broken into regular credits/buffers that are used by general writes, and special requests such as ISOCH writes. This count only tracks the regular credits Common high banwidth workloads should be able to make use of all of the regular buffers, but it will be difficult (and uncommon) to make use of both the regular and special buffers at the same time. One can filter based on the memory controller channel. One or more channels can be tracked at a given iven time.", .modmsk = BDX_UNC_HA_ATTRS, .cntmsk = 0xf, .ngrp = 1, .umasks = bdx_unc_h_wpq_cycles_no_reg_credits, .numasks= LIBPFM_ARRAY_SIZE(bdx_unc_h_wpq_cycles_no_reg_credits), }, }; papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_bdw.3000664 001750 001750 00000007713 13216244364 024044 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "October, 2014" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_bdw - support for Intel Broadwell core PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: bdw .B PMU desc: Intel Broadwell .sp .SH DESCRIPTION The library supports the Intel Broadwell core PMU. It should be noted that this PMU model only covers each core's PMU and not the socket level PMU. On Broadwell, the number of generic counters depends on the Hyperthreading (HT) mode. When HT is on, then only 4 generic counters are available. When HT is off, then 8 generic counters are available. The \fBpfm_get_pmu_info()\fR function returns the maximum number of generic counters in \fBnum_cntrs\fr. .SH MODIFIERS The following modifiers are supported on Intel Broadwell processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B t Measure on both threads at the same time assuming hyper-threading is enabled. This is a boolean modifier. .TP .B ldlat Pass a latency threshold to the MEM_TRANS_RETIRED:LOAD_LATENCY event. This is an integer attribute that must be in the range [3:65535]. It is required for this event. Note that the event must be used with precise sampling (PEBS). .TP .B intx Monitor the event only when executing inside a transactional memory region (in tx). Event does not count otherwise. This is a boolean modifiers. Default value is 0. .TP .B intxcp Do not count occurrences of the event when they are inside an aborted transactional memory region. This is a boolean modifier. Default value is 0. .SH OFFCORE_RESPONSE events Intel Broadwell provides two offcore_response events. They are called OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. Those events need special treatment in the performance monitoring infrastructure because each event uses an extra register to store some settings. Thus, in case multiple offcore_response events are monitored simultaneously, the kernel needs to manage the sharing of that extra register. The offcore_response events are exposed as a normal events by the library. The extra settings are exposed as regular umasks. The library takes care of encoding the events according to the underlying kernel interface. On Intel Broadwell, the umasks are divided into three categories: request, supplier and snoop. The user must provide at least one umask for each category. The categories are shown in the umask descriptions. There is also the special response umask called \fBANY_RESPONSE\fR. When this umask is used then it overrides any supplier and snoop umasks. In other words, users can specify either \fBANY_RESPONSE\fR \fBOR\fR any combinations of supplier + snoops. In case no supplier or snoop is specified, the library defaults to using \fBANY_RESPONSE\fR. For instance, the following are valid event selections: .TP .B OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_REQUEST .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY .P But the following are illegal: .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:ANY_RESPONSE .TP .B OFFCORE_RESPONSE_0:ANY_RFO:LLC_HITM:SNOOP_ANY:ANY_RESPONSE .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_bdx_unc_irp.3000664 001750 001750 00000002744 13216244364 025563 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "June, 2017" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_bdx_unc_irp - support for Intel Broadwell Server IRP uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: bdx_unc_irp .B PMU desc: Intel Broadwell Server IRP uncore PMU .sp .SH DESCRIPTION The library supports the Intel Broadwell Server IRP (IIO coherency) uncore PMU . This PMU model only exists various Broadwell server models (79, 86). .SH MODIFIERS The following modifiers are supported on Intel Broadwell server IRP uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .TP .B i Invert the meaning of the threshold or edge filter. If set, the event counts when strictly less than N occurrences occur per cycle if threshold is set to N. When invert is set, then threshold must be set to non-zero value. If set, the event counts when the event transitions from occurring to not occurring (falling edge) when edge detection is set. This is a boolean modifier .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/ctests/reset_multiplex.c000664 001750 001750 00000020330 13216244360 022215 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the same tests as the reset test but does it with the events multiplexed. This is mostly to test perf_event, where resetting multiplexed events is handled differently than grouped events. */ #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int main( int argc, char **argv ) { int retval, num_tests = 9, num_events, tmp, i; long long **values; int EventSet = PAPI_NULL; int PAPI_event, mask; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval = PAPI_multiplex_init( ); if ( retval == PAPI_ENOSUPP) { test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); } else if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval ); } /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet = add_two_events( &num_events, &PAPI_event, &mask ); /* Set multiplexing on the eventset */ retval = PAPI_set_multiplex( EventSet ); if ( retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "Setting multiplex", retval); } retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); values = allocate_test_space( num_tests, num_events ); /*===== Test 1: Start/Stop =======================*/ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[0] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /*===== Test 2 Start/Stop =======================*/ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[1] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /*===== Test 3: Reset/Start/Stop =======================*/ retval = PAPI_reset( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); } retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[2] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /*===== Test 4: Start/Read =======================*/ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( NUM_FLOPS / 2 ); retval = PAPI_read( EventSet, values[3] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read", retval ); } /*===== Test 5: Read =======================*/ do_flops( NUM_FLOPS / 2 ); retval = PAPI_read( EventSet, values[4] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read", retval ); } /*===== Test 6: Read/Accum =======================*/ do_flops( NUM_FLOPS / 2 ); retval = PAPI_read( EventSet, values[5] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read", retval ); } retval = PAPI_accum( EventSet, values[5] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); } /*===== Test 7: Read =======================*/ do_flops( NUM_FLOPS / 2 ); retval = PAPI_read( EventSet, values[6] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read", retval ); } /*===== Test 8 Reset/Stop =======================*/ retval = PAPI_reset( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); } do_flops( NUM_FLOPS / 2 ); retval = PAPI_stop( EventSet, values[7] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /*===== Test 9: Reset/Read =======================*/ retval = PAPI_reset( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); } retval = PAPI_read( EventSet, values[8] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_read", retval ); } remove_test_events( &EventSet, mask ); if (!quiet) { printf( "Test case: Start/Stop/Read/Accum/Reset.\n" ); printf( "----------------------------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-------------------------------------------------------------------------\n" ); sprintf( add_event_str, "%s:", event_name ); printf( " PAPI_TOT_CYC %s\n", event_name ); printf( "1. start,ops,stop %10lld %10lld\n", values[0][0], values[0][1] ); printf( "2. start,ops,stop %10lld %10lld\n", values[1][0], values[1][1] ); printf( "3. reset,start,ops,stop %10lld %10lld\n", values[2][0], values[2][1] ); printf( "4. start,ops/2,read %10lld %10lld\n", values[3][0], values[3][1] ); printf( "5. ops/2,read %10lld %10lld\n", values[4][0], values[4][1] ); printf( "6. ops/2,accum %10lld %10lld\n", values[5][0], values[5][1] ); printf( "7. ops/2,read %10lld %10lld\n", values[6][0], values[6][1] ); printf( "8. reset,ops/2,stop %10lld %10lld\n", values[7][0], values[7][1] ); printf( "9. reset,read %10lld %10lld\n", values[8][0], values[8][1] ); printf( "-------------------------------------------------------------------------\n" ); printf( "Verification:\n" ); printf( "Row 1 approximately equals rows 2 and 3 \n" ); printf( "Row 4 approximately equals 1/2 of row 3\n" ); printf( "Row 5 approximately equals twice row 4\n" ); printf( "Row 6 approximately equals 6 times row 4\n" ); printf( "Rows 7 and 8 approximately equal row 4\n" ); printf( "Row 9 equals 0\n" ); printf( "%% difference between %s 1 & 2: %.2f\n", "PAPI_TOT_CYC", 100.0 * ( float ) values[0][0] / ( float ) values[1][0] ); printf( "%% difference between %s 1 & 2: %.2f\n", add_event_str, 100.0 * ( float ) values[0][1] / ( float ) values[1][1] ); } for ( i = 0; i <= 1; i++ ) { if ( !approx_equals ( ( double ) values[0][i], ( double ) values[1][i] ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[1][i], ( double ) values[2][i] ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[2][i], ( double ) values[3][i] * 2.0 ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[2][i], ( double ) values[4][i] ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[5][i], ( double ) values[3][i] * 6.0 ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[6][i], ( double ) values[3][i] ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( !approx_equals ( ( double ) values[7][i], ( double ) values[3][i] ) ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); if ( values[8][i] != 0LL ) test_fail( __FILE__, __LINE__, ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/cuda/tests/HelloWorld.cu000664 001750 001750 00000011101 13216244357 024170 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file HelloWorld.c * @author Heike Jagode * jagode@eecs.utk.edu * Mods: * * test case for Example component * * * @brief * This file is a very simple HelloWorld C example which serves (together * with its Makefile) as a guideline on how to add tests to components. * The papi configure and papi Makefile will take care of the compilation * of the component tests (if all tests are added to a directory named * 'tests' in the specific component dir). * See components/README for more details. * * The string "Hello World!" is mangled and then restored. */ #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 1 #define PAPI 1 // Prototypes __global__ void helloWorld(char*); // Host function int main(int argc, char** argv) { #ifdef PAPI int retval, i; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; /* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT FOR THE CUDA DEVICE YOU ARE RUNNING ON. RUN papi_native_avail to get a list of CUDA events that are supported on your machine */ //char *EventName[] = { "PAPI_FP_OPS" }; char const *EventName[] = { "cuda:::event:elapsed_cycles_sm:device=0" }; int events[NUM_EVENTS]; int eventCount = 0; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if( retval != PAPI_VER_CURRENT ) { if (!quiet) printf("PAPI init failed\n"); test_fail(__FILE__,__LINE__, "PAPI_library_init failed", 0 ); } if (!quiet) { printf( "PAPI_VERSION : %4d %6d %7d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) ); } /* convert PAPI native events to PAPI code */ for( i = 0; i < NUM_EVENTS; i++ ){ retval = PAPI_event_name_to_code( (char *)EventName[i], &events[i] ); if( retval != PAPI_OK ) { fprintf( stderr, "PAPI_event_name_to_code failed\n" ); continue; } eventCount++; if (!quiet) printf( "Name %s --- Code: %#x\n", EventName[i], events[i] ); } /* if we did not find any valid events, just report test failed. */ if (eventCount == 0) { if (!quiet) printf( "Test FAILED: no valid events found.\n"); test_skip(__FILE__,__LINE__,"No events found",0); return 1; } retval = PAPI_create_eventset( &EventSet ); if( retval != PAPI_OK ) { if (!quiet) printf( "PAPI_create_eventset failed\n" ); test_fail(__FILE__,__LINE__,"Cannot create eventset",retval); } // If multiple GPUs/contexts were being used, // you need to switch to each device before adding its events // e.g. cudaSetDevice( 0 ); retval = PAPI_add_events( EventSet, events, eventCount ); if( retval != PAPI_OK ) { fprintf( stderr, "PAPI_add_events failed\n" ); } retval = PAPI_start( EventSet ); if( retval != PAPI_OK ) { fprintf( stderr, "PAPI_start failed\n" ); } #endif int j; // desired output char str[] = "Hello World!"; // mangle contents of output // the null character is left intact for simplicity for(j = 0; j < 12; j++) { str[j] -= j; //printf("str=%s\n", str); } // allocate memory on the device char *d_str; size_t size = sizeof(str); cudaMalloc((void**)&d_str, size); // copy the string to the device cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice); // set the grid and block sizes dim3 dimGrid(2); // one block per word dim3 dimBlock(6); // one thread per character // invoke the kernel helloWorld<<< dimGrid, dimBlock >>>(d_str); // retrieve the results from the device cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost); // free up the allocated memory on the device cudaFree(d_str); if (!quiet) printf("END: %s\n", str); #ifdef PAPI retval = PAPI_stop( EventSet, values ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); retval = PAPI_cleanup_eventset(EventSet); if( retval != PAPI_OK ) fprintf(stderr, "PAPI_cleanup_eventset failed\n"); retval = PAPI_destroy_eventset(&EventSet); if (retval != PAPI_OK) fprintf(stderr, "PAPI_destroy_eventset failed\n"); PAPI_shutdown(); for( i = 0; i < eventCount; i++ ) if (!quiet) printf( "%12lld \t\t --> %s \n", values[i], EventName[i] ); #endif test_pass(__FILE__); return 0; } // Device kernel __global__ void helloWorld(char* str) { // determine where in the thread grid we are int idx = blockIdx.x * blockDim.x + threadIdx.x; // unmangle output str[idx] += idx; } papi-5.6.0/src/libpfm4/lib/pfmlib_power7.c000664 001750 001750 00000004406 13216244365 022355 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_power7.c : IBM Power7 support * * Copyright (C) IBM Corporation, 2009. All rights reserved. * Contributed by Corey Ashford (cjashfor@us.ibm.com) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_power_priv.h" #include "events/power7_events.h" static int pfm_power7_detect(void* this) { if (__is_processor(PV_POWER7) || __is_processor(PV_POWER7p)) return PFM_SUCCESS; return PFM_ERR_NOTSUPP; } pfmlib_pmu_t power7_support={ .desc = "POWER7", .name = "power7", .pmu = PFM_PMU_POWER7, .pme_count = LIBPFM_ARRAY_SIZE(power7_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 4, .num_fixed_cntrs = 2, .max_encoding = 1, .pe = power7_pe, .pmu_detect = pfm_power7_detect, .get_event_encoding[PFM_OS_NONE] = pfm_gen_powerpc_get_encoding, PFMLIB_ENCODE_PERF(pfm_gen_powerpc_get_perf_encoding), PFMLIB_VALID_PERF_PATTRS(pfm_gen_powerpc_perf_validate_pattrs), .get_event_first = pfm_gen_powerpc_get_event_first, .get_event_next = pfm_gen_powerpc_get_event_next, .event_is_valid = pfm_gen_powerpc_event_is_valid, .validate_table = pfm_gen_powerpc_validate_table, .get_event_info = pfm_gen_powerpc_get_event_info, .get_event_attr_info = pfm_gen_powerpc_get_event_attr_info, }; papi-5.6.0/src/libpfm-3.y/lib/pfmlib_crayx2_priv.h000664 001750 001750 00000007070 13216244363 023730 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Cray Inc. * Contributed by Steve Kaufmann based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PMLIB_CRAYX2_PRIV_H__ #define __PMLIB_CRAYX2_PRIV_H__ 1 #include /* Chips (substrates) that contain performance counters. */ #define PME_CRAYX2_CHIP_CPU 1 #define PME_CRAYX2_CHIP_CACHE 2 #define PME_CRAYX2_CHIP_MEMORY 3 /* Number of chips monitored per single process. */ #define PME_CRAYX2_CPU_CHIPS 1 #define PME_CRAYX2_CACHE_CHIPS 1 #define PME_CRAYX2_MEMORY_CHIPS 16 /* Number of events per physical counter. */ #define PME_CRAYX2_EVENTS_PER_COUNTER 4 /* Number of counters per chip (CPU, L2 Cache, Memory) */ #define PME_CRAYX2_CPU_CTRS_PER_CHIP PFM_CPU_PMD_COUNT #define PME_CRAYX2_CACHE_CTRS_PER_CHIP PFM_CACHE_PMD_PER_CHIP #define PME_CRAYX2_MEMORY_CTRS_PER_CHIP PFM_MEM_PMD_PER_CHIP /* Number of events per chip (CPU, L2 Cache, Memory) */ #define PME_CRAYX2_CPU_EVENTS \ (PME_CRAYX2_CPU_CHIPS*PME_CRAYX2_CPU_CTRS_PER_CHIP*PME_CRAYX2_EVENTS_PER_COUNTER) #define PME_CRAYX2_CACHE_EVENTS \ (PME_CRAYX2_CACHE_CHIPS*PME_CRAYX2_CACHE_CTRS_PER_CHIP*PME_CRAYX2_EVENTS_PER_COUNTER) #define PME_CRAYX2_MEMORY_EVENTS \ (PME_CRAYX2_MEMORY_CHIPS*PME_CRAYX2_MEMORY_CTRS_PER_CHIP*PME_CRAYX2_EVENTS_PER_COUNTER) /* No unit masks are (currently) used. */ #define PFMLIB_CRAYX2_MAX_UMASK 1 typedef struct { const char *pme_uname; /* unit mask name */ const char *pme_udesc; /* event/umask description */ unsigned int pme_ucode; /* unit mask code */ } pme_crayx2_umask_t; /* Description of each performance counter event available on all * substrates. Listed contiguously for all substrates. */ typedef struct { const char *pme_name; /* event name */ const char *pme_desc; /* event description */ unsigned int pme_code; /* event code */ unsigned int pme_flags; /* flags */ unsigned int pme_numasks; /* number of unit masks */ pme_crayx2_umask_t pme_umasks[PFMLIB_CRAYX2_MAX_UMASK]; /* unit masks (chip numbers) */ unsigned int pme_chip; /* substrate/chip containing counter */ unsigned int pme_ctr; /* counter on chip */ unsigned int pme_event; /* event number on counter */ unsigned int pme_chipno; /* chip# upon which the event lies */ unsigned int pme_base; /* PMD base reg_num for this chip */ unsigned int pme_nctrs; /* PMDs/counters per chip */ unsigned int pme_nchips; /* number of chips per process */ } pme_crayx2_entry_t; #endif /* __PMLIB_CRAYX2_PRIV_H__ */ papi-5.6.0/src/libpfm-3.y/lib/powerpc_events.h000664 001750 001750 00000002255 13216244363 023172 0ustar00jshenry1963jshenry1963000000 000000 /* * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * powerpc_events.h */ #ifndef _POWERPC_EVENTS_H_ #define _POWERPC_EVENTS_H_ #define PME_INSTR_COMPLETED 1 #endif papi-5.6.0/src/validation_tests/display_error.h000664 001750 001750 00000000221 13216244370 023720 0ustar00jshenry1963jshenry1963000000 000000 double display_error(long long average, long long high, long long low, long long expected, int quiet); papi-5.6.0/src/examples/000775 001750 001750 00000000000 13216244361 017140 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPI_shlib_info_t.3000664 001750 001750 00000000610 13216244356 021501 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_shlib_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_shlib_info_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "\fBPAPI_address_map_t\fP * \fBmap\fP" .br .ti -1c .RI "int \fBcount\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/lib/gen_ia32_events.h000664 001750 001750 00000006754 13216244363 023112 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ /* * architected events for architectural perfmon v1 and v2 as defined by the IA-32 developer's manual * Vol 3B, table 18-6 (May 2007) */ static pme_gen_ia32_entry_t gen_ia32_all_pe[]={ {.pme_name = "UNHALTED_CORE_CYCLES", .pme_code = 0x003c, .pme_fixed = 17, .pme_desc = "count core clock cycles whenever the clock signal on the specific core is running (not halted)" }, {.pme_name = "INSTRUCTIONS_RETIRED", .pme_code = 0x00c0, .pme_fixed = 16, .pme_desc = "count the number of instructions at retirement. For instructions that consists of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction", }, {.pme_name = "UNHALTED_REFERENCE_CYCLES", .pme_code = 0x013c, .pme_fixed = 18, .pme_desc = "count reference clock cycles while the clock signal on the specific core is running. The reference clock operates at a fixed frequency, irrespective of core freqeuncy changes due to performance state transitions", }, {.pme_name = "LAST_LEVEL_CACHE_REFERENCES", .pme_code = 0x4f2e, .pme_desc = "count each request originating from the core to reference a cache line in the last level cache. The count may include speculation, but excludes cache line fills due to hardware prefetch", }, {.pme_name = "LAST_LEVEL_CACHE_MISSES", .pme_code = 0x412e, .pme_desc = "count each cache miss condition for references to the last level cache. The event count may include speculation, but excludes cache line fills due to hardware prefetch", }, {.pme_name = "BRANCH_INSTRUCTIONS_RETIRED", .pme_code = 0x00c4, .pme_desc = "count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction", }, {.pme_name = "MISPREDICTED_BRANCH_RETIRED", .pme_code = 0x00c5, .pme_desc = "count mispredicted branch instructions at retirement. Specifically, this event counts at retirement of the last micro-op of a branch instruction in the architectural path of the execution and experienced misprediction in the branch prediction hardware", } }; #define PME_GEN_IA32_UNHALTED_CORE_CYCLES 0 #define PME_GEN_IA32_INSTRUCTIONS_RETIRED 1 #define PFMLIB_GEN_IA32_EVENT_COUNT (sizeof(gen_ia32_all_pe)/sizeof(pme_gen_ia32_entry_t)) papi-5.6.0/src/validation_tests/display_error.c000664 001750 001750 00000001304 13216244370 023716 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "display_error.h" double display_error(long long average, long long high, long long low, long long expected, int quiet) { double error; error=(((double)average-expected)/expected)*100.0; if (!quiet) { printf(" Expected: %lld\n", expected); printf(" High: %lld Low: %lld Average: %lld\n", high,low,average); printf(" ( note, a small value above %lld may be expected due\n", expected); printf(" to overhead and interrupt noise, among other reasons)\n"); printf(" Average Error = %.2f%%\n",error); } return error; } papi-5.6.0/src/components/rapl/utils/000775 001750 001750 00000000000 13216244360 021604 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/ita2_dear.c000664 001750 001750 00000026571 13216244362 024422 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_dear.c - example of how use the D-EAR with the Itanium 2 PMU * * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define SMPL_PERIOD (40) #define EVENT_NAME "data_ear_cache_lat4" #define M_PMD(x) (1UL<<(x)) #define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) typedef pfm_default_smpl_hdr_t dear_hdr_t; typedef pfm_default_smpl_entry_t dear_entry_t; typedef pfm_default_smpl_ctx_arg_t dear_ctx_t; #define DEAR_FMT_UUID PFM_DEFAULT_SMPL_UUID static pfm_uuid_t buf_fmt_id = DEAR_FMT_UUID; static void *smpl_vaddr; static unsigned long entry_size; static int id; #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define hweight64(x) _m64_popcnt(x) #elif defined(__GNUC__) static __inline__ int hweight64 (unsigned long x) { unsigned long result; __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); return (int)result; } #else #error "you need to provide inline assembly from your compiler" #endif long do_test(unsigned long size) { unsigned long i, sum = 0; int *array; printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); array = (int *)malloc(size * sizeof(int)); if (array == NULL ) { printf("line = %d No memory available!\n", __LINE__); exit(1); } for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (dear_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita2_pmd_reg_t*)(ent+1); safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); reg++; safe_printf("PMD3 : 0x%016lx, latency %u\n", reg->pmd_val, reg->pmd3_ita2_reg.dear_latency); reg++; safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", reg->pmd_val, reg->pmd17_ita2_reg.dear_vl ? 'Y': 'N', (reg->pmd17_ita2_reg.dear_iaddr << 4) | (unsigned long)reg->pmd17_ita2_reg.dear_slot); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { perror("PFM_RESTART"); exit(1); } } int main(void) { pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_event_t ev; dear_ctx_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; int ret, type = 0; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(pc, 0, sizeof(pc)); memset(ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event(EVENT_NAME, &ev) != PFMLIB_SUCCESS) { fatal_error("cannot find event %s\n", EVENT_NAME); } /* * set the (global) privilege mode: * PFM_PLM0 : kernel level only */ inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; /* * how many counters we use */ inp.pfp_event_count = 1; /* * propagate the event descriptor */ inp.pfp_events[0] = ev; /* * let the library figure out the values for the PMCS * * We use all global settings for this EAR. */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * prepare context structure. * * format specific parameters MUST be concatenated to the regular * pfarg_context_t structure. For convenience, the default sampling * format provides a data structure that already combines the pfarg_context_t * with what is needed fot this format. */ /* * We initialize the format specific information. * The format is identified by its UUID which must be copied * into the ctx_buf_fmt_id field. */ memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ ctx[0].buf_arg.buf_size = 4096; /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the file descriptor we will use to * identify this newly created context */ id = ctx[0].ctx_arg.ctx_fd; printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * indicate we want notification when buffer is full */ pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; /* * indicate which PMD to include in the sample */ pc[0].reg_smpl_pmds[0] = DEAR_REGS_MASK; /* * compute size of each sample: fixed-size header + all our DEAR regs */ entry_size = sizeof(dear_entry_t)+(hweight64(DEAR_REGS_MASK)<<3); /* * initialize the PMD and the sampling period */ pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * attach context to stopped task */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) { fatal_error("cannot set ASYNC: %s\n", strerror(errno)); } /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) { fatal_error("cannot setown: %s\n", strerror(errno)); } /* * Let's roll now. */ pfm_self_start(id); do_test(10000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer, i.e. which may not be full */ process_smpl_buffer(); /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/perfctr-2.6.x/usr.lib/arch.h000775 001750 001750 00000000645 13216244367 022222 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.1.2.1 2007/02/11 20:15:03 mikpe Exp $ * Architecture-specific code for performance counters library. * * Copyright (C) 2004-2007 Mikael Pettersson */ #ifndef __LIB_PERFCTR_ARCH_H #define __LIB_PERFCTR_ARCH_H #if defined(__i386__) || defined(__x86_64__) #include "x86.h" #elif defined(__powerpc__) #include "ppc.h" #elif defined(__arm__) #include "arm.h" #endif #endif /* __LIB_PERFCTR_ARCH_H */ papi-5.6.0/src/libpfm4/lib/events/intel_knl_events.h000664 001750 001750 00000123374 13216244364 024462 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2016 Intel Corp. All rights reserved * Contributed by Peinan Zhang * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: knl (Intel Knights Landing) */ static const intel_x86_umask_t knl_icache[]={ { .uname = "HIT", .udesc = "Counts all instruction fetches that hit the instruction cache.", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISSES", .udesc = "Counts all instruction fetches that miss the instruction cache or produce memory requests. An instruction fetch miss is counted only once and not once for every cycle it is outstanding.", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ACCESSES", .udesc = "Counts all instruction fetches, including uncacheable fetches.", .ucode = 0x300, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_uops_retired[]={ { .uname = "ALL", .udesc = "Counts the number of micro-ops retired.", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "MS", .udesc = "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS).", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SCALAR_SIMD", .udesc = "Counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts scalar SSE, AVX, AVX2, AVX-512 micro-ops except for loads (memory-to-register mov-type micro ops), division, sqrt.", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PACKED_SIMD", .udesc = "Counts the number of vector SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts packed SSE, AVX, AVX2, AVX-512 micro-ops (both floating point and integer) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies.", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_inst_retired[]={ { .uname = "ANY_P", .udesc = "Instructions retired using generic counter (precise event)", .ucode = 0x0, .uflags = INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Instructions retired using generic counter (precise event)", .uequiv = "ANY_P", .ucode = 0x0, .uflags = INTEL_X86_PEBS, }, }; static const intel_x86_umask_t knl_l2_requests_reject[]={ { .uname = "ALL", .udesc = "Counts the number of MEC requests from the L2Q that reference a cache line excluding SW prefetches filling only to L2 cache and L1 evictions (automatically exlcudes L2HWP, UC, WC) that were rejected - Multiple repeated rejects should be counted multiple times.", .ucode = 0x000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_core_reject[]={ { .uname = "ALL", .udesc = "Counts the number of MEC requests that were not accepted into the L2Q because of any L2 queue reject condition. There is no concept of at-ret here. It might include requests due to instructions in the speculative path", .ucode = 0x000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_machine_clears[]={ { .uname = "SMC", .udesc = "Counts the number of times that the machine clears due to program modifying data within 1K of a recently fetched code page.", .ucode = 0x0100, .uflags = INTEL_X86_DFL, }, { .uname = "MEMORY_ORDERING", .udesc = "Counts the number of times the machine clears due to memory ordering hazards", .ucode = 0x0200, }, { .uname = "FP_ASSIST", .udesc = "Counts the number of floating operations retired that required microcode assists", .ucode = 0x0400, }, { .uname = "ALL", .udesc = "Counts all nukes", .ucode = 0x0800, }, { .uname = "ANY", .udesc = "Counts all nukes", .uequiv = "ALL", .ucode = 0x0800, }, }; static const intel_x86_umask_t knl_br_inst_retired[]={ { .uname = "ANY", .udesc = "Counts the number of branch instructions retired (Precise Event)", .ucode = 0x0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "Counts the number of branch instructions retired", .uequiv = "ANY", .ucode = 0x0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "JCC", .udesc = "Counts the number of branch instructions retired that were conditional jumps.", .ucode = 0x7e00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TAKEN_JCC", .udesc = "Counts the number of branch instructions retired that were conditional jumps and predicted taken.", .ucode = 0xfe00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "CALL", .udesc = "Counts the number of near CALL branch instructions retired.", .ucode = 0xf900, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REL_CALL", .udesc = "Counts the number of near relative CALL branch instructions retired.", .ucode = 0xfd00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "IND_CALL", .udesc = "Counts the number of near indirect CALL branch instructions retired. (Precise Event)", .ucode = 0xfb00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETURN", .udesc = "Counts the number of near RET branch instructions retired. (Precise Event)", .ucode = 0xf700, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NON_RETURN_IND", .udesc = "Counts the number of branch instructions retired that were near indirect CALL or near indirect JMP. (Precise Event)", .ucode = 0xeb00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR_BRANCH", .udesc = "Counts the number of far branch instructions retired. (Precise Event)", .uequiv = "FAR", .ucode = 0xbf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR", .udesc = "Counts the number of far branch instructions retired. (Precise Event)", .ucode = 0xbf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t knl_fetch_stall[]={ { .uname = "ICACHE_FILL_PENDING_CYCLES", .udesc = "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of core cycles the fetch stalled for all icache misses", .ucode = 0x0400, .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_baclears[]={ { .uname = "ALL", .udesc = "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end.", .ucode = 0x100, .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end.", .uequiv = "ALL", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RETURN", .udesc = "Counts the number of times the front end resteers for RET branches as a result of another branch handling mechanism in the front end.", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "COND", .udesc = "Counts the number of times the front end resteers for conditional branches as a result of another branch handling mechanism in the front end.", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_cpu_clk_unhalted[]={ { .uname = "THREAD_P", .udesc = "thread cycles when core is not halted", .ucode = 0x0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "BUS", .udesc = "Bus cycles when core is not halted. This event can give a measurement of the elapsed time. This events has a constant ratio with CPU_CLK_UNHALTED:REF event, which is the maximum bus to processor frequency ratio", .uequiv = "REF_P", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REF_P", .udesc = "Number of reference cycles that the cpu is not in a halted state. The core enters the halted state when it is running the HLT instruction. In mobile systems, the core frequency may change from time to time. This event is not affected by core frequency changes but counts as if the core is running a the same maximum frequency all the time", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_mem_uops_retired[]={ { .uname = "L1_MISS_LOADS", .udesc = "Counts the number of load micro-ops retired that miss in L1 D cache.", .ucode = 0x100, }, { .uname = "LD_DCU_MISS", .udesc = "Counts the number of load micro-ops retired that miss in L1 D cache.", .uequiv = "L1_MISS_LOADS", .ucode = 0x100, }, { .uname = "L2_HIT_LOADS", .udesc = "Counts the number of load micro-ops retired that hit in the L2.", .ucode = 0x200, .uflags = INTEL_X86_PEBS, }, { .uname = "L2_MISS_LOADS", .udesc = "Counts the number of load micro-ops retired that miss in the L2.", .ucode = 0x400, .uflags = INTEL_X86_PEBS, }, { .uname = "LD_L2_MISS", .udesc = "Counts the number of load micro-ops retired that miss in the L2.", .uequiv = "L2_MISS_LOADS", .ucode = 0x400, .uflags = INTEL_X86_PEBS, }, { .uname = "DTLB_MISS_LOADS", .udesc = "Counts the number of load micro-ops retired that cause a DTLB miss.", .ucode = 0x800, .uflags = INTEL_X86_PEBS, }, { .uname = "UTLB_MISS_LOADS", .udesc = "Counts the number of load micro-ops retired that caused micro TLB miss.", .ucode = 0x1000, }, { .uname = "LD_UTLB_MISS", .udesc = "Counts the number of load micro-ops retired that caused micro TLB miss.", .uequiv = "UTLB_MISS_LOADS", .ucode = 0x1000, }, { .uname = "HITM", .udesc = "Counts the loads retired that get the data from the other core in the same tile in M state.", .ucode = 0x2000, .uflags = INTEL_X86_PEBS, }, { .uname = "ALL_LOADS", .udesc = "Counts all the load micro-ops retired.", .ucode = 0x4000, .uflags = INTEL_X86_DFL, }, { .uname = "ANY_LD", .udesc = "Counts all the load micro-ops retired.", .uequiv = "ALL_LOADS", .ucode = 0x4000, }, { .uname = "ALL_STORES", .udesc = "Counts all the store micro-ops retired.", .ucode = 0x8000, }, { .uname = "ANY_ST", .udesc = "Counts all the store micro-ops retired.", .uequiv = "ALL_STORES", .ucode = 0x8000, }, }; static const intel_x86_umask_t knl_page_walks[]={ { .uname = "D_SIDE_CYCLES", .udesc = "Counts the total D-side page walks that are completed or started. The page walks started in the speculative path will also be counted.", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "D_SIDE_WALKS", .udesc = "Counts the total number of core cycles for all the D-side page walks. The cycles for page walks started in speculative path will also be included.", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "I_SIDE_CYCLES", .udesc = "Counts the total I-side page walks that are completed.", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "I_SIDE_WALKS", .udesc = "Counts the total number of core cycles for all the I-side page walks. The cycles for page walks started in speculative path will also be included.", .ucode = 0x200 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES", .udesc = "Counts the total page walks completed (I-side and D-side)", .ucode = 0x300, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "WALKS", .udesc = "Counts the total number of core cycles for all the page walks. The cycles for page walks started in speculative path will also be included.", .ucode = 0x300 | INTEL_X86_MOD_EDGE | (1ULL << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_l2_rqsts[]={ { .uname = "MISS", .udesc = "Counts the number of L2 cache misses", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REFERENCE", .udesc = "Counts the total number of L2 cache references.", .ucode = 0x4f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_recycleq[]={ { .uname = "LD_BLOCK_ST_FORWARD", .udesc = "Counts the number of occurrences a retired load gets blocked because its address partially overlaps with a store (Precise Event).", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LD_BLOCK_STD_NOTREADY", .udesc = "Counts the number of occurrences a retired load gets blocked because its address overlaps with a store whose data is not ready.", .ucode = 0x0200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ST_SPLITS", .udesc = "Counts the number of occurrences a retired store that is a cache line split. Each split should be counted only once.", .ucode = 0x0400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "LD_SPLITS", .udesc = "Counts the number of occurrences a retired load that is a cache line split. Each split should be counted only once (Precise Event).", .ucode = 0x0800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOCK", .udesc = "Counts all the retired locked loads. It does not include stores because we would double count if we count stores.", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STA_FULL", .udesc = "Counts the store micro-ops retired that were pushed in the rehad queue because the store address buffer is full.", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY_LD", .udesc = "Counts any retired load that was pushed into the recycle queue for any reason.", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY_ST", .udesc = "Counts any retired store that was pushed into the recycle queue for any reason.", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_offcore_response_0[]={ { .uname = "DMND_DATA_RD", .udesc = "Counts demand cacheable data and L1 prefetch data reads", .ucode = 1ULL << (0 + 8), .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Counts Demand cacheable data writes", .ucode = 1ULL << (1 + 8), .grpid = 0, }, { .uname = "DMND_CODE_RD", .udesc = "Counts demand code reads and prefetch code reads", .ucode = 1ULL << (2 + 8), .grpid = 0, }, { .uname = "PF_L2_RFO", .udesc = "Counts L2 data RFO prefetches (includes PREFETCHW instruction)", .ucode = 1ULL << (5 + 8), .grpid = 0, }, { .uname = "PF_L2_CODE_RD", .udesc = "Request: number of code reads generated by L2 prefetchers", .ucode = 1ULL << (6 + 8), .grpid = 0, }, { .uname = "PARTIAL_READS", .udesc = "Counts Partial reads (UC or WC and is valid only for Outstanding response type).", .ucode = 1ULL << (7 + 8), .grpid = 0, }, { .uname = "PARTIAL_WRITES", .udesc = "Counts Partial writes (UC or WT or WP and should be programmed on PMC1)", .ucode = 1ULL << (8 + 8), .grpid = 0, }, { .uname = "UC_CODE_READS", .udesc = "Counts UC code reads (valid only for Outstanding response type)", .ucode = 1ULL << (9 + 8), .grpid = 0, }, { .uname = "BUS_LOCKS", .udesc = "Counts Bus locks and split lock requests", .ucode = 1ULL << (10 + 8), .grpid = 0, }, { .uname = "FULL_STREAMING_STORES", .udesc = "Counts Full streaming stores (WC and should be programmed on PMC1)", .ucode = 1ULL << (11 + 8), .grpid = 0, }, { .uname = "PF_SOFTWARE", .udesc = "Counts Software prefetches", .ucode = 1ULL << (12 + 8), .grpid = 0, }, { .uname = "PF_L1_DATA_RD", .udesc = "Counts L1 data HW prefetches", .ucode = 1ULL << (13 + 8), .grpid = 0, }, { .uname = "PARTIAL_STREAMING_STORES", .udesc = "Counts Partial streaming stores (WC and should be programmed on PMC1)", .ucode = 1ULL << (14 + 8), .grpid = 0, }, { .uname = "STREAMING_STORES", .udesc = "Counts all streaming stores (WC and should be programmed on PMC1)", .ucode = (1ULL << 14 | 1ULL << 11) << 8, .uequiv = "PARTIAL_STREAMING_STORES:FULL_STREAMING_STORES", .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Counts any request", .ucode = 1ULL << (15 + 8), .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "ANY_DATA_RD", .udesc = "Counts Demand cacheable data and L1 prefetch data read requests", .ucode = (1ULL << 0 | 1ULL << 7 | 1ULL << 12 | 1ULL << 13) << 8, .uequiv = "DMND_DATA_RD:PARTIAL_READS:PF_SOFTWARE:PF_L1_DATA_RD", .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Counts Demand cacheable data write requests", .ucode = (1ULL << 1 | 1ULL << 5) << 8, .grpid = 0, }, { .uname = "ANY_CODE_RD", .udesc = "Counts Demand code reads and prefetch code read requests", .ucode = (1ULL << 2 | 1ULL << 6) << 8, .uequiv = "DMND_CODE_RD:PF_L2_CODE_RD", .grpid = 0, }, { .uname = "ANY_READ", .udesc = "Counts any Read request", .ucode = (1ULL << 0 | 1ULL << 1 | 1ULL << 2 | 1ULL << 5 | 1ULL << 6 | 1ULL << 7 | 1ULL << 9 | 1ULL << 12 | 1ULL << 13 ) << 8, .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD", .grpid = 0, }, { .uname = "ANY_PF_L2", .udesc = "Counts any Prefetch requests", .ucode = (1ULL << 5 | 1ULL << 6) << 8, .uequiv = "PF_L2_RFO:PF_L2_CODE_RD", .grpid = 0, }, { .uname = "ANY_RESPONSE", .udesc = "Accounts for any response", .ucode = (1ULL << 16) << 8, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 1, }, { .uname = "DDR_NEAR", .udesc = "Accounts for data responses from DRAM Local.", .ucode = (1ULL << 31 | 1ULL << 23 ) << 8, .grpid = 1, }, { .uname = "DDR_FAR", .udesc = "Accounts for data responses from DRAM Far.", .ucode = (1ULL << 31 | 1ULL << 24 ) << 8, .grpid = 1, }, { .uname = "MCDRAM_NEAR", .udesc = "Accounts for data responses from MCDRAM Local.", .ucode = (1ULL << 31 | 1ULL << 21 ) << 8, .grpid = 1, }, { .uname = "MCDRAM_FAR", .udesc = "Accounts for data responses from MCDRAM Far or Other tile L2 hit far.", .ucode = (1ULL << 32 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE_E_F", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", .ucode = (1ULL << 35 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE_M", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", .ucode = (1ULL << 36 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE_E_F", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.", .ucode = (1ULL << 35 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE_M", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", .ucode = (1ULL << 36 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "NON_DRAM", .udesc = "accounts for responses from any NON_DRAM system address. This includes MMIO transactions", .ucode = (1ULL << 37 | 1ULL << 17 ) << 8, .grpid = 1, }, { .uname = "MCDRAM", .udesc = "accounts for responses from MCDRAM (local and far)", .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 22 | 1ULL << 21 ) << 8, .grpid = 1, }, { .uname = "DDR", .udesc = "accounts for responses from DDR (local and far)", .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 24 | 1ULL << 23 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE", .udesc = " accounts for responses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state", .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 20 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE", .udesc = "accounts for responses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.", .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "OUTSTANDING", .udesc = "outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0.", .ucode = (1ULL << 38) << 8, .uflags = INTEL_X86_GRP_DFL_NONE | INTEL_X86_EXCL_GRP_BUT_0, /* can only be combined with request type bits (grpid = 0) */ .grpid = 2, }, }; static const intel_x86_umask_t knl_offcore_response_1[]={ { .uname = "DMND_DATA_RD", .udesc = "Counts demand cacheable data and L1 prefetch data reads", .ucode = 1ULL << (0 + 8), .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Counts Demand cacheable data writes", .ucode = 1ULL << (1 + 8), .grpid = 0, }, { .uname = "DMND_CODE_RD", .udesc = "Counts demand code reads and prefetch code reads", .ucode = 1ULL << (2 + 8), .grpid = 0, }, { .uname = "PF_L2_RFO", .udesc = "Counts L2 data RFO prefetches (includes PREFETCHW instruction)", .ucode = 1ULL << (5 + 8), .grpid = 0, }, { .uname = "PF_L2_CODE_RD", .udesc = "Request: number of code reads generated by L2 prefetchers", .ucode = 1ULL << (6 + 8), .grpid = 0, }, { .uname = "PARTIAL_READS", .udesc = "Counts Partial reads (UC or WC and is valid only for Outstanding response type).", .ucode = 1ULL << (7 + 8), .grpid = 0, }, { .uname = "PARTIAL_WRITES", .udesc = "Counts Partial writes (UC or WT or WP and should be programmed on PMC1)", .ucode = 1ULL << (8 + 8), .grpid = 0, }, { .uname = "UC_CODE_READS", .udesc = "Counts UC code reads (valid only for Outstanding response type)", .ucode = 1ULL << (9 + 8), .grpid = 0, }, { .uname = "BUS_LOCKS", .udesc = "Counts Bus locks and split lock requests", .ucode = 1ULL << (10 + 8), .grpid = 0, }, { .uname = "FULL_STREAMING_STORES", .udesc = "Counts Full streaming stores (WC and should be programmed on PMC1)", .ucode = 1ULL << (11 + 8), .grpid = 0, }, { .uname = "PF_SOFTWARE", .udesc = "Counts Software prefetches", .ucode = 1ULL << (12 + 8), .grpid = 0, }, { .uname = "PF_L1_DATA_RD", .udesc = "Counts L1 data HW prefetches", .ucode = 1ULL << (13 + 8), .grpid = 0, }, { .uname = "PARTIAL_STREAMING_STORES", .udesc = "Counts Partial streaming stores (WC and should be programmed on PMC1)", .ucode = 1ULL << (14 + 8), .grpid = 0, }, { .uname = "STREAMING_STORES", .udesc = "Counts all streaming stores (WC and should be programmed on PMC1)", .ucode = (1ULL << 14 | 1ULL << 11) << 8, .uequiv = "PARTIAL_STREAMING_STORES:FULL_STREAMING_STORES", .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Counts any request", .ucode = 1ULL << (15 + 8), .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "ANY_DATA_RD", .udesc = "Counts Demand cacheable data and L1 prefetch data read requests", .ucode = (1ULL << 0 | 1ULL << 7 | 1ULL << 12 | 1ULL << 13) << 8, .uequiv = "DMND_DATA_RD:PARTIAL_READS:PF_SOFTWARE:PF_L1_DATA_RD", .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Counts Demand cacheable data write requests", .ucode = (1ULL << 1 | 1ULL << 5) << 8, .grpid = 0, }, { .uname = "ANY_CODE_RD", .udesc = "Counts Demand code reads and prefetch code read requests", .ucode = (1ULL << 2 | 1ULL << 6) << 8, .uequiv = "DMND_CODE_RD:PF_L2_CODE_RD", .grpid = 0, }, { .uname = "ANY_READ", .udesc = "Counts any Read request", .ucode = (1ULL << 0 | 1ULL << 1 | 1ULL << 2 | 1ULL << 5 | 1ULL << 6 | 1ULL << 7 | 1ULL << 9 | 1ULL << 12 | 1ULL << 13 ) << 8, .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_CODE_RD:PF_L2_RFO:PF_L2_CODE_RD:PARTIAL_READS:UC_CODE_READS:PF_SOFTWARE:PF_L1_DATA_RD", .grpid = 0, }, { .uname = "ANY_PF_L2", .udesc = "Counts any Prefetch requests", .ucode = (1ULL << 5 | 1ULL << 6) << 8, .uequiv = "PF_L2_RFO:PF_L2_CODE_RD", .grpid = 0, }, { .uname = "ANY_RESPONSE", .udesc = "Accounts for any response", .ucode = (1ULL << 16) << 8, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 1, }, { .uname = "DDR_NEAR", .udesc = "Accounts for data responses from DRAM Local.", .ucode = (1ULL << 31 | 1ULL << 23 ) << 8, .grpid = 1, }, { .uname = "DDR_FAR", .udesc = "Accounts for data responses from DRAM Far.", .ucode = (1ULL << 31 | 1ULL << 24 ) << 8, .grpid = 1, }, { .uname = "MCDRAM_NEAR", .udesc = "Accounts for data responses from MCDRAM Local.", .ucode = (1ULL << 31 | 1ULL << 21 ) << 8, .grpid = 1, }, { .uname = "MCDRAM_FAR", .udesc = "Accounts for data responses from MCDRAM Far or Other tile L2 hit far.", .ucode = (1ULL << 32 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE_E_F", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state.", .ucode = (1ULL << 35 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE_M", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state.", .ucode = (1ULL << 36 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE_E_F", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.", .ucode = (1ULL << 35 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE_M", .udesc = "Accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state.", .ucode = (1ULL << 36 | 1ULL << 22 ) << 8, .grpid = 1, }, { .uname = "NON_DRAM", .udesc = "accounts for responses from any NON_DRAM system address. This includes MMIO transactions", .ucode = (1ULL << 37 | 1ULL << 17 ) << 8, .grpid = 1, }, { .uname = "MCDRAM", .udesc = "accounts for responses from MCDRAM (local and far)", .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 22 | 1ULL << 21 ) << 8, .grpid = 1, }, { .uname = "DDR", .udesc = "accounts for responses from DDR (local and far)", .ucode = (1ULL << 32 | 1ULL << 31 | 1ULL << 24 | 1ULL << 23 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_NEAR_TILE", .udesc = " accounts for responses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state", .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 20 | 1ULL << 19 ) << 8, .grpid = 1, }, { .uname = "L2_HIT_FAR_TILE", .udesc = "accounts for responses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.", .ucode = (1ULL << 36 | 1ULL << 35 | 1ULL << 22 ) << 8, .grpid = 1, }, }; static const intel_x86_umask_t knl_br_misp_retired[]={ { .uname = "ALL_BRANCHES", .udesc = "All mispredicted branches (Precise Event)", .uequiv = "ANY", .ucode = 0x0000, /* architectural encoding */ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ANY", .udesc = "All mispredicted branches (Precise Event)", .ucode = 0x0000, /* architectural encoding */ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "JCC", .udesc = "Number of mispredicted conditional branch instructions retired (Precise Event)", .ucode = 0x7e00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NON_RETURN_IND", .udesc = "Number of mispredicted non-return branch instructions retired (Precise Event)", .ucode = 0xeb00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETURN", .udesc = "Number of mispredicted return branch instructions retired (Precise Event)", .ucode = 0xf700, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "IND_CALL", .udesc = "Number of mispredicted indirect call branch instructions retired (Precise Event)", .ucode = 0xfb00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TAKEN_JCC", .udesc = "Number of mispredicted taken conditional branch instructions retired (Precise Event)", .ucode = 0xfe00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "CALL", .udesc = "Counts the number of mispredicted near CALL branch instructions retired.", .ucode = 0xf900, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REL_CALL", .udesc = "Counts the number of mispredicted near relative CALL branch instructions retired.", .ucode = 0xfd00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR_BRANCH", .udesc = "Counts the number of mispredicted far branch instructions retired.", .ucode = 0xbf00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t knl_no_alloc_cycles[]={ { .uname = "ROB_FULL", .udesc = "Counts the number of core cycles when no micro-ops are allocated and the ROB is full", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISPREDICTS", .udesc = "Counts the number of core cycles when no micro-ops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire.", .ucode = 0x0400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RAT_STALL", .udesc = "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted.", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NOT_DELIVERED", .udesc = "Counts the number of core cycles when no micro-ops are allocated, the IQ is empty, and no other condition is blocking allocation.", .ucode = 0x9000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL", .udesc = "Counts the total number of core cycles when no micro-ops are allocated for any reason.", .ucode = 0x7f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Counts the total number of core cycles when no micro-ops are allocated for any reason.", .uequiv = "ALL", .ucode = 0x7f00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t knl_rs_full_stall[]={ { .uname = "MEC", .udesc = "Counts the number of core cycles when allocation pipeline is stalled and is waiting for a free MEC reservation station entry.", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full.", .ucode = 0x1f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_cycles_div_busy[]={ { .uname = "ALL", .udesc = "Counts the number of core cycles when divider is busy. Does not imply a stall waiting for the divider.", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_ms_decoded[]={ { .uname = "ENTRY", .udesc = "Counts the number of times the MSROM starts a flow of uops.", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t knl_decode_restriction[]={ { .uname = "PREDECODE_WRONG", .udesc = "Number of times the prediction (from the predecode cache) for instruction length is incorrect", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_entry_t intel_knl_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Unhalted core cycles", .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ .cntmsk = 0x200000003ull, .code = 0x3c, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycle", .modmsk = INTEL_FIXED3_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "INSTRUCTION_RETIRED", .desc = "Instructions retired (any thread modifier supported in fixed counter)", .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ .cntmsk = 0x100000003ull, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias for INSTRUCTION_RETIRED (any thread modifier supported in fixed counter)", .modmsk = INTEL_V3_ATTRS, /* any thread only supported in fixed counter */ .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x10003, .code = 0xc0, }, { .name = "LLC_REFERENCES", .desc = "Last level of cache references", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LAST_LEVEL_CACHE_REFERENCES", .desc = "This is an alias for LLC_REFERENCES", .modmsk = INTEL_V2_ATTRS, .equiv = "LLC_REFERENCES", .cntmsk = 0x3, .code = 0x4f2e, }, { .name = "LLC_MISSES", .desc = "Last level of cache misses", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x412e, }, { .name = "LAST_LEVEL_CACHE_MISSES", .desc = "This is an alias for LLC_MISSES", .modmsk = INTEL_V2_ATTRS, .equiv = "LLC_MISSES", .cntmsk = 0x3, .code = 0x412e, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Branch instructions retired", .modmsk = INTEL_V2_ATTRS, .equiv = "BR_INST_RETIRED:ANY", .cntmsk = 0x3, .code = 0xc4, }, { .name = "MISPREDICTED_BRANCH_RETIRED", .desc = "Mispredicted branch instruction retired", .equiv = "BR_MISP_RETIRED:ANY", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc5, .flags = INTEL_X86_PEBS, }, /* begin model specific events */ { .name = "ICACHE", .desc = "Instruction fetches", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x80, .numasks = LIBPFM_ARRAY_SIZE(knl_icache), .ngrp = 1, .umasks = knl_icache, }, { .name = "UOPS_RETIRED", .desc = "Micro-ops retired", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc2, .numasks = LIBPFM_ARRAY_SIZE(knl_uops_retired), .ngrp = 1, .umasks = knl_uops_retired, }, { .name = "INST_RETIRED", .desc = "Instructions retired", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc0, .flags = INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(knl_inst_retired), .ngrp = 1, .umasks = knl_inst_retired, }, { .name = "CYCLES_DIV_BUSY", .desc = "Counts the number of core cycles when divider is busy.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xcd, .numasks = LIBPFM_ARRAY_SIZE(knl_cycles_div_busy), .ngrp = 1, .umasks = knl_cycles_div_busy, }, { .name = "RS_FULL_STALL", .desc = "Counts the number of core cycles when allocation pipeline is stalled.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(knl_rs_full_stall), .ngrp = 1, .umasks = knl_rs_full_stall, }, { .name = "L2_REQUESTS", .desc = "L2 cache requests", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(knl_l2_rqsts), .ngrp = 1, .umasks = knl_l2_rqsts, }, { .name = "MACHINE_CLEARS", .desc = "Counts the number of times that the machine clears.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc3, .numasks = LIBPFM_ARRAY_SIZE(knl_machine_clears), .ngrp = 1, .umasks = knl_machine_clears, }, { .name = "BR_INST_RETIRED", .desc = "Retired branch instructions", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc4, .numasks = LIBPFM_ARRAY_SIZE(knl_br_inst_retired), .flags = INTEL_X86_PEBS, .ngrp = 1, .umasks = knl_br_inst_retired, }, { .name = "BR_MISP_RETIRED", .desc = "Counts the number of mispredicted branch instructions retired.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xc5, .flags = INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(knl_br_misp_retired), .ngrp = 1, .umasks = knl_br_misp_retired, }, { .name = "MS_DECODED", .desc = "Number of times the MSROM starts a flow of uops.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xe7, .numasks = LIBPFM_ARRAY_SIZE(knl_ms_decoded), .ngrp = 1, .umasks = knl_ms_decoded, }, { .name = "FETCH_STALL", .desc = "Counts the number of core cycles the fetch stalls.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x86, .numasks = LIBPFM_ARRAY_SIZE(knl_fetch_stall), .ngrp = 1, .umasks = knl_fetch_stall, }, { .name = "BACLEARS", .desc = "Branch address calculator", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xe6, .numasks = LIBPFM_ARRAY_SIZE(knl_baclears), .ngrp = 1, .umasks = knl_baclears, }, { .name = "NO_ALLOC_CYCLES", .desc = "Front-end allocation", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0xca, .numasks = LIBPFM_ARRAY_SIZE(knl_no_alloc_cycles), .ngrp = 1, .umasks = knl_no_alloc_cycles, }, { .name = "CPU_CLK_UNHALTED", .desc = "Core cycles when core is not halted", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x3c, .numasks = LIBPFM_ARRAY_SIZE(knl_cpu_clk_unhalted), .ngrp = 1, .umasks = knl_cpu_clk_unhalted, }, { .name = "MEM_UOPS_RETIRED", .desc = "Counts the number of load micro-ops retired.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x4, .flags = INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(knl_mem_uops_retired), .ngrp = 1, .umasks = knl_mem_uops_retired, }, { .name = "PAGE_WALKS", .desc = "Number of page-walks executed", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x5, .numasks = LIBPFM_ARRAY_SIZE(knl_page_walks), .ngrp = 1, .umasks = knl_page_walks, }, { .name = "L2_REQUESTS_REJECT", .desc = "Counts the number of MEC requests from the L2Q that reference a cache line were rejected.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x30, .numasks = LIBPFM_ARRAY_SIZE(knl_l2_requests_reject), .ngrp = 1, .umasks = knl_l2_requests_reject, }, { .name = "CORE_REJECT_L2Q", .desc = "Number of requests not accepted into the L2Q because of any L2 queue reject condition.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x31, .numasks = LIBPFM_ARRAY_SIZE(knl_core_reject), .ngrp = 1, .umasks = knl_core_reject, }, { .name = "RECYCLEQ", .desc = "Counts the number of occurrences a retired load gets blocked.", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0x3, .code = 0x03, .flags = INTEL_X86_PEBS, .numasks = LIBPFM_ARRAY_SIZE(knl_recycleq), .ngrp = 1, .umasks = knl_recycleq, }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0xf, .code = 0x01b7, .flags = INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(knl_offcore_response_0), .ngrp = 3, .umasks = knl_offcore_response_0, }, { .name = "OFFCORE_RESPONSE_1", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V2_ATTRS, .cntmsk = 0xf, .code = 0x02b7, .flags = INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(knl_offcore_response_1), .ngrp = 2, .umasks = knl_offcore_response_1, }, }; papi-5.6.0/src/components/vmware/README000664 001750 001750 00000001025 13216244360 021665 0ustar00jshenry1963jshenry1963000000 000000 /** * @file: README * CVS: $Id$ * @author: Dan Terpstra * terpstra@icl.utk.edu * @defgroup papi_components Components * @brief Component Specific Readme file: VMware */ /** @page component_readme Component Readme @section Component Specific Information vmware/ To make the generic VMware component do --with-vmware_incdir=< path_to_VMWare_Guest_SDK > from the component directory. For further information see the VMwareComponentDocument.txt file in the component directory, or the ComponentGuide pdf file. */ papi-5.6.0/src/components/net/README000664 001750 001750 00000003347 13216244357 021171 0ustar00jshenry1963jshenry1963000000 000000 COMPONENT net SUMMARY Linux network component DESCRIPTION This network component enables PAPI-C to access the network statistics exported by the Linux kernel through the /proc pseudo-file system (file /proc/net/dev). This component will dynamically create a native events table based on the number of interfaces listed in /proc/net/dev (16 entries for each network interface). Event names ------------------------- ".rx.bytes", ".rx.packets", ".rx.errors", ".rx.dropped", ".rx.fifo", ".rx.frame", ".rx.compressed", ".rx.multicast", ".tx.bytes", ".tx.packets", ".tx.errors", ".tx.dropped", ".tx.fifo", ".tx.colls", ".tx.carrier", ".tx.compressed" By default the Linux kernel only updates the network statistics once every second (see the references listed in the "SEE ALSO" section for some problems you may come across and for how to change the default polling period). Note: The Linux network statistics are updated by code that resides in the file net/core/dev.c. AUTHOR Initial written by Haihang You . Re-written by Jose Pedro Oliveira in order to source data directly from /proc/net/dev. SEE ALSO * Network Stats Anomaly http://collectl.sourceforge.net/NetworkStats.html * Occasionally corrupted network stats in /proc/net/dev http://kerneltrap.org/mailarchive/linux-netdev/2008/1/14/566936 http://kerneltrap.org/mailarchive/linux-netdev/2008/1/14/567512 # 2011-11-05 jpo # vim:set ai ts=4 sw=4 sts=4 et: papi-5.6.0/src/examples/PAPI_get_virt_cyc.c000664 001750 001750 00000003311 13216244361 022574 0ustar00jshenry1963jshenry1963000000 000000 /****************************************************************************** * This is an example to show how to use low level function PAPI_get_virt_cyc * * and PAPI_get_virt_usec. * ******************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ int i; double tmp; int your_slow_code() { for(i=1; i<200000; i++) { tmp= (tmp+i)/2; } return 0; } int main() { long long s,s1, e, e1; int retval; /**************************************************************************** * This part initializes the library and compares the version number of the * * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ****************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { printf("Library initialization error! \n"); exit(1); } /* Here you get initial cycles and time */ /* No error checking is done here because this function call is always successful */ s = PAPI_get_virt_cyc(); your_slow_code(); /*Here you get final cycles and time */ e = PAPI_get_virt_cyc(); s1= PAPI_get_virt_usec(); your_slow_code(); e1= PAPI_get_virt_usec(); printf("Virtual cycles : %lld\nVirtual time(ms): %lld\n",e-s,e1-s1); /* clean up */ PAPI_shutdown(); exit(0); } papi-5.6.0/man/man3/PAPI_cleanup_eventset.3000664 001750 001750 00000002624 13216244356 022415 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_cleanup_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_cleanup_eventset \- .PP Empty and destroy an EventSet\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par C Interface: \#include @n int PAPI_cleanup_eventset( int EventSet ); .fi .PP .PP \fBPAPI_cleanup_eventset\fP removes all events from a PAPI event set and turns off profiling and overflow for all events in the EventSet\&. This can not be called if the EventSet is not stopped\&. .PP \fBParameters:\fP .RS 4 \fIEventSet\fP An integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP\&. .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. Attempting to destroy a non-empty event set or passing in a null pointer to be destroyed\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .br \fIPAPI_EISRUN\fP The EventSet is currently counting events\&. .br \fIPAPI_EBUG\fP Internal error, send mail to ptools-perfapi@icl.utk.edu and complain\&. .RE .PP \fBExamples:\fP .RS 4 .PP .nf * // Remove all events in the eventset * if ( PAPI_cleanup_eventset( EventSet ) != PAPI_OK ) * handle_error( 1 ); * .fi .PP .RE .PP .PP .nf @see PAPI_profil @n PAPI_create_eventset @n PAPI_add_event @n PAPI_stop.fi .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.53000664 001750 001750 00000001465 13216244367 023217 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.53 GHz Athlon 1800+] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1534121 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 166 cycles PERFCTR INIT: rdtsc cost is 14.1 cycles (1070 total) PERFCTR INIT: rdpmc cost is 13.9 cycles (1059 total) PERFCTR INIT: rdmsr (counter) cost is 51.7 cycles (3481 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.7 cycles (3541 total) PERFCTR INIT: wrmsr (counter) cost is 82.9 cycles (5472 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.6 cycles (14993 total) PERFCTR INIT: read cr4 cost is 2.0 cycles (300 total) PERFCTR INIT: write cr4 cost is 62.9 cycles (4196 total) PERFCTR INIT: write LVTPC cost is 9.2 cycles (757 total) PERFCTR INIT: sync_core cost is 73.7 cycles (4883 total) perfctr: driver 2.7.14, cpu type AMD K7/K8 at 1534121 kHz papi-5.6.0/man/man3/PAPI_get_event_info.3000664 001750 001750 00000002036 13216244356 022041 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_event_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_event_info \- .PP Get the event's name and description info\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBParameters:\fP .RS 4 \fIEventCode\fP event code (preset or native) .br \fIinfo\fP structure with the event information \fBPAPI_event_info_t\fP .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOTPRESET\fP The PAPI preset mask was set, but the hardware event specified is not a valid PAPI preset\&. .br \fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. .RE .PP This function fills the event information into a structure\&. In Fortran, some fields of the structure are returned explicitly\&. This function works with existing PAPI preset and native event codes\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_event_name_to_code\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/examples/showevtinfo.c000664 001750 001750 00000050231 13216244364 023220 0ustar00jshenry1963jshenry1963000000 000000 /* * showevtinfo.c - show event information * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #define MAXBUF 1024 #define COMBO_MAX 18 static struct { int compact; int sort; uint8_t encode; uint8_t combo; uint8_t combo_lim; uint8_t name_only; uint8_t desc; char *csv_sep; pfm_event_info_t efilter; pfm_event_attr_info_t ufilter; pfm_os_t os; uint64_t mask; } options; typedef struct { uint64_t code; int idx; } code_info_t; static void show_event_info_compact(pfm_event_info_t *info); static const char *srcs[PFM_ATTR_CTRL_MAX]={ [PFM_ATTR_CTRL_UNKNOWN] = "???", [PFM_ATTR_CTRL_PMU] = "PMU", [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event", }; #ifdef PFMLIB_WINDOWS int set_env_var(const char *var, const char *value, int ov) { size_t len; char *str; int ret; len = strlen(var) + 1 + strlen(value) + 1; str = malloc(len); if (!str) return PFM_ERR_NOMEM; sprintf(str, "%s=%s", var, value); ret = putenv(str); free(str); return ret ? PFM_ERR_INVAL : PFM_SUCCESS; } #else static inline int set_env_var(const char *var, const char *value, int ov) { return setenv(var, value, ov); } #endif static int event_has_pname(char *s) { char *p; return (p = strchr(s, ':')) && *(p+1) == ':'; } static int print_codes(char *buf, int plm, int max_encoding) { uint64_t *codes = NULL; int j, ret, count = 0; ret = pfm_get_event_encoding(buf, PFM_PLM0|PFM_PLM3, NULL, NULL, &codes, &count); if (ret != PFM_SUCCESS) { if (ret == PFM_ERR_NOTFOUND) errx(1, "encoding failed, try setting env variable LIBPFM_ENCODE_INACTIVE=1"); return -1; } for(j = 0; j < max_encoding; j++) { if (j < count) printf("0x%"PRIx64, codes[j]); printf("%s", options.csv_sep); } free(codes); return 0; } static int check_valid(char *buf, int plm) { uint64_t *codes = NULL; int ret, count = 0; ret = pfm_get_event_encoding(buf, PFM_PLM0|PFM_PLM3, NULL, NULL, &codes, &count); if (ret != PFM_SUCCESS) return -1; free(codes); return 0; } static int match_ufilters(pfm_event_attr_info_t *info) { uint32_t ufilter1 = 0; uint32_t ufilter2 = 0; if (options.ufilter.is_dfl) ufilter1 |= 0x1; if (info->is_dfl) ufilter2 |= 0x1; if (options.ufilter.is_precise) ufilter1 |= 0x2; if (info->is_precise) ufilter2 |= 0x2; if (!ufilter1) return 1; /* at least one filter matches */ return ufilter1 & ufilter2; } static int match_efilters(pfm_event_info_t *info) { pfm_event_attr_info_t ainfo; int n = 0; int i, ret; if (options.efilter.is_precise && !info->is_precise) return 0; memset(&ainfo, 0, sizeof(ainfo)); ainfo.size = sizeof(ainfo); pfm_for_each_event_attr(i, info) { ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); if (ret != PFM_SUCCESS) continue; if (match_ufilters(&ainfo)) return 1; if (ainfo.type == PFM_ATTR_UMASK) n++; } return n ? 0 : 1; } static void show_event_info_combo(pfm_event_info_t *info) { pfm_event_attr_info_t *ainfo; pfm_pmu_info_t pinfo; char buf[MAXBUF]; size_t len; int numasks = 0; int i, j, ret; uint64_t total, m, u; memset(&pinfo, 0, sizeof(pinfo)); pinfo.size = sizeof(pinfo); ret = pfm_get_pmu_info(info->pmu, &pinfo); if (ret != PFM_SUCCESS) errx(1, "cannot get PMU info"); ainfo = calloc(info->nattrs, sizeof(*ainfo)); if (!ainfo) err(1, "event %s : ", info->name); /* * extract attribute information and count number * of umasks * * we cannot just drop non umasks because we need * to keep attributes in order for the enumeration * of 2^n */ pfm_for_each_event_attr(i, info) { ainfo[i].size = sizeof(*ainfo); ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo[i]); if (ret != PFM_SUCCESS) errx(1, "cannot get attribute info: %s", pfm_strerror(ret)); if (ainfo[i].type == PFM_ATTR_UMASK) numasks++; } if (numasks > options.combo_lim) { warnx("event %s has too many umasks to print all combinations, dropping to simple enumeration", info->name); free(ainfo); show_event_info_compact(info); return; } if (numasks) { if (info->nattrs > (int)((sizeof(total)<<3))) { warnx("too many umasks, cannot show all combinations for event %s", info->name); goto end; } total = 1ULL << info->nattrs; for (u = 1; u < total; u++) { len = sizeof(buf); len -= snprintf(buf, len, "%s::%s", pinfo.name, info->name); if (len <= 0) { warnx("event name too long%s", info->name); goto end; } for(m = u, j = 0; m; m >>=1, j++) { if (m & 0x1ULL) { /* we have hit a non umasks attribute, skip */ if (ainfo[j].type != PFM_ATTR_UMASK) break; if (len < (1 + strlen(ainfo[j].name))) { warnx("umasks combination too long for event %s", buf); break; } strncat(buf, ":", len-1);buf[len-1] = '\0'; len--; strncat(buf, ainfo[j].name, len-1);buf[len-1] = '\0'; len -= strlen(ainfo[j].name); } } /* if found a valid umask combination, check encoding */ if (m == 0) { if (options.encode) ret = print_codes(buf, PFM_PLM0|PFM_PLM3, pinfo.max_encoding); else ret = check_valid(buf, PFM_PLM0|PFM_PLM3); if (!ret) printf("%s\n", buf); } } } else { snprintf(buf, sizeof(buf)-1, "%s::%s", pinfo.name, info->name); buf[sizeof(buf)-1] = '\0'; ret = options.encode ? print_codes(buf, PFM_PLM0|PFM_PLM3, pinfo.max_encoding) : 0; if (!ret) printf("%s\n", buf); } end: free(ainfo); } static void show_event_info_compact(pfm_event_info_t *info) { pfm_event_attr_info_t ainfo; pfm_pmu_info_t pinfo; char buf[MAXBUF]; int i, ret, um = 0; memset(&ainfo, 0, sizeof(ainfo)); memset(&pinfo, 0, sizeof(pinfo)); pinfo.size = sizeof(pinfo); ainfo.size = sizeof(ainfo); ret = pfm_get_pmu_info(info->pmu, &pinfo); if (ret != PFM_SUCCESS) errx(1, "cannot get pmu info: %s", pfm_strerror(ret)); if (options.name_only) { if (options.encode) printf("0x%-10"PRIx64, info->code); printf("%s\n", info->name); return; } pfm_for_each_event_attr(i, info) { ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); if (ret != PFM_SUCCESS) errx(1, "cannot get attribute info: %s", pfm_strerror(ret)); if (ainfo.type != PFM_ATTR_UMASK) continue; if (!match_ufilters(&ainfo)) continue; snprintf(buf, sizeof(buf)-1, "%s::%s:%s", pinfo.name, info->name, ainfo.name); buf[sizeof(buf)-1] = '\0'; ret = 0; if (options.encode) { ret = print_codes(buf, PFM_PLM0|PFM_PLM3, pinfo.max_encoding); } if (!ret) { printf("%s", buf); if (options.desc) { printf("%s", options.csv_sep); printf("\"%s. %s.\"", info->desc, ainfo.desc); } putchar('\n'); } um++; } if (um == 0) { if (!match_efilters(info)) return; snprintf(buf, sizeof(buf)-1, "%s::%s", pinfo.name, info->name); buf[sizeof(buf)-1] = '\0'; if (options.encode) { ret = print_codes(buf, PFM_PLM0|PFM_PLM3, pinfo.max_encoding); if (ret) return; } printf("%s", buf); if (options.desc) { printf("%s", options.csv_sep); printf("\"%s.\"", info->desc); } putchar('\n'); } } int compare_codes(const void *a, const void *b) { const code_info_t *aa = a; const code_info_t *bb = b; uint64_t m = options.mask; if ((aa->code & m) < (bb->code &m)) return -1; if ((aa->code & m) == (bb->code & m)) return 0; return 1; } static void print_event_flags(pfm_event_info_t *info) { int n = 0; if (info->is_precise) { printf("[precise] "); n++; } if (!n) printf("None"); } static void print_attr_flags(pfm_event_attr_info_t *info) { int n = 0; if (info->is_dfl) { printf("[default] "); n++; } if (info->is_precise) { printf("[precise] "); n++; } if (!n) printf("None "); } static void show_event_info(pfm_event_info_t *info) { pfm_event_attr_info_t ainfo; pfm_pmu_info_t pinfo; int mod = 0, um = 0; int i, ret; const char *src; if (options.name_only) { printf("%s\n", info->name); return; } memset(&ainfo, 0, sizeof(ainfo)); memset(&pinfo, 0, sizeof(pinfo)); pinfo.size = sizeof(pinfo); ainfo.size = sizeof(ainfo); if (!match_efilters(info)) return; ret = pfm_get_pmu_info(info->pmu, &pinfo); if (ret) errx(1, "cannot get pmu info: %s", pfm_strerror(ret)); printf("#-----------------------------\n" "IDX : %d\n" "PMU name : %s (%s)\n" "Name : %s\n" "Equiv : %s\n", info->idx, pinfo.name, pinfo.desc, info->name, info->equiv ? info->equiv : "None"); printf("Flags : "); print_event_flags(info); putchar('\n'); printf("Desc : %s\n", info->desc ? info->desc : "no description available"); printf("Code : 0x%"PRIx64"\n", info->code); pfm_for_each_event_attr(i, info) { ret = pfm_get_event_attr_info(info->idx, i, options.os, &ainfo); if (ret != PFM_SUCCESS) errx(1, "cannot retrieve event %s attribute info: %s", info->name, pfm_strerror(ret)); if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) { warnx("event: %s has unsupported attribute source %d", info->name, ainfo.ctrl); ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN; } src = srcs[ainfo.ctrl]; switch(ainfo.type) { case PFM_ATTR_UMASK: if (!match_ufilters(&ainfo)) continue; printf("Umask-%02u : 0x%02"PRIx64" : %s : [%s] : ", um, ainfo.code, src, ainfo.name); print_attr_flags(&ainfo); putchar(':'); if (ainfo.equiv) printf(" Alias to %s", ainfo.equiv); else printf(" %s", ainfo.desc); putchar('\n'); um++; break; case PFM_ATTR_MOD_BOOL: printf("Modif-%02u : 0x%02"PRIx64" : %s : [%s] : %s (boolean)\n", mod, ainfo.code, src, ainfo.name, ainfo.desc); mod++; break; case PFM_ATTR_MOD_INTEGER: printf("Modif-%02u : 0x%02"PRIx64" : %s : [%s] : %s (integer)\n", mod, ainfo.code, src, ainfo.name, ainfo.desc); mod++; break; default: printf("Attr-%02u : 0x%02"PRIx64" : %s : [%s] : %s\n", i, ainfo.code, ainfo.name, src, ainfo.desc); } } } static int show_info(char *event, regex_t *preg) { pfm_pmu_info_t pinfo; pfm_event_info_t info; int i, j, ret, match = 0, pname; size_t len, l = 0; char *fullname = NULL; memset(&pinfo, 0, sizeof(pinfo)); memset(&info, 0, sizeof(info)); pinfo.size = sizeof(pinfo); info.size = sizeof(info); pname = event_has_pname(event); /* * scan all supported events, incl. those * from undetected PMU models */ pfm_for_all_pmus(j) { ret = pfm_get_pmu_info(j, &pinfo); if (ret != PFM_SUCCESS) continue; /* no pmu prefix, just look for detected PMU models */ if (!pname && !pinfo.is_present) continue; for (i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) { ret = pfm_get_event_info(i, options.os, &info); if (ret != PFM_SUCCESS) errx(1, "cannot get event info: %s", pfm_strerror(ret)); len = strlen(info.name) + strlen(pinfo.name) + 1 + 2; if (len > l) { l = len; fullname = realloc(fullname, l); if (!fullname) err(1, "cannot allocate memory"); } sprintf(fullname, "%s::%s", pinfo.name, info.name); if (regexec(preg, fullname, 0, NULL, 0) == 0) { if (options.compact) if (options.combo) show_event_info_combo(&info); else show_event_info_compact(&info); else show_event_info(&info); match++; } } } if (fullname) free(fullname); return match; } static int show_info_sorted(char *event, regex_t *preg) { pfm_pmu_info_t pinfo; pfm_event_info_t info; unsigned int j; int i, ret, n, match = 0; size_t len, l = 0; char *fullname = NULL; code_info_t *codes; memset(&pinfo, 0, sizeof(pinfo)); memset(&info, 0, sizeof(info)); pinfo.size = sizeof(pinfo); info.size = sizeof(info); pfm_for_all_pmus(j) { ret = pfm_get_pmu_info(j, &pinfo); if (ret != PFM_SUCCESS) continue; codes = malloc(pinfo.nevents * sizeof(*codes)); if (!codes) err(1, "cannot allocate memory\n"); /* scans all supported events */ n = 0; for (i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) { ret = pfm_get_event_info(i, options.os, &info); if (ret != PFM_SUCCESS) errx(1, "cannot get event info: %s", pfm_strerror(ret)); if (info.pmu != j) continue; codes[n].idx = info.idx; codes[n].code = info.code; n++; } qsort(codes, n, sizeof(*codes), compare_codes); for(i=0; i < n; i++) { ret = pfm_get_event_info(codes[i].idx, options.os, &info); if (ret != PFM_SUCCESS) errx(1, "cannot get event info: %s", pfm_strerror(ret)); len = strlen(info.name) + strlen(pinfo.name) + 1 + 2; if (len > l) { l = len; fullname = realloc(fullname, l); if (!fullname) err(1, "cannot allocate memory"); } sprintf(fullname, "%s::%s", pinfo.name, info.name); if (regexec(preg, fullname, 0, NULL, 0) == 0) { if (options.compact) show_event_info_compact(&info); else show_event_info(&info); match++; } } free(codes); } if (fullname) free(fullname); return match; } static void usage(void) { printf("showevtinfo [-L] [-E] [-h] [-s] [-m mask]\n" "-L\t\tlist one event per line (compact mode)\n" "-E\t\tlist one event per line with encoding (compact mode)\n" "-M\t\tdisplay all valid unit masks combination (use with -L or -E)\n" "-h\t\tget help\n" "-s\t\tsort event by PMU and by code based on -m mask\n" "-l\t\tmaximum number of umasks to list all combinations (default: %d)\n" "-F\t\tshow only events and attributes with certain flags (precise,...)\n" "-m mask\t\thexadecimal event code mask, bits to match when sorting\n" "-x sep\t\tuse sep as field separator in compact mode\n" "-D\t\t\tprint event description in compact mode\n" "-O os\t\tshow attributes for the specific operating system\n", COMBO_MAX); } /* * keep: [pmu::]event * drop everything else */ static void drop_event_attributes(char *str) { char *p; p = strchr(str, ':'); if (!p) return; str = p+1; /* keep PMU name */ if (*str == ':') str++; /* stop string at 1st attribute */ p = strchr(str, ':'); if (p) *p = '\0'; } #define EVENT_FLAGS(n, f, l) { .name = n, .ebit = f, .ubit = l } struct attr_flags { const char *name; int ebit; /* bit position in pfm_event_info_t.flags, -1 means ignore */ int ubit; /* bit position in pfm_event_attr_info_t.flags, -1 means ignore */ }; static const struct attr_flags event_flags[]={ EVENT_FLAGS("precise", 0, 1), EVENT_FLAGS("pebs", 0, 1), EVENT_FLAGS("default", -1, 0), EVENT_FLAGS("dfl", -1, 0), EVENT_FLAGS(NULL, 0, 0) }; static void parse_filters(char *arg) { const struct attr_flags *attr; char *p; while (arg) { p = strchr(arg, ','); if (p) *p++ = 0; for (attr = event_flags; attr->name; attr++) { if (!strcasecmp(attr->name, arg)) { switch(attr->ebit) { case 0: options.efilter.is_precise = 1; break; case -1: break; default: errx(1, "unknown event flag %d", attr->ebit); } switch (attr->ubit) { case 0: options.ufilter.is_dfl = 1; break; case 1: options.ufilter.is_precise = 1; break; case -1: break; default: errx(1, "unknown umaks flag %d", attr->ubit); } break; } } arg = p; } } static const struct { char *name; pfm_os_t os; } supported_oses[]={ { .name = "none", .os = PFM_OS_NONE }, { .name = "raw", .os = PFM_OS_NONE }, { .name = "pmu", .os = PFM_OS_NONE }, { .name = "perf", .os = PFM_OS_PERF_EVENT}, { .name = "perf_ext", .os = PFM_OS_PERF_EVENT_EXT}, { .name = NULL, } }; static const char *pmu_types[]={ "unknown type", "core", "uncore", "OS generic", }; static void setup_os(char *ostr) { int i; for (i = 0; supported_oses[i].name; i++) { if (!strcmp(supported_oses[i].name, ostr)) { options.os = supported_oses[i].os; return; } } fprintf(stderr, "unknown OS layer %s, choose from:", ostr); for (i = 0; supported_oses[i].name; i++) { if (i) fputc(',', stderr); fprintf(stderr, " %s", supported_oses[i].name); } fputc('\n', stderr); exit(1); } int main(int argc, char **argv) { static char *argv_all[2] = { ".*", NULL }; pfm_pmu_info_t pinfo; char *endptr = NULL; char default_sep[2] = "\t"; char *ostr = NULL; char **args; int i, match; regex_t preg; int ret, c; memset(&pinfo, 0, sizeof(pinfo)); pinfo.size = sizeof(pinfo); while ((c=getopt(argc, argv,"hELsm:MNl:F:x:DO:")) != -1) { switch(c) { case 'L': options.compact = 1; break; case 'F': parse_filters(optarg); break; case 'E': options.compact = 1; options.encode = 1; break; case 'M': options.combo = 1; break; case 'N': options.name_only = 1; break; case 's': options.sort = 1; break; case 'D': options.desc = 1; break; case 'l': options.combo_lim = atoi(optarg); break; case 'x': options.csv_sep = optarg; break; case 'O': ostr = optarg; break; case 'm': options.mask = strtoull(optarg, &endptr, 16); if (*endptr) errx(1, "mask must be in hexadecimal\n"); break; case 'h': usage(); exit(0); default: errx(1, "unknown option error"); } } /* to allow encoding of events from non detected PMU models */ ret = set_env_var("LIBPFM_ENCODE_INACTIVE", "1", 1); if (ret != PFM_SUCCESS) errx(1, "cannot force inactive encoding"); ret = pfm_initialize(); if (ret != PFM_SUCCESS) errx(1, "cannot initialize libpfm: %s", pfm_strerror(ret)); if (options.mask == 0) options.mask = ~0; if (optind == argc) { args = argv_all; } else { args = argv + optind; } if (!options.csv_sep) options.csv_sep = default_sep; /* avoid combinatorial explosion */ if (options.combo_lim == 0) options.combo_lim = COMBO_MAX; if (ostr) setup_os(ostr); else options.os = PFM_OS_NONE; if (!options.compact) { int total_supported_events = 0; int total_available_events = 0; printf("Supported PMU models:\n"); pfm_for_all_pmus(i) { ret = pfm_get_pmu_info(i, &pinfo); if (ret != PFM_SUCCESS) continue; printf("\t[%d, %s, \"%s\"]\n", i, pinfo.name, pinfo.desc); } printf("Detected PMU models:\n"); pfm_for_all_pmus(i) { ret = pfm_get_pmu_info(i, &pinfo); if (ret != PFM_SUCCESS) continue; if (pinfo.is_present) { if (pinfo.type >= PFM_PMU_TYPE_MAX) pinfo.type = PFM_PMU_TYPE_UNKNOWN; printf("\t[%d, %s, \"%s\", %d events, %d max encoding, %d counters, %s PMU]\n", i, pinfo.name, pinfo.desc, pinfo.nevents, pinfo.max_encoding, pinfo.num_cntrs + pinfo.num_fixed_cntrs, pmu_types[pinfo.type]); total_supported_events += pinfo.nevents; } total_available_events += pinfo.nevents; } printf("Total events: %d available, %d supported\n", total_available_events, total_supported_events); } while(*args) { /* drop umasks and modifiers */ drop_event_attributes(*args); if (regcomp(&preg, *args, REG_ICASE)) errx(1, "error in regular expression for event \"%s\"", *argv); if (options.sort) match = show_info_sorted(*args, &preg); else match = show_info(*args, &preg); if (match == 0) errx(1, "event %s not found", *args); args++; } regfree(&preg); pfm_terminate(); return 0; } papi-5.6.0/src/libpfm-3.y/TODO000664 001750 001750 00000000433 13216244361 017672 0ustar00jshenry1963jshenry1963000000 000000 TODO list: ---------- - add Linux/ia64 perfmon support to GNU libc, this would avoid having the perfmon.h perfmon_default_smpl.h headers here. - add library interface to help setup system-wide mode SMP on Linux/ia64 - add support for cumulative calls to pfm_dispatch_events() papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_event_description.3000664 001750 001750 00000000036 13216244361 026300 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_event_name.3 papi-5.6.0/src/components/cuda/tests/cuda_ld_preload_example.c000664 001750 001750 00000011674 13216244357 026563 0ustar00jshenry1963jshenry1963000000 000000 /* Example of using LD_PRELOAD with the CUDA component. Asim YarKhan This is designed to work with the simpleMultiGPU_no_counters binary in the PAPI CUDA component tests directory. First trace the library calls in simpleMultiGPU_no_counters binary using ltrace. Note in the ltrace output that the CUDA C APIs are different from the CUDA calls visible to nvcc. Then figure out appropriate place to attach the PAPI calls. The initialization is attached to the first entry to cudaSetDevice. Each cudaSetDevice is also used to setup the PAPI events for that device. It was harder to figure out where to attach the PAPI_start. After running some tests, I attached it to the 18th invocation of gettimeofday (kind of arbitrary! Sorry!). The PAPI_stop was attached to the first invocation of cudaFreeHost. */ #define _GNU_SOURCE #include #include #include "papi.h" #define MAXDEVICES 5 int EventSet = PAPI_NULL; int devseen[MAXDEVICES] = {0}; static void *dl1; int (*PAPI_library_init_ptr)(int version); /**< initialize the PAPI library */ int (*PAPI_create_eventset_ptr)(int *EventSet); /**< create a new empty PAPI event set */ int (*PAPI_add_named_event_ptr)(int EventSet, char *EventName); /**< add an event by name to a PAPI event set */ int (*PAPI_start_ptr)(int EventSet); /**< start counting hardware events in an event set */ int (*PAPI_stop_ptr)(int EventSet, long long * values); /**< stop counting hardware events in an event set and return current events */ int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1) { static int onetime = 0; int retval, retval_cudaSetDevice; //printf("cudaSetDevice wrapper %d\n", devnum); if ( onetime==0 ) { onetime=1; // Load the papi library dynamically and read the relevant functions dl1 = dlopen( "libpapi.so", RTLD_NOW | RTLD_GLOBAL ); if ( dl1==NULL ) printf("Intercept cudaSetDevice: Cannot load libpapi.so\n"); PAPI_library_init_ptr = dlsym( dl1, "PAPI_library_init" ); PAPI_create_eventset_ptr = dlsym( dl1, "PAPI_create_eventset" ); PAPI_add_named_event_ptr = dlsym( dl1, "PAPI_add_named_event" ); PAPI_start_ptr = dlsym( dl1, "PAPI_start" ); PAPI_stop_ptr = dlsym( dl1, "PAPI_stop" ); // Start using PAPI printf("Intercept cudaSetDevice: Initializing PAPI on device %d\n", devnum); retval = (PAPI_library_init_ptr)( PAPI_VER_CURRENT ); if( retval != PAPI_VER_CURRENT ) fprintf( stdout, "PAPI_library_init failed\n" ); printf( "PAPI version: %d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) ); retval = (PAPI_create_eventset_ptr)( &EventSet ); if( retval != PAPI_OK ) fprintf( stdout, "PAPI_create_eventset failed\n" ); } int (*original_function)(int devnum, int n1, int n2, int n3, void *ptr1); original_function = dlsym(RTLD_NEXT, "cudaSetDevice"); retval_cudaSetDevice = (*original_function)( devnum, n1, n2, n3, ptr1 ); if ( devseen[devnum]==0 ) { devseen[devnum]=1; char tmpEventName[120]; printf("Intercept cudaSetDevice: Attaching events for device on device %d\n", devnum); snprintf( tmpEventName, 110, "cuda:::device:%d:%s", devnum, "inst_executed" ); retval = (PAPI_add_named_event_ptr)( EventSet, tmpEventName ); if (retval!=PAPI_OK) printf( "Could not add event %s\n", tmpEventName ); } return retval_cudaSetDevice; } int gettimeofday(void *ptr1, void *ptr2) { static int onetime = 0; onetime++; // printf("gettimeofday onetime %d\n", onetime); // Use above print statement to determine that the N-th gettime of day works if ( onetime==17 ) { printf("Intercept gettimeofday: Attaching PAPI_start to the %d th call to gettimeofday (this may need to be adjusted)\n", onetime); int retval = (PAPI_start_ptr)( EventSet ); printf("Starting PAPI\n"); if( retval!=PAPI_OK ) fprintf( stdout, "PAPI_start failed\n" ); } int (*original_function)(void *ptr1, void *ptr2); original_function = dlsym(RTLD_NEXT, "gettimeofday"); return (*original_function)(ptr1, ptr2); } int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3) { static int onetime = 0; long long values[10]; int retval, devnum; onetime++; if ( onetime==1 ) { printf("Intercept cudaFreeHost: Used to get PAPI results\n" ); retval = (PAPI_stop_ptr)( EventSet, values ); if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); for( devnum = 0; devnum < MAXDEVICES && devseen[devnum]==1 ; devnum++ ) printf( "PAPI counterValue: cuda::device:%d:%s: %12lld \n", devnum, "inst_executed", values[devnum] ); } int (*original_function)(void *ptr1, void *ptr2, int n1, int n2, void *ptr3); original_function = dlsym(RTLD_NEXT, "cudaFreeHost"); return (*original_function)(ptr1, ptr2, n1, n2, ptr3); } papi-5.6.0/src/validation_tests/papi_br_ins.c000664 001750 001750 00000004370 13216244370 023333 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the retired branches instruction */ /* performance counter PAPI_BR_INS */ /* by Vince Weaver, */ /* This test seems to work on: */ /* + x86 */ /* + ARMv6 (Raspberry Pi) */ /* It is known to not work on: */ /* + ARMv7 (Pi2 CortexA7 Panda CortexA9 */ /* failure is odd, 1/3 missing but not consistent */ /* something weird with event 0xC PC_WRITE ? */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; long long high=0,low=0,average=0,expected=1500000; double error; long long count,total=0; int quiet=0,retval,ins_result; int eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_BR_INS event.\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_BR_INS"); if (retval!=PAPI_OK) { test_skip( __FILE__, __LINE__, "adding PAPI_BR_INS", retval ); } if (!quiet) { printf("Testing a loop with %lld branches (%d times):\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/libpfm4/lib/events/power9_events.h000664 001750 001750 00001254414 13216244365 023732 0ustar00jshenry1963jshenry1963000000 000000 /* * File: power9_events.h * CVS: * Author: Will Schmidt * will_schmidt@vnet.ibm.com * Author: Carl Love * cel@us.ibm.com * * Mods: * Initial content generated by Will Schmidt. (Jan 31, 2017). * Refresh/update generated Jun 06, 2017 by Will Schmidt. * missing _ALT events added, Nov 16, 2017 by Will Schmidt. * * Contributed by * (C) Copyright IBM Corporation, 2017. All Rights Reserved. * * Note: This code was automatically generated and should not be modified by * hand. * * Documentation on the PMU events will be published at: * ... */ #ifndef __POWER9_EVENTS_H__ #define __POWER9_EVENTS_H__ #define POWER9_PME_PM_1FLOP_CMPL 0 #define POWER9_PME_PM_1PLUS_PPC_CMPL 1 #define POWER9_PME_PM_1PLUS_PPC_DISP 2 #define POWER9_PME_PM_2FLOP_CMPL 3 #define POWER9_PME_PM_4FLOP_CMPL 4 #define POWER9_PME_PM_8FLOP_CMPL 5 #define POWER9_PME_PM_ANY_THRD_RUN_CYC 6 #define POWER9_PME_PM_BACK_BR_CMPL 7 #define POWER9_PME_PM_BANK_CONFLICT 8 #define POWER9_PME_PM_BFU_BUSY 9 #define POWER9_PME_PM_BR_2PATH 10 #define POWER9_PME_PM_BR_CMPL 11 #define POWER9_PME_PM_BR_CORECT_PRED_TAKEN_CMPL 12 #define POWER9_PME_PM_BR_MPRED_CCACHE 13 #define POWER9_PME_PM_BR_MPRED_CMPL 14 #define POWER9_PME_PM_BR_MPRED_LSTACK 15 #define POWER9_PME_PM_BR_MPRED_PCACHE 16 #define POWER9_PME_PM_BR_MPRED_TAKEN_CR 17 #define POWER9_PME_PM_BR_MPRED_TAKEN_TA 18 #define POWER9_PME_PM_BR_PRED_CCACHE 19 #define POWER9_PME_PM_BR_PRED_LSTACK 20 #define POWER9_PME_PM_BR_PRED_PCACHE 21 #define POWER9_PME_PM_BR_PRED_TAKEN_CR 22 #define POWER9_PME_PM_BR_PRED_TA 23 #define POWER9_PME_PM_BR_PRED 24 #define POWER9_PME_PM_BR_TAKEN_CMPL 25 #define POWER9_PME_PM_BRU_FIN 26 #define POWER9_PME_PM_BR_UNCOND 27 #define POWER9_PME_PM_BTAC_BAD_RESULT 28 #define POWER9_PME_PM_BTAC_GOOD_RESULT 29 #define POWER9_PME_PM_CHIP_PUMP_CPRED 30 #define POWER9_PME_PM_CLB_HELD 31 #define POWER9_PME_PM_CMPLU_STALL_ANY_SYNC 32 #define POWER9_PME_PM_CMPLU_STALL_BRU 33 #define POWER9_PME_PM_CMPLU_STALL_CRYPTO 34 #define POWER9_PME_PM_CMPLU_STALL_DCACHE_MISS 35 #define POWER9_PME_PM_CMPLU_STALL_DFLONG 36 #define POWER9_PME_PM_CMPLU_STALL_DFU 37 #define POWER9_PME_PM_CMPLU_STALL_DMISS_L21_L31 38 #define POWER9_PME_PM_CMPLU_STALL_DMISS_L2L3_CONFLICT 39 #define POWER9_PME_PM_CMPLU_STALL_DMISS_L2L3 40 #define POWER9_PME_PM_CMPLU_STALL_DMISS_L3MISS 41 #define POWER9_PME_PM_CMPLU_STALL_DMISS_LMEM 42 #define POWER9_PME_PM_CMPLU_STALL_DMISS_REMOTE 43 #define POWER9_PME_PM_CMPLU_STALL_DPLONG 44 #define POWER9_PME_PM_CMPLU_STALL_DP 45 #define POWER9_PME_PM_CMPLU_STALL_EIEIO 46 #define POWER9_PME_PM_CMPLU_STALL_EMQ_FULL 47 #define POWER9_PME_PM_CMPLU_STALL_ERAT_MISS 48 #define POWER9_PME_PM_CMPLU_STALL_EXCEPTION 49 #define POWER9_PME_PM_CMPLU_STALL_EXEC_UNIT 50 #define POWER9_PME_PM_CMPLU_STALL_FLUSH_ANY_THREAD 51 #define POWER9_PME_PM_CMPLU_STALL_FXLONG 52 #define POWER9_PME_PM_CMPLU_STALL_FXU 53 #define POWER9_PME_PM_CMPLU_STALL_HWSYNC 54 #define POWER9_PME_PM_CMPLU_STALL_LARX 55 #define POWER9_PME_PM_CMPLU_STALL_LHS 56 #define POWER9_PME_PM_CMPLU_STALL_LMQ_FULL 57 #define POWER9_PME_PM_CMPLU_STALL_LOAD_FINISH 58 #define POWER9_PME_PM_CMPLU_STALL_LRQ_FULL 59 #define POWER9_PME_PM_CMPLU_STALL_LRQ_OTHER 60 #define POWER9_PME_PM_CMPLU_STALL_LSAQ_ARB 61 #define POWER9_PME_PM_CMPLU_STALL_LSU_FIN 62 #define POWER9_PME_PM_CMPLU_STALL_LSU_FLUSH_NEXT 63 #define POWER9_PME_PM_CMPLU_STALL_LSU_MFSPR 64 #define POWER9_PME_PM_CMPLU_STALL_LSU 65 #define POWER9_PME_PM_CMPLU_STALL_LWSYNC 66 #define POWER9_PME_PM_CMPLU_STALL_MTFPSCR 67 #define POWER9_PME_PM_CMPLU_STALL_NESTED_TBEGIN 68 #define POWER9_PME_PM_CMPLU_STALL_NESTED_TEND 69 #define POWER9_PME_PM_CMPLU_STALL_NTC_DISP_FIN 70 #define POWER9_PME_PM_CMPLU_STALL_NTC_FLUSH 71 #define POWER9_PME_PM_CMPLU_STALL_OTHER_CMPL 72 #define POWER9_PME_PM_CMPLU_STALL_PASTE 73 #define POWER9_PME_PM_CMPLU_STALL_PM 74 #define POWER9_PME_PM_CMPLU_STALL_SLB 75 #define POWER9_PME_PM_CMPLU_STALL_SPEC_FINISH 76 #define POWER9_PME_PM_CMPLU_STALL_SRQ_FULL 77 #define POWER9_PME_PM_CMPLU_STALL_STCX 78 #define POWER9_PME_PM_CMPLU_STALL_ST_FWD 79 #define POWER9_PME_PM_CMPLU_STALL_STORE_DATA 80 #define POWER9_PME_PM_CMPLU_STALL_STORE_FIN_ARB 81 #define POWER9_PME_PM_CMPLU_STALL_STORE_FINISH 82 #define POWER9_PME_PM_CMPLU_STALL_STORE_PIPE_ARB 83 #define POWER9_PME_PM_CMPLU_STALL_SYNC_PMU_INT 84 #define POWER9_PME_PM_CMPLU_STALL_TEND 85 #define POWER9_PME_PM_CMPLU_STALL_THRD 86 #define POWER9_PME_PM_CMPLU_STALL_TLBIE 87 #define POWER9_PME_PM_CMPLU_STALL 88 #define POWER9_PME_PM_CMPLU_STALL_VDPLONG 89 #define POWER9_PME_PM_CMPLU_STALL_VDP 90 #define POWER9_PME_PM_CMPLU_STALL_VFXLONG 91 #define POWER9_PME_PM_CMPLU_STALL_VFXU 92 #define POWER9_PME_PM_CO0_BUSY 93 #define POWER9_PME_PM_CO0_BUSY_ALT 94 #define POWER9_PME_PM_CO_DISP_FAIL 95 #define POWER9_PME_PM_CO_TM_SC_FOOTPRINT 96 #define POWER9_PME_PM_CO_USAGE 97 #define POWER9_PME_PM_CYC 98 #define POWER9_PME_PM_DARQ0_0_3_ENTRIES 99 #define POWER9_PME_PM_DARQ0_10_12_ENTRIES 100 #define POWER9_PME_PM_DARQ0_4_6_ENTRIES 101 #define POWER9_PME_PM_DARQ0_7_9_ENTRIES 102 #define POWER9_PME_PM_DARQ1_0_3_ENTRIES 103 #define POWER9_PME_PM_DARQ1_10_12_ENTRIES 104 #define POWER9_PME_PM_DARQ1_4_6_ENTRIES 105 #define POWER9_PME_PM_DARQ1_7_9_ENTRIES 106 #define POWER9_PME_PM_DARQ_STORE_REJECT 107 #define POWER9_PME_PM_DARQ_STORE_XMIT 108 #define POWER9_PME_PM_DATA_CHIP_PUMP_CPRED 109 #define POWER9_PME_PM_DATA_FROM_DL2L3_MOD 110 #define POWER9_PME_PM_DATA_FROM_DL2L3_SHR 111 #define POWER9_PME_PM_DATA_FROM_DL4 112 #define POWER9_PME_PM_DATA_FROM_DMEM 113 #define POWER9_PME_PM_DATA_FROM_L21_MOD 114 #define POWER9_PME_PM_DATA_FROM_L21_SHR 115 #define POWER9_PME_PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST 116 #define POWER9_PME_PM_DATA_FROM_L2_DISP_CONFLICT_OTHER 117 #define POWER9_PME_PM_DATA_FROM_L2_MEPF 118 #define POWER9_PME_PM_DATA_FROM_L2MISS_MOD 119 #define POWER9_PME_PM_DATA_FROM_L2MISS 120 #define POWER9_PME_PM_DATA_FROM_L2_NO_CONFLICT 121 #define POWER9_PME_PM_DATA_FROM_L2 122 #define POWER9_PME_PM_DATA_FROM_L31_ECO_MOD 123 #define POWER9_PME_PM_DATA_FROM_L31_ECO_SHR 124 #define POWER9_PME_PM_DATA_FROM_L31_MOD 125 #define POWER9_PME_PM_DATA_FROM_L31_SHR 126 #define POWER9_PME_PM_DATA_FROM_L3_DISP_CONFLICT 127 #define POWER9_PME_PM_DATA_FROM_L3_MEPF 128 #define POWER9_PME_PM_DATA_FROM_L3MISS_MOD 129 #define POWER9_PME_PM_DATA_FROM_L3MISS 130 #define POWER9_PME_PM_DATA_FROM_L3_NO_CONFLICT 131 #define POWER9_PME_PM_DATA_FROM_L3 132 #define POWER9_PME_PM_DATA_FROM_LL4 133 #define POWER9_PME_PM_DATA_FROM_LMEM 134 #define POWER9_PME_PM_DATA_FROM_MEMORY 135 #define POWER9_PME_PM_DATA_FROM_OFF_CHIP_CACHE 136 #define POWER9_PME_PM_DATA_FROM_ON_CHIP_CACHE 137 #define POWER9_PME_PM_DATA_FROM_RL2L3_MOD 138 #define POWER9_PME_PM_DATA_FROM_RL2L3_SHR 139 #define POWER9_PME_PM_DATA_FROM_RL4 140 #define POWER9_PME_PM_DATA_FROM_RMEM 141 #define POWER9_PME_PM_DATA_GRP_PUMP_CPRED 142 #define POWER9_PME_PM_DATA_GRP_PUMP_MPRED_RTY 143 #define POWER9_PME_PM_DATA_GRP_PUMP_MPRED 144 #define POWER9_PME_PM_DATA_PUMP_CPRED 145 #define POWER9_PME_PM_DATA_PUMP_MPRED 146 #define POWER9_PME_PM_DATA_STORE 147 #define POWER9_PME_PM_DATA_SYS_PUMP_CPRED 148 #define POWER9_PME_PM_DATA_SYS_PUMP_MPRED_RTY 149 #define POWER9_PME_PM_DATA_SYS_PUMP_MPRED 150 #define POWER9_PME_PM_DATA_TABLEWALK_CYC 151 #define POWER9_PME_PM_DC_DEALLOC_NO_CONF 152 #define POWER9_PME_PM_DC_PREF_CONF 153 #define POWER9_PME_PM_DC_PREF_CONS_ALLOC 154 #define POWER9_PME_PM_DC_PREF_FUZZY_CONF 155 #define POWER9_PME_PM_DC_PREF_HW_ALLOC 156 #define POWER9_PME_PM_DC_PREF_STRIDED_CONF 157 #define POWER9_PME_PM_DC_PREF_SW_ALLOC 158 #define POWER9_PME_PM_DC_PREF_XCONS_ALLOC 159 #define POWER9_PME_PM_DECODE_FUSION_CONST_GEN 160 #define POWER9_PME_PM_DECODE_FUSION_EXT_ADD 161 #define POWER9_PME_PM_DECODE_FUSION_LD_ST_DISP 162 #define POWER9_PME_PM_DECODE_FUSION_OP_PRESERV 163 #define POWER9_PME_PM_DECODE_HOLD_ICT_FULL 164 #define POWER9_PME_PM_DECODE_LANES_NOT_AVAIL 165 #define POWER9_PME_PM_DERAT_MISS_16G 166 #define POWER9_PME_PM_DERAT_MISS_16M 167 #define POWER9_PME_PM_DERAT_MISS_1G 168 #define POWER9_PME_PM_DERAT_MISS_2M 169 #define POWER9_PME_PM_DERAT_MISS_4K 170 #define POWER9_PME_PM_DERAT_MISS_64K 171 #define POWER9_PME_PM_DFU_BUSY 172 #define POWER9_PME_PM_DISP_CLB_HELD_BAL 173 #define POWER9_PME_PM_DISP_CLB_HELD_SB 174 #define POWER9_PME_PM_DISP_CLB_HELD_TLBIE 175 #define POWER9_PME_PM_DISP_HELD_HB_FULL 176 #define POWER9_PME_PM_DISP_HELD_ISSQ_FULL 177 #define POWER9_PME_PM_DISP_HELD_SYNC_HOLD 178 #define POWER9_PME_PM_DISP_HELD_TBEGIN 179 #define POWER9_PME_PM_DISP_HELD 180 #define POWER9_PME_PM_DISP_STARVED 181 #define POWER9_PME_PM_DP_QP_FLOP_CMPL 182 #define POWER9_PME_PM_DPTEG_FROM_DL2L3_MOD 183 #define POWER9_PME_PM_DPTEG_FROM_DL2L3_SHR 184 #define POWER9_PME_PM_DPTEG_FROM_DL4 185 #define POWER9_PME_PM_DPTEG_FROM_DMEM 186 #define POWER9_PME_PM_DPTEG_FROM_L21_MOD 187 #define POWER9_PME_PM_DPTEG_FROM_L21_SHR 188 #define POWER9_PME_PM_DPTEG_FROM_L2_MEPF 189 #define POWER9_PME_PM_DPTEG_FROM_L2MISS 190 #define POWER9_PME_PM_DPTEG_FROM_L2_NO_CONFLICT 191 #define POWER9_PME_PM_DPTEG_FROM_L2 192 #define POWER9_PME_PM_DPTEG_FROM_L31_ECO_MOD 193 #define POWER9_PME_PM_DPTEG_FROM_L31_ECO_SHR 194 #define POWER9_PME_PM_DPTEG_FROM_L31_MOD 195 #define POWER9_PME_PM_DPTEG_FROM_L31_SHR 196 #define POWER9_PME_PM_DPTEG_FROM_L3_DISP_CONFLICT 197 #define POWER9_PME_PM_DPTEG_FROM_L3_MEPF 198 #define POWER9_PME_PM_DPTEG_FROM_L3MISS 199 #define POWER9_PME_PM_DPTEG_FROM_L3_NO_CONFLICT 200 #define POWER9_PME_PM_DPTEG_FROM_L3 201 #define POWER9_PME_PM_DPTEG_FROM_LL4 202 #define POWER9_PME_PM_DPTEG_FROM_LMEM 203 #define POWER9_PME_PM_DPTEG_FROM_MEMORY 204 #define POWER9_PME_PM_DPTEG_FROM_OFF_CHIP_CACHE 205 #define POWER9_PME_PM_DPTEG_FROM_ON_CHIP_CACHE 206 #define POWER9_PME_PM_DPTEG_FROM_RL2L3_MOD 207 #define POWER9_PME_PM_DPTEG_FROM_RL2L3_SHR 208 #define POWER9_PME_PM_DPTEG_FROM_RL4 209 #define POWER9_PME_PM_DPTEG_FROM_RMEM 210 #define POWER9_PME_PM_DSIDE_L2MEMACC 211 #define POWER9_PME_PM_DSIDE_MRU_TOUCH 212 #define POWER9_PME_PM_DSIDE_OTHER_64B_L2MEMACC 213 #define POWER9_PME_PM_DSLB_MISS 214 #define POWER9_PME_PM_DSLB_MISS_ALT 215 #define POWER9_PME_PM_DTLB_MISS_16G 216 #define POWER9_PME_PM_DTLB_MISS_16M 217 #define POWER9_PME_PM_DTLB_MISS_1G 218 #define POWER9_PME_PM_DTLB_MISS_2M 219 #define POWER9_PME_PM_DTLB_MISS_4K 220 #define POWER9_PME_PM_DTLB_MISS_64K 221 #define POWER9_PME_PM_DTLB_MISS 222 #define POWER9_PME_PM_SPACEHOLDER_0000040062 223 #define POWER9_PME_PM_SPACEHOLDER_0000040064 224 #define POWER9_PME_PM_EAT_FORCE_MISPRED 225 #define POWER9_PME_PM_EAT_FULL_CYC 226 #define POWER9_PME_PM_EE_OFF_EXT_INT 227 #define POWER9_PME_PM_EXT_INT 228 #define POWER9_PME_PM_FLOP_CMPL 229 #define POWER9_PME_PM_FLUSH_COMPLETION 230 #define POWER9_PME_PM_FLUSH_DISP_SB 231 #define POWER9_PME_PM_FLUSH_DISP_TLBIE 232 #define POWER9_PME_PM_FLUSH_DISP 233 #define POWER9_PME_PM_FLUSH_HB_RESTORE_CYC 234 #define POWER9_PME_PM_FLUSH_LSU 235 #define POWER9_PME_PM_FLUSH_MPRED 236 #define POWER9_PME_PM_FLUSH 237 #define POWER9_PME_PM_FMA_CMPL 238 #define POWER9_PME_PM_FORCED_NOP 239 #define POWER9_PME_PM_FREQ_DOWN 240 #define POWER9_PME_PM_FREQ_UP 241 #define POWER9_PME_PM_FXU_1PLUS_BUSY 242 #define POWER9_PME_PM_FXU_BUSY 243 #define POWER9_PME_PM_FXU_FIN 244 #define POWER9_PME_PM_FXU_IDLE 245 #define POWER9_PME_PM_GRP_PUMP_CPRED 246 #define POWER9_PME_PM_GRP_PUMP_MPRED_RTY 247 #define POWER9_PME_PM_GRP_PUMP_MPRED 248 #define POWER9_PME_PM_HV_CYC 249 #define POWER9_PME_PM_HWSYNC 250 #define POWER9_PME_PM_IBUF_FULL_CYC 251 #define POWER9_PME_PM_IC_DEMAND_CYC 252 #define POWER9_PME_PM_IC_DEMAND_L2_BHT_REDIRECT 253 #define POWER9_PME_PM_IC_DEMAND_L2_BR_REDIRECT 254 #define POWER9_PME_PM_IC_DEMAND_REQ 255 #define POWER9_PME_PM_IC_INVALIDATE 256 #define POWER9_PME_PM_IC_MISS_CMPL 257 #define POWER9_PME_PM_IC_MISS_ICBI 258 #define POWER9_PME_PM_IC_PREF_CANCEL_HIT 259 #define POWER9_PME_PM_IC_PREF_CANCEL_L2 260 #define POWER9_PME_PM_IC_PREF_CANCEL_PAGE 261 #define POWER9_PME_PM_IC_PREF_REQ 262 #define POWER9_PME_PM_IC_PREF_WRITE 263 #define POWER9_PME_PM_IC_RELOAD_PRIVATE 264 #define POWER9_PME_PM_ICT_EMPTY_CYC 265 #define POWER9_PME_PM_ICT_NOSLOT_BR_MPRED_ICMISS 266 #define POWER9_PME_PM_ICT_NOSLOT_BR_MPRED 267 #define POWER9_PME_PM_ICT_NOSLOT_CYC 268 #define POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_HB_FULL 269 #define POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_ISSQ 270 #define POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_SYNC 271 #define POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_TBEGIN 272 #define POWER9_PME_PM_ICT_NOSLOT_DISP_HELD 273 #define POWER9_PME_PM_ICT_NOSLOT_IC_L3MISS 274 #define POWER9_PME_PM_ICT_NOSLOT_IC_L3 275 #define POWER9_PME_PM_ICT_NOSLOT_IC_MISS 276 #define POWER9_PME_PM_IERAT_RELOAD_16M 277 #define POWER9_PME_PM_IERAT_RELOAD_4K 278 #define POWER9_PME_PM_IERAT_RELOAD_64K 279 #define POWER9_PME_PM_IERAT_RELOAD 280 #define POWER9_PME_PM_IFETCH_THROTTLE 281 #define POWER9_PME_PM_INST_CHIP_PUMP_CPRED 282 #define POWER9_PME_PM_INST_CMPL 283 #define POWER9_PME_PM_INST_DISP 284 #define POWER9_PME_PM_INST_FROM_DL2L3_MOD 285 #define POWER9_PME_PM_INST_FROM_DL2L3_SHR 286 #define POWER9_PME_PM_INST_FROM_DL4 287 #define POWER9_PME_PM_INST_FROM_DMEM 288 #define POWER9_PME_PM_INST_FROM_L1 289 #define POWER9_PME_PM_INST_FROM_L21_MOD 290 #define POWER9_PME_PM_INST_FROM_L21_SHR 291 #define POWER9_PME_PM_INST_FROM_L2_DISP_CONFLICT_LDHITST 292 #define POWER9_PME_PM_INST_FROM_L2_DISP_CONFLICT_OTHER 293 #define POWER9_PME_PM_INST_FROM_L2_MEPF 294 #define POWER9_PME_PM_INST_FROM_L2MISS 295 #define POWER9_PME_PM_INST_FROM_L2_NO_CONFLICT 296 #define POWER9_PME_PM_INST_FROM_L2 297 #define POWER9_PME_PM_INST_FROM_L31_ECO_MOD 298 #define POWER9_PME_PM_INST_FROM_L31_ECO_SHR 299 #define POWER9_PME_PM_INST_FROM_L31_MOD 300 #define POWER9_PME_PM_INST_FROM_L31_SHR 301 #define POWER9_PME_PM_INST_FROM_L3_DISP_CONFLICT 302 #define POWER9_PME_PM_INST_FROM_L3_MEPF 303 #define POWER9_PME_PM_INST_FROM_L3MISS_MOD 304 #define POWER9_PME_PM_INST_FROM_L3MISS 305 #define POWER9_PME_PM_INST_FROM_L3_NO_CONFLICT 306 #define POWER9_PME_PM_INST_FROM_L3 307 #define POWER9_PME_PM_INST_FROM_LL4 308 #define POWER9_PME_PM_INST_FROM_LMEM 309 #define POWER9_PME_PM_INST_FROM_MEMORY 310 #define POWER9_PME_PM_INST_FROM_OFF_CHIP_CACHE 311 #define POWER9_PME_PM_INST_FROM_ON_CHIP_CACHE 312 #define POWER9_PME_PM_INST_FROM_RL2L3_MOD 313 #define POWER9_PME_PM_INST_FROM_RL2L3_SHR 314 #define POWER9_PME_PM_INST_FROM_RL4 315 #define POWER9_PME_PM_INST_FROM_RMEM 316 #define POWER9_PME_PM_INST_GRP_PUMP_CPRED 317 #define POWER9_PME_PM_INST_GRP_PUMP_MPRED_RTY 318 #define POWER9_PME_PM_INST_GRP_PUMP_MPRED 319 #define POWER9_PME_PM_INST_IMC_MATCH_CMPL 320 #define POWER9_PME_PM_INST_PUMP_CPRED 321 #define POWER9_PME_PM_INST_PUMP_MPRED 322 #define POWER9_PME_PM_INST_SYS_PUMP_CPRED 323 #define POWER9_PME_PM_INST_SYS_PUMP_MPRED_RTY 324 #define POWER9_PME_PM_INST_SYS_PUMP_MPRED 325 #define POWER9_PME_PM_IOPS_CMPL 326 #define POWER9_PME_PM_IPTEG_FROM_DL2L3_MOD 327 #define POWER9_PME_PM_IPTEG_FROM_DL2L3_SHR 328 #define POWER9_PME_PM_IPTEG_FROM_DL4 329 #define POWER9_PME_PM_IPTEG_FROM_DMEM 330 #define POWER9_PME_PM_IPTEG_FROM_L21_MOD 331 #define POWER9_PME_PM_IPTEG_FROM_L21_SHR 332 #define POWER9_PME_PM_IPTEG_FROM_L2_MEPF 333 #define POWER9_PME_PM_IPTEG_FROM_L2MISS 334 #define POWER9_PME_PM_IPTEG_FROM_L2_NO_CONFLICT 335 #define POWER9_PME_PM_IPTEG_FROM_L2 336 #define POWER9_PME_PM_IPTEG_FROM_L31_ECO_MOD 337 #define POWER9_PME_PM_IPTEG_FROM_L31_ECO_SHR 338 #define POWER9_PME_PM_IPTEG_FROM_L31_MOD 339 #define POWER9_PME_PM_IPTEG_FROM_L31_SHR 340 #define POWER9_PME_PM_IPTEG_FROM_L3_DISP_CONFLICT 341 #define POWER9_PME_PM_IPTEG_FROM_L3_MEPF 342 #define POWER9_PME_PM_IPTEG_FROM_L3MISS 343 #define POWER9_PME_PM_IPTEG_FROM_L3_NO_CONFLICT 344 #define POWER9_PME_PM_IPTEG_FROM_L3 345 #define POWER9_PME_PM_IPTEG_FROM_LL4 346 #define POWER9_PME_PM_IPTEG_FROM_LMEM 347 #define POWER9_PME_PM_IPTEG_FROM_MEMORY 348 #define POWER9_PME_PM_IPTEG_FROM_OFF_CHIP_CACHE 349 #define POWER9_PME_PM_IPTEG_FROM_ON_CHIP_CACHE 350 #define POWER9_PME_PM_IPTEG_FROM_RL2L3_MOD 351 #define POWER9_PME_PM_IPTEG_FROM_RL2L3_SHR 352 #define POWER9_PME_PM_IPTEG_FROM_RL4 353 #define POWER9_PME_PM_IPTEG_FROM_RMEM 354 #define POWER9_PME_PM_ISIDE_DISP_FAIL_ADDR 355 #define POWER9_PME_PM_ISIDE_DISP_FAIL_OTHER 356 #define POWER9_PME_PM_ISIDE_DISP 357 #define POWER9_PME_PM_ISIDE_L2MEMACC 358 #define POWER9_PME_PM_ISIDE_MRU_TOUCH 359 #define POWER9_PME_PM_ISLB_MISS 360 #define POWER9_PME_PM_ISLB_MISS_ALT 361 #define POWER9_PME_PM_ISQ_0_8_ENTRIES 362 #define POWER9_PME_PM_ISQ_36_44_ENTRIES 363 #define POWER9_PME_PM_ISU0_ISS_HOLD_ALL 364 #define POWER9_PME_PM_ISU1_ISS_HOLD_ALL 365 #define POWER9_PME_PM_ISU2_ISS_HOLD_ALL 366 #define POWER9_PME_PM_ISU3_ISS_HOLD_ALL 367 #define POWER9_PME_PM_ISYNC 368 #define POWER9_PME_PM_ITLB_MISS 369 #define POWER9_PME_PM_L1_DCACHE_RELOADED_ALL 370 #define POWER9_PME_PM_L1_DCACHE_RELOAD_VALID 371 #define POWER9_PME_PM_L1_DEMAND_WRITE 372 #define POWER9_PME_PM_L1_ICACHE_MISS 373 #define POWER9_PME_PM_L1_ICACHE_RELOADED_ALL 374 #define POWER9_PME_PM_L1_ICACHE_RELOADED_PREF 375 #define POWER9_PME_PM_L1PF_L2MEMACC 376 #define POWER9_PME_PM_L1_PREF 377 #define POWER9_PME_PM_L1_SW_PREF 378 #define POWER9_PME_PM_L2_CASTOUT_MOD 379 #define POWER9_PME_PM_L2_CASTOUT_SHR 380 #define POWER9_PME_PM_L2_CHIP_PUMP 381 #define POWER9_PME_PM_L2_DC_INV 382 #define POWER9_PME_PM_L2_DISP_ALL_L2MISS 383 #define POWER9_PME_PM_L2_GROUP_PUMP 384 #define POWER9_PME_PM_L2_GRP_GUESS_CORRECT 385 #define POWER9_PME_PM_L2_GRP_GUESS_WRONG 386 #define POWER9_PME_PM_L2_IC_INV 387 #define POWER9_PME_PM_L2_INST_MISS 388 #define POWER9_PME_PM_L2_INST_MISS_ALT 389 #define POWER9_PME_PM_L2_INST 390 #define POWER9_PME_PM_L2_INST_ALT 391 #define POWER9_PME_PM_L2_LD_DISP 392 #define POWER9_PME_PM_L2_LD_DISP_ALT 393 #define POWER9_PME_PM_L2_LD_HIT 394 #define POWER9_PME_PM_L2_LD_HIT_ALT 395 #define POWER9_PME_PM_L2_LD_MISS_128B 396 #define POWER9_PME_PM_L2_LD_MISS_64B 397 #define POWER9_PME_PM_L2_LD_MISS 398 #define POWER9_PME_PM_L2_LD 399 #define POWER9_PME_PM_L2_LOC_GUESS_CORRECT 400 #define POWER9_PME_PM_L2_LOC_GUESS_WRONG 401 #define POWER9_PME_PM_L2_RCLD_DISP_FAIL_ADDR 402 #define POWER9_PME_PM_L2_RCLD_DISP_FAIL_OTHER 403 #define POWER9_PME_PM_L2_RCLD_DISP 404 #define POWER9_PME_PM_L2_RCST_DISP_FAIL_ADDR 405 #define POWER9_PME_PM_L2_RCST_DISP_FAIL_OTHER 406 #define POWER9_PME_PM_L2_RCST_DISP 407 #define POWER9_PME_PM_L2_RC_ST_DONE 408 #define POWER9_PME_PM_L2_RTY_LD 409 #define POWER9_PME_PM_L2_RTY_LD_ALT 410 #define POWER9_PME_PM_L2_RTY_ST 411 #define POWER9_PME_PM_L2_RTY_ST_ALT 412 #define POWER9_PME_PM_L2_SN_M_RD_DONE 413 #define POWER9_PME_PM_L2_SN_M_WR_DONE 414 #define POWER9_PME_PM_L2_SN_M_WR_DONE_ALT 415 #define POWER9_PME_PM_L2_SN_SX_I_DONE 416 #define POWER9_PME_PM_L2_ST_DISP 417 #define POWER9_PME_PM_L2_ST_DISP_ALT 418 #define POWER9_PME_PM_L2_ST_HIT 419 #define POWER9_PME_PM_L2_ST_HIT_ALT 420 #define POWER9_PME_PM_L2_ST_MISS_128B 421 #define POWER9_PME_PM_L2_ST_MISS_64B 422 #define POWER9_PME_PM_L2_ST_MISS 423 #define POWER9_PME_PM_L2_ST 424 #define POWER9_PME_PM_L2_SYS_GUESS_CORRECT 425 #define POWER9_PME_PM_L2_SYS_GUESS_WRONG 426 #define POWER9_PME_PM_L2_SYS_PUMP 427 #define POWER9_PME_PM_L3_CI_HIT 428 #define POWER9_PME_PM_L3_CI_MISS 429 #define POWER9_PME_PM_L3_CINJ 430 #define POWER9_PME_PM_L3_CI_USAGE 431 #define POWER9_PME_PM_L3_CO0_BUSY 432 #define POWER9_PME_PM_L3_CO0_BUSY_ALT 433 #define POWER9_PME_PM_L3_CO_L31 434 #define POWER9_PME_PM_L3_CO_LCO 435 #define POWER9_PME_PM_L3_CO_MEM 436 #define POWER9_PME_PM_L3_CO_MEPF 437 #define POWER9_PME_PM_L3_CO_MEPF_ALT 438 #define POWER9_PME_PM_L3_CO 439 #define POWER9_PME_PM_L3_GRP_GUESS_CORRECT 440 #define POWER9_PME_PM_L3_GRP_GUESS_WRONG_HIGH 441 #define POWER9_PME_PM_L3_GRP_GUESS_WRONG_LOW 442 #define POWER9_PME_PM_L3_HIT 443 #define POWER9_PME_PM_L3_L2_CO_HIT 444 #define POWER9_PME_PM_L3_L2_CO_MISS 445 #define POWER9_PME_PM_L3_LAT_CI_HIT 446 #define POWER9_PME_PM_L3_LAT_CI_MISS 447 #define POWER9_PME_PM_L3_LD_HIT 448 #define POWER9_PME_PM_L3_LD_MISS 449 #define POWER9_PME_PM_L3_LD_PREF 450 #define POWER9_PME_PM_L3_LOC_GUESS_CORRECT 451 #define POWER9_PME_PM_L3_LOC_GUESS_WRONG 452 #define POWER9_PME_PM_L3_MISS 453 #define POWER9_PME_PM_L3_P0_CO_L31 454 #define POWER9_PME_PM_L3_P0_CO_MEM 455 #define POWER9_PME_PM_L3_P0_CO_RTY 456 #define POWER9_PME_PM_L3_P0_CO_RTY_ALT 457 #define POWER9_PME_PM_L3_P0_GRP_PUMP 458 #define POWER9_PME_PM_L3_P0_LCO_DATA 459 #define POWER9_PME_PM_L3_P0_LCO_NO_DATA 460 #define POWER9_PME_PM_L3_P0_LCO_RTY 461 #define POWER9_PME_PM_L3_P0_NODE_PUMP 462 #define POWER9_PME_PM_L3_P0_PF_RTY 463 #define POWER9_PME_PM_L3_P0_PF_RTY_ALT 464 #define POWER9_PME_PM_L3_P0_SYS_PUMP 465 #define POWER9_PME_PM_L3_P1_CO_L31 466 #define POWER9_PME_PM_L3_P1_CO_MEM 467 #define POWER9_PME_PM_L3_P1_CO_RTY 468 #define POWER9_PME_PM_L3_P1_CO_RTY_ALT 469 #define POWER9_PME_PM_L3_P1_GRP_PUMP 470 #define POWER9_PME_PM_L3_P1_LCO_DATA 471 #define POWER9_PME_PM_L3_P1_LCO_NO_DATA 472 #define POWER9_PME_PM_L3_P1_LCO_RTY 473 #define POWER9_PME_PM_L3_P1_NODE_PUMP 474 #define POWER9_PME_PM_L3_P1_PF_RTY 475 #define POWER9_PME_PM_L3_P1_PF_RTY_ALT 476 #define POWER9_PME_PM_L3_P1_SYS_PUMP 477 #define POWER9_PME_PM_L3_P2_LCO_RTY 478 #define POWER9_PME_PM_L3_P3_LCO_RTY 479 #define POWER9_PME_PM_L3_PF0_BUSY 480 #define POWER9_PME_PM_L3_PF0_BUSY_ALT 481 #define POWER9_PME_PM_L3_PF_HIT_L3 482 #define POWER9_PME_PM_L3_PF_MISS_L3 483 #define POWER9_PME_PM_L3_PF_OFF_CHIP_CACHE 484 #define POWER9_PME_PM_L3_PF_OFF_CHIP_MEM 485 #define POWER9_PME_PM_L3_PF_ON_CHIP_CACHE 486 #define POWER9_PME_PM_L3_PF_ON_CHIP_MEM 487 #define POWER9_PME_PM_L3_PF_USAGE 488 #define POWER9_PME_PM_L3_RD0_BUSY 489 #define POWER9_PME_PM_L3_RD0_BUSY_ALT 490 #define POWER9_PME_PM_L3_RD_USAGE 491 #define POWER9_PME_PM_L3_SN0_BUSY 492 #define POWER9_PME_PM_L3_SN0_BUSY_ALT 493 #define POWER9_PME_PM_L3_SN_USAGE 494 #define POWER9_PME_PM_L3_SW_PREF 495 #define POWER9_PME_PM_L3_SYS_GUESS_CORRECT 496 #define POWER9_PME_PM_L3_SYS_GUESS_WRONG 497 #define POWER9_PME_PM_L3_TRANS_PF 498 #define POWER9_PME_PM_L3_WI0_BUSY 499 #define POWER9_PME_PM_L3_WI0_BUSY_ALT 500 #define POWER9_PME_PM_L3_WI_USAGE 501 #define POWER9_PME_PM_LARX_FIN 502 #define POWER9_PME_PM_LD_CMPL 503 #define POWER9_PME_PM_LD_L3MISS_PEND_CYC 504 #define POWER9_PME_PM_LD_MISS_L1_FIN 505 #define POWER9_PME_PM_LD_MISS_L1 506 #define POWER9_PME_PM_LD_REF_L1 507 #define POWER9_PME_PM_LINK_STACK_CORRECT 508 #define POWER9_PME_PM_LINK_STACK_INVALID_PTR 509 #define POWER9_PME_PM_LINK_STACK_WRONG_ADD_PRED 510 #define POWER9_PME_PM_LMQ_EMPTY_CYC 511 #define POWER9_PME_PM_LMQ_MERGE 512 #define POWER9_PME_PM_LRQ_REJECT 513 #define POWER9_PME_PM_LS0_DC_COLLISIONS 514 #define POWER9_PME_PM_LS0_ERAT_MISS_PREF 515 #define POWER9_PME_PM_LS0_LAUNCH_HELD_PREF 516 #define POWER9_PME_PM_LS0_PTE_TABLEWALK_CYC 517 #define POWER9_PME_PM_LS0_TM_DISALLOW 518 #define POWER9_PME_PM_LS0_UNALIGNED_LD 519 #define POWER9_PME_PM_LS0_UNALIGNED_ST 520 #define POWER9_PME_PM_LS1_DC_COLLISIONS 521 #define POWER9_PME_PM_LS1_ERAT_MISS_PREF 522 #define POWER9_PME_PM_LS1_LAUNCH_HELD_PREF 523 #define POWER9_PME_PM_LS1_PTE_TABLEWALK_CYC 524 #define POWER9_PME_PM_LS1_TM_DISALLOW 525 #define POWER9_PME_PM_LS1_UNALIGNED_LD 526 #define POWER9_PME_PM_LS1_UNALIGNED_ST 527 #define POWER9_PME_PM_LS2_DC_COLLISIONS 528 #define POWER9_PME_PM_LS2_ERAT_MISS_PREF 529 #define POWER9_PME_PM_LS2_TM_DISALLOW 530 #define POWER9_PME_PM_LS2_UNALIGNED_LD 531 #define POWER9_PME_PM_LS2_UNALIGNED_ST 532 #define POWER9_PME_PM_LS3_DC_COLLISIONS 533 #define POWER9_PME_PM_LS3_ERAT_MISS_PREF 534 #define POWER9_PME_PM_LS3_TM_DISALLOW 535 #define POWER9_PME_PM_LS3_UNALIGNED_LD 536 #define POWER9_PME_PM_LS3_UNALIGNED_ST 537 #define POWER9_PME_PM_LSU0_1_LRQF_FULL_CYC 538 #define POWER9_PME_PM_LSU0_ERAT_HIT 539 #define POWER9_PME_PM_LSU0_FALSE_LHS 540 #define POWER9_PME_PM_LSU0_L1_CAM_CANCEL 541 #define POWER9_PME_PM_LSU0_LDMX_FIN 542 #define POWER9_PME_PM_LSU0_LMQ_S0_VALID 543 #define POWER9_PME_PM_LSU0_LRQ_S0_VALID_CYC 544 #define POWER9_PME_PM_LSU0_SET_MPRED 545 #define POWER9_PME_PM_LSU0_SRQ_S0_VALID_CYC 546 #define POWER9_PME_PM_LSU0_STORE_REJECT 547 #define POWER9_PME_PM_LSU0_TM_L1_HIT 548 #define POWER9_PME_PM_LSU0_TM_L1_MISS 549 #define POWER9_PME_PM_LSU1_ERAT_HIT 550 #define POWER9_PME_PM_LSU1_FALSE_LHS 551 #define POWER9_PME_PM_LSU1_L1_CAM_CANCEL 552 #define POWER9_PME_PM_LSU1_LDMX_FIN 553 #define POWER9_PME_PM_LSU1_SET_MPRED 554 #define POWER9_PME_PM_LSU1_STORE_REJECT 555 #define POWER9_PME_PM_LSU1_TM_L1_HIT 556 #define POWER9_PME_PM_LSU1_TM_L1_MISS 557 #define POWER9_PME_PM_LSU2_3_LRQF_FULL_CYC 558 #define POWER9_PME_PM_LSU2_ERAT_HIT 559 #define POWER9_PME_PM_LSU2_FALSE_LHS 560 #define POWER9_PME_PM_LSU2_L1_CAM_CANCEL 561 #define POWER9_PME_PM_LSU2_LDMX_FIN 562 #define POWER9_PME_PM_LSU2_SET_MPRED 563 #define POWER9_PME_PM_LSU2_STORE_REJECT 564 #define POWER9_PME_PM_LSU2_TM_L1_HIT 565 #define POWER9_PME_PM_LSU2_TM_L1_MISS 566 #define POWER9_PME_PM_LSU3_ERAT_HIT 567 #define POWER9_PME_PM_LSU3_FALSE_LHS 568 #define POWER9_PME_PM_LSU3_L1_CAM_CANCEL 569 #define POWER9_PME_PM_LSU3_LDMX_FIN 570 #define POWER9_PME_PM_LSU3_SET_MPRED 571 #define POWER9_PME_PM_LSU3_STORE_REJECT 572 #define POWER9_PME_PM_LSU3_TM_L1_HIT 573 #define POWER9_PME_PM_LSU3_TM_L1_MISS 574 #define POWER9_PME_PM_LSU_DERAT_MISS 575 #define POWER9_PME_PM_LSU_FIN 576 #define POWER9_PME_PM_LSU_FLUSH_ATOMIC 577 #define POWER9_PME_PM_LSU_FLUSH_CI 578 #define POWER9_PME_PM_LSU_FLUSH_EMSH 579 #define POWER9_PME_PM_LSU_FLUSH_LARX_STCX 580 #define POWER9_PME_PM_LSU_FLUSH_LHL_SHL 581 #define POWER9_PME_PM_LSU_FLUSH_LHS 582 #define POWER9_PME_PM_LSU_FLUSH_NEXT 583 #define POWER9_PME_PM_LSU_FLUSH_OTHER 584 #define POWER9_PME_PM_LSU_FLUSH_RELAUNCH_MISS 585 #define POWER9_PME_PM_LSU_FLUSH_SAO 586 #define POWER9_PME_PM_LSU_FLUSH_UE 587 #define POWER9_PME_PM_LSU_FLUSH_WRK_ARND 588 #define POWER9_PME_PM_LSU_LMQ_FULL_CYC 589 #define POWER9_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 590 #define POWER9_PME_PM_LSU_NCST 591 #define POWER9_PME_PM_LSU_REJECT_ERAT_MISS 592 #define POWER9_PME_PM_LSU_REJECT_LHS 593 #define POWER9_PME_PM_LSU_REJECT_LMQ_FULL 594 #define POWER9_PME_PM_LSU_SRQ_FULL_CYC 595 #define POWER9_PME_PM_LSU_STCX_FAIL 596 #define POWER9_PME_PM_LSU_STCX 597 #define POWER9_PME_PM_LWSYNC 598 #define POWER9_PME_PM_MATH_FLOP_CMPL 599 #define POWER9_PME_PM_MEM_CO 600 #define POWER9_PME_PM_MEM_LOC_THRESH_IFU 601 #define POWER9_PME_PM_MEM_LOC_THRESH_LSU_HIGH 602 #define POWER9_PME_PM_MEM_LOC_THRESH_LSU_MED 603 #define POWER9_PME_PM_MEM_PREF 604 #define POWER9_PME_PM_MEM_READ 605 #define POWER9_PME_PM_MEM_RWITM 606 #define POWER9_PME_PM_MRK_BACK_BR_CMPL 607 #define POWER9_PME_PM_MRK_BR_2PATH 608 #define POWER9_PME_PM_MRK_BR_CMPL 609 #define POWER9_PME_PM_MRK_BR_MPRED_CMPL 610 #define POWER9_PME_PM_MRK_BR_TAKEN_CMPL 611 #define POWER9_PME_PM_MRK_BRU_FIN 612 #define POWER9_PME_PM_MRK_DATA_FROM_DL2L3_MOD_CYC 613 #define POWER9_PME_PM_MRK_DATA_FROM_DL2L3_MOD 614 #define POWER9_PME_PM_MRK_DATA_FROM_DL2L3_SHR_CYC 615 #define POWER9_PME_PM_MRK_DATA_FROM_DL2L3_SHR 616 #define POWER9_PME_PM_MRK_DATA_FROM_DL4_CYC 617 #define POWER9_PME_PM_MRK_DATA_FROM_DL4 618 #define POWER9_PME_PM_MRK_DATA_FROM_DMEM_CYC 619 #define POWER9_PME_PM_MRK_DATA_FROM_DMEM 620 #define POWER9_PME_PM_MRK_DATA_FROM_L21_MOD_CYC 621 #define POWER9_PME_PM_MRK_DATA_FROM_L21_MOD 622 #define POWER9_PME_PM_MRK_DATA_FROM_L21_SHR_CYC 623 #define POWER9_PME_PM_MRK_DATA_FROM_L21_SHR 624 #define POWER9_PME_PM_MRK_DATA_FROM_L2_CYC 625 #define POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC 626 #define POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST 627 #define POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC 628 #define POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER 629 #define POWER9_PME_PM_MRK_DATA_FROM_L2_MEPF_CYC 630 #define POWER9_PME_PM_MRK_DATA_FROM_L2_MEPF 631 #define POWER9_PME_PM_MRK_DATA_FROM_L2MISS_CYC 632 #define POWER9_PME_PM_MRK_DATA_FROM_L2MISS 633 #define POWER9_PME_PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC 634 #define POWER9_PME_PM_MRK_DATA_FROM_L2_NO_CONFLICT 635 #define POWER9_PME_PM_MRK_DATA_FROM_L2 636 #define POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_MOD_CYC 637 #define POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_MOD 638 #define POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_SHR_CYC 639 #define POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_SHR 640 #define POWER9_PME_PM_MRK_DATA_FROM_L31_MOD_CYC 641 #define POWER9_PME_PM_MRK_DATA_FROM_L31_MOD 642 #define POWER9_PME_PM_MRK_DATA_FROM_L31_SHR_CYC 643 #define POWER9_PME_PM_MRK_DATA_FROM_L31_SHR 644 #define POWER9_PME_PM_MRK_DATA_FROM_L3_CYC 645 #define POWER9_PME_PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC 646 #define POWER9_PME_PM_MRK_DATA_FROM_L3_DISP_CONFLICT 647 #define POWER9_PME_PM_MRK_DATA_FROM_L3_MEPF_CYC 648 #define POWER9_PME_PM_MRK_DATA_FROM_L3_MEPF 649 #define POWER9_PME_PM_MRK_DATA_FROM_L3MISS_CYC 650 #define POWER9_PME_PM_MRK_DATA_FROM_L3MISS 651 #define POWER9_PME_PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC 652 #define POWER9_PME_PM_MRK_DATA_FROM_L3_NO_CONFLICT 653 #define POWER9_PME_PM_MRK_DATA_FROM_L3 654 #define POWER9_PME_PM_MRK_DATA_FROM_LL4_CYC 655 #define POWER9_PME_PM_MRK_DATA_FROM_LL4 656 #define POWER9_PME_PM_MRK_DATA_FROM_LMEM_CYC 657 #define POWER9_PME_PM_MRK_DATA_FROM_LMEM 658 #define POWER9_PME_PM_MRK_DATA_FROM_MEMORY_CYC 659 #define POWER9_PME_PM_MRK_DATA_FROM_MEMORY 660 #define POWER9_PME_PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC 661 #define POWER9_PME_PM_MRK_DATA_FROM_OFF_CHIP_CACHE 662 #define POWER9_PME_PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC 663 #define POWER9_PME_PM_MRK_DATA_FROM_ON_CHIP_CACHE 664 #define POWER9_PME_PM_MRK_DATA_FROM_RL2L3_MOD_CYC 665 #define POWER9_PME_PM_MRK_DATA_FROM_RL2L3_MOD 666 #define POWER9_PME_PM_MRK_DATA_FROM_RL2L3_SHR_CYC 667 #define POWER9_PME_PM_MRK_DATA_FROM_RL2L3_SHR 668 #define POWER9_PME_PM_MRK_DATA_FROM_RL4_CYC 669 #define POWER9_PME_PM_MRK_DATA_FROM_RL4 670 #define POWER9_PME_PM_MRK_DATA_FROM_RMEM_CYC 671 #define POWER9_PME_PM_MRK_DATA_FROM_RMEM 672 #define POWER9_PME_PM_MRK_DCACHE_RELOAD_INTV 673 #define POWER9_PME_PM_MRK_DERAT_MISS_16G 674 #define POWER9_PME_PM_MRK_DERAT_MISS_16M 675 #define POWER9_PME_PM_MRK_DERAT_MISS_1G 676 #define POWER9_PME_PM_MRK_DERAT_MISS_2M 677 #define POWER9_PME_PM_MRK_DERAT_MISS_4K 678 #define POWER9_PME_PM_MRK_DERAT_MISS_64K 679 #define POWER9_PME_PM_MRK_DERAT_MISS 680 #define POWER9_PME_PM_MRK_DFU_FIN 681 #define POWER9_PME_PM_MRK_DPTEG_FROM_DL2L3_MOD 682 #define POWER9_PME_PM_MRK_DPTEG_FROM_DL2L3_SHR 683 #define POWER9_PME_PM_MRK_DPTEG_FROM_DL4 684 #define POWER9_PME_PM_MRK_DPTEG_FROM_DMEM 685 #define POWER9_PME_PM_MRK_DPTEG_FROM_L21_MOD 686 #define POWER9_PME_PM_MRK_DPTEG_FROM_L21_SHR 687 #define POWER9_PME_PM_MRK_DPTEG_FROM_L2_MEPF 688 #define POWER9_PME_PM_MRK_DPTEG_FROM_L2MISS 689 #define POWER9_PME_PM_MRK_DPTEG_FROM_L2_NO_CONFLICT 690 #define POWER9_PME_PM_MRK_DPTEG_FROM_L2 691 #define POWER9_PME_PM_MRK_DPTEG_FROM_L31_ECO_MOD 692 #define POWER9_PME_PM_MRK_DPTEG_FROM_L31_ECO_SHR 693 #define POWER9_PME_PM_MRK_DPTEG_FROM_L31_MOD 694 #define POWER9_PME_PM_MRK_DPTEG_FROM_L31_SHR 695 #define POWER9_PME_PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT 696 #define POWER9_PME_PM_MRK_DPTEG_FROM_L3_MEPF 697 #define POWER9_PME_PM_MRK_DPTEG_FROM_L3MISS 698 #define POWER9_PME_PM_MRK_DPTEG_FROM_L3_NO_CONFLICT 699 #define POWER9_PME_PM_MRK_DPTEG_FROM_L3 700 #define POWER9_PME_PM_MRK_DPTEG_FROM_LL4 701 #define POWER9_PME_PM_MRK_DPTEG_FROM_LMEM 702 #define POWER9_PME_PM_MRK_DPTEG_FROM_MEMORY 703 #define POWER9_PME_PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE 704 #define POWER9_PME_PM_MRK_DPTEG_FROM_ON_CHIP_CACHE 705 #define POWER9_PME_PM_MRK_DPTEG_FROM_RL2L3_MOD 706 #define POWER9_PME_PM_MRK_DPTEG_FROM_RL2L3_SHR 707 #define POWER9_PME_PM_MRK_DPTEG_FROM_RL4 708 #define POWER9_PME_PM_MRK_DPTEG_FROM_RMEM 709 #define POWER9_PME_PM_MRK_DTLB_MISS_16G 710 #define POWER9_PME_PM_MRK_DTLB_MISS_16M 711 #define POWER9_PME_PM_MRK_DTLB_MISS_1G 712 #define POWER9_PME_PM_MRK_DTLB_MISS_4K 713 #define POWER9_PME_PM_MRK_DTLB_MISS_64K 714 #define POWER9_PME_PM_MRK_DTLB_MISS 715 #define POWER9_PME_PM_MRK_FAB_RSP_BKILL_CYC 716 #define POWER9_PME_PM_MRK_FAB_RSP_BKILL 717 #define POWER9_PME_PM_MRK_FAB_RSP_CLAIM_RTY 718 #define POWER9_PME_PM_MRK_FAB_RSP_DCLAIM_CYC 719 #define POWER9_PME_PM_MRK_FAB_RSP_DCLAIM 720 #define POWER9_PME_PM_MRK_FAB_RSP_RD_RTY 721 #define POWER9_PME_PM_MRK_FAB_RSP_RD_T_INTV 722 #define POWER9_PME_PM_MRK_FAB_RSP_RWITM_CYC 723 #define POWER9_PME_PM_MRK_FAB_RSP_RWITM_RTY 724 #define POWER9_PME_PM_MRK_FXU_FIN 725 #define POWER9_PME_PM_MRK_IC_MISS 726 #define POWER9_PME_PM_MRK_INST_CMPL 727 #define POWER9_PME_PM_MRK_INST_DECODED 728 #define POWER9_PME_PM_MRK_INST_DISP 729 #define POWER9_PME_PM_MRK_INST_FIN 730 #define POWER9_PME_PM_MRK_INST_FROM_L3MISS 731 #define POWER9_PME_PM_MRK_INST_ISSUED 732 #define POWER9_PME_PM_MRK_INST_TIMEO 733 #define POWER9_PME_PM_MRK_INST 734 #define POWER9_PME_PM_MRK_L1_ICACHE_MISS 735 #define POWER9_PME_PM_MRK_L1_RELOAD_VALID 736 #define POWER9_PME_PM_MRK_L2_RC_DISP 737 #define POWER9_PME_PM_MRK_L2_RC_DONE 738 #define POWER9_PME_PM_MRK_L2_TM_REQ_ABORT 739 #define POWER9_PME_PM_MRK_L2_TM_ST_ABORT_SISTER 740 #define POWER9_PME_PM_MRK_LARX_FIN 741 #define POWER9_PME_PM_MRK_LD_MISS_EXPOSED_CYC 742 #define POWER9_PME_PM_MRK_LD_MISS_L1_CYC 743 #define POWER9_PME_PM_MRK_LD_MISS_L1 744 #define POWER9_PME_PM_MRK_LSU_DERAT_MISS 745 #define POWER9_PME_PM_MRK_LSU_FIN 746 #define POWER9_PME_PM_MRK_LSU_FLUSH_ATOMIC 747 #define POWER9_PME_PM_MRK_LSU_FLUSH_EMSH 748 #define POWER9_PME_PM_MRK_LSU_FLUSH_LARX_STCX 749 #define POWER9_PME_PM_MRK_LSU_FLUSH_LHL_SHL 750 #define POWER9_PME_PM_MRK_LSU_FLUSH_LHS 751 #define POWER9_PME_PM_MRK_LSU_FLUSH_RELAUNCH_MISS 752 #define POWER9_PME_PM_MRK_LSU_FLUSH_SAO 753 #define POWER9_PME_PM_MRK_LSU_FLUSH_UE 754 #define POWER9_PME_PM_MRK_NTC_CYC 755 #define POWER9_PME_PM_MRK_NTF_FIN 756 #define POWER9_PME_PM_MRK_PROBE_NOP_CMPL 757 #define POWER9_PME_PM_MRK_RUN_CYC 758 #define POWER9_PME_PM_MRK_STALL_CMPLU_CYC 759 #define POWER9_PME_PM_MRK_ST_CMPL_INT 760 #define POWER9_PME_PM_MRK_ST_CMPL 761 #define POWER9_PME_PM_MRK_STCX_FAIL 762 #define POWER9_PME_PM_MRK_STCX_FIN 763 #define POWER9_PME_PM_MRK_ST_DONE_L2 764 #define POWER9_PME_PM_MRK_ST_DRAIN_TO_L2DISP_CYC 765 #define POWER9_PME_PM_MRK_ST_FWD 766 #define POWER9_PME_PM_MRK_ST_L2DISP_TO_CMPL_CYC 767 #define POWER9_PME_PM_MRK_ST_NEST 768 #define POWER9_PME_PM_MRK_TEND_FAIL 769 #define POWER9_PME_PM_MRK_VSU_FIN 770 #define POWER9_PME_PM_MULT_MRK 771 #define POWER9_PME_PM_NEST_REF_CLK 772 #define POWER9_PME_PM_NON_DATA_STORE 773 #define POWER9_PME_PM_NON_FMA_FLOP_CMPL 774 #define POWER9_PME_PM_NON_MATH_FLOP_CMPL 775 #define POWER9_PME_PM_NON_TM_RST_SC 776 #define POWER9_PME_PM_NTC_ALL_FIN 777 #define POWER9_PME_PM_NTC_FIN 778 #define POWER9_PME_PM_NTC_ISSUE_HELD_ARB 779 #define POWER9_PME_PM_NTC_ISSUE_HELD_DARQ_FULL 780 #define POWER9_PME_PM_NTC_ISSUE_HELD_OTHER 781 #define POWER9_PME_PM_PARTIAL_ST_FIN 782 #define POWER9_PME_PM_PMC1_OVERFLOW 783 #define POWER9_PME_PM_PMC1_REWIND 784 #define POWER9_PME_PM_PMC1_SAVED 785 #define POWER9_PME_PM_PMC2_OVERFLOW 786 #define POWER9_PME_PM_PMC2_REWIND 787 #define POWER9_PME_PM_PMC2_SAVED 788 #define POWER9_PME_PM_PMC3_OVERFLOW 789 #define POWER9_PME_PM_PMC3_REWIND 790 #define POWER9_PME_PM_PMC3_SAVED 791 #define POWER9_PME_PM_PMC4_OVERFLOW 792 #define POWER9_PME_PM_PMC4_REWIND 793 #define POWER9_PME_PM_PMC4_SAVED 794 #define POWER9_PME_PM_PMC5_OVERFLOW 795 #define POWER9_PME_PM_PMC6_OVERFLOW 796 #define POWER9_PME_PM_PROBE_NOP_DISP 797 #define POWER9_PME_PM_PTE_PREFETCH 798 #define POWER9_PME_PM_PTESYNC 799 #define POWER9_PME_PM_PUMP_CPRED 800 #define POWER9_PME_PM_PUMP_MPRED 801 #define POWER9_PME_PM_RADIX_PWC_L1_HIT 802 #define POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L2 803 #define POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L3MISS 804 #define POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L3 805 #define POWER9_PME_PM_RADIX_PWC_L2_HIT 806 #define POWER9_PME_PM_RADIX_PWC_L2_PDE_FROM_L2 807 #define POWER9_PME_PM_RADIX_PWC_L2_PDE_FROM_L3 808 #define POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L2 809 #define POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L3MISS 810 #define POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L3 811 #define POWER9_PME_PM_RADIX_PWC_L3_HIT 812 #define POWER9_PME_PM_RADIX_PWC_L3_PDE_FROM_L2 813 #define POWER9_PME_PM_RADIX_PWC_L3_PDE_FROM_L3 814 #define POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L2 815 #define POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L3MISS 816 #define POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L3 817 #define POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L2 818 #define POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L3MISS 819 #define POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L3 820 #define POWER9_PME_PM_RADIX_PWC_MISS 821 #define POWER9_PME_PM_RC0_BUSY 822 #define POWER9_PME_PM_RC0_BUSY_ALT 823 #define POWER9_PME_PM_RC_USAGE 824 #define POWER9_PME_PM_RD_CLEARING_SC 825 #define POWER9_PME_PM_RD_FORMING_SC 826 #define POWER9_PME_PM_RD_HIT_PF 827 #define POWER9_PME_PM_RUN_CYC_SMT2_MODE 828 #define POWER9_PME_PM_RUN_CYC_SMT4_MODE 829 #define POWER9_PME_PM_RUN_CYC_ST_MODE 830 #define POWER9_PME_PM_RUN_CYC 831 #define POWER9_PME_PM_RUN_INST_CMPL 832 #define POWER9_PME_PM_RUN_PURR 833 #define POWER9_PME_PM_RUN_SPURR 834 #define POWER9_PME_PM_S2Q_FULL 835 #define POWER9_PME_PM_SCALAR_FLOP_CMPL 836 #define POWER9_PME_PM_SHL_CREATED 837 #define POWER9_PME_PM_SHL_ST_DEP_CREATED 838 #define POWER9_PME_PM_SHL_ST_DISABLE 839 #define POWER9_PME_PM_SLB_TABLEWALK_CYC 840 #define POWER9_PME_PM_SN0_BUSY 841 #define POWER9_PME_PM_SN0_BUSY_ALT 842 #define POWER9_PME_PM_SN_HIT 843 #define POWER9_PME_PM_SN_INVL 844 #define POWER9_PME_PM_SN_MISS 845 #define POWER9_PME_PM_SNOOP_TLBIE 846 #define POWER9_PME_PM_SNP_TM_HIT_M 847 #define POWER9_PME_PM_SNP_TM_HIT_T 848 #define POWER9_PME_PM_SN_USAGE 849 #define POWER9_PME_PM_SP_FLOP_CMPL 850 #define POWER9_PME_PM_SRQ_EMPTY_CYC 851 #define POWER9_PME_PM_SRQ_SYNC_CYC 852 #define POWER9_PME_PM_STALL_END_ICT_EMPTY 853 #define POWER9_PME_PM_ST_CAUSED_FAIL 854 #define POWER9_PME_PM_ST_CMPL 855 #define POWER9_PME_PM_STCX_FAIL 856 #define POWER9_PME_PM_STCX_FIN 857 #define POWER9_PME_PM_STCX_SUCCESS_CMPL 858 #define POWER9_PME_PM_ST_FIN 859 #define POWER9_PME_PM_ST_FWD 860 #define POWER9_PME_PM_ST_MISS_L1 861 #define POWER9_PME_PM_STOP_FETCH_PENDING_CYC 862 #define POWER9_PME_PM_SUSPENDED 863 #define POWER9_PME_PM_SYNC_MRK_BR_LINK 864 #define POWER9_PME_PM_SYNC_MRK_BR_MPRED 865 #define POWER9_PME_PM_SYNC_MRK_FX_DIVIDE 866 #define POWER9_PME_PM_SYNC_MRK_L2HIT 867 #define POWER9_PME_PM_SYNC_MRK_L2MISS 868 #define POWER9_PME_PM_SYNC_MRK_L3MISS 869 #define POWER9_PME_PM_SYNC_MRK_PROBE_NOP 870 #define POWER9_PME_PM_SYS_PUMP_CPRED 871 #define POWER9_PME_PM_SYS_PUMP_MPRED_RTY 872 #define POWER9_PME_PM_SYS_PUMP_MPRED 873 #define POWER9_PME_PM_TABLEWALK_CYC_PREF 874 #define POWER9_PME_PM_TABLEWALK_CYC 875 #define POWER9_PME_PM_TAGE_CORRECT_TAKEN_CMPL 876 #define POWER9_PME_PM_TAGE_CORRECT 877 #define POWER9_PME_PM_TAGE_OVERRIDE_WRONG_SPEC 878 #define POWER9_PME_PM_TAGE_OVERRIDE_WRONG 879 #define POWER9_PME_PM_TAKEN_BR_MPRED_CMPL 880 #define POWER9_PME_PM_TB_BIT_TRANS 881 #define POWER9_PME_PM_TEND_PEND_CYC 882 #define POWER9_PME_PM_THRD_ALL_RUN_CYC 883 #define POWER9_PME_PM_THRD_CONC_RUN_INST 884 #define POWER9_PME_PM_THRD_PRIO_0_1_CYC 885 #define POWER9_PME_PM_THRD_PRIO_2_3_CYC 886 #define POWER9_PME_PM_THRD_PRIO_4_5_CYC 887 #define POWER9_PME_PM_THRD_PRIO_6_7_CYC 888 #define POWER9_PME_PM_THRESH_ACC 889 #define POWER9_PME_PM_THRESH_EXC_1024 890 #define POWER9_PME_PM_THRESH_EXC_128 891 #define POWER9_PME_PM_THRESH_EXC_2048 892 #define POWER9_PME_PM_THRESH_EXC_256 893 #define POWER9_PME_PM_THRESH_EXC_32 894 #define POWER9_PME_PM_THRESH_EXC_4096 895 #define POWER9_PME_PM_THRESH_EXC_512 896 #define POWER9_PME_PM_THRESH_EXC_64 897 #define POWER9_PME_PM_THRESH_MET 898 #define POWER9_PME_PM_THRESH_NOT_MET 899 #define POWER9_PME_PM_TLB_HIT 900 #define POWER9_PME_PM_TLBIE_FIN 901 #define POWER9_PME_PM_TLB_MISS 902 #define POWER9_PME_PM_TM_ABORTS 903 #define POWER9_PME_PM_TMA_REQ_L2 904 #define POWER9_PME_PM_TM_CAM_OVERFLOW 905 #define POWER9_PME_PM_TM_CAP_OVERFLOW 906 #define POWER9_PME_PM_TM_FAIL_CONF_NON_TM 907 #define POWER9_PME_PM_TM_FAIL_CONF_TM 908 #define POWER9_PME_PM_TM_FAIL_FOOTPRINT_OVERFLOW 909 #define POWER9_PME_PM_TM_FAIL_NON_TX_CONFLICT 910 #define POWER9_PME_PM_TM_FAIL_SELF 911 #define POWER9_PME_PM_TM_FAIL_TLBIE 912 #define POWER9_PME_PM_TM_FAIL_TX_CONFLICT 913 #define POWER9_PME_PM_TM_FAV_CAUSED_FAIL 914 #define POWER9_PME_PM_TM_FAV_TBEGIN 915 #define POWER9_PME_PM_TM_LD_CAUSED_FAIL 916 #define POWER9_PME_PM_TM_LD_CONF 917 #define POWER9_PME_PM_TM_NESTED_TBEGIN 918 #define POWER9_PME_PM_TM_NESTED_TEND 919 #define POWER9_PME_PM_TM_NON_FAV_TBEGIN 920 #define POWER9_PME_PM_TM_OUTER_TBEGIN_DISP 921 #define POWER9_PME_PM_TM_OUTER_TBEGIN 922 #define POWER9_PME_PM_TM_OUTER_TEND 923 #define POWER9_PME_PM_TM_PASSED 924 #define POWER9_PME_PM_TM_RST_SC 925 #define POWER9_PME_PM_TM_SC_CO 926 #define POWER9_PME_PM_TM_ST_CAUSED_FAIL 927 #define POWER9_PME_PM_TM_ST_CONF 928 #define POWER9_PME_PM_TM_TABORT_TRECLAIM 929 #define POWER9_PME_PM_TM_TRANS_RUN_CYC 930 #define POWER9_PME_PM_TM_TRANS_RUN_INST 931 #define POWER9_PME_PM_TM_TRESUME 932 #define POWER9_PME_PM_TM_TSUSPEND 933 #define POWER9_PME_PM_TM_TX_PASS_RUN_CYC 934 #define POWER9_PME_PM_TM_TX_PASS_RUN_INST 935 #define POWER9_PME_PM_VECTOR_FLOP_CMPL 936 #define POWER9_PME_PM_VECTOR_LD_CMPL 937 #define POWER9_PME_PM_VECTOR_ST_CMPL 938 #define POWER9_PME_PM_VSU_DP_FSQRT_FDIV 939 #define POWER9_PME_PM_VSU_FIN 940 #define POWER9_PME_PM_VSU_FSQRT_FDIV 941 #define POWER9_PME_PM_VSU_NON_FLOP_CMPL 942 #define POWER9_PME_PM_XLATE_HPT_MODE 943 #define POWER9_PME_PM_XLATE_MISS 944 #define POWER9_PME_PM_XLATE_RADIX_MODE 945 #define POWER9_PME_PM_BR_2PATH_ALT 946 #define POWER9_PME_PM_CYC_ALT 947 #define POWER9_PME_PM_CYC_ALT2 948 #define POWER9_PME_PM_CYC_ALT3 949 #define POWER9_PME_PM_INST_CMPL_ALT 950 #define POWER9_PME_PM_INST_CMPL_ALT2 951 #define POWER9_PME_PM_INST_CMPL_ALT3 952 #define POWER9_PME_PM_INST_DISP_ALT 953 #define POWER9_PME_PM_LD_MISS_L1_ALT 954 #define POWER9_PME_PM_SUSPENDED_ALT 955 #define POWER9_PME_PM_SUSPENDED_ALT2 956 #define POWER9_PME_PM_SUSPENDED_ALT3 957 static const pme_power_entry_t power9_pe[] = { [ POWER9_PME_PM_1FLOP_CMPL ] = { .pme_name = "PM_1FLOP_CMPL", .pme_code = 0x0000045050, .pme_short_desc = "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed", .pme_long_desc = "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed", }, [ POWER9_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x00000100F2, .pme_short_desc = "1 or more ppc insts finished", .pme_long_desc = "1 or more ppc insts finished", }, [ POWER9_PME_PM_1PLUS_PPC_DISP ] = { .pme_name = "PM_1PLUS_PPC_DISP", .pme_code = 0x00000400F2, .pme_short_desc = "Cycles at least one Instr Dispatched", .pme_long_desc = "Cycles at least one Instr Dispatched", }, [ POWER9_PME_PM_2FLOP_CMPL ] = { .pme_name = "PM_2FLOP_CMPL", .pme_code = 0x000004D052, .pme_short_desc = "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg ", .pme_long_desc = "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg ", }, [ POWER9_PME_PM_4FLOP_CMPL ] = { .pme_name = "PM_4FLOP_CMPL", .pme_code = 0x0000045052, .pme_short_desc = "4 FLOP instruction completed", .pme_long_desc = "4 FLOP instruction completed", }, [ POWER9_PME_PM_8FLOP_CMPL ] = { .pme_name = "PM_8FLOP_CMPL", .pme_code = 0x000004D054, .pme_short_desc = "8 FLOP instruction completed", .pme_long_desc = "8 FLOP instruction completed", }, [ POWER9_PME_PM_ANY_THRD_RUN_CYC ] = { .pme_name = "PM_ANY_THRD_RUN_CYC", .pme_code = 0x00000100FA, .pme_short_desc = "Cycles in which at least one thread has the run latch set", .pme_long_desc = "Cycles in which at least one thread has the run latch set", }, [ POWER9_PME_PM_BACK_BR_CMPL ] = { .pme_name = "PM_BACK_BR_CMPL", .pme_code = 0x000002505E, .pme_short_desc = "Branch instruction completed with a target address less than current instruction address", .pme_long_desc = "Branch instruction completed with a target address less than current instruction address", }, [ POWER9_PME_PM_BANK_CONFLICT ] = { .pme_name = "PM_BANK_CONFLICT", .pme_code = 0x0000004880, .pme_short_desc = "Read blocked due to interleave conflict.", .pme_long_desc = "Read blocked due to interleave conflict. The ifar logic will detect an interleave conflict and kill the data that was read that cycle.", }, [ POWER9_PME_PM_BFU_BUSY ] = { .pme_name = "PM_BFU_BUSY", .pme_code = 0x000003005C, .pme_short_desc = "Cycles in which all 4 Binary Floating Point units are busy.", .pme_long_desc = "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity", }, /* See also alternate entries for 0000020036 / POWER9_PME_PM_BR_2PATH with code(s) 0000040036 at the bottom of this table. \n */ [ POWER9_PME_PM_BR_2PATH ] = { .pme_name = "PM_BR_2PATH", .pme_code = 0x0000020036, .pme_short_desc = "Branches that are not strongly biased", .pme_long_desc = "Branches that are not strongly biased", }, [ POWER9_PME_PM_BR_CMPL ] = { .pme_name = "PM_BR_CMPL", .pme_code = 0x000004D05E, .pme_short_desc = "Any Branch instruction completed", .pme_long_desc = "Any Branch instruction completed", }, [ POWER9_PME_PM_BR_CORECT_PRED_TAKEN_CMPL ] = { .pme_name = "PM_BR_CORECT_PRED_TAKEN_CMPL", .pme_code = 0x000000489C, .pme_short_desc = "Conditional Branch Completed in which the HW correctly predicted the direction as taken.", .pme_long_desc = "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time", }, [ POWER9_PME_PM_BR_MPRED_CCACHE ] = { .pme_name = "PM_BR_MPRED_CCACHE", .pme_code = 0x00000040AC, .pme_short_desc = "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction", .pme_long_desc = "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction", }, [ POWER9_PME_PM_BR_MPRED_CMPL ] = { .pme_name = "PM_BR_MPRED_CMPL", .pme_code = 0x00000400F6, .pme_short_desc = "Number of Branch Mispredicts", .pme_long_desc = "Number of Branch Mispredicts", }, [ POWER9_PME_PM_BR_MPRED_LSTACK ] = { .pme_name = "PM_BR_MPRED_LSTACK", .pme_code = 0x00000048AC, .pme_short_desc = "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction", .pme_long_desc = "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction", }, [ POWER9_PME_PM_BR_MPRED_PCACHE ] = { .pme_name = "PM_BR_MPRED_PCACHE", .pme_code = 0x00000048B0, .pme_short_desc = "Conditional Branch Completed that was Mispredicted due to pattern cache prediction", .pme_long_desc = "Conditional Branch Completed that was Mispredicted due to pattern cache prediction", }, [ POWER9_PME_PM_BR_MPRED_TAKEN_CR ] = { .pme_name = "PM_BR_MPRED_TAKEN_CR", .pme_code = 0x00000040B8, .pme_short_desc = "A Conditional Branch that resolved to taken was mispredicted as not taken (due to the BHT Direction Prediction).", .pme_long_desc = "A Conditional Branch that resolved to taken was mispredicted as not taken (due to the BHT Direction Prediction).", }, [ POWER9_PME_PM_BR_MPRED_TAKEN_TA ] = { .pme_name = "PM_BR_MPRED_TAKEN_TA", .pme_code = 0x00000048B8, .pme_short_desc = "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack.", .pme_long_desc = "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event.", }, [ POWER9_PME_PM_BR_PRED_CCACHE ] = { .pme_name = "PM_BR_PRED_CCACHE", .pme_code = 0x00000040A4, .pme_short_desc = "Conditional Branch Completed that used the Count Cache for Target Prediction", .pme_long_desc = "Conditional Branch Completed that used the Count Cache for Target Prediction", }, [ POWER9_PME_PM_BR_PRED_LSTACK ] = { .pme_name = "PM_BR_PRED_LSTACK", .pme_code = 0x00000040A8, .pme_short_desc = "Conditional Branch Completed that used the Link Stack for Target Prediction", .pme_long_desc = "Conditional Branch Completed that used the Link Stack for Target Prediction", }, [ POWER9_PME_PM_BR_PRED_PCACHE ] = { .pme_name = "PM_BR_PRED_PCACHE", .pme_code = 0x00000048A0, .pme_short_desc = "Conditional branch completed that used pattern cache prediction", .pme_long_desc = "Conditional branch completed that used pattern cache prediction", }, [ POWER9_PME_PM_BR_PRED_TAKEN_CR ] = { .pme_name = "PM_BR_PRED_TAKEN_CR", .pme_code = 0x00000040B0, .pme_short_desc = "Conditional Branch that had its direction predicted.", .pme_long_desc = "Conditional Branch that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches", }, [ POWER9_PME_PM_BR_PRED_TA ] = { .pme_name = "PM_BR_PRED_TA", .pme_code = 0x00000040B4, .pme_short_desc = "Conditional Branch Completed that had its target address predicted.", .pme_long_desc = "Conditional Branch Completed that had its target address predicted. Only XL-form branches set this event. This equal the sum of CCACHE, LSTACK, and PCACHE", }, [ POWER9_PME_PM_BR_PRED ] = { .pme_name = "PM_BR_PRED", .pme_code = 0x000000409C, .pme_short_desc = "Conditional Branch Executed in which the HW predicted the Direction or Target.", .pme_long_desc = "Conditional Branch Executed in which the HW predicted the Direction or Target. Includes taken and not taken and is counted at execution time", }, [ POWER9_PME_PM_BR_TAKEN_CMPL ] = { .pme_name = "PM_BR_TAKEN_CMPL", .pme_code = 0x00000200FA, .pme_short_desc = "New event for Branch Taken", .pme_long_desc = "New event for Branch Taken", }, [ POWER9_PME_PM_BRU_FIN ] = { .pme_name = "PM_BRU_FIN", .pme_code = 0x0000010068, .pme_short_desc = "Branch Instruction Finished", .pme_long_desc = "Branch Instruction Finished", }, [ POWER9_PME_PM_BR_UNCOND ] = { .pme_name = "PM_BR_UNCOND", .pme_code = 0x00000040A0, .pme_short_desc = "Unconditional Branch Completed.", .pme_long_desc = "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve.", }, [ POWER9_PME_PM_BTAC_BAD_RESULT ] = { .pme_name = "PM_BTAC_BAD_RESULT", .pme_code = 0x00000050B0, .pme_short_desc = "BTAC thinks branch will be taken but it is either predicted not-taken by the BHT, or the target address is wrong (less common).", .pme_long_desc = "BTAC thinks branch will be taken but it is either predicted not-taken by the BHT, or the target address is wrong (less common). In both cases, a redirect will happen", }, [ POWER9_PME_PM_BTAC_GOOD_RESULT ] = { .pme_name = "PM_BTAC_GOOD_RESULT", .pme_code = 0x00000058B0, .pme_short_desc = "BTAC predicts a taken branch and the BHT agrees, and the target address is correct", .pme_long_desc = "BTAC predicts a taken branch and the BHT agrees, and the target address is correct", }, [ POWER9_PME_PM_CHIP_PUMP_CPRED ] = { .pme_name = "PM_CHIP_PUMP_CPRED", .pme_code = 0x0000010050, .pme_short_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_CLB_HELD ] = { .pme_name = "PM_CLB_HELD", .pme_code = 0x000000208C, .pme_short_desc = "CLB (control logic block - indicates quadword fetch block) Hold: Any Reason", .pme_long_desc = "CLB (control logic block - indicates quadword fetch block) Hold: Any Reason", }, [ POWER9_PME_PM_CMPLU_STALL_ANY_SYNC ] = { .pme_name = "PM_CMPLU_STALL_ANY_SYNC", .pme_code = 0x000001E05A, .pme_short_desc = "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete", .pme_long_desc = "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete", }, [ POWER9_PME_PM_CMPLU_STALL_BRU ] = { .pme_name = "PM_CMPLU_STALL_BRU", .pme_code = 0x000004D018, .pme_short_desc = "Completion stall due to a Branch Unit", .pme_long_desc = "Completion stall due to a Branch Unit", }, [ POWER9_PME_PM_CMPLU_STALL_CRYPTO ] = { .pme_name = "PM_CMPLU_STALL_CRYPTO", .pme_code = 0x000004C01E, .pme_short_desc = "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", .pme_long_desc = "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", }, [ POWER9_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { .pme_name = "PM_CMPLU_STALL_DCACHE_MISS", .pme_code = 0x000002C012, .pme_short_desc = "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", .pme_long_desc = "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", }, [ POWER9_PME_PM_CMPLU_STALL_DFLONG ] = { .pme_name = "PM_CMPLU_STALL_DFLONG", .pme_code = 0x000001005A, .pme_short_desc = "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle", }, [ POWER9_PME_PM_CMPLU_STALL_DFU ] = { .pme_name = "PM_CMPLU_STALL_DFU", .pme_code = 0x000002D012, .pme_short_desc = "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_L21_L31 ] = { .pme_name = "PM_CMPLU_STALL_DMISS_L21_L31", .pme_code = 0x000002C018, .pme_short_desc = "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", .pme_long_desc = "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_L2L3_CONFLICT ] = { .pme_name = "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT", .pme_code = 0x000004C016, .pme_short_desc = "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", .pme_long_desc = "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_L2L3 ] = { .pme_name = "PM_CMPLU_STALL_DMISS_L2L3", .pme_code = 0x000001003C, .pme_short_desc = "Completion stall by Dcache miss which resolved in L2/L3", .pme_long_desc = "Completion stall by Dcache miss which resolved in L2/L3", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_L3MISS ] = { .pme_name = "PM_CMPLU_STALL_DMISS_L3MISS", .pme_code = 0x000004C01A, .pme_short_desc = "Completion stall due to cache miss resolving missed the L3", .pme_long_desc = "Completion stall due to cache miss resolving missed the L3", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_LMEM ] = { .pme_name = "PM_CMPLU_STALL_DMISS_LMEM", .pme_code = 0x0000030038, .pme_short_desc = "Completion stall due to cache miss that resolves in local memory", .pme_long_desc = "Completion stall due to cache miss that resolves in local memory", }, [ POWER9_PME_PM_CMPLU_STALL_DMISS_REMOTE ] = { .pme_name = "PM_CMPLU_STALL_DMISS_REMOTE", .pme_code = 0x000002C01C, .pme_short_desc = "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", .pme_long_desc = "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", }, [ POWER9_PME_PM_CMPLU_STALL_DPLONG ] = { .pme_name = "PM_CMPLU_STALL_DPLONG", .pme_code = 0x000003405C, .pme_short_desc = "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", }, [ POWER9_PME_PM_CMPLU_STALL_DP ] = { .pme_name = "PM_CMPLU_STALL_DP", .pme_code = 0x000001005C, .pme_short_desc = "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector", }, [ POWER9_PME_PM_CMPLU_STALL_EIEIO ] = { .pme_name = "PM_CMPLU_STALL_EIEIO", .pme_code = 0x000004D01A, .pme_short_desc = "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", .pme_long_desc = "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", }, [ POWER9_PME_PM_CMPLU_STALL_EMQ_FULL ] = { .pme_name = "PM_CMPLU_STALL_EMQ_FULL", .pme_code = 0x0000030004, .pme_short_desc = "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", .pme_long_desc = "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", }, [ POWER9_PME_PM_CMPLU_STALL_ERAT_MISS ] = { .pme_name = "PM_CMPLU_STALL_ERAT_MISS", .pme_code = 0x000004C012, .pme_short_desc = "Finish stall because the NTF instruction was a load or store that suffered a translation miss", .pme_long_desc = "Finish stall because the NTF instruction was a load or store that suffered a translation miss", }, [ POWER9_PME_PM_CMPLU_STALL_EXCEPTION ] = { .pme_name = "PM_CMPLU_STALL_EXCEPTION", .pme_code = 0x000003003A, .pme_short_desc = "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", .pme_long_desc = "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", }, [ POWER9_PME_PM_CMPLU_STALL_EXEC_UNIT ] = { .pme_name = "PM_CMPLU_STALL_EXEC_UNIT", .pme_code = 0x000002D018, .pme_short_desc = "Completion stall due to execution units (FXU/VSU/CRU)", .pme_long_desc = "Completion stall due to execution units (FXU/VSU/CRU)", }, [ POWER9_PME_PM_CMPLU_STALL_FLUSH_ANY_THREAD ] = { .pme_name = "PM_CMPLU_STALL_FLUSH_ANY_THREAD", .pme_code = 0x000001E056, .pme_short_desc = "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", .pme_long_desc = "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", }, [ POWER9_PME_PM_CMPLU_STALL_FXLONG ] = { .pme_name = "PM_CMPLU_STALL_FXLONG", .pme_code = 0x000004D016, .pme_short_desc = "Completion stall due to a long latency scalar fixed point instruction (division, square root)", .pme_long_desc = "Completion stall due to a long latency scalar fixed point instruction (division, square root)", }, [ POWER9_PME_PM_CMPLU_STALL_FXU ] = { .pme_name = "PM_CMPLU_STALL_FXU", .pme_code = 0x000002D016, .pme_short_desc = "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline.", .pme_long_desc = "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", }, [ POWER9_PME_PM_CMPLU_STALL_HWSYNC ] = { .pme_name = "PM_CMPLU_STALL_HWSYNC", .pme_code = 0x0000030036, .pme_short_desc = "completion stall due to hwsync", .pme_long_desc = "completion stall due to hwsync", }, [ POWER9_PME_PM_CMPLU_STALL_LARX ] = { .pme_name = "PM_CMPLU_STALL_LARX", .pme_code = 0x000001002A, .pme_short_desc = "Finish stall because the NTF instruction was a larx waiting to be satisfied", .pme_long_desc = "Finish stall because the NTF instruction was a larx waiting to be satisfied", }, [ POWER9_PME_PM_CMPLU_STALL_LHS ] = { .pme_name = "PM_CMPLU_STALL_LHS", .pme_code = 0x000002C01A, .pme_short_desc = "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", .pme_long_desc = "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", }, [ POWER9_PME_PM_CMPLU_STALL_LMQ_FULL ] = { .pme_name = "PM_CMPLU_STALL_LMQ_FULL", .pme_code = 0x000004C014, .pme_short_desc = "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", .pme_long_desc = "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", }, [ POWER9_PME_PM_CMPLU_STALL_LOAD_FINISH ] = { .pme_name = "PM_CMPLU_STALL_LOAD_FINISH", .pme_code = 0x000004D014, .pme_short_desc = "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", .pme_long_desc = "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", }, [ POWER9_PME_PM_CMPLU_STALL_LRQ_FULL ] = { .pme_name = "PM_CMPLU_STALL_LRQ_FULL", .pme_code = 0x000002D014, .pme_short_desc = "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full", .pme_long_desc = "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full", }, [ POWER9_PME_PM_CMPLU_STALL_LRQ_OTHER ] = { .pme_name = "PM_CMPLU_STALL_LRQ_OTHER", .pme_code = 0x0000010004, .pme_short_desc = "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", .pme_long_desc = "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", }, [ POWER9_PME_PM_CMPLU_STALL_LSAQ_ARB ] = { .pme_name = "PM_CMPLU_STALL_LSAQ_ARB", .pme_code = 0x000004E016, .pme_short_desc = "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", .pme_long_desc = "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", }, [ POWER9_PME_PM_CMPLU_STALL_LSU_FIN ] = { .pme_name = "PM_CMPLU_STALL_LSU_FIN", .pme_code = 0x000001003A, .pme_short_desc = "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", .pme_long_desc = "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", }, [ POWER9_PME_PM_CMPLU_STALL_LSU_FLUSH_NEXT ] = { .pme_name = "PM_CMPLU_STALL_LSU_FLUSH_NEXT", .pme_code = 0x000002E01A, .pme_short_desc = "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence.", .pme_long_desc = "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", }, [ POWER9_PME_PM_CMPLU_STALL_LSU_MFSPR ] = { .pme_name = "PM_CMPLU_STALL_LSU_MFSPR", .pme_code = 0x0000034056, .pme_short_desc = "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", .pme_long_desc = "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", }, [ POWER9_PME_PM_CMPLU_STALL_LSU ] = { .pme_name = "PM_CMPLU_STALL_LSU", .pme_code = 0x000002C010, .pme_short_desc = "Completion stall by LSU instruction", .pme_long_desc = "Completion stall by LSU instruction", }, [ POWER9_PME_PM_CMPLU_STALL_LWSYNC ] = { .pme_name = "PM_CMPLU_STALL_LWSYNC", .pme_code = 0x0000010036, .pme_short_desc = "completion stall due to lwsync", .pme_long_desc = "completion stall due to lwsync", }, [ POWER9_PME_PM_CMPLU_STALL_MTFPSCR ] = { .pme_name = "PM_CMPLU_STALL_MTFPSCR", .pme_code = 0x000004E012, .pme_short_desc = "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", .pme_long_desc = "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", }, [ POWER9_PME_PM_CMPLU_STALL_NESTED_TBEGIN ] = { .pme_name = "PM_CMPLU_STALL_NESTED_TBEGIN", .pme_code = 0x000001E05C, .pme_short_desc = "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin.", .pme_long_desc = "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", }, [ POWER9_PME_PM_CMPLU_STALL_NESTED_TEND ] = { .pme_name = "PM_CMPLU_STALL_NESTED_TEND", .pme_code = 0x000003003C, .pme_short_desc = "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level.", .pme_long_desc = "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", }, [ POWER9_PME_PM_CMPLU_STALL_NTC_DISP_FIN ] = { .pme_name = "PM_CMPLU_STALL_NTC_DISP_FIN", .pme_code = 0x000004E018, .pme_short_desc = "Finish stall because the NTF instruction was one that must finish at dispatch.", .pme_long_desc = "Finish stall because the NTF instruction was one that must finish at dispatch.", }, [ POWER9_PME_PM_CMPLU_STALL_NTC_FLUSH ] = { .pme_name = "PM_CMPLU_STALL_NTC_FLUSH", .pme_code = 0x000002E01E, .pme_short_desc = "Completion stall due to ntc flush", .pme_long_desc = "Completion stall due to ntc flush", }, [ POWER9_PME_PM_CMPLU_STALL_OTHER_CMPL ] = { .pme_name = "PM_CMPLU_STALL_OTHER_CMPL", .pme_code = 0x0000030006, .pme_short_desc = "Instructions the core completed while this tread was stalled", .pme_long_desc = "Instructions the core completed while this tread was stalled", }, [ POWER9_PME_PM_CMPLU_STALL_PASTE ] = { .pme_name = "PM_CMPLU_STALL_PASTE", .pme_code = 0x000002C016, .pme_short_desc = "Finish stall because the NTF instruction was a paste waiting for response from L2", .pme_long_desc = "Finish stall because the NTF instruction was a paste waiting for response from L2", }, [ POWER9_PME_PM_CMPLU_STALL_PM ] = { .pme_name = "PM_CMPLU_STALL_PM", .pme_code = 0x000003000A, .pme_short_desc = "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle", }, [ POWER9_PME_PM_CMPLU_STALL_SLB ] = { .pme_name = "PM_CMPLU_STALL_SLB", .pme_code = 0x000001E052, .pme_short_desc = "Finish stall because the NTF instruction was awaiting L2 response for an SLB", .pme_long_desc = "Finish stall because the NTF instruction was awaiting L2 response for an SLB", }, [ POWER9_PME_PM_CMPLU_STALL_SPEC_FINISH ] = { .pme_name = "PM_CMPLU_STALL_SPEC_FINISH", .pme_code = 0x0000030028, .pme_short_desc = "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", .pme_long_desc = "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", }, [ POWER9_PME_PM_CMPLU_STALL_SRQ_FULL ] = { .pme_name = "PM_CMPLU_STALL_SRQ_FULL", .pme_code = 0x0000030016, .pme_short_desc = "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", .pme_long_desc = "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", }, [ POWER9_PME_PM_CMPLU_STALL_STCX ] = { .pme_name = "PM_CMPLU_STALL_STCX", .pme_code = 0x000002D01C, .pme_short_desc = "Finish stall because the NTF instruction was a stcx waiting for response from L2", .pme_long_desc = "Finish stall because the NTF instruction was a stcx waiting for response from L2", }, [ POWER9_PME_PM_CMPLU_STALL_ST_FWD ] = { .pme_name = "PM_CMPLU_STALL_ST_FWD", .pme_code = 0x000004C01C, .pme_short_desc = "Completion stall due to store forward", .pme_long_desc = "Completion stall due to store forward", }, [ POWER9_PME_PM_CMPLU_STALL_STORE_DATA ] = { .pme_name = "PM_CMPLU_STALL_STORE_DATA", .pme_code = 0x0000030026, .pme_short_desc = "Finish stall because the next to finish instruction was a store waiting on data", .pme_long_desc = "Finish stall because the next to finish instruction was a store waiting on data", }, [ POWER9_PME_PM_CMPLU_STALL_STORE_FIN_ARB ] = { .pme_name = "PM_CMPLU_STALL_STORE_FIN_ARB", .pme_code = 0x0000030014, .pme_short_desc = "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe.", .pme_long_desc = "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", }, [ POWER9_PME_PM_CMPLU_STALL_STORE_FINISH ] = { .pme_name = "PM_CMPLU_STALL_STORE_FINISH", .pme_code = 0x000002C014, .pme_short_desc = "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", .pme_long_desc = "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", }, [ POWER9_PME_PM_CMPLU_STALL_STORE_PIPE_ARB ] = { .pme_name = "PM_CMPLU_STALL_STORE_PIPE_ARB", .pme_code = 0x000004C010, .pme_short_desc = "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject.", .pme_long_desc = "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", }, [ POWER9_PME_PM_CMPLU_STALL_SYNC_PMU_INT ] = { .pme_name = "PM_CMPLU_STALL_SYNC_PMU_INT", .pme_code = 0x000002C01E, .pme_short_desc = "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt", .pme_long_desc = "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt", }, [ POWER9_PME_PM_CMPLU_STALL_TEND ] = { .pme_name = "PM_CMPLU_STALL_TEND", .pme_code = 0x000001E050, .pme_short_desc = "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", .pme_long_desc = "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", }, [ POWER9_PME_PM_CMPLU_STALL_THRD ] = { .pme_name = "PM_CMPLU_STALL_THRD", .pme_code = 0x000001001C, .pme_short_desc = "Completion Stalled because the thread was blocked", .pme_long_desc = "Completion Stalled because the thread was blocked", }, [ POWER9_PME_PM_CMPLU_STALL_TLBIE ] = { .pme_name = "PM_CMPLU_STALL_TLBIE", .pme_code = 0x000002E01C, .pme_short_desc = "Finish stall because the NTF instruction was a tlbie waiting for response from L2", .pme_long_desc = "Finish stall because the NTF instruction was a tlbie waiting for response from L2", }, [ POWER9_PME_PM_CMPLU_STALL ] = { .pme_name = "PM_CMPLU_STALL", .pme_code = 0x000001E054, .pme_short_desc = "Nothing completed and ICT not empty", .pme_long_desc = "Nothing completed and ICT not empty", }, [ POWER9_PME_PM_CMPLU_STALL_VDPLONG ] = { .pme_name = "PM_CMPLU_STALL_VDPLONG", .pme_code = 0x000003C05A, .pme_short_desc = "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", }, [ POWER9_PME_PM_CMPLU_STALL_VDP ] = { .pme_name = "PM_CMPLU_STALL_VDP", .pme_code = 0x000004405C, .pme_short_desc = "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.", .pme_long_desc = "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector", }, [ POWER9_PME_PM_CMPLU_STALL_VFXLONG ] = { .pme_name = "PM_CMPLU_STALL_VFXLONG", .pme_code = 0x000002E018, .pme_short_desc = "Completion stall due to a long latency vector fixed point instruction (division, square root)", .pme_long_desc = "Completion stall due to a long latency vector fixed point instruction (division, square root)", }, [ POWER9_PME_PM_CMPLU_STALL_VFXU ] = { .pme_name = "PM_CMPLU_STALL_VFXU", .pme_code = 0x000003C05C, .pme_short_desc = "Finish stall due to a vector fixed point instruction in the execution pipeline.", .pme_long_desc = "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", }, [ POWER9_PME_PM_CO0_BUSY ] = { .pme_name = "PM_CO0_BUSY", .pme_code = 0x000003608C, .pme_short_desc = "CO mach 0 Busy.", .pme_long_desc = "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_CO0_BUSY_ALT ] = { .pme_name = "PM_CO0_BUSY_ALT", .pme_code = 0x000004608C, .pme_short_desc = "CO mach 0 Busy.", .pme_long_desc = "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_CO_DISP_FAIL ] = { .pme_name = "PM_CO_DISP_FAIL", .pme_code = 0x0000016886, .pme_short_desc = "CO dispatch failed due to all CO machines being busy", .pme_long_desc = "CO dispatch failed due to all CO machines being busy", }, [ POWER9_PME_PM_CO_TM_SC_FOOTPRINT ] = { .pme_name = "PM_CO_TM_SC_FOOTPRINT", .pme_code = 0x0000026086, .pme_short_desc = "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus", .pme_long_desc = "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus", }, [ POWER9_PME_PM_CO_USAGE ] = { .pme_name = "PM_CO_USAGE", .pme_code = 0x000002688C, .pme_short_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy.", .pme_long_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", }, /* See also alternate entries for 000001001E / POWER9_PME_PM_CYC with code(s) 000002001E 000003001E 000004001E at the bottom of this table. \n */ [ POWER9_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0x000001001E, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER9_PME_PM_DARQ0_0_3_ENTRIES ] = { .pme_name = "PM_DARQ0_0_3_ENTRIES", .pme_code = 0x000004D04A, .pme_short_desc = "Cycles in which 3 or less DARQ entries (out of 12) are in use", .pme_long_desc = "Cycles in which 3 or less DARQ entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ0_10_12_ENTRIES ] = { .pme_name = "PM_DARQ0_10_12_ENTRIES", .pme_code = 0x000001D058, .pme_short_desc = "Cycles in which 10 or more DARQ entries (out of 12) are in use", .pme_long_desc = "Cycles in which 10 or more DARQ entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ0_4_6_ENTRIES ] = { .pme_name = "PM_DARQ0_4_6_ENTRIES", .pme_code = 0x000003504E, .pme_short_desc = "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use", .pme_long_desc = "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ0_7_9_ENTRIES ] = { .pme_name = "PM_DARQ0_7_9_ENTRIES", .pme_code = 0x000002E050, .pme_short_desc = "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use", .pme_long_desc = "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ1_0_3_ENTRIES ] = { .pme_name = "PM_DARQ1_0_3_ENTRIES", .pme_code = 0x000004C122, .pme_short_desc = "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use", .pme_long_desc = "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ1_10_12_ENTRIES ] = { .pme_name = "PM_DARQ1_10_12_ENTRIES", .pme_code = 0x0000020058, .pme_short_desc = "Cycles in which 10 or more DARQ1 entries (out of 12) are in use", .pme_long_desc = "Cycles in which 10 or more DARQ1 entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ1_4_6_ENTRIES ] = { .pme_name = "PM_DARQ1_4_6_ENTRIES", .pme_code = 0x000003E050, .pme_short_desc = "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use", .pme_long_desc = "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ1_7_9_ENTRIES ] = { .pme_name = "PM_DARQ1_7_9_ENTRIES", .pme_code = 0x000002005A, .pme_short_desc = "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use", .pme_long_desc = "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use", }, [ POWER9_PME_PM_DARQ_STORE_REJECT ] = { .pme_name = "PM_DARQ_STORE_REJECT", .pme_code = 0x000004405E, .pme_short_desc = "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected.", .pme_long_desc = "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio", }, [ POWER9_PME_PM_DARQ_STORE_XMIT ] = { .pme_name = "PM_DARQ_STORE_XMIT", .pme_code = 0x0000030064, .pme_short_desc = "The DARQ attempted to transmit a store into an LSAQ or SRQ entry.", .pme_long_desc = "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core", }, [ POWER9_PME_PM_DATA_CHIP_PUMP_CPRED ] = { .pme_name = "PM_DATA_CHIP_PUMP_CPRED", .pme_code = 0x000001C050, .pme_short_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load", .pme_long_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load", }, [ POWER9_PME_PM_DATA_FROM_DL2L3_MOD ] = { .pme_name = "PM_DATA_FROM_DL2L3_MOD", .pme_code = 0x000004C048, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_DL2L3_SHR ] = { .pme_name = "PM_DATA_FROM_DL2L3_SHR", .pme_code = 0x000003C048, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_DL4 ] = { .pme_name = "PM_DATA_FROM_DL4", .pme_code = 0x000003C04C, .pme_short_desc = "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_DMEM ] = { .pme_name = "PM_DATA_FROM_DMEM", .pme_code = 0x000004C04C, .pme_short_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L21_MOD ] = { .pme_name = "PM_DATA_FROM_L21_MOD", .pme_code = 0x000004C046, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L21_SHR ] = { .pme_name = "PM_DATA_FROM_L21_SHR", .pme_code = 0x000003C046, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST ] = { .pme_name = "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST", .pme_code = 0x000003C040, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2_DISP_CONFLICT_OTHER ] = { .pme_name = "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER", .pme_code = 0x000004C040, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2_MEPF ] = { .pme_name = "PM_DATA_FROM_L2_MEPF", .pme_code = 0x000002C040, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2MISS_MOD ] = { .pme_name = "PM_DATA_FROM_L2MISS_MOD", .pme_code = 0x000001C04E, .pme_short_desc = "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2MISS ] = { .pme_name = "PM_DATA_FROM_L2MISS", .pme_code = 0x00000200FE, .pme_short_desc = "Demand LD - L2 Miss (not L2 hit)", .pme_long_desc = "Demand LD - L2 Miss (not L2 hit)", }, [ POWER9_PME_PM_DATA_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_DATA_FROM_L2_NO_CONFLICT", .pme_code = 0x000001C040, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x000001C042, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L31_ECO_MOD ] = { .pme_name = "PM_DATA_FROM_L31_ECO_MOD", .pme_code = 0x000004C044, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L31_ECO_SHR ] = { .pme_name = "PM_DATA_FROM_L31_ECO_SHR", .pme_code = 0x000003C044, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L31_MOD ] = { .pme_name = "PM_DATA_FROM_L31_MOD", .pme_code = 0x000002C044, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L31_SHR ] = { .pme_name = "PM_DATA_FROM_L31_SHR", .pme_code = 0x000001C046, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_DATA_FROM_L3_DISP_CONFLICT", .pme_code = 0x000003C042, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L3_MEPF ] = { .pme_name = "PM_DATA_FROM_L3_MEPF", .pme_code = 0x000002C042, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L3MISS_MOD ] = { .pme_name = "PM_DATA_FROM_L3MISS_MOD", .pme_code = 0x000004C04E, .pme_short_desc = "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L3MISS ] = { .pme_name = "PM_DATA_FROM_L3MISS", .pme_code = 0x00000300FE, .pme_short_desc = "Demand LD - L3 Miss (not L2 hit and not L3 hit)", .pme_long_desc = "Demand LD - L3 Miss (not L2 hit and not L3 hit)", }, [ POWER9_PME_PM_DATA_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_DATA_FROM_L3_NO_CONFLICT", .pme_code = 0x000001C044, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_L3 ] = { .pme_name = "PM_DATA_FROM_L3", .pme_code = 0x000004C042, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_LL4 ] = { .pme_name = "PM_DATA_FROM_LL4", .pme_code = 0x000001C04C, .pme_short_desc = "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_LMEM ] = { .pme_name = "PM_DATA_FROM_LMEM", .pme_code = 0x000002C048, .pme_short_desc = "The processor's data cache was reloaded from the local chip's Memory due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from the local chip's Memory due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_MEMORY ] = { .pme_name = "PM_DATA_FROM_MEMORY", .pme_code = 0x00000400FE, .pme_short_desc = "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_DATA_FROM_OFF_CHIP_CACHE", .pme_code = 0x000004C04A, .pme_short_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load", .pme_long_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_DATA_FROM_ON_CHIP_CACHE", .pme_code = 0x000001C048, .pme_short_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_RL2L3_MOD ] = { .pme_name = "PM_DATA_FROM_RL2L3_MOD", .pme_code = 0x000002C046, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_RL2L3_SHR ] = { .pme_name = "PM_DATA_FROM_RL2L3_SHR", .pme_code = 0x000001C04A, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_RL4 ] = { .pme_name = "PM_DATA_FROM_RL4", .pme_code = 0x000002C04A, .pme_short_desc = "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load", }, [ POWER9_PME_PM_DATA_FROM_RMEM ] = { .pme_name = "PM_DATA_FROM_RMEM", .pme_code = 0x000003C04A, .pme_short_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load", .pme_long_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load", }, [ POWER9_PME_PM_DATA_GRP_PUMP_CPRED ] = { .pme_name = "PM_DATA_GRP_PUMP_CPRED", .pme_code = 0x000002C050, .pme_short_desc = "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load", .pme_long_desc = "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load", }, [ POWER9_PME_PM_DATA_GRP_PUMP_MPRED_RTY ] = { .pme_name = "PM_DATA_GRP_PUMP_MPRED_RTY", .pme_code = 0x000001C052, .pme_short_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", .pme_long_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", }, [ POWER9_PME_PM_DATA_GRP_PUMP_MPRED ] = { .pme_name = "PM_DATA_GRP_PUMP_MPRED", .pme_code = 0x000002C052, .pme_short_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load", .pme_long_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load", }, [ POWER9_PME_PM_DATA_PUMP_CPRED ] = { .pme_name = "PM_DATA_PUMP_CPRED", .pme_code = 0x000001C054, .pme_short_desc = "Pump prediction correct.", .pme_long_desc = "Pump prediction correct. Counts across all types of pumps for a demand load", }, [ POWER9_PME_PM_DATA_PUMP_MPRED ] = { .pme_name = "PM_DATA_PUMP_MPRED", .pme_code = 0x000004C052, .pme_short_desc = "Pump misprediction.", .pme_long_desc = "Pump misprediction. Counts across all types of pumps for a demand load", }, [ POWER9_PME_PM_DATA_STORE ] = { .pme_name = "PM_DATA_STORE", .pme_code = 0x000000F0A0, .pme_short_desc = "All ops that drain from s2q to L2 containing data", .pme_long_desc = "All ops that drain from s2q to L2 containing data", }, [ POWER9_PME_PM_DATA_SYS_PUMP_CPRED ] = { .pme_name = "PM_DATA_SYS_PUMP_CPRED", .pme_code = 0x000003C050, .pme_short_desc = "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load", .pme_long_desc = "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load", }, [ POWER9_PME_PM_DATA_SYS_PUMP_MPRED_RTY ] = { .pme_name = "PM_DATA_SYS_PUMP_MPRED_RTY", .pme_code = 0x000004C050, .pme_short_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load", .pme_long_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load", }, [ POWER9_PME_PM_DATA_SYS_PUMP_MPRED ] = { .pme_name = "PM_DATA_SYS_PUMP_MPRED", .pme_code = 0x000003C052, .pme_short_desc = "Final Pump Scope (system) mispredicted.", .pme_long_desc = "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load", }, [ POWER9_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x000003001A, .pme_short_desc = "Data Tablewalk Cycles.", .pme_long_desc = "Data Tablewalk Cycles. Could be 1 or 2 active tablewalks. Includes data prefetches.", }, [ POWER9_PME_PM_DC_DEALLOC_NO_CONF ] = { .pme_name = "PM_DC_DEALLOC_NO_CONF", .pme_code = 0x000000F8AC, .pme_short_desc = "A demand load referenced a line in an active fuzzy prefetch stream.", .pme_long_desc = "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)", }, [ POWER9_PME_PM_DC_PREF_CONF ] = { .pme_name = "PM_DC_PREF_CONF", .pme_code = 0x000000F0A8, .pme_short_desc = "A demand load referenced a line in an active prefetch stream.", .pme_long_desc = "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Includes forwards and backwards streams", }, [ POWER9_PME_PM_DC_PREF_CONS_ALLOC ] = { .pme_name = "PM_DC_PREF_CONS_ALLOC", .pme_code = 0x000000F0B4, .pme_short_desc = "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch", .pme_long_desc = "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch", }, [ POWER9_PME_PM_DC_PREF_FUZZY_CONF ] = { .pme_name = "PM_DC_PREF_FUZZY_CONF", .pme_code = 0x000000F8A8, .pme_short_desc = "A demand load referenced a line in an active fuzzy prefetch stream.", .pme_long_desc = "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)", }, [ POWER9_PME_PM_DC_PREF_HW_ALLOC ] = { .pme_name = "PM_DC_PREF_HW_ALLOC", .pme_code = 0x000000F0A4, .pme_short_desc = "Prefetch stream allocated by the hardware prefetch mechanism", .pme_long_desc = "Prefetch stream allocated by the hardware prefetch mechanism", }, [ POWER9_PME_PM_DC_PREF_STRIDED_CONF ] = { .pme_name = "PM_DC_PREF_STRIDED_CONF", .pme_code = 0x000000F0AC, .pme_short_desc = "A demand load referenced a line in an active strided prefetch stream.", .pme_long_desc = "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.", }, [ POWER9_PME_PM_DC_PREF_SW_ALLOC ] = { .pme_name = "PM_DC_PREF_SW_ALLOC", .pme_code = 0x000000F8A4, .pme_short_desc = "Prefetch stream allocated by software prefetching", .pme_long_desc = "Prefetch stream allocated by software prefetching", }, [ POWER9_PME_PM_DC_PREF_XCONS_ALLOC ] = { .pme_name = "PM_DC_PREF_XCONS_ALLOC", .pme_code = 0x000000F8B4, .pme_short_desc = "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch", .pme_long_desc = "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch", }, [ POWER9_PME_PM_DECODE_FUSION_CONST_GEN ] = { .pme_name = "PM_DECODE_FUSION_CONST_GEN", .pme_code = 0x00000048B4, .pme_short_desc = "32-bit constant generation", .pme_long_desc = "32-bit constant generation", }, [ POWER9_PME_PM_DECODE_FUSION_EXT_ADD ] = { .pme_name = "PM_DECODE_FUSION_EXT_ADD", .pme_code = 0x0000005084, .pme_short_desc = "32-bit extended addition", .pme_long_desc = "32-bit extended addition", }, [ POWER9_PME_PM_DECODE_FUSION_LD_ST_DISP ] = { .pme_name = "PM_DECODE_FUSION_LD_ST_DISP", .pme_code = 0x00000048A8, .pme_short_desc = "32-bit displacement D-form and 16-bit displacement X-form", .pme_long_desc = "32-bit displacement D-form and 16-bit displacement X-form", }, [ POWER9_PME_PM_DECODE_FUSION_OP_PRESERV ] = { .pme_name = "PM_DECODE_FUSION_OP_PRESERV", .pme_code = 0x0000005088, .pme_short_desc = "Destructive op operand preservation", .pme_long_desc = "Destructive op operand preservation", }, [ POWER9_PME_PM_DECODE_HOLD_ICT_FULL ] = { .pme_name = "PM_DECODE_HOLD_ICT_FULL", .pme_code = 0x00000058A8, .pme_short_desc = "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use.", .pme_long_desc = "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread", }, [ POWER9_PME_PM_DECODE_LANES_NOT_AVAIL ] = { .pme_name = "PM_DECODE_LANES_NOT_AVAIL", .pme_code = 0x0000005884, .pme_short_desc = "Decode has something to transmit but dispatch lanes are not available", .pme_long_desc = "Decode has something to transmit but dispatch lanes are not available", }, [ POWER9_PME_PM_DERAT_MISS_16G ] = { .pme_name = "PM_DERAT_MISS_16G", .pme_code = 0x000004C054, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 16G", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 16G", }, [ POWER9_PME_PM_DERAT_MISS_16M ] = { .pme_name = "PM_DERAT_MISS_16M", .pme_code = 0x000003C054, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 16M", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 16M", }, [ POWER9_PME_PM_DERAT_MISS_1G ] = { .pme_name = "PM_DERAT_MISS_1G", .pme_code = 0x000002C05A, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 1G.", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", }, [ POWER9_PME_PM_DERAT_MISS_2M ] = { .pme_name = "PM_DERAT_MISS_2M", .pme_code = 0x000001C05A, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 2M.", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", }, [ POWER9_PME_PM_DERAT_MISS_4K ] = { .pme_name = "PM_DERAT_MISS_4K", .pme_code = 0x000001C056, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 4K", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 4K", }, [ POWER9_PME_PM_DERAT_MISS_64K ] = { .pme_name = "PM_DERAT_MISS_64K", .pme_code = 0x000002C054, .pme_short_desc = "Data ERAT Miss (Data TLB Access) page size 64K", .pme_long_desc = "Data ERAT Miss (Data TLB Access) page size 64K", }, [ POWER9_PME_PM_DFU_BUSY ] = { .pme_name = "PM_DFU_BUSY", .pme_code = 0x000004D04C, .pme_short_desc = "Cycles in which all 4 Decimal Floating Point units are busy.", .pme_long_desc = "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity", }, [ POWER9_PME_PM_DISP_CLB_HELD_BAL ] = { .pme_name = "PM_DISP_CLB_HELD_BAL", .pme_code = 0x000000288C, .pme_short_desc = "Dispatch/CLB Hold: Balance Flush", .pme_long_desc = "Dispatch/CLB Hold: Balance Flush", }, [ POWER9_PME_PM_DISP_CLB_HELD_SB ] = { .pme_name = "PM_DISP_CLB_HELD_SB", .pme_code = 0x0000002090, .pme_short_desc = "Dispatch/CLB Hold: Scoreboard", .pme_long_desc = "Dispatch/CLB Hold: Scoreboard", }, [ POWER9_PME_PM_DISP_CLB_HELD_TLBIE ] = { .pme_name = "PM_DISP_CLB_HELD_TLBIE", .pme_code = 0x0000002890, .pme_short_desc = "Dispatch Hold: Due to TLBIE", .pme_long_desc = "Dispatch Hold: Due to TLBIE", }, [ POWER9_PME_PM_DISP_HELD_HB_FULL ] = { .pme_name = "PM_DISP_HELD_HB_FULL", .pme_code = 0x000003D05C, .pme_short_desc = "Dispatch held due to History Buffer full.", .pme_long_desc = "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", }, [ POWER9_PME_PM_DISP_HELD_ISSQ_FULL ] = { .pme_name = "PM_DISP_HELD_ISSQ_FULL", .pme_code = 0x0000020006, .pme_short_desc = "Dispatch held due to Issue q full.", .pme_long_desc = "Dispatch held due to Issue q full. Includes issue queue and branch queue", }, [ POWER9_PME_PM_DISP_HELD_SYNC_HOLD ] = { .pme_name = "PM_DISP_HELD_SYNC_HOLD", .pme_code = 0x000004003C, .pme_short_desc = "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline", .pme_long_desc = "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline", }, [ POWER9_PME_PM_DISP_HELD_TBEGIN ] = { .pme_name = "PM_DISP_HELD_TBEGIN", .pme_code = 0x00000028B0, .pme_short_desc = "This outer tbegin transaction cannot be dispatched until the previous tend instruction completes", .pme_long_desc = "This outer tbegin transaction cannot be dispatched until the previous tend instruction completes", }, [ POWER9_PME_PM_DISP_HELD ] = { .pme_name = "PM_DISP_HELD", .pme_code = 0x0000010006, .pme_short_desc = "Dispatch Held", .pme_long_desc = "Dispatch Held", }, [ POWER9_PME_PM_DISP_STARVED ] = { .pme_name = "PM_DISP_STARVED", .pme_code = 0x0000030008, .pme_short_desc = "Dispatched Starved", .pme_long_desc = "Dispatched Starved", }, [ POWER9_PME_PM_DP_QP_FLOP_CMPL ] = { .pme_name = "PM_DP_QP_FLOP_CMPL", .pme_code = 0x000004D05C, .pme_short_desc = "Double-Precion or Quad-Precision instruction completed", .pme_long_desc = "Double-Precion or Quad-Precision instruction completed", }, [ POWER9_PME_PM_DPTEG_FROM_DL2L3_MOD ] = { .pme_name = "PM_DPTEG_FROM_DL2L3_MOD", .pme_code = 0x000004E048, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_DL2L3_SHR ] = { .pme_name = "PM_DPTEG_FROM_DL2L3_SHR", .pme_code = 0x000003E048, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_DL4 ] = { .pme_name = "PM_DPTEG_FROM_DL4", .pme_code = 0x000003E04C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_DMEM ] = { .pme_name = "PM_DPTEG_FROM_DMEM", .pme_code = 0x000004E04C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L21_MOD ] = { .pme_name = "PM_DPTEG_FROM_L21_MOD", .pme_code = 0x000004E046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L21_SHR ] = { .pme_name = "PM_DPTEG_FROM_L21_SHR", .pme_code = 0x000003E046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L2_MEPF ] = { .pme_name = "PM_DPTEG_FROM_L2_MEPF", .pme_code = 0x000002E040, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L2MISS ] = { .pme_name = "PM_DPTEG_FROM_L2MISS", .pme_code = 0x000001E04E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_DPTEG_FROM_L2_NO_CONFLICT", .pme_code = 0x000001E040, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L2 ] = { .pme_name = "PM_DPTEG_FROM_L2", .pme_code = 0x000001E042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L31_ECO_MOD ] = { .pme_name = "PM_DPTEG_FROM_L31_ECO_MOD", .pme_code = 0x000004E044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L31_ECO_SHR ] = { .pme_name = "PM_DPTEG_FROM_L31_ECO_SHR", .pme_code = 0x000003E044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L31_MOD ] = { .pme_name = "PM_DPTEG_FROM_L31_MOD", .pme_code = 0x000002E044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L31_SHR ] = { .pme_name = "PM_DPTEG_FROM_L31_SHR", .pme_code = 0x000001E046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_DPTEG_FROM_L3_DISP_CONFLICT", .pme_code = 0x000003E042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L3_MEPF ] = { .pme_name = "PM_DPTEG_FROM_L3_MEPF", .pme_code = 0x000002E042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L3MISS ] = { .pme_name = "PM_DPTEG_FROM_L3MISS", .pme_code = 0x000004E04E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_DPTEG_FROM_L3_NO_CONFLICT", .pme_code = 0x000001E044, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_L3 ] = { .pme_name = "PM_DPTEG_FROM_L3", .pme_code = 0x000004E042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_LL4 ] = { .pme_name = "PM_DPTEG_FROM_LL4", .pme_code = 0x000001E04C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_LMEM ] = { .pme_name = "PM_DPTEG_FROM_LMEM", .pme_code = 0x000002E048, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_MEMORY ] = { .pme_name = "PM_DPTEG_FROM_MEMORY", .pme_code = 0x000002E04C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_DPTEG_FROM_OFF_CHIP_CACHE", .pme_code = 0x000004E04A, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_DPTEG_FROM_ON_CHIP_CACHE", .pme_code = 0x000001E048, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_RL2L3_MOD ] = { .pme_name = "PM_DPTEG_FROM_RL2L3_MOD", .pme_code = 0x000002E046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_RL2L3_SHR ] = { .pme_name = "PM_DPTEG_FROM_RL2L3_SHR", .pme_code = 0x000001E04A, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_RL4 ] = { .pme_name = "PM_DPTEG_FROM_RL4", .pme_code = 0x000002E04A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DPTEG_FROM_RMEM ] = { .pme_name = "PM_DPTEG_FROM_RMEM", .pme_code = 0x000003E04A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_DSIDE_L2MEMACC ] = { .pme_name = "PM_DSIDE_L2MEMACC", .pme_code = 0x0000036092, .pme_short_desc = "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory (excluding hpcread64 accesses), i.", .pme_long_desc = "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory (excluding hpcread64 accesses), i.e., total memory accesses by RCs", }, [ POWER9_PME_PM_DSIDE_MRU_TOUCH ] = { .pme_name = "PM_DSIDE_MRU_TOUCH", .pme_code = 0x0000026884, .pme_short_desc = "D-side L2 MRU touch sent to L2", .pme_long_desc = "D-side L2 MRU touch sent to L2", }, [ POWER9_PME_PM_DSIDE_OTHER_64B_L2MEMACC ] = { .pme_name = "PM_DSIDE_OTHER_64B_L2MEMACC", .pme_code = 0x0000036892, .pme_short_desc = "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory that was for hpc_read64, (RC had to fetch other 64B of a line from MC) i.", .pme_long_desc = "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory that was for hpc_read64, (RC had to fetch other 64B of a line from MC) i.e., number of times RC had to go to memory to get 'missing' 64B", }, [ POWER9_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x000000D0A8, .pme_short_desc = "Data SLB Miss - Total of all segment sizes", .pme_long_desc = "Data SLB Miss - Total of all segment sizes", }, [ POWER9_PME_PM_DSLB_MISS_ALT ] = { .pme_name = "PM_DSLB_MISS_ALT", .pme_code = 0x0000010016, .pme_short_desc = "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))", .pme_long_desc = "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))", }, [ POWER9_PME_PM_DTLB_MISS_16G ] = { .pme_name = "PM_DTLB_MISS_16G", .pme_code = 0x000001C058, .pme_short_desc = "Data TLB Miss page size 16G", .pme_long_desc = "Data TLB Miss page size 16G", }, [ POWER9_PME_PM_DTLB_MISS_16M ] = { .pme_name = "PM_DTLB_MISS_16M", .pme_code = 0x000004C056, .pme_short_desc = "Data TLB Miss page size 16M", .pme_long_desc = "Data TLB Miss page size 16M", }, [ POWER9_PME_PM_DTLB_MISS_1G ] = { .pme_name = "PM_DTLB_MISS_1G", .pme_code = 0x000004C05A, .pme_short_desc = "Data TLB reload (after a miss) page size 1G.", .pme_long_desc = "Data TLB reload (after a miss) page size 1G. Implies radix translation was used", }, [ POWER9_PME_PM_DTLB_MISS_2M ] = { .pme_name = "PM_DTLB_MISS_2M", .pme_code = 0x000001C05C, .pme_short_desc = "Data TLB reload (after a miss) page size 2M.", .pme_long_desc = "Data TLB reload (after a miss) page size 2M. Implies radix translation was used", }, [ POWER9_PME_PM_DTLB_MISS_4K ] = { .pme_name = "PM_DTLB_MISS_4K", .pme_code = 0x000002C056, .pme_short_desc = "Data TLB Miss page size 4k", .pme_long_desc = "Data TLB Miss page size 4k", }, [ POWER9_PME_PM_DTLB_MISS_64K ] = { .pme_name = "PM_DTLB_MISS_64K", .pme_code = 0x000003C056, .pme_short_desc = "Data TLB Miss page size 64K", .pme_long_desc = "Data TLB Miss page size 64K", }, [ POWER9_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x00000300FC, .pme_short_desc = "Data PTEG reload", .pme_long_desc = "Data PTEG reload", }, [ POWER9_PME_PM_SPACEHOLDER_0000040062 ] = { .pme_name = "PM_SPACEHOLDER_0000040062", .pme_code = 0x0000040062, .pme_short_desc = "SPACE_HOLDER for event 0000040062", .pme_long_desc = "SPACE_HOLDER for event 0000040062", }, [ POWER9_PME_PM_SPACEHOLDER_0000040064 ] = { .pme_name = "PM_SPACEHOLDER_0000040064", .pme_code = 0x0000040064, .pme_short_desc = "SPACE_HOLDER for event 0000040064", .pme_long_desc = "SPACE_HOLDER for event 0000040064", }, [ POWER9_PME_PM_EAT_FORCE_MISPRED ] = { .pme_name = "PM_EAT_FORCE_MISPRED", .pme_code = 0x00000050A8, .pme_short_desc = "XL-form branch was mispredicted due to the predicted target address missing from EAT.", .pme_long_desc = "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued", }, [ POWER9_PME_PM_EAT_FULL_CYC ] = { .pme_name = "PM_EAT_FULL_CYC", .pme_code = 0x0000004084, .pme_short_desc = "Cycles No room in EAT", .pme_long_desc = "Cycles No room in EAT", }, [ POWER9_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x0000002080, .pme_short_desc = "CyclesMSR[EE] is off and external interrupts are active", .pme_long_desc = "CyclesMSR[EE] is off and external interrupts are active", }, [ POWER9_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x00000200F8, .pme_short_desc = "external interrupt", .pme_long_desc = "external interrupt", }, [ POWER9_PME_PM_FLOP_CMPL ] = { .pme_name = "PM_FLOP_CMPL", .pme_code = 0x000004505E, .pme_short_desc = "Floating Point Operation Finished", .pme_long_desc = "Floating Point Operation Finished", }, [ POWER9_PME_PM_FLUSH_COMPLETION ] = { .pme_name = "PM_FLUSH_COMPLETION", .pme_code = 0x0000030012, .pme_short_desc = "The instruction that was next to complete did not complete because it suffered a flush", .pme_long_desc = "The instruction that was next to complete did not complete because it suffered a flush", }, [ POWER9_PME_PM_FLUSH_DISP_SB ] = { .pme_name = "PM_FLUSH_DISP_SB", .pme_code = 0x0000002088, .pme_short_desc = "Dispatch Flush: Scoreboard", .pme_long_desc = "Dispatch Flush: Scoreboard", }, [ POWER9_PME_PM_FLUSH_DISP_TLBIE ] = { .pme_name = "PM_FLUSH_DISP_TLBIE", .pme_code = 0x0000002888, .pme_short_desc = "Dispatch Flush: TLBIE", .pme_long_desc = "Dispatch Flush: TLBIE", }, [ POWER9_PME_PM_FLUSH_DISP ] = { .pme_name = "PM_FLUSH_DISP", .pme_code = 0x0000002880, .pme_short_desc = "Dispatch flush", .pme_long_desc = "Dispatch flush", }, [ POWER9_PME_PM_FLUSH_HB_RESTORE_CYC ] = { .pme_name = "PM_FLUSH_HB_RESTORE_CYC", .pme_code = 0x0000002084, .pme_short_desc = "Cycles in which no new instructions can be dispatched to the ICT after a flush.", .pme_long_desc = "Cycles in which no new instructions can be dispatched to the ICT after a flush. History buffer recovery", }, [ POWER9_PME_PM_FLUSH_LSU ] = { .pme_name = "PM_FLUSH_LSU", .pme_code = 0x00000058A4, .pme_short_desc = "LSU flushes.", .pme_long_desc = "LSU flushes. Includes all lsu flushes", }, [ POWER9_PME_PM_FLUSH_MPRED ] = { .pme_name = "PM_FLUSH_MPRED", .pme_code = 0x00000050A4, .pme_short_desc = "Branch mispredict flushes.", .pme_long_desc = "Branch mispredict flushes. Includes target and address misprecition", }, [ POWER9_PME_PM_FLUSH ] = { .pme_name = "PM_FLUSH", .pme_code = 0x00000400F8, .pme_short_desc = "Flush (any type)", .pme_long_desc = "Flush (any type)", }, [ POWER9_PME_PM_FMA_CMPL ] = { .pme_name = "PM_FMA_CMPL", .pme_code = 0x0000045054, .pme_short_desc = "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only.", .pme_long_desc = "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. ", }, [ POWER9_PME_PM_FORCED_NOP ] = { .pme_name = "PM_FORCED_NOP", .pme_code = 0x000000509C, .pme_short_desc = "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time", .pme_long_desc = "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time", }, [ POWER9_PME_PM_FREQ_DOWN ] = { .pme_name = "PM_FREQ_DOWN", .pme_code = 0x000003000C, .pme_short_desc = "Power Management: Below Threshold B", .pme_long_desc = "Power Management: Below Threshold B", }, [ POWER9_PME_PM_FREQ_UP ] = { .pme_name = "PM_FREQ_UP", .pme_code = 0x000004000C, .pme_short_desc = "Power Management: Above Threshold A", .pme_long_desc = "Power Management: Above Threshold A", }, [ POWER9_PME_PM_FXU_1PLUS_BUSY ] = { .pme_name = "PM_FXU_1PLUS_BUSY", .pme_code = 0x000003000E, .pme_short_desc = "At least one of the 4 FXU units is busy", .pme_long_desc = "At least one of the 4 FXU units is busy", }, [ POWER9_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x000002000E, .pme_short_desc = "Cycles in which all 4 FXUs are busy.", .pme_long_desc = "Cycles in which all 4 FXUs are busy. The FXU is running at capacity", }, [ POWER9_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x0000040004, .pme_short_desc = "The fixed point unit Unit finished an instruction.", .pme_long_desc = "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete.", }, [ POWER9_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x0000024052, .pme_short_desc = "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle", .pme_long_desc = "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle", }, [ POWER9_PME_PM_GRP_PUMP_CPRED ] = { .pme_name = "PM_GRP_PUMP_CPRED", .pme_code = 0x0000020050, .pme_short_desc = "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_GRP_PUMP_MPRED_RTY ] = { .pme_name = "PM_GRP_PUMP_MPRED_RTY", .pme_code = 0x0000010052, .pme_short_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_GRP_PUMP_MPRED ] = { .pme_name = "PM_GRP_PUMP_MPRED", .pme_code = 0x0000020052, .pme_short_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x000002000A, .pme_short_desc = "Cycles in which msr_hv is high.", .pme_long_desc = "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration", }, [ POWER9_PME_PM_HWSYNC ] = { .pme_name = "PM_HWSYNC", .pme_code = 0x00000050A0, .pme_short_desc = "Hwsync instruction decoded and transferred", .pme_long_desc = "Hwsync instruction decoded and transferred", }, [ POWER9_PME_PM_IBUF_FULL_CYC ] = { .pme_name = "PM_IBUF_FULL_CYC", .pme_code = 0x0000004884, .pme_short_desc = "Cycles No room in ibuff", .pme_long_desc = "Cycles No room in ibuff", }, [ POWER9_PME_PM_IC_DEMAND_CYC ] = { .pme_name = "PM_IC_DEMAND_CYC", .pme_code = 0x0000010018, .pme_short_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", .pme_long_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", }, [ POWER9_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BHT_REDIRECT", .pme_code = 0x0000004098, .pme_short_desc = "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)", .pme_long_desc = "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)", }, [ POWER9_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BR_REDIRECT", .pme_code = 0x0000004898, .pme_short_desc = "L2 I cache demand request due to branch Mispredict ( 15 cycle path)", .pme_long_desc = "L2 I cache demand request due to branch Mispredict ( 15 cycle path)", }, [ POWER9_PME_PM_IC_DEMAND_REQ ] = { .pme_name = "PM_IC_DEMAND_REQ", .pme_code = 0x0000004088, .pme_short_desc = "Demand Instruction fetch request", .pme_long_desc = "Demand Instruction fetch request", }, [ POWER9_PME_PM_IC_INVALIDATE ] = { .pme_name = "PM_IC_INVALIDATE", .pme_code = 0x0000005888, .pme_short_desc = "Ic line invalidated", .pme_long_desc = "Ic line invalidated", }, [ POWER9_PME_PM_IC_MISS_CMPL ] = { .pme_name = "PM_IC_MISS_CMPL", .pme_code = 0x0000045058, .pme_short_desc = "Non-speculative icache miss, counted at completion", .pme_long_desc = "Non-speculative icache miss, counted at completion", }, [ POWER9_PME_PM_IC_MISS_ICBI ] = { .pme_name = "PM_IC_MISS_ICBI", .pme_code = 0x0000005094, .pme_short_desc = "threaded version, IC Misses where we got EA dir hit but no sector valids were on.", .pme_long_desc = "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out", }, [ POWER9_PME_PM_IC_PREF_CANCEL_HIT ] = { .pme_name = "PM_IC_PREF_CANCEL_HIT", .pme_code = 0x0000004890, .pme_short_desc = "Prefetch Canceled due to icache hit", .pme_long_desc = "Prefetch Canceled due to icache hit", }, [ POWER9_PME_PM_IC_PREF_CANCEL_L2 ] = { .pme_name = "PM_IC_PREF_CANCEL_L2", .pme_code = 0x0000004094, .pme_short_desc = "L2 Squashed a demand or prefetch request", .pme_long_desc = "L2 Squashed a demand or prefetch request", }, [ POWER9_PME_PM_IC_PREF_CANCEL_PAGE ] = { .pme_name = "PM_IC_PREF_CANCEL_PAGE", .pme_code = 0x0000004090, .pme_short_desc = "Prefetch Canceled due to page boundary", .pme_long_desc = "Prefetch Canceled due to page boundary", }, [ POWER9_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x0000004888, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "Instruction prefetch requests", }, [ POWER9_PME_PM_IC_PREF_WRITE ] = { .pme_name = "PM_IC_PREF_WRITE", .pme_code = 0x000000488C, .pme_short_desc = "Instruction prefetch written into IL1", .pme_long_desc = "Instruction prefetch written into IL1", }, [ POWER9_PME_PM_IC_RELOAD_PRIVATE ] = { .pme_name = "PM_IC_RELOAD_PRIVATE", .pme_code = 0x0000004894, .pme_short_desc = "Reloading line was brought in private for a specific thread.", .pme_long_desc = "Reloading line was brought in private for a specific thread. Most lines are brought in shared for all eight threads. If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat", }, [ POWER9_PME_PM_ICT_EMPTY_CYC ] = { .pme_name = "PM_ICT_EMPTY_CYC", .pme_code = 0x0000020008, .pme_short_desc = "Cycles in which the ICT is completely empty.", .pme_long_desc = "Cycles in which the ICT is completely empty. No itags are assigned to any thread", }, [ POWER9_PME_PM_ICT_NOSLOT_BR_MPRED_ICMISS ] = { .pme_name = "PM_ICT_NOSLOT_BR_MPRED_ICMISS", .pme_code = 0x0000034058, .pme_short_desc = "Ict empty for this thread due to Icache Miss and branch mispred", .pme_long_desc = "Ict empty for this thread due to Icache Miss and branch mispred", }, [ POWER9_PME_PM_ICT_NOSLOT_BR_MPRED ] = { .pme_name = "PM_ICT_NOSLOT_BR_MPRED", .pme_code = 0x000004D01E, .pme_short_desc = "Ict empty for this thread due to branch mispred", .pme_long_desc = "Ict empty for this thread due to branch mispred", }, [ POWER9_PME_PM_ICT_NOSLOT_CYC ] = { .pme_name = "PM_ICT_NOSLOT_CYC", .pme_code = 0x00000100F8, .pme_short_desc = "Number of cycles the ICT has no itags assigned to this thread", .pme_long_desc = "Number of cycles the ICT has no itags assigned to this thread", }, [ POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_HB_FULL ] = { .pme_name = "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", .pme_code = 0x0000030018, .pme_short_desc = "Ict empty for this thread due to dispatch holds because the History Buffer was full.", .pme_long_desc = "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", }, [ POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_ISSQ ] = { .pme_name = "PM_ICT_NOSLOT_DISP_HELD_ISSQ", .pme_code = 0x000002D01E, .pme_short_desc = "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", .pme_long_desc = "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", }, [ POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_SYNC ] = { .pme_name = "PM_ICT_NOSLOT_DISP_HELD_SYNC", .pme_code = 0x000004D01C, .pme_short_desc = "Dispatch held due to a synchronizing instruction at dispatch", .pme_long_desc = "Dispatch held due to a synchronizing instruction at dispatch", }, [ POWER9_PME_PM_ICT_NOSLOT_DISP_HELD_TBEGIN ] = { .pme_name = "PM_ICT_NOSLOT_DISP_HELD_TBEGIN", .pme_code = 0x0000010064, .pme_short_desc = "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", .pme_long_desc = "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", }, [ POWER9_PME_PM_ICT_NOSLOT_DISP_HELD ] = { .pme_name = "PM_ICT_NOSLOT_DISP_HELD", .pme_code = 0x000004E01A, .pme_short_desc = "Cycles in which the NTC instruction is held at dispatch for any reason", .pme_long_desc = "Cycles in which the NTC instruction is held at dispatch for any reason", }, [ POWER9_PME_PM_ICT_NOSLOT_IC_L3MISS ] = { .pme_name = "PM_ICT_NOSLOT_IC_L3MISS", .pme_code = 0x000004E010, .pme_short_desc = "Ict empty for this thread due to icache misses that were sourced from beyond the local L3.", .pme_long_desc = "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", }, [ POWER9_PME_PM_ICT_NOSLOT_IC_L3 ] = { .pme_name = "PM_ICT_NOSLOT_IC_L3", .pme_code = 0x000003E052, .pme_short_desc = "Ict empty for this thread due to icache misses that were sourced from the local L3", .pme_long_desc = "Ict empty for this thread due to icache misses that were sourced from the local L3", }, [ POWER9_PME_PM_ICT_NOSLOT_IC_MISS ] = { .pme_name = "PM_ICT_NOSLOT_IC_MISS", .pme_code = 0x000002D01A, .pme_short_desc = "Ict empty for this thread due to Icache Miss", .pme_long_desc = "Ict empty for this thread due to Icache Miss", }, [ POWER9_PME_PM_IERAT_RELOAD_16M ] = { .pme_name = "PM_IERAT_RELOAD_16M", .pme_code = 0x000004006A, .pme_short_desc = "IERAT Reloaded (Miss) for a 16M page", .pme_long_desc = "IERAT Reloaded (Miss) for a 16M page", }, [ POWER9_PME_PM_IERAT_RELOAD_4K ] = { .pme_name = "PM_IERAT_RELOAD_4K", .pme_code = 0x0000020064, .pme_short_desc = "IERAT reloaded (after a miss) for 4K pages", .pme_long_desc = "IERAT reloaded (after a miss) for 4K pages", }, [ POWER9_PME_PM_IERAT_RELOAD_64K ] = { .pme_name = "PM_IERAT_RELOAD_64K", .pme_code = 0x000003006A, .pme_short_desc = "IERAT Reloaded (Miss) for a 64k page", .pme_long_desc = "IERAT Reloaded (Miss) for a 64k page", }, [ POWER9_PME_PM_IERAT_RELOAD ] = { .pme_name = "PM_IERAT_RELOAD", .pme_code = 0x00000100F6, .pme_short_desc = "Number of I-ERAT reloads", .pme_long_desc = "Number of I-ERAT reloads", }, [ POWER9_PME_PM_IFETCH_THROTTLE ] = { .pme_name = "PM_IFETCH_THROTTLE", .pme_code = 0x000003405E, .pme_short_desc = "Cycles in which Instruction fetch throttle was active.", .pme_long_desc = "Cycles in which Instruction fetch throttle was active.", }, [ POWER9_PME_PM_INST_CHIP_PUMP_CPRED ] = { .pme_name = "PM_INST_CHIP_PUMP_CPRED", .pme_code = 0x0000014050, .pme_short_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch", .pme_long_desc = "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch", }, /* See also alternate entries for 0000010002 / POWER9_PME_PM_INST_CMPL with code(s) 0000020002 0000030002 0000040002 at the bottom of this table. \n */ [ POWER9_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x0000010002, .pme_short_desc = "Number of PowerPC Instructions that completed.", .pme_long_desc = "Number of PowerPC Instructions that completed.", }, /* See also alternate entries for 00000200F2 / POWER9_PME_PM_INST_DISP with code(s) 00000300F2 at the bottom of this table. \n */ [ POWER9_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x00000200F2, .pme_short_desc = "# PPC Dispatched", .pme_long_desc = "# PPC Dispatched", }, [ POWER9_PME_PM_INST_FROM_DL2L3_MOD ] = { .pme_name = "PM_INST_FROM_DL2L3_MOD", .pme_code = 0x0000044048, .pme_short_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_DL2L3_SHR ] = { .pme_name = "PM_INST_FROM_DL2L3_SHR", .pme_code = 0x0000034048, .pme_short_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_DL4 ] = { .pme_name = "PM_INST_FROM_DL4", .pme_code = 0x000003404C, .pme_short_desc = "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_DMEM ] = { .pme_name = "PM_INST_FROM_DMEM", .pme_code = 0x000004404C, .pme_short_desc = "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x0000004080, .pme_short_desc = "Instruction fetches from L1.", .pme_long_desc = "Instruction fetches from L1. L1 instruction hit", }, [ POWER9_PME_PM_INST_FROM_L21_MOD ] = { .pme_name = "PM_INST_FROM_L21_MOD", .pme_code = 0x0000044046, .pme_short_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L21_SHR ] = { .pme_name = "PM_INST_FROM_L21_SHR", .pme_code = 0x0000034046, .pme_short_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2_DISP_CONFLICT_LDHITST ] = { .pme_name = "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST", .pme_code = 0x0000034040, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2_DISP_CONFLICT_OTHER ] = { .pme_name = "PM_INST_FROM_L2_DISP_CONFLICT_OTHER", .pme_code = 0x0000044040, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2_MEPF ] = { .pme_name = "PM_INST_FROM_L2_MEPF", .pme_code = 0x0000024040, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2MISS ] = { .pme_name = "PM_INST_FROM_L2MISS", .pme_code = 0x000001404E, .pme_short_desc = "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_INST_FROM_L2_NO_CONFLICT", .pme_code = 0x0000014040, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x0000014042, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L31_ECO_MOD ] = { .pme_name = "PM_INST_FROM_L31_ECO_MOD", .pme_code = 0x0000044044, .pme_short_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L31_ECO_SHR ] = { .pme_name = "PM_INST_FROM_L31_ECO_SHR", .pme_code = 0x0000034044, .pme_short_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L31_MOD ] = { .pme_name = "PM_INST_FROM_L31_MOD", .pme_code = 0x0000024044, .pme_short_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L31_SHR ] = { .pme_name = "PM_INST_FROM_L31_SHR", .pme_code = 0x0000014046, .pme_short_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_INST_FROM_L3_DISP_CONFLICT", .pme_code = 0x0000034042, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L3_MEPF ] = { .pme_name = "PM_INST_FROM_L3_MEPF", .pme_code = 0x0000024042, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state.", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L3MISS_MOD ] = { .pme_name = "PM_INST_FROM_L3MISS_MOD", .pme_code = 0x000004404E, .pme_short_desc = "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", .pme_long_desc = "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", }, [ POWER9_PME_PM_INST_FROM_L3MISS ] = { .pme_name = "PM_INST_FROM_L3MISS", .pme_code = 0x00000300FA, .pme_short_desc = "Marked instruction was reloaded from a location beyond the local chiplet", .pme_long_desc = "Marked instruction was reloaded from a location beyond the local chiplet", }, [ POWER9_PME_PM_INST_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_INST_FROM_L3_NO_CONFLICT", .pme_code = 0x0000014044, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_L3 ] = { .pme_name = "PM_INST_FROM_L3", .pme_code = 0x0000044042, .pme_short_desc = "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_LL4 ] = { .pme_name = "PM_INST_FROM_LL4", .pme_code = 0x000001404C, .pme_short_desc = "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_LMEM ] = { .pme_name = "PM_INST_FROM_LMEM", .pme_code = 0x0000024048, .pme_short_desc = "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_MEMORY ] = { .pme_name = "PM_INST_FROM_MEMORY", .pme_code = 0x000002404C, .pme_short_desc = "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_INST_FROM_OFF_CHIP_CACHE", .pme_code = 0x000004404A, .pme_short_desc = "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_INST_FROM_ON_CHIP_CACHE", .pme_code = 0x0000014048, .pme_short_desc = "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_RL2L3_MOD ] = { .pme_name = "PM_INST_FROM_RL2L3_MOD", .pme_code = 0x0000024046, .pme_short_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_RL2L3_SHR ] = { .pme_name = "PM_INST_FROM_RL2L3_SHR", .pme_code = 0x000001404A, .pme_short_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_RL4 ] = { .pme_name = "PM_INST_FROM_RL4", .pme_code = 0x000002404A, .pme_short_desc = "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_FROM_RMEM ] = { .pme_name = "PM_INST_FROM_RMEM", .pme_code = 0x000003404A, .pme_short_desc = "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", .pme_long_desc = "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", }, [ POWER9_PME_PM_INST_GRP_PUMP_CPRED ] = { .pme_name = "PM_INST_GRP_PUMP_CPRED", .pme_code = 0x000002C05C, .pme_short_desc = "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)", .pme_long_desc = "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)", }, [ POWER9_PME_PM_INST_GRP_PUMP_MPRED_RTY ] = { .pme_name = "PM_INST_GRP_PUMP_MPRED_RTY", .pme_code = 0x0000014052, .pme_short_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch", .pme_long_desc = "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch", }, [ POWER9_PME_PM_INST_GRP_PUMP_MPRED ] = { .pme_name = "PM_INST_GRP_PUMP_MPRED", .pme_code = 0x000002C05E, .pme_short_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)", .pme_long_desc = "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)", }, [ POWER9_PME_PM_INST_IMC_MATCH_CMPL ] = { .pme_name = "PM_INST_IMC_MATCH_CMPL", .pme_code = 0x000004001C, .pme_short_desc = "IMC Match Count", .pme_long_desc = "IMC Match Count", }, [ POWER9_PME_PM_INST_PUMP_CPRED ] = { .pme_name = "PM_INST_PUMP_CPRED", .pme_code = 0x0000014054, .pme_short_desc = "Pump prediction correct.", .pme_long_desc = "Pump prediction correct. Counts across all types of pumps for an instruction fetch", }, [ POWER9_PME_PM_INST_PUMP_MPRED ] = { .pme_name = "PM_INST_PUMP_MPRED", .pme_code = 0x0000044052, .pme_short_desc = "Pump misprediction.", .pme_long_desc = "Pump misprediction. Counts across all types of pumps for an instruction fetch", }, [ POWER9_PME_PM_INST_SYS_PUMP_CPRED ] = { .pme_name = "PM_INST_SYS_PUMP_CPRED", .pme_code = 0x0000034050, .pme_short_desc = "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch", .pme_long_desc = "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch", }, [ POWER9_PME_PM_INST_SYS_PUMP_MPRED_RTY ] = { .pme_name = "PM_INST_SYS_PUMP_MPRED_RTY", .pme_code = 0x0000044050, .pme_short_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch", .pme_long_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch", }, [ POWER9_PME_PM_INST_SYS_PUMP_MPRED ] = { .pme_name = "PM_INST_SYS_PUMP_MPRED", .pme_code = 0x0000034052, .pme_short_desc = "Final Pump Scope (system) mispredicted.", .pme_long_desc = "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch", }, [ POWER9_PME_PM_IOPS_CMPL ] = { .pme_name = "PM_IOPS_CMPL", .pme_code = 0x0000024050, .pme_short_desc = "Internal Operations completed", .pme_long_desc = "Internal Operations completed", }, [ POWER9_PME_PM_IPTEG_FROM_DL2L3_MOD ] = { .pme_name = "PM_IPTEG_FROM_DL2L3_MOD", .pme_code = 0x0000045048, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_DL2L3_SHR ] = { .pme_name = "PM_IPTEG_FROM_DL2L3_SHR", .pme_code = 0x0000035048, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_DL4 ] = { .pme_name = "PM_IPTEG_FROM_DL4", .pme_code = 0x000003504C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_DMEM ] = { .pme_name = "PM_IPTEG_FROM_DMEM", .pme_code = 0x000004504C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L21_MOD ] = { .pme_name = "PM_IPTEG_FROM_L21_MOD", .pme_code = 0x0000045046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L21_SHR ] = { .pme_name = "PM_IPTEG_FROM_L21_SHR", .pme_code = 0x0000035046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L2_MEPF ] = { .pme_name = "PM_IPTEG_FROM_L2_MEPF", .pme_code = 0x0000025040, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L2MISS ] = { .pme_name = "PM_IPTEG_FROM_L2MISS", .pme_code = 0x000001504E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_IPTEG_FROM_L2_NO_CONFLICT", .pme_code = 0x0000015040, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L2 ] = { .pme_name = "PM_IPTEG_FROM_L2", .pme_code = 0x0000015042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L31_ECO_MOD ] = { .pme_name = "PM_IPTEG_FROM_L31_ECO_MOD", .pme_code = 0x0000045044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L31_ECO_SHR ] = { .pme_name = "PM_IPTEG_FROM_L31_ECO_SHR", .pme_code = 0x0000035044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L31_MOD ] = { .pme_name = "PM_IPTEG_FROM_L31_MOD", .pme_code = 0x0000025044, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L31_SHR ] = { .pme_name = "PM_IPTEG_FROM_L31_SHR", .pme_code = 0x0000015046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_IPTEG_FROM_L3_DISP_CONFLICT", .pme_code = 0x0000035042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L3_MEPF ] = { .pme_name = "PM_IPTEG_FROM_L3_MEPF", .pme_code = 0x0000025042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L3MISS ] = { .pme_name = "PM_IPTEG_FROM_L3MISS", .pme_code = 0x000004504E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_IPTEG_FROM_L3_NO_CONFLICT", .pme_code = 0x0000015044, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_L3 ] = { .pme_name = "PM_IPTEG_FROM_L3", .pme_code = 0x0000045042, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_LL4 ] = { .pme_name = "PM_IPTEG_FROM_LL4", .pme_code = 0x000001504C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_LMEM ] = { .pme_name = "PM_IPTEG_FROM_LMEM", .pme_code = 0x0000025048, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_MEMORY ] = { .pme_name = "PM_IPTEG_FROM_MEMORY", .pme_code = 0x000002504C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_IPTEG_FROM_OFF_CHIP_CACHE", .pme_code = 0x000004504A, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_IPTEG_FROM_ON_CHIP_CACHE", .pme_code = 0x0000015048, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_RL2L3_MOD ] = { .pme_name = "PM_IPTEG_FROM_RL2L3_MOD", .pme_code = 0x0000025046, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_RL2L3_SHR ] = { .pme_name = "PM_IPTEG_FROM_RL2L3_SHR", .pme_code = 0x000001504A, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_RL4 ] = { .pme_name = "PM_IPTEG_FROM_RL4", .pme_code = 0x000002504A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request", }, [ POWER9_PME_PM_IPTEG_FROM_RMEM ] = { .pme_name = "PM_IPTEG_FROM_RMEM", .pme_code = 0x000003504A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request", }, [ POWER9_PME_PM_ISIDE_DISP_FAIL_ADDR ] = { .pme_name = "PM_ISIDE_DISP_FAIL_ADDR", .pme_code = 0x000002608A, .pme_short_desc = "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_ISIDE_DISP_FAIL_OTHER ] = { .pme_name = "PM_ISIDE_DISP_FAIL_OTHER", .pme_code = 0x000002688A, .pme_short_desc = "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_ISIDE_DISP ] = { .pme_name = "PM_ISIDE_DISP", .pme_code = 0x000001688A, .pme_short_desc = "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_ISIDE_L2MEMACC ] = { .pme_name = "PM_ISIDE_L2MEMACC", .pme_code = 0x0000026890, .pme_short_desc = "Valid when first beat of data comes in for an I-side fetch where data came from memory", .pme_long_desc = "Valid when first beat of data comes in for an I-side fetch where data came from memory", }, [ POWER9_PME_PM_ISIDE_MRU_TOUCH ] = { .pme_name = "PM_ISIDE_MRU_TOUCH", .pme_code = 0x0000046880, .pme_short_desc = "I-side L2 MRU touch sent to L2 for this thread", .pme_long_desc = "I-side L2 MRU touch sent to L2 for this thread", }, [ POWER9_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x000000D8A8, .pme_short_desc = "Instruction SLB Miss - Total of all segment sizes", .pme_long_desc = "Instruction SLB Miss - Total of all segment sizes", }, [ POWER9_PME_PM_ISLB_MISS_ALT ] = { .pme_name = "PM_ISLB_MISS_ALT", .pme_code = 0x0000040006, .pme_short_desc = "Number of ISLB misses for this thread", .pme_long_desc = "Number of ISLB misses for this thread", }, [ POWER9_PME_PM_ISQ_0_8_ENTRIES ] = { .pme_name = "PM_ISQ_0_8_ENTRIES", .pme_code = 0x000003005A, .pme_short_desc = "Cycles in which 8 or less Issue Queue entries are in use.", .pme_long_desc = "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread", }, [ POWER9_PME_PM_ISQ_36_44_ENTRIES ] = { .pme_name = "PM_ISQ_36_44_ENTRIES", .pme_code = 0x000004000A, .pme_short_desc = "Cycles in which 36 or more Issue Queue entries are in use.", .pme_long_desc = "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core", }, [ POWER9_PME_PM_ISU0_ISS_HOLD_ALL ] = { .pme_name = "PM_ISU0_ISS_HOLD_ALL", .pme_code = 0x0000003080, .pme_short_desc = "All ISU rejects", .pme_long_desc = "All ISU rejects", }, [ POWER9_PME_PM_ISU1_ISS_HOLD_ALL ] = { .pme_name = "PM_ISU1_ISS_HOLD_ALL", .pme_code = 0x0000003084, .pme_short_desc = "All ISU rejects", .pme_long_desc = "All ISU rejects", }, [ POWER9_PME_PM_ISU2_ISS_HOLD_ALL ] = { .pme_name = "PM_ISU2_ISS_HOLD_ALL", .pme_code = 0x0000003880, .pme_short_desc = "All ISU rejects", .pme_long_desc = "All ISU rejects", }, [ POWER9_PME_PM_ISU3_ISS_HOLD_ALL ] = { .pme_name = "PM_ISU3_ISS_HOLD_ALL", .pme_code = 0x0000003884, .pme_short_desc = "All ISU rejects", .pme_long_desc = "All ISU rejects", }, [ POWER9_PME_PM_ISYNC ] = { .pme_name = "PM_ISYNC", .pme_code = 0x0000002884, .pme_short_desc = "Isync completion count per thread", .pme_long_desc = "Isync completion count per thread", }, [ POWER9_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x00000400FC, .pme_short_desc = "ITLB Reloaded.", .pme_long_desc = "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed", }, [ POWER9_PME_PM_L1_DCACHE_RELOADED_ALL ] = { .pme_name = "PM_L1_DCACHE_RELOADED_ALL", .pme_code = 0x000001002C, .pme_short_desc = "L1 data cache reloaded for demand.", .pme_long_desc = "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well", }, [ POWER9_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0x00000300F6, .pme_short_desc = "DL1 reloaded due to Demand Load", .pme_long_desc = "DL1 reloaded due to Demand Load", }, [ POWER9_PME_PM_L1_DEMAND_WRITE ] = { .pme_name = "PM_L1_DEMAND_WRITE", .pme_code = 0x000000408C, .pme_short_desc = "Instruction Demand sectors written into IL1", .pme_long_desc = "Instruction Demand sectors written into IL1", }, [ POWER9_PME_PM_L1_ICACHE_MISS ] = { .pme_name = "PM_L1_ICACHE_MISS", .pme_code = 0x00000200FD, .pme_short_desc = "Demand iCache Miss", .pme_long_desc = "Demand iCache Miss", }, [ POWER9_PME_PM_L1_ICACHE_RELOADED_ALL ] = { .pme_name = "PM_L1_ICACHE_RELOADED_ALL", .pme_code = 0x0000040012, .pme_short_desc = "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch", .pme_long_desc = "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch", }, [ POWER9_PME_PM_L1_ICACHE_RELOADED_PREF ] = { .pme_name = "PM_L1_ICACHE_RELOADED_PREF", .pme_code = 0x0000030068, .pme_short_desc = "Counts all Icache prefetch reloads ( includes demand turned into prefetch)", .pme_long_desc = "Counts all Icache prefetch reloads ( includes demand turned into prefetch)", }, [ POWER9_PME_PM_L1PF_L2MEMACC ] = { .pme_name = "PM_L1PF_L2MEMACC", .pme_code = 0x0000016890, .pme_short_desc = "Valid when first beat of data comes in for an L1PF where data came from memory", .pme_long_desc = "Valid when first beat of data comes in for an L1PF where data came from memory", }, [ POWER9_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0x0000020054, .pme_short_desc = "A data line was written to the L1 due to a hardware or software prefetch", .pme_long_desc = "A data line was written to the L1 due to a hardware or software prefetch", }, [ POWER9_PME_PM_L1_SW_PREF ] = { .pme_name = "PM_L1_SW_PREF", .pme_code = 0x000000E880, .pme_short_desc = "Software L1 Prefetches, including SW Transient Prefetches", .pme_long_desc = "Software L1 Prefetches, including SW Transient Prefetches", }, [ POWER9_PME_PM_L2_CASTOUT_MOD ] = { .pme_name = "PM_L2_CASTOUT_MOD", .pme_code = 0x0000016082, .pme_short_desc = "L2 Castouts - Modified (M,Mu,Me)", .pme_long_desc = "L2 Castouts - Modified (M,Mu,Me)", }, [ POWER9_PME_PM_L2_CASTOUT_SHR ] = { .pme_name = "PM_L2_CASTOUT_SHR", .pme_code = 0x0000016882, .pme_short_desc = "L2 Castouts - Shared (Tx,Sx)", .pme_long_desc = "L2 Castouts - Shared (Tx,Sx)", }, [ POWER9_PME_PM_L2_CHIP_PUMP ] = { .pme_name = "PM_L2_CHIP_PUMP", .pme_code = 0x0000046088, .pme_short_desc = "RC requests that were local (aka chip) pump attempts", .pme_long_desc = "RC requests that were local (aka chip) pump attempts", }, [ POWER9_PME_PM_L2_DC_INV ] = { .pme_name = "PM_L2_DC_INV", .pme_code = 0x0000026882, .pme_short_desc = "D-cache invalidates sent over the reload bus to the core", .pme_long_desc = "D-cache invalidates sent over the reload bus to the core", }, [ POWER9_PME_PM_L2_DISP_ALL_L2MISS ] = { .pme_name = "PM_L2_DISP_ALL_L2MISS", .pme_code = 0x0000046080, .pme_short_desc = "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_GROUP_PUMP ] = { .pme_name = "PM_L2_GROUP_PUMP", .pme_code = 0x0000046888, .pme_short_desc = "RC requests that were on group (aka nodel) pump attempts", .pme_long_desc = "RC requests that were on group (aka nodel) pump attempts", }, [ POWER9_PME_PM_L2_GRP_GUESS_CORRECT ] = { .pme_name = "PM_L2_GRP_GUESS_CORRECT", .pme_code = 0x0000026088, .pme_short_desc = "L2 guess grp (GS or NNS) and guess was correct (data intra-group AND ^on-chip)", .pme_long_desc = "L2 guess grp (GS or NNS) and guess was correct (data intra-group AND ^on-chip)", }, [ POWER9_PME_PM_L2_GRP_GUESS_WRONG ] = { .pme_name = "PM_L2_GRP_GUESS_WRONG", .pme_code = 0x0000026888, .pme_short_desc = "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)", .pme_long_desc = "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)", }, [ POWER9_PME_PM_L2_IC_INV ] = { .pme_name = "PM_L2_IC_INV", .pme_code = 0x0000026082, .pme_short_desc = "I-cache Invalidates sent over the realod bus to the core", .pme_long_desc = "I-cache Invalidates sent over the realod bus to the core", }, [ POWER9_PME_PM_L2_INST_MISS ] = { .pme_name = "PM_L2_INST_MISS", .pme_code = 0x0000036880, .pme_short_desc = "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)", .pme_long_desc = "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)", }, [ POWER9_PME_PM_L2_INST_MISS_ALT ] = { .pme_name = "PM_L2_INST_MISS_ALT", .pme_code = 0x000004609E, .pme_short_desc = "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)", .pme_long_desc = "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)", }, [ POWER9_PME_PM_L2_INST ] = { .pme_name = "PM_L2_INST", .pme_code = 0x0000036080, .pme_short_desc = "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", .pme_long_desc = "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", }, [ POWER9_PME_PM_L2_INST_ALT ] = { .pme_name = "PM_L2_INST_ALT", .pme_code = 0x000003609E, .pme_short_desc = "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", .pme_long_desc = "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", }, [ POWER9_PME_PM_L2_LD_DISP ] = { .pme_name = "PM_L2_LD_DISP", .pme_code = 0x000001609E, .pme_short_desc = "All successful D-side load dispatches for this thread (L2 miss + L2 hits)", .pme_long_desc = "All successful D-side load dispatches for this thread (L2 miss + L2 hits)", }, [ POWER9_PME_PM_L2_LD_DISP_ALT ] = { .pme_name = "PM_L2_LD_DISP_ALT", .pme_code = 0x0000036082, .pme_short_desc = "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_LD_HIT ] = { .pme_name = "PM_L2_LD_HIT", .pme_code = 0x000002609E, .pme_short_desc = "All successful D-side load dispatches that were L2 hits for this thread", .pme_long_desc = "All successful D-side load dispatches that were L2 hits for this thread", }, [ POWER9_PME_PM_L2_LD_HIT_ALT ] = { .pme_name = "PM_L2_LD_HIT_ALT", .pme_code = 0x0000036882, .pme_short_desc = "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_LD_MISS_128B ] = { .pme_name = "PM_L2_LD_MISS_128B", .pme_code = 0x0000016092, .pme_short_desc = "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.", .pme_long_desc = "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)", }, [ POWER9_PME_PM_L2_LD_MISS_64B ] = { .pme_name = "PM_L2_LD_MISS_64B", .pme_code = 0x0000026092, .pme_short_desc = "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.", .pme_long_desc = "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.e., M=1)", }, [ POWER9_PME_PM_L2_LD_MISS ] = { .pme_name = "PM_L2_LD_MISS", .pme_code = 0x0000026080, .pme_short_desc = "All successful D-Side Load dispatches that were an L2 miss for this thread", .pme_long_desc = "All successful D-Side Load dispatches that were an L2 miss for this thread", }, [ POWER9_PME_PM_L2_LD ] = { .pme_name = "PM_L2_LD", .pme_code = 0x0000016080, .pme_short_desc = "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)", .pme_long_desc = "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)", }, [ POWER9_PME_PM_L2_LOC_GUESS_CORRECT ] = { .pme_name = "PM_L2_LOC_GUESS_CORRECT", .pme_code = 0x0000016088, .pme_short_desc = "L2 guess local (LNS) and guess was correct (ie data local)", .pme_long_desc = "L2 guess local (LNS) and guess was correct (ie data local)", }, [ POWER9_PME_PM_L2_LOC_GUESS_WRONG ] = { .pme_name = "PM_L2_LOC_GUESS_WRONG", .pme_code = 0x0000016888, .pme_short_desc = "L2 guess local (LNS) and guess was not correct (ie data not on chip)", .pme_long_desc = "L2 guess local (LNS) and guess was not correct (ie data not on chip)", }, [ POWER9_PME_PM_L2_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2_RCLD_DISP_FAIL_ADDR", .pme_code = 0x0000016884, .pme_short_desc = "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2_RCLD_DISP_FAIL_OTHER", .pme_code = 0x0000026084, .pme_short_desc = "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_RCLD_DISP ] = { .pme_name = "PM_L2_RCLD_DISP", .pme_code = 0x0000016084, .pme_short_desc = "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)", .pme_long_desc = "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)", }, [ POWER9_PME_PM_L2_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2_RCST_DISP_FAIL_ADDR", .pme_code = 0x0000036884, .pme_short_desc = "All D-side store dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "All D-side store dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ", }, [ POWER9_PME_PM_L2_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2_RCST_DISP_FAIL_OTHER", .pme_code = 0x0000046084, .pme_short_desc = "All D-side store dispatch attempts for this thread that failed due to reason other than address collision", .pme_long_desc = "All D-side store dispatch attempts for this thread that failed due to reason other than address collision", }, [ POWER9_PME_PM_L2_RCST_DISP ] = { .pme_name = "PM_L2_RCST_DISP", .pme_code = 0x0000036084, .pme_short_desc = "All D-side store dispatch attempts for this thread", .pme_long_desc = "All D-side store dispatch attempts for this thread", }, [ POWER9_PME_PM_L2_RC_ST_DONE ] = { .pme_name = "PM_L2_RC_ST_DONE", .pme_code = 0x0000036086, .pme_short_desc = "RC did store to line that was Tx or Sx", .pme_long_desc = "RC did store to line that was Tx or Sx", }, [ POWER9_PME_PM_L2_RTY_LD ] = { .pme_name = "PM_L2_RTY_LD", .pme_code = 0x000003688A, .pme_short_desc = "RC retries on PB for any load from core (excludes DCBFs)", .pme_long_desc = "RC retries on PB for any load from core (excludes DCBFs)", }, [ POWER9_PME_PM_L2_RTY_LD_ALT ] = { .pme_name = "PM_L2_RTY_LD_ALT", .pme_code = 0x000003689E, .pme_short_desc = "RC retries on PB for any load from core (excludes DCBFs)", .pme_long_desc = "RC retries on PB for any load from core (excludes DCBFs)", }, [ POWER9_PME_PM_L2_RTY_ST ] = { .pme_name = "PM_L2_RTY_ST", .pme_code = 0x000003608A, .pme_short_desc = "RC retries on PB for any store from core (excludes DCBFs)", .pme_long_desc = "RC retries on PB for any store from core (excludes DCBFs)", }, [ POWER9_PME_PM_L2_RTY_ST_ALT ] = { .pme_name = "PM_L2_RTY_ST_ALT", .pme_code = 0x000004689E, .pme_short_desc = "RC retries on PB for any store from core (excludes DCBFs)", .pme_long_desc = "RC retries on PB for any store from core (excludes DCBFs)", }, [ POWER9_PME_PM_L2_SN_M_RD_DONE ] = { .pme_name = "PM_L2_SN_M_RD_DONE", .pme_code = 0x0000046086, .pme_short_desc = "SNP dispatched for a read and was M (true M)", .pme_long_desc = "SNP dispatched for a read and was M (true M)", }, [ POWER9_PME_PM_L2_SN_M_WR_DONE ] = { .pme_name = "PM_L2_SN_M_WR_DONE", .pme_code = 0x0000016086, .pme_short_desc = "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)", .pme_long_desc = "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)", }, [ POWER9_PME_PM_L2_SN_M_WR_DONE_ALT ] = { .pme_name = "PM_L2_SN_M_WR_DONE_ALT", .pme_code = 0x0000046886, .pme_short_desc = "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)", .pme_long_desc = "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)", }, [ POWER9_PME_PM_L2_SN_SX_I_DONE ] = { .pme_name = "PM_L2_SN_SX_I_DONE", .pme_code = 0x0000036886, .pme_short_desc = "SNP dispatched and went from Sx to Ix", .pme_long_desc = "SNP dispatched and went from Sx to Ix", }, [ POWER9_PME_PM_L2_ST_DISP ] = { .pme_name = "PM_L2_ST_DISP", .pme_code = 0x0000046082, .pme_short_desc = "All successful D-side store dispatches for this thread", .pme_long_desc = "All successful D-side store dispatches for this thread", }, [ POWER9_PME_PM_L2_ST_DISP_ALT ] = { .pme_name = "PM_L2_ST_DISP_ALT", .pme_code = 0x000001689E, .pme_short_desc = "All successful D-side store dispatches for this thread (L2 miss + L2 hits)", .pme_long_desc = "All successful D-side store dispatches for this thread (L2 miss + L2 hits)", }, [ POWER9_PME_PM_L2_ST_HIT ] = { .pme_name = "PM_L2_ST_HIT", .pme_code = 0x0000046882, .pme_short_desc = "All successful D-side store dispatches for this thread that were L2 hits", .pme_long_desc = "All successful D-side store dispatches for this thread that were L2 hits", }, [ POWER9_PME_PM_L2_ST_HIT_ALT ] = { .pme_name = "PM_L2_ST_HIT_ALT", .pme_code = 0x000002689E, .pme_short_desc = "All successful D-side store dispatches that were L2 hits for this thread", .pme_long_desc = "All successful D-side store dispatches that were L2 hits for this thread", }, [ POWER9_PME_PM_L2_ST_MISS_128B ] = { .pme_name = "PM_L2_ST_MISS_128B", .pme_code = 0x0000016892, .pme_short_desc = "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.", .pme_long_desc = "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)", }, [ POWER9_PME_PM_L2_ST_MISS_64B ] = { .pme_name = "PM_L2_ST_MISS_64B", .pme_code = 0x0000026892, .pme_short_desc = "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B (i.", .pme_long_desc = "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B (i.e., M=1)", }, [ POWER9_PME_PM_L2_ST_MISS ] = { .pme_name = "PM_L2_ST_MISS", .pme_code = 0x0000026880, .pme_short_desc = "All successful D-Side Store dispatches that were an L2 miss for this thread", .pme_long_desc = "All successful D-Side Store dispatches that were an L2 miss for this thread", }, [ POWER9_PME_PM_L2_ST ] = { .pme_name = "PM_L2_ST", .pme_code = 0x0000016880, .pme_short_desc = "All successful D-side store dispatches for this thread (L2 miss + L2 hits)", .pme_long_desc = "All successful D-side store dispatches for this thread (L2 miss + L2 hits)", }, [ POWER9_PME_PM_L2_SYS_GUESS_CORRECT ] = { .pme_name = "PM_L2_SYS_GUESS_CORRECT", .pme_code = 0x0000036088, .pme_short_desc = "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)", .pme_long_desc = "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)", }, [ POWER9_PME_PM_L2_SYS_GUESS_WRONG ] = { .pme_name = "PM_L2_SYS_GUESS_WRONG", .pme_code = 0x0000036888, .pme_short_desc = "L2 guess system (VGS or RNS) and guess was not correct (ie data ^beyond-group)", .pme_long_desc = "L2 guess system (VGS or RNS) and guess was not correct (ie data ^beyond-group)", }, [ POWER9_PME_PM_L2_SYS_PUMP ] = { .pme_name = "PM_L2_SYS_PUMP", .pme_code = 0x000004688A, .pme_short_desc = "RC requests that were system pump attempts", .pme_long_desc = "RC requests that were system pump attempts", }, [ POWER9_PME_PM_L3_CI_HIT ] = { .pme_name = "PM_L3_CI_HIT", .pme_code = 0x00000260A2, .pme_short_desc = "L3 Castins Hit (total count)", .pme_long_desc = "L3 Castins Hit (total count)", }, [ POWER9_PME_PM_L3_CI_MISS ] = { .pme_name = "PM_L3_CI_MISS", .pme_code = 0x00000268A2, .pme_short_desc = "L3 castins miss (total count)", .pme_long_desc = "L3 castins miss (total count)", }, [ POWER9_PME_PM_L3_CINJ ] = { .pme_name = "PM_L3_CINJ", .pme_code = 0x00000368A4, .pme_short_desc = "L3 castin of cache inject", .pme_long_desc = "L3 castin of cache inject", }, [ POWER9_PME_PM_L3_CI_USAGE ] = { .pme_name = "PM_L3_CI_USAGE", .pme_code = 0x00000168AC, .pme_short_desc = "Rotating sample of 16 CI or CO actives", .pme_long_desc = "Rotating sample of 16 CI or CO actives", }, [ POWER9_PME_PM_L3_CO0_BUSY ] = { .pme_name = "PM_L3_CO0_BUSY", .pme_code = 0x00000368AC, .pme_short_desc = "Lifetime, sample of CO machine 0 valid", .pme_long_desc = "Lifetime, sample of CO machine 0 valid", }, [ POWER9_PME_PM_L3_CO0_BUSY_ALT ] = { .pme_name = "PM_L3_CO0_BUSY_ALT", .pme_code = 0x00000468AC, .pme_short_desc = "Lifetime, sample of CO machine 0 valid", .pme_long_desc = "Lifetime, sample of CO machine 0 valid", }, [ POWER9_PME_PM_L3_CO_L31 ] = { .pme_name = "PM_L3_CO_L31", .pme_code = 0x00000268A0, .pme_short_desc = "L3 CO to L3.", .pme_long_desc = "L3 CO to L3.1 OR of port 0 and 1 (lossy = may undercount if two cresps come in the same cyc)", }, [ POWER9_PME_PM_L3_CO_LCO ] = { .pme_name = "PM_L3_CO_LCO", .pme_code = 0x00000360A4, .pme_short_desc = "Total L3 COs occurred on LCO L3.", .pme_long_desc = "Total L3 COs occurred on LCO L3.1 (good cresp, may end up in mem on a retry)", }, [ POWER9_PME_PM_L3_CO_MEM ] = { .pme_name = "PM_L3_CO_MEM", .pme_code = 0x00000260A0, .pme_short_desc = "L3 CO to memory OR of port 0 and 1 (lossy = may undercount if two cresp come in the same cyc)", .pme_long_desc = "L3 CO to memory OR of port 0 and 1 (lossy = may undercount if two cresp come in the same cyc)", }, [ POWER9_PME_PM_L3_CO_MEPF ] = { .pme_name = "PM_L3_CO_MEPF", .pme_code = 0x000003E05E, .pme_short_desc = "L3 castouts in Mepf state for this thread", .pme_long_desc = "L3 castouts in Mepf state for this thread", }, [ POWER9_PME_PM_L3_CO_MEPF_ALT ] = { .pme_name = "PM_L3_CO_MEPF_ALT", .pme_code = 0x00000168A0, .pme_short_desc = "L3 CO of line in Mep state (includes casthrough to memory).", .pme_long_desc = "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request", }, [ POWER9_PME_PM_L3_CO ] = { .pme_name = "PM_L3_CO", .pme_code = 0x00000360A8, .pme_short_desc = "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))", .pme_long_desc = "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))", }, [ POWER9_PME_PM_L3_GRP_GUESS_CORRECT ] = { .pme_name = "PM_L3_GRP_GUESS_CORRECT", .pme_code = 0x00000168B2, .pme_short_desc = "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)", .pme_long_desc = "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)", }, [ POWER9_PME_PM_L3_GRP_GUESS_WRONG_HIGH ] = { .pme_name = "PM_L3_GRP_GUESS_WRONG_HIGH", .pme_code = 0x00000368B2, .pme_short_desc = "Initial scope=group (GS or NNS) but data from local node.", .pme_long_desc = "Initial scope=group (GS or NNS) but data from local node. Prediction too high", }, [ POWER9_PME_PM_L3_GRP_GUESS_WRONG_LOW ] = { .pme_name = "PM_L3_GRP_GUESS_WRONG_LOW", .pme_code = 0x00000360B2, .pme_short_desc = "Initial scope=group (GS or NNS) but data from outside group (far or rem).", .pme_long_desc = "Initial scope=group (GS or NNS) but data from outside group (far or rem). Prediction too Low", }, [ POWER9_PME_PM_L3_HIT ] = { .pme_name = "PM_L3_HIT", .pme_code = 0x00000160A4, .pme_short_desc = "L3 Hits (L2 miss hitting L3, including data/instrn/xlate)", .pme_long_desc = "L3 Hits (L2 miss hitting L3, including data/instrn/xlate)", }, [ POWER9_PME_PM_L3_L2_CO_HIT ] = { .pme_name = "PM_L3_L2_CO_HIT", .pme_code = 0x00000360A2, .pme_short_desc = "L2 CO hits", .pme_long_desc = "L2 CO hits", }, [ POWER9_PME_PM_L3_L2_CO_MISS ] = { .pme_name = "PM_L3_L2_CO_MISS", .pme_code = 0x00000368A2, .pme_short_desc = "L2 CO miss", .pme_long_desc = "L2 CO miss", }, [ POWER9_PME_PM_L3_LAT_CI_HIT ] = { .pme_name = "PM_L3_LAT_CI_HIT", .pme_code = 0x00000460A2, .pme_short_desc = "L3 Lateral Castins Hit", .pme_long_desc = "L3 Lateral Castins Hit", }, [ POWER9_PME_PM_L3_LAT_CI_MISS ] = { .pme_name = "PM_L3_LAT_CI_MISS", .pme_code = 0x00000468A2, .pme_short_desc = "L3 Lateral Castins Miss", .pme_long_desc = "L3 Lateral Castins Miss", }, [ POWER9_PME_PM_L3_LD_HIT ] = { .pme_name = "PM_L3_LD_HIT", .pme_code = 0x00000260A4, .pme_short_desc = "L3 Hits for demand LDs", .pme_long_desc = "L3 Hits for demand LDs", }, [ POWER9_PME_PM_L3_LD_MISS ] = { .pme_name = "PM_L3_LD_MISS", .pme_code = 0x00000268A4, .pme_short_desc = "L3 Misses for demand LDs", .pme_long_desc = "L3 Misses for demand LDs", }, [ POWER9_PME_PM_L3_LD_PREF ] = { .pme_name = "PM_L3_LD_PREF", .pme_code = 0x000000F0B0, .pme_short_desc = "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest", .pme_long_desc = "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest", }, [ POWER9_PME_PM_L3_LOC_GUESS_CORRECT ] = { .pme_name = "PM_L3_LOC_GUESS_CORRECT", .pme_code = 0x00000160B2, .pme_short_desc = "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only", .pme_long_desc = "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only", }, [ POWER9_PME_PM_L3_LOC_GUESS_WRONG ] = { .pme_name = "PM_L3_LOC_GUESS_WRONG", .pme_code = 0x00000268B2, .pme_short_desc = "Initial scope=node (LNS) but data from out side local node (near or far or rem).", .pme_long_desc = "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low", }, [ POWER9_PME_PM_L3_MISS ] = { .pme_name = "PM_L3_MISS", .pme_code = 0x00000168A4, .pme_short_desc = "L3 Misses (L2 miss also missing L3, including data/instrn/xlate)", .pme_long_desc = "L3 Misses (L2 miss also missing L3, including data/instrn/xlate)", }, [ POWER9_PME_PM_L3_P0_CO_L31 ] = { .pme_name = "PM_L3_P0_CO_L31", .pme_code = 0x00000460AA, .pme_short_desc = "L3 CO to L3.", .pme_long_desc = "L3 CO to L3.1 (LCO) port 0 with or without data", }, [ POWER9_PME_PM_L3_P0_CO_MEM ] = { .pme_name = "PM_L3_P0_CO_MEM", .pme_code = 0x00000360AA, .pme_short_desc = "L3 CO to memory port 0 with or without data", .pme_long_desc = "L3 CO to memory port 0 with or without data", }, [ POWER9_PME_PM_L3_P0_CO_RTY ] = { .pme_name = "PM_L3_P0_CO_RTY", .pme_code = 0x00000360AE, .pme_short_desc = "L3 CO received retry port 0 (memory only), every retry counted", .pme_long_desc = "L3 CO received retry port 0 (memory only), every retry counted", }, [ POWER9_PME_PM_L3_P0_CO_RTY_ALT ] = { .pme_name = "PM_L3_P0_CO_RTY_ALT", .pme_code = 0x00000460AE, .pme_short_desc = "L3 CO received retry port 2 (memory only), every retry counted", .pme_long_desc = "L3 CO received retry port 2 (memory only), every retry counted", }, [ POWER9_PME_PM_L3_P0_GRP_PUMP ] = { .pme_name = "PM_L3_P0_GRP_PUMP", .pme_code = 0x00000260B0, .pme_short_desc = "L3 PF sent with grp scope port 0, counts even retried requests", .pme_long_desc = "L3 PF sent with grp scope port 0, counts even retried requests", }, [ POWER9_PME_PM_L3_P0_LCO_DATA ] = { .pme_name = "PM_L3_P0_LCO_DATA", .pme_code = 0x00000260AA, .pme_short_desc = "LCO sent with data port 0", .pme_long_desc = "LCO sent with data port 0", }, [ POWER9_PME_PM_L3_P0_LCO_NO_DATA ] = { .pme_name = "PM_L3_P0_LCO_NO_DATA", .pme_code = 0x00000160AA, .pme_short_desc = "Dataless L3 LCO sent port 0", .pme_long_desc = "Dataless L3 LCO sent port 0", }, [ POWER9_PME_PM_L3_P0_LCO_RTY ] = { .pme_name = "PM_L3_P0_LCO_RTY", .pme_code = 0x00000160B4, .pme_short_desc = "L3 initiated LCO received retry on port 0 (can try 4 times)", .pme_long_desc = "L3 initiated LCO received retry on port 0 (can try 4 times)", }, [ POWER9_PME_PM_L3_P0_NODE_PUMP ] = { .pme_name = "PM_L3_P0_NODE_PUMP", .pme_code = 0x00000160B0, .pme_short_desc = "L3 PF sent with nodal scope port 0, counts even retried requests", .pme_long_desc = "L3 PF sent with nodal scope port 0, counts even retried requests", }, [ POWER9_PME_PM_L3_P0_PF_RTY ] = { .pme_name = "PM_L3_P0_PF_RTY", .pme_code = 0x00000160AE, .pme_short_desc = "L3 PF received retry port 0, every retry counted", .pme_long_desc = "L3 PF received retry port 0, every retry counted", }, [ POWER9_PME_PM_L3_P0_PF_RTY_ALT ] = { .pme_name = "PM_L3_P0_PF_RTY_ALT", .pme_code = 0x00000260AE, .pme_short_desc = "L3 PF received retry port 2, every retry counted", .pme_long_desc = "L3 PF received retry port 2, every retry counted", }, [ POWER9_PME_PM_L3_P0_SYS_PUMP ] = { .pme_name = "PM_L3_P0_SYS_PUMP", .pme_code = 0x00000360B0, .pme_short_desc = "L3 PF sent with sys scope port 0, counts even retried requests", .pme_long_desc = "L3 PF sent with sys scope port 0, counts even retried requests", }, [ POWER9_PME_PM_L3_P1_CO_L31 ] = { .pme_name = "PM_L3_P1_CO_L31", .pme_code = 0x00000468AA, .pme_short_desc = "L3 CO to L3.", .pme_long_desc = "L3 CO to L3.1 (LCO) port 1 with or without data", }, [ POWER9_PME_PM_L3_P1_CO_MEM ] = { .pme_name = "PM_L3_P1_CO_MEM", .pme_code = 0x00000368AA, .pme_short_desc = "L3 CO to memory port 1 with or without data", .pme_long_desc = "L3 CO to memory port 1 with or without data", }, [ POWER9_PME_PM_L3_P1_CO_RTY ] = { .pme_name = "PM_L3_P1_CO_RTY", .pme_code = 0x00000368AE, .pme_short_desc = "L3 CO received retry port 1 (memory only), every retry counted", .pme_long_desc = "L3 CO received retry port 1 (memory only), every retry counted", }, [ POWER9_PME_PM_L3_P1_CO_RTY_ALT ] = { .pme_name = "PM_L3_P1_CO_RTY_ALT", .pme_code = 0x00000468AE, .pme_short_desc = "L3 CO received retry port 3 (memory only), every retry counted", .pme_long_desc = "L3 CO received retry port 3 (memory only), every retry counted", }, [ POWER9_PME_PM_L3_P1_GRP_PUMP ] = { .pme_name = "PM_L3_P1_GRP_PUMP", .pme_code = 0x00000268B0, .pme_short_desc = "L3 PF sent with grp scope port 1, counts even retried requests", .pme_long_desc = "L3 PF sent with grp scope port 1, counts even retried requests", }, [ POWER9_PME_PM_L3_P1_LCO_DATA ] = { .pme_name = "PM_L3_P1_LCO_DATA", .pme_code = 0x00000268AA, .pme_short_desc = "LCO sent with data port 1", .pme_long_desc = "LCO sent with data port 1", }, [ POWER9_PME_PM_L3_P1_LCO_NO_DATA ] = { .pme_name = "PM_L3_P1_LCO_NO_DATA", .pme_code = 0x00000168AA, .pme_short_desc = "Dataless L3 LCO sent port 1", .pme_long_desc = "Dataless L3 LCO sent port 1", }, [ POWER9_PME_PM_L3_P1_LCO_RTY ] = { .pme_name = "PM_L3_P1_LCO_RTY", .pme_code = 0x00000168B4, .pme_short_desc = "L3 initiated LCO received retry on port 1 (can try 4 times)", .pme_long_desc = "L3 initiated LCO received retry on port 1 (can try 4 times)", }, [ POWER9_PME_PM_L3_P1_NODE_PUMP ] = { .pme_name = "PM_L3_P1_NODE_PUMP", .pme_code = 0x00000168B0, .pme_short_desc = "L3 PF sent with nodal scope port 1, counts even retried requests", .pme_long_desc = "L3 PF sent with nodal scope port 1, counts even retried requests", }, [ POWER9_PME_PM_L3_P1_PF_RTY ] = { .pme_name = "PM_L3_P1_PF_RTY", .pme_code = 0x00000168AE, .pme_short_desc = "L3 PF received retry port 1, every retry counted", .pme_long_desc = "L3 PF received retry port 1, every retry counted", }, [ POWER9_PME_PM_L3_P1_PF_RTY_ALT ] = { .pme_name = "PM_L3_P1_PF_RTY_ALT", .pme_code = 0x00000268AE, .pme_short_desc = "L3 PF received retry port 3, every retry counted", .pme_long_desc = "L3 PF received retry port 3, every retry counted", }, [ POWER9_PME_PM_L3_P1_SYS_PUMP ] = { .pme_name = "PM_L3_P1_SYS_PUMP", .pme_code = 0x00000368B0, .pme_short_desc = "L3 PF sent with sys scope port 1, counts even retried requests", .pme_long_desc = "L3 PF sent with sys scope port 1, counts even retried requests", }, [ POWER9_PME_PM_L3_P2_LCO_RTY ] = { .pme_name = "PM_L3_P2_LCO_RTY", .pme_code = 0x00000260B4, .pme_short_desc = "L3 initiated LCO received retry on port 2 (can try 4 times)", .pme_long_desc = "L3 initiated LCO received retry on port 2 (can try 4 times)", }, [ POWER9_PME_PM_L3_P3_LCO_RTY ] = { .pme_name = "PM_L3_P3_LCO_RTY", .pme_code = 0x00000268B4, .pme_short_desc = "L3 initiated LCO received retry on port 3 (can try 4 times)", .pme_long_desc = "L3 initiated LCO received retry on port 3 (can try 4 times)", }, [ POWER9_PME_PM_L3_PF0_BUSY ] = { .pme_name = "PM_L3_PF0_BUSY", .pme_code = 0x00000360B4, .pme_short_desc = "Lifetime, sample of PF machine 0 valid", .pme_long_desc = "Lifetime, sample of PF machine 0 valid", }, [ POWER9_PME_PM_L3_PF0_BUSY_ALT ] = { .pme_name = "PM_L3_PF0_BUSY_ALT", .pme_code = 0x00000460B4, .pme_short_desc = "Lifetime, sample of PF machine 0 valid", .pme_long_desc = "Lifetime, sample of PF machine 0 valid", }, [ POWER9_PME_PM_L3_PF_HIT_L3 ] = { .pme_name = "PM_L3_PF_HIT_L3", .pme_code = 0x00000260A8, .pme_short_desc = "L3 PF hit in L3 (abandoned)", .pme_long_desc = "L3 PF hit in L3 (abandoned)", }, [ POWER9_PME_PM_L3_PF_MISS_L3 ] = { .pme_name = "PM_L3_PF_MISS_L3", .pme_code = 0x00000160A0, .pme_short_desc = "L3 PF missed in L3", .pme_long_desc = "L3 PF missed in L3", }, [ POWER9_PME_PM_L3_PF_OFF_CHIP_CACHE ] = { .pme_name = "PM_L3_PF_OFF_CHIP_CACHE", .pme_code = 0x00000368A0, .pme_short_desc = "L3 PF from Off chip cache", .pme_long_desc = "L3 PF from Off chip cache", }, [ POWER9_PME_PM_L3_PF_OFF_CHIP_MEM ] = { .pme_name = "PM_L3_PF_OFF_CHIP_MEM", .pme_code = 0x00000468A0, .pme_short_desc = "L3 PF from Off chip memory", .pme_long_desc = "L3 PF from Off chip memory", }, [ POWER9_PME_PM_L3_PF_ON_CHIP_CACHE ] = { .pme_name = "PM_L3_PF_ON_CHIP_CACHE", .pme_code = 0x00000360A0, .pme_short_desc = "L3 PF from On chip cache", .pme_long_desc = "L3 PF from On chip cache", }, [ POWER9_PME_PM_L3_PF_ON_CHIP_MEM ] = { .pme_name = "PM_L3_PF_ON_CHIP_MEM", .pme_code = 0x00000460A0, .pme_short_desc = "L3 PF from On chip memory", .pme_long_desc = "L3 PF from On chip memory", }, [ POWER9_PME_PM_L3_PF_USAGE ] = { .pme_name = "PM_L3_PF_USAGE", .pme_code = 0x00000260AC, .pme_short_desc = "Rotating sample of 32 PF actives", .pme_long_desc = "Rotating sample of 32 PF actives", }, [ POWER9_PME_PM_L3_RD0_BUSY ] = { .pme_name = "PM_L3_RD0_BUSY", .pme_code = 0x00000368B4, .pme_short_desc = "Lifetime, sample of RD machine 0 valid", .pme_long_desc = "Lifetime, sample of RD machine 0 valid", }, [ POWER9_PME_PM_L3_RD0_BUSY_ALT ] = { .pme_name = "PM_L3_RD0_BUSY_ALT", .pme_code = 0x00000468B4, .pme_short_desc = "Lifetime, sample of RD machine 0 valid", .pme_long_desc = "Lifetime, sample of RD machine 0 valid", }, [ POWER9_PME_PM_L3_RD_USAGE ] = { .pme_name = "PM_L3_RD_USAGE", .pme_code = 0x00000268AC, .pme_short_desc = "Rotating sample of 16 RD actives", .pme_long_desc = "Rotating sample of 16 RD actives", }, [ POWER9_PME_PM_L3_SN0_BUSY ] = { .pme_name = "PM_L3_SN0_BUSY", .pme_code = 0x00000360AC, .pme_short_desc = "Lifetime, sample of snooper machine 0 valid", .pme_long_desc = "Lifetime, sample of snooper machine 0 valid", }, [ POWER9_PME_PM_L3_SN0_BUSY_ALT ] = { .pme_name = "PM_L3_SN0_BUSY_ALT", .pme_code = 0x00000460AC, .pme_short_desc = "Lifetime, sample of snooper machine 0 valid", .pme_long_desc = "Lifetime, sample of snooper machine 0 valid", }, [ POWER9_PME_PM_L3_SN_USAGE ] = { .pme_name = "PM_L3_SN_USAGE", .pme_code = 0x00000160AC, .pme_short_desc = "Rotating sample of 16 snoop valids", .pme_long_desc = "Rotating sample of 16 snoop valids", }, [ POWER9_PME_PM_L3_SW_PREF ] = { .pme_name = "PM_L3_SW_PREF", .pme_code = 0x000000F8B0, .pme_short_desc = "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest", .pme_long_desc = "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest", }, [ POWER9_PME_PM_L3_SYS_GUESS_CORRECT ] = { .pme_name = "PM_L3_SYS_GUESS_CORRECT", .pme_code = 0x00000260B2, .pme_short_desc = "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)", .pme_long_desc = "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)", }, [ POWER9_PME_PM_L3_SYS_GUESS_WRONG ] = { .pme_name = "PM_L3_SYS_GUESS_WRONG", .pme_code = 0x00000460B2, .pme_short_desc = "Initial scope=system (VGS or RNS) but data from local or near.", .pme_long_desc = "Initial scope=system (VGS or RNS) but data from local or near. Prediction too high", }, [ POWER9_PME_PM_L3_TRANS_PF ] = { .pme_name = "PM_L3_TRANS_PF", .pme_code = 0x00000468A4, .pme_short_desc = "L3 Transient prefetch received from L2", .pme_long_desc = "L3 Transient prefetch received from L2", }, [ POWER9_PME_PM_L3_WI0_BUSY ] = { .pme_name = "PM_L3_WI0_BUSY", .pme_code = 0x00000160B6, .pme_short_desc = "Rotating sample of 8 WI valid", .pme_long_desc = "Rotating sample of 8 WI valid", }, [ POWER9_PME_PM_L3_WI0_BUSY_ALT ] = { .pme_name = "PM_L3_WI0_BUSY_ALT", .pme_code = 0x00000260B6, .pme_short_desc = "Rotating sample of 8 WI valid (duplicate)", .pme_long_desc = "Rotating sample of 8 WI valid (duplicate)", }, [ POWER9_PME_PM_L3_WI_USAGE ] = { .pme_name = "PM_L3_WI_USAGE", .pme_code = 0x00000168A8, .pme_short_desc = "Lifetime, sample of Write Inject machine 0 valid", .pme_long_desc = "Lifetime, sample of Write Inject machine 0 valid", }, [ POWER9_PME_PM_LARX_FIN ] = { .pme_name = "PM_LARX_FIN", .pme_code = 0x000003C058, .pme_short_desc = "Larx finished", .pme_long_desc = "Larx finished", }, [ POWER9_PME_PM_LD_CMPL ] = { .pme_name = "PM_LD_CMPL", .pme_code = 0x000004003E, .pme_short_desc = "count of Loads completed", .pme_long_desc = "count of Loads completed", }, [ POWER9_PME_PM_LD_L3MISS_PEND_CYC ] = { .pme_name = "PM_LD_L3MISS_PEND_CYC", .pme_code = 0x0000010062, .pme_short_desc = "Cycles L3 miss was pending for this thread", .pme_long_desc = "Cycles L3 miss was pending for this thread", }, [ POWER9_PME_PM_LD_MISS_L1_FIN ] = { .pme_name = "PM_LD_MISS_L1_FIN", .pme_code = 0x000002C04E, .pme_short_desc = "Number of load instructions that finished with an L1 miss.", .pme_long_desc = "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op.", }, /* See also alternate entries for 000003E054 / POWER9_PME_PM_LD_MISS_L1 with code(s) 00000400F0 at the bottom of this table. \n */ [ POWER9_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x000003E054, .pme_short_desc = "Load Missed L1, counted at execution time (can be greater than loads finished).", .pme_long_desc = "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", }, [ POWER9_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x00000100FC, .pme_short_desc = "All L1 D cache load references counted at finish, gated by reject", .pme_long_desc = "All L1 D cache load references counted at finish, gated by reject", }, [ POWER9_PME_PM_LINK_STACK_CORRECT ] = { .pme_name = "PM_LINK_STACK_CORRECT", .pme_code = 0x00000058A0, .pme_short_desc = "Link stack predicts right address", .pme_long_desc = "Link stack predicts right address", }, [ POWER9_PME_PM_LINK_STACK_INVALID_PTR ] = { .pme_name = "PM_LINK_STACK_INVALID_PTR", .pme_code = 0x0000005898, .pme_short_desc = "It is most often caused by certain types of flush where the pointer is not available.", .pme_long_desc = "It is most often caused by certain types of flush where the pointer is not available. Can result in the data in the link stack becoming unusable.", }, [ POWER9_PME_PM_LINK_STACK_WRONG_ADD_PRED ] = { .pme_name = "PM_LINK_STACK_WRONG_ADD_PRED", .pme_code = 0x0000005098, .pme_short_desc = "Link stack predicts wrong address, because of link stack design limitation or software violating the coding conventions", .pme_long_desc = "Link stack predicts wrong address, because of link stack design limitation or software violating the coding conventions", }, [ POWER9_PME_PM_LMQ_EMPTY_CYC ] = { .pme_name = "PM_LMQ_EMPTY_CYC", .pme_code = 0x000002E05E, .pme_short_desc = "Cycles in which the LMQ has no pending load misses for this thread", .pme_long_desc = "Cycles in which the LMQ has no pending load misses for this thread", }, [ POWER9_PME_PM_LMQ_MERGE ] = { .pme_name = "PM_LMQ_MERGE", .pme_code = 0x000001002E, .pme_short_desc = "A demand miss collides with a prefetch for the same line", .pme_long_desc = "A demand miss collides with a prefetch for the same line", }, [ POWER9_PME_PM_LRQ_REJECT ] = { .pme_name = "PM_LRQ_REJECT", .pme_code = 0x000002E05A, .pme_short_desc = "Internal LSU reject from LRQ.", .pme_long_desc = "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects", }, [ POWER9_PME_PM_LS0_DC_COLLISIONS ] = { .pme_name = "PM_LS0_DC_COLLISIONS", .pme_code = 0x000000D090, .pme_short_desc = "Read-write data cache collisions", .pme_long_desc = "Read-write data cache collisions", }, [ POWER9_PME_PM_LS0_ERAT_MISS_PREF ] = { .pme_name = "PM_LS0_ERAT_MISS_PREF", .pme_code = 0x000000E084, .pme_short_desc = "LS0 Erat miss due to prefetch", .pme_long_desc = "LS0 Erat miss due to prefetch", }, [ POWER9_PME_PM_LS0_LAUNCH_HELD_PREF ] = { .pme_name = "PM_LS0_LAUNCH_HELD_PREF", .pme_code = 0x000000C09C, .pme_short_desc = "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle", .pme_long_desc = "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle", }, [ POWER9_PME_PM_LS0_PTE_TABLEWALK_CYC ] = { .pme_name = "PM_LS0_PTE_TABLEWALK_CYC", .pme_code = 0x000000E0BC, .pme_short_desc = "Cycles when a tablewalk is pending on this thread on table 0", .pme_long_desc = "Cycles when a tablewalk is pending on this thread on table 0", }, [ POWER9_PME_PM_LS0_TM_DISALLOW ] = { .pme_name = "PM_LS0_TM_DISALLOW", .pme_code = 0x000000E0B4, .pme_short_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", .pme_long_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", }, [ POWER9_PME_PM_LS0_UNALIGNED_LD ] = { .pme_name = "PM_LS0_UNALIGNED_LD", .pme_code = 0x000000C094, .pme_short_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.", .pme_long_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS0_UNALIGNED_ST ] = { .pme_name = "PM_LS0_UNALIGNED_ST", .pme_code = 0x000000F0B8, .pme_short_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.", .pme_long_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS1_DC_COLLISIONS ] = { .pme_name = "PM_LS1_DC_COLLISIONS", .pme_code = 0x000000D890, .pme_short_desc = "Read-write data cache collisions", .pme_long_desc = "Read-write data cache collisions", }, [ POWER9_PME_PM_LS1_ERAT_MISS_PREF ] = { .pme_name = "PM_LS1_ERAT_MISS_PREF", .pme_code = 0x000000E884, .pme_short_desc = "LS1 Erat miss due to prefetch", .pme_long_desc = "LS1 Erat miss due to prefetch", }, [ POWER9_PME_PM_LS1_LAUNCH_HELD_PREF ] = { .pme_name = "PM_LS1_LAUNCH_HELD_PREF", .pme_code = 0x000000C89C, .pme_short_desc = "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle", .pme_long_desc = "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle", }, [ POWER9_PME_PM_LS1_PTE_TABLEWALK_CYC ] = { .pme_name = "PM_LS1_PTE_TABLEWALK_CYC", .pme_code = 0x000000E8BC, .pme_short_desc = "Cycles when a tablewalk is pending on this thread on table 1", .pme_long_desc = "Cycles when a tablewalk is pending on this thread on table 1", }, [ POWER9_PME_PM_LS1_TM_DISALLOW ] = { .pme_name = "PM_LS1_TM_DISALLOW", .pme_code = 0x000000E8B4, .pme_short_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", .pme_long_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", }, [ POWER9_PME_PM_LS1_UNALIGNED_LD ] = { .pme_name = "PM_LS1_UNALIGNED_LD", .pme_code = 0x000000C894, .pme_short_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.", .pme_long_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS1_UNALIGNED_ST ] = { .pme_name = "PM_LS1_UNALIGNED_ST", .pme_code = 0x000000F8B8, .pme_short_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.", .pme_long_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS2_DC_COLLISIONS ] = { .pme_name = "PM_LS2_DC_COLLISIONS", .pme_code = 0x000000D094, .pme_short_desc = "Read-write data cache collisions", .pme_long_desc = "Read-write data cache collisions", }, [ POWER9_PME_PM_LS2_ERAT_MISS_PREF ] = { .pme_name = "PM_LS2_ERAT_MISS_PREF", .pme_code = 0x000000E088, .pme_short_desc = "LS0 Erat miss due to prefetch", .pme_long_desc = "LS0 Erat miss due to prefetch", }, [ POWER9_PME_PM_LS2_TM_DISALLOW ] = { .pme_name = "PM_LS2_TM_DISALLOW", .pme_code = 0x000000E0B8, .pme_short_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", .pme_long_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", }, [ POWER9_PME_PM_LS2_UNALIGNED_LD ] = { .pme_name = "PM_LS2_UNALIGNED_LD", .pme_code = 0x000000C098, .pme_short_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.", .pme_long_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS2_UNALIGNED_ST ] = { .pme_name = "PM_LS2_UNALIGNED_ST", .pme_code = 0x000000F0BC, .pme_short_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.", .pme_long_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS3_DC_COLLISIONS ] = { .pme_name = "PM_LS3_DC_COLLISIONS", .pme_code = 0x000000D894, .pme_short_desc = "Read-write data cache collisions", .pme_long_desc = "Read-write data cache collisions", }, [ POWER9_PME_PM_LS3_ERAT_MISS_PREF ] = { .pme_name = "PM_LS3_ERAT_MISS_PREF", .pme_code = 0x000000E888, .pme_short_desc = "LS1 Erat miss due to prefetch", .pme_long_desc = "LS1 Erat miss due to prefetch", }, [ POWER9_PME_PM_LS3_TM_DISALLOW ] = { .pme_name = "PM_LS3_TM_DISALLOW", .pme_code = 0x000000E8B8, .pme_short_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", .pme_long_desc = "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it", }, [ POWER9_PME_PM_LS3_UNALIGNED_LD ] = { .pme_name = "PM_LS3_UNALIGNED_LD", .pme_code = 0x000000C898, .pme_short_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size.", .pme_long_desc = "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LS3_UNALIGNED_ST ] = { .pme_name = "PM_LS3_UNALIGNED_ST", .pme_code = 0x000000F8BC, .pme_short_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size.", .pme_long_desc = "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty", }, [ POWER9_PME_PM_LSU0_1_LRQF_FULL_CYC ] = { .pme_name = "PM_LSU0_1_LRQF_FULL_CYC", .pme_code = 0x000000D0BC, .pme_short_desc = "Counts the number of cycles the LRQF is full.", .pme_long_desc = "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ", }, [ POWER9_PME_PM_LSU0_ERAT_HIT ] = { .pme_name = "PM_LSU0_ERAT_HIT", .pme_code = 0x000000E08C, .pme_short_desc = "Primary ERAT hit.", .pme_long_desc = "Primary ERAT hit. There is no secondary ERAT", }, [ POWER9_PME_PM_LSU0_FALSE_LHS ] = { .pme_name = "PM_LSU0_FALSE_LHS", .pme_code = 0x000000C0A0, .pme_short_desc = "False LHS match detected", .pme_long_desc = "False LHS match detected", }, [ POWER9_PME_PM_LSU0_L1_CAM_CANCEL ] = { .pme_name = "PM_LSU0_L1_CAM_CANCEL", .pme_code = 0x000000F090, .pme_short_desc = "ls0 l1 tm cam cancel", .pme_long_desc = "ls0 l1 tm cam cancel", }, [ POWER9_PME_PM_LSU0_LDMX_FIN ] = { .pme_name = "PM_LSU0_LDMX_FIN", .pme_code = 0x000000D088, .pme_short_desc = "New P9 instruction LDMX.", .pme_long_desc = "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56]).", }, [ POWER9_PME_PM_LSU0_LMQ_S0_VALID ] = { .pme_name = "PM_LSU0_LMQ_S0_VALID", .pme_code = 0x000000D8B8, .pme_short_desc = "Slot 0 of LMQ valid", .pme_long_desc = "Slot 0 of LMQ valid", }, [ POWER9_PME_PM_LSU0_LRQ_S0_VALID_CYC ] = { .pme_name = "PM_LSU0_LRQ_S0_VALID_CYC", .pme_code = 0x000000D8B4, .pme_short_desc = "Slot 0 of LRQ valid", .pme_long_desc = "Slot 0 of LRQ valid", }, [ POWER9_PME_PM_LSU0_SET_MPRED ] = { .pme_name = "PM_LSU0_SET_MPRED", .pme_code = 0x000000D080, .pme_short_desc = "Set prediction(set-p) miss.", .pme_long_desc = "Set prediction(set-p) miss. The entry was not found in the Set prediction table", }, [ POWER9_PME_PM_LSU0_SRQ_S0_VALID_CYC ] = { .pme_name = "PM_LSU0_SRQ_S0_VALID_CYC", .pme_code = 0x000000D0B4, .pme_short_desc = "Slot 0 of SRQ valid", .pme_long_desc = "Slot 0 of SRQ valid", }, [ POWER9_PME_PM_LSU0_STORE_REJECT ] = { .pme_name = "PM_LSU0_STORE_REJECT", .pme_code = 0x000000F088, .pme_short_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", .pme_long_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", }, [ POWER9_PME_PM_LSU0_TM_L1_HIT ] = { .pme_name = "PM_LSU0_TM_L1_HIT", .pme_code = 0x000000E094, .pme_short_desc = "Load tm hit in L1", .pme_long_desc = "Load tm hit in L1", }, [ POWER9_PME_PM_LSU0_TM_L1_MISS ] = { .pme_name = "PM_LSU0_TM_L1_MISS", .pme_code = 0x000000E09C, .pme_short_desc = "Load tm L1 miss", .pme_long_desc = "Load tm L1 miss", }, [ POWER9_PME_PM_LSU1_ERAT_HIT ] = { .pme_name = "PM_LSU1_ERAT_HIT", .pme_code = 0x000000E88C, .pme_short_desc = "Primary ERAT hit.", .pme_long_desc = "Primary ERAT hit. There is no secondary ERAT", }, [ POWER9_PME_PM_LSU1_FALSE_LHS ] = { .pme_name = "PM_LSU1_FALSE_LHS", .pme_code = 0x000000C8A0, .pme_short_desc = "False LHS match detected", .pme_long_desc = "False LHS match detected", }, [ POWER9_PME_PM_LSU1_L1_CAM_CANCEL ] = { .pme_name = "PM_LSU1_L1_CAM_CANCEL", .pme_code = 0x000000F890, .pme_short_desc = "ls1 l1 tm cam cancel", .pme_long_desc = "ls1 l1 tm cam cancel", }, [ POWER9_PME_PM_LSU1_LDMX_FIN ] = { .pme_name = "PM_LSU1_LDMX_FIN", .pme_code = 0x000000D888, .pme_short_desc = "New P9 instruction LDMX.", .pme_long_desc = "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56]).", }, [ POWER9_PME_PM_LSU1_SET_MPRED ] = { .pme_name = "PM_LSU1_SET_MPRED", .pme_code = 0x000000D880, .pme_short_desc = "Set prediction(set-p) miss.", .pme_long_desc = "Set prediction(set-p) miss. The entry was not found in the Set prediction table", }, [ POWER9_PME_PM_LSU1_STORE_REJECT ] = { .pme_name = "PM_LSU1_STORE_REJECT", .pme_code = 0x000000F888, .pme_short_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", .pme_long_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", }, [ POWER9_PME_PM_LSU1_TM_L1_HIT ] = { .pme_name = "PM_LSU1_TM_L1_HIT", .pme_code = 0x000000E894, .pme_short_desc = "Load tm hit in L1", .pme_long_desc = "Load tm hit in L1", }, [ POWER9_PME_PM_LSU1_TM_L1_MISS ] = { .pme_name = "PM_LSU1_TM_L1_MISS", .pme_code = 0x000000E89C, .pme_short_desc = "Load tm L1 miss", .pme_long_desc = "Load tm L1 miss", }, [ POWER9_PME_PM_LSU2_3_LRQF_FULL_CYC ] = { .pme_name = "PM_LSU2_3_LRQF_FULL_CYC", .pme_code = 0x000000D8BC, .pme_short_desc = "Counts the number of cycles the LRQF is full.", .pme_long_desc = "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ", }, [ POWER9_PME_PM_LSU2_ERAT_HIT ] = { .pme_name = "PM_LSU2_ERAT_HIT", .pme_code = 0x000000E090, .pme_short_desc = "Primary ERAT hit.", .pme_long_desc = "Primary ERAT hit. There is no secondary ERAT", }, [ POWER9_PME_PM_LSU2_FALSE_LHS ] = { .pme_name = "PM_LSU2_FALSE_LHS", .pme_code = 0x000000C0A4, .pme_short_desc = "False LHS match detected", .pme_long_desc = "False LHS match detected", }, [ POWER9_PME_PM_LSU2_L1_CAM_CANCEL ] = { .pme_name = "PM_LSU2_L1_CAM_CANCEL", .pme_code = 0x000000F094, .pme_short_desc = "ls2 l1 tm cam cancel", .pme_long_desc = "ls2 l1 tm cam cancel", }, [ POWER9_PME_PM_LSU2_LDMX_FIN ] = { .pme_name = "PM_LSU2_LDMX_FIN", .pme_code = 0x000000D08C, .pme_short_desc = "New P9 instruction LDMX.", .pme_long_desc = "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56]).", }, [ POWER9_PME_PM_LSU2_SET_MPRED ] = { .pme_name = "PM_LSU2_SET_MPRED", .pme_code = 0x000000D084, .pme_short_desc = "Set prediction(set-p) miss.", .pme_long_desc = "Set prediction(set-p) miss. The entry was not found in the Set prediction table", }, [ POWER9_PME_PM_LSU2_STORE_REJECT ] = { .pme_name = "PM_LSU2_STORE_REJECT", .pme_code = 0x000000F08C, .pme_short_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", .pme_long_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", }, [ POWER9_PME_PM_LSU2_TM_L1_HIT ] = { .pme_name = "PM_LSU2_TM_L1_HIT", .pme_code = 0x000000E098, .pme_short_desc = "Load tm hit in L1", .pme_long_desc = "Load tm hit in L1", }, [ POWER9_PME_PM_LSU2_TM_L1_MISS ] = { .pme_name = "PM_LSU2_TM_L1_MISS", .pme_code = 0x000000E0A0, .pme_short_desc = "Load tm L1 miss", .pme_long_desc = "Load tm L1 miss", }, [ POWER9_PME_PM_LSU3_ERAT_HIT ] = { .pme_name = "PM_LSU3_ERAT_HIT", .pme_code = 0x000000E890, .pme_short_desc = "Primary ERAT hit.", .pme_long_desc = "Primary ERAT hit. There is no secondary ERAT", }, [ POWER9_PME_PM_LSU3_FALSE_LHS ] = { .pme_name = "PM_LSU3_FALSE_LHS", .pme_code = 0x000000C8A4, .pme_short_desc = "False LHS match detected", .pme_long_desc = "False LHS match detected", }, [ POWER9_PME_PM_LSU3_L1_CAM_CANCEL ] = { .pme_name = "PM_LSU3_L1_CAM_CANCEL", .pme_code = 0x000000F894, .pme_short_desc = "ls3 l1 tm cam cancel", .pme_long_desc = "ls3 l1 tm cam cancel", }, [ POWER9_PME_PM_LSU3_LDMX_FIN ] = { .pme_name = "PM_LSU3_LDMX_FIN", .pme_code = 0x000000D88C, .pme_short_desc = "New P9 instruction LDMX.", .pme_long_desc = "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56]).", }, [ POWER9_PME_PM_LSU3_SET_MPRED ] = { .pme_name = "PM_LSU3_SET_MPRED", .pme_code = 0x000000D884, .pme_short_desc = "Set prediction(set-p) miss.", .pme_long_desc = "Set prediction(set-p) miss. The entry was not found in the Set prediction table", }, [ POWER9_PME_PM_LSU3_STORE_REJECT ] = { .pme_name = "PM_LSU3_STORE_REJECT", .pme_code = 0x000000F88C, .pme_short_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", .pme_long_desc = "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met", }, [ POWER9_PME_PM_LSU3_TM_L1_HIT ] = { .pme_name = "PM_LSU3_TM_L1_HIT", .pme_code = 0x000000E898, .pme_short_desc = "Load tm hit in L1", .pme_long_desc = "Load tm hit in L1", }, [ POWER9_PME_PM_LSU3_TM_L1_MISS ] = { .pme_name = "PM_LSU3_TM_L1_MISS", .pme_code = 0x000000E8A0, .pme_short_desc = "Load tm L1 miss", .pme_long_desc = "Load tm L1 miss", }, [ POWER9_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x00000200F6, .pme_short_desc = "DERAT Reloaded due to a DERAT miss", .pme_long_desc = "DERAT Reloaded due to a DERAT miss", }, [ POWER9_PME_PM_LSU_FIN ] = { .pme_name = "PM_LSU_FIN", .pme_code = 0x0000030066, .pme_short_desc = "LSU Finished a PPC instruction (up to 4 per cycle)", .pme_long_desc = "LSU Finished a PPC instruction (up to 4 per cycle)", }, [ POWER9_PME_PM_LSU_FLUSH_ATOMIC ] = { .pme_name = "PM_LSU_FLUSH_ATOMIC", .pme_code = 0x000000C8A8, .pme_short_desc = "Quad-word loads (lq) are considered atomic because they always span at least 2 slices.", .pme_long_desc = "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed", }, [ POWER9_PME_PM_LSU_FLUSH_CI ] = { .pme_name = "PM_LSU_FLUSH_CI", .pme_code = 0x000000C0A8, .pme_short_desc = "Load was not issued to LSU as a cache inhibited (non-cacheable) load but it was later determined to be cache inhibited", .pme_long_desc = "Load was not issued to LSU as a cache inhibited (non-cacheable) load but it was later determined to be cache inhibited", }, [ POWER9_PME_PM_LSU_FLUSH_EMSH ] = { .pme_name = "PM_LSU_FLUSH_EMSH", .pme_code = 0x000000C0AC, .pme_short_desc = "An ERAT miss was detected after a set-p hit.", .pme_long_desc = "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address", }, [ POWER9_PME_PM_LSU_FLUSH_LARX_STCX ] = { .pme_name = "PM_LSU_FLUSH_LARX_STCX", .pme_code = 0x000000C8B8, .pme_short_desc = "A larx is flushed because an older larx has an LMQ reservation for the same thread.", .pme_long_desc = "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches", }, [ POWER9_PME_PM_LSU_FLUSH_LHL_SHL ] = { .pme_name = "PM_LSU_FLUSH_LHL_SHL", .pme_code = 0x000000C8B4, .pme_short_desc = "The instruction was flushed because of a sequential load/store consistency.", .pme_long_desc = "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores).", }, [ POWER9_PME_PM_LSU_FLUSH_LHS ] = { .pme_name = "PM_LSU_FLUSH_LHS", .pme_code = 0x000000C8B0, .pme_short_desc = "Effective Address alias flush : no EA match but Real Address match.", .pme_long_desc = "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed", }, [ POWER9_PME_PM_LSU_FLUSH_NEXT ] = { .pme_name = "PM_LSU_FLUSH_NEXT", .pme_code = 0x00000020B0, .pme_short_desc = "LSU flush next reported at flush time.", .pme_long_desc = "LSU flush next reported at flush time. Sometimes these also come with an exception", }, [ POWER9_PME_PM_LSU_FLUSH_OTHER ] = { .pme_name = "PM_LSU_FLUSH_OTHER", .pme_code = 0x000000C0BC, .pme_short_desc = "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason).", .pme_long_desc = "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval†back and flush all younger ops)", }, [ POWER9_PME_PM_LSU_FLUSH_RELAUNCH_MISS ] = { .pme_name = "PM_LSU_FLUSH_RELAUNCH_MISS", .pme_code = 0x000000C8AC, .pme_short_desc = "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent", .pme_long_desc = "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent", }, [ POWER9_PME_PM_LSU_FLUSH_SAO ] = { .pme_name = "PM_LSU_FLUSH_SAO", .pme_code = 0x000000C0B8, .pme_short_desc = "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush", .pme_long_desc = "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush", }, [ POWER9_PME_PM_LSU_FLUSH_UE ] = { .pme_name = "PM_LSU_FLUSH_UE", .pme_code = 0x000000C0B0, .pme_short_desc = "Correctable ECC error on reload data, reported at critical data forward time", .pme_long_desc = "Correctable ECC error on reload data, reported at critical data forward time", }, [ POWER9_PME_PM_LSU_FLUSH_WRK_ARND ] = { .pme_name = "PM_LSU_FLUSH_WRK_ARND", .pme_code = 0x000000C0B4, .pme_short_desc = "LSU workaround flush.", .pme_long_desc = "LSU workaround flush. These flushes are setup with programmable scan only latches to perform various actions when the flush macro receives a trigger from the dbg macros. These actions include things like flushing the next op encountered for a particular thread or flushing the next op that is NTC op that is encountered on a particular slice. The kind of flush that the workaround is setup to perform is highly variable.", }, [ POWER9_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0x000000D0B8, .pme_short_desc = "Counts the number of cycles the LMQ is full", .pme_long_desc = "Counts the number of cycles the LMQ is full", }, [ POWER9_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x000002003E, .pme_short_desc = "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)", .pme_long_desc = "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)", }, [ POWER9_PME_PM_LSU_NCST ] = { .pme_name = "PM_LSU_NCST", .pme_code = 0x000000C890, .pme_short_desc = "Asserts when a i=1 store op is sent to the nest.", .pme_long_desc = "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1", }, [ POWER9_PME_PM_LSU_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU_REJECT_ERAT_MISS", .pme_code = 0x000002E05C, .pme_short_desc = "LSU Reject due to ERAT (up to 4 per cycles)", .pme_long_desc = "LSU Reject due to ERAT (up to 4 per cycles)", }, [ POWER9_PME_PM_LSU_REJECT_LHS ] = { .pme_name = "PM_LSU_REJECT_LHS", .pme_code = 0x000004E05C, .pme_short_desc = "LSU Reject due to LHS (up to 4 per cycle)", .pme_long_desc = "LSU Reject due to LHS (up to 4 per cycle)", }, [ POWER9_PME_PM_LSU_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU_REJECT_LMQ_FULL", .pme_code = 0x000003001C, .pme_short_desc = "LSU Reject due to LMQ full (up to 4 per cycles)", .pme_long_desc = "LSU Reject due to LMQ full (up to 4 per cycles)", }, [ POWER9_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x000001001A, .pme_short_desc = "Cycles in which the Store Queue is full on all 4 slices.", .pme_long_desc = "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource", }, [ POWER9_PME_PM_LSU_STCX_FAIL ] = { .pme_name = "PM_LSU_STCX_FAIL", .pme_code = 0x000000F080, .pme_short_desc = "", .pme_long_desc = "", }, [ POWER9_PME_PM_LSU_STCX ] = { .pme_name = "PM_LSU_STCX", .pme_code = 0x000000C090, .pme_short_desc = "STCX sent to nest, i.", .pme_long_desc = "STCX sent to nest, i.e. total", }, [ POWER9_PME_PM_LWSYNC ] = { .pme_name = "PM_LWSYNC", .pme_code = 0x0000005894, .pme_short_desc = "Lwsync instruction decoded and transferred", .pme_long_desc = "Lwsync instruction decoded and transferred", }, [ POWER9_PME_PM_MATH_FLOP_CMPL ] = { .pme_name = "PM_MATH_FLOP_CMPL", .pme_code = 0x000004505C, .pme_short_desc = "Math flop instruction completed", .pme_long_desc = "Math flop instruction completed", }, [ POWER9_PME_PM_MEM_CO ] = { .pme_name = "PM_MEM_CO", .pme_code = 0x000004C058, .pme_short_desc = "Memory castouts from this thread", .pme_long_desc = "Memory castouts from this thread", }, [ POWER9_PME_PM_MEM_LOC_THRESH_IFU ] = { .pme_name = "PM_MEM_LOC_THRESH_IFU", .pme_code = 0x0000010058, .pme_short_desc = "Local Memory above threshold for IFU speculation control", .pme_long_desc = "Local Memory above threshold for IFU speculation control", }, [ POWER9_PME_PM_MEM_LOC_THRESH_LSU_HIGH ] = { .pme_name = "PM_MEM_LOC_THRESH_LSU_HIGH", .pme_code = 0x0000040056, .pme_short_desc = "Local memory above threshold for LSU medium", .pme_long_desc = "Local memory above threshold for LSU medium", }, [ POWER9_PME_PM_MEM_LOC_THRESH_LSU_MED ] = { .pme_name = "PM_MEM_LOC_THRESH_LSU_MED", .pme_code = 0x000001C05E, .pme_short_desc = "Local memory above threshold for data prefetch", .pme_long_desc = "Local memory above threshold for data prefetch", }, [ POWER9_PME_PM_MEM_PREF ] = { .pme_name = "PM_MEM_PREF", .pme_code = 0x000002C058, .pme_short_desc = "Memory prefetch for this thread.", .pme_long_desc = "Memory prefetch for this thread. Includes L4", }, [ POWER9_PME_PM_MEM_READ ] = { .pme_name = "PM_MEM_READ", .pme_code = 0x0000010056, .pme_short_desc = "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch).", .pme_long_desc = "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4", }, [ POWER9_PME_PM_MEM_RWITM ] = { .pme_name = "PM_MEM_RWITM", .pme_code = 0x000003C05E, .pme_short_desc = "Memory Read With Intent to Modify for this thread", .pme_long_desc = "Memory Read With Intent to Modify for this thread", }, [ POWER9_PME_PM_MRK_BACK_BR_CMPL ] = { .pme_name = "PM_MRK_BACK_BR_CMPL", .pme_code = 0x000003515E, .pme_short_desc = "Marked branch instruction completed with a target address less than current instruction address", .pme_long_desc = "Marked branch instruction completed with a target address less than current instruction address", }, [ POWER9_PME_PM_MRK_BR_2PATH ] = { .pme_name = "PM_MRK_BR_2PATH", .pme_code = 0x0000010138, .pme_short_desc = "marked branches which are not strongly biased", .pme_long_desc = "marked branches which are not strongly biased", }, [ POWER9_PME_PM_MRK_BR_CMPL ] = { .pme_name = "PM_MRK_BR_CMPL", .pme_code = 0x000001016E, .pme_short_desc = "Branch Instruction completed", .pme_long_desc = "Branch Instruction completed", }, [ POWER9_PME_PM_MRK_BR_MPRED_CMPL ] = { .pme_name = "PM_MRK_BR_MPRED_CMPL", .pme_code = 0x00000301E4, .pme_short_desc = "Marked Branch Mispredicted", .pme_long_desc = "Marked Branch Mispredicted", }, [ POWER9_PME_PM_MRK_BR_TAKEN_CMPL ] = { .pme_name = "PM_MRK_BR_TAKEN_CMPL", .pme_code = 0x00000101E2, .pme_short_desc = "Marked Branch Taken completed", .pme_long_desc = "Marked Branch Taken completed", }, [ POWER9_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x000002013A, .pme_short_desc = "bru marked instr finish", .pme_long_desc = "bru marked instr finish", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL2L3_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_DL2L3_MOD_CYC", .pme_code = 0x000004D12E, .pme_short_desc = "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL2L3_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_DL2L3_MOD", .pme_code = 0x000003D14E, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL2L3_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_DL2L3_SHR_CYC", .pme_code = 0x000002C128, .pme_short_desc = "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL2L3_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_DL2L3_SHR", .pme_code = 0x000001D150, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL4_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_DL4_CYC", .pme_code = 0x000002C12C, .pme_short_desc = "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load", .pme_long_desc = "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DL4 ] = { .pme_name = "PM_MRK_DATA_FROM_DL4", .pme_code = 0x000001D152, .pme_short_desc = "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_DMEM_CYC", .pme_code = 0x000004E11E, .pme_short_desc = "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load", .pme_long_desc = "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_DMEM ] = { .pme_name = "PM_MRK_DATA_FROM_DMEM", .pme_code = 0x000003D14C, .pme_short_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L21_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L21_MOD_CYC", .pme_code = 0x000003D148, .pme_short_desc = "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L21_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L21_MOD", .pme_code = 0x000004D146, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L21_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L21_SHR_CYC", .pme_code = 0x000001D154, .pme_short_desc = "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L21_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L21_SHR", .pme_code = 0x000002D14E, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_CYC", .pme_code = 0x0000014156, .pme_short_desc = "Duration in cycles to reload from local core's L2 due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L2 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC", .pme_code = 0x000001415A, .pme_short_desc = "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST ] = { .pme_name = "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", .pme_code = 0x000002D148, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC", .pme_code = 0x000003D140, .pme_short_desc = "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER ] = { .pme_name = "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", .pme_code = 0x000002C124, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_MEPF_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_MEPF_CYC", .pme_code = 0x000003D144, .pme_short_desc = "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_MEPF ] = { .pme_name = "PM_MRK_DATA_FROM_L2_MEPF", .pme_code = 0x000004C120, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2MISS_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2MISS_CYC", .pme_code = 0x0000035152, .pme_short_desc = "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", .pme_long_desc = "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2MISS ] = { .pme_name = "PM_MRK_DATA_FROM_L2MISS", .pme_code = 0x00000401E8, .pme_short_desc = "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC", .pme_code = 0x0000014158, .pme_short_desc = "Duration in cycles to reload from local core's L2 without conflict due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L2 without conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_MRK_DATA_FROM_L2_NO_CONFLICT", .pme_code = 0x000002C120, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x000002C126, .pme_short_desc = "The processor's data cache was reloaded from local core's L2 due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L2 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", .pme_code = 0x0000035158, .pme_short_desc = "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L31_ECO_MOD", .pme_code = 0x000004D144, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC", .pme_code = 0x000001D142, .pme_short_desc = "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_ECO_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L31_ECO_SHR", .pme_code = 0x000002D14C, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L31_MOD_CYC", .pme_code = 0x000001D140, .pme_short_desc = "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L31_MOD", .pme_code = 0x000002D144, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L31_SHR_CYC", .pme_code = 0x0000035156, .pme_short_desc = "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L31_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L31_SHR", .pme_code = 0x000004D124, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_CYC", .pme_code = 0x0000035154, .pme_short_desc = "Duration in cycles to reload from local core's L3 due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L3 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", .pme_code = 0x000002C122, .pme_short_desc = "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", .pme_code = 0x000001D144, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_MEPF_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_MEPF_CYC", .pme_code = 0x000001415C, .pme_short_desc = "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_MEPF ] = { .pme_name = "PM_MRK_DATA_FROM_L3_MEPF", .pme_code = 0x000002D142, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state.", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3MISS_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3MISS_CYC", .pme_code = 0x000001415E, .pme_short_desc = "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", .pme_long_desc = "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3MISS ] = { .pme_name = "PM_MRK_DATA_FROM_L3MISS", .pme_code = 0x00000201E4, .pme_short_desc = "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC", .pme_code = 0x000004C124, .pme_short_desc = "Duration in cycles to reload from local core's L3 without conflict due to a marked load", .pme_long_desc = "Duration in cycles to reload from local core's L3 without conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_MRK_DATA_FROM_L3_NO_CONFLICT", .pme_code = 0x000003D146, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_L3 ] = { .pme_name = "PM_MRK_DATA_FROM_L3", .pme_code = 0x000004D142, .pme_short_desc = "The processor's data cache was reloaded from local core's L3 due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from local core's L3 due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_LL4_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_LL4_CYC", .pme_code = 0x000002C12E, .pme_short_desc = "Duration in cycles to reload from the local chip's L4 cache due to a marked load", .pme_long_desc = "Duration in cycles to reload from the local chip's L4 cache due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_LL4 ] = { .pme_name = "PM_MRK_DATA_FROM_LL4", .pme_code = 0x000001D14C, .pme_short_desc = "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM_CYC", .pme_code = 0x000004D128, .pme_short_desc = "Duration in cycles to reload from the local chip's Memory due to a marked load", .pme_long_desc = "Duration in cycles to reload from the local chip's Memory due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_LMEM ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM", .pme_code = 0x000003D142, .pme_short_desc = "The processor's data cache was reloaded from the local chip's Memory due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from the local chip's Memory due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_MEMORY_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_MEMORY_CYC", .pme_code = 0x000001D146, .pme_short_desc = "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load", .pme_long_desc = "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_MEMORY ] = { .pme_name = "PM_MRK_DATA_FROM_MEMORY", .pme_code = 0x00000201E0, .pme_short_desc = "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", .pme_code = 0x000001D14E, .pme_short_desc = "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", .pme_long_desc = "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_MRK_DATA_FROM_OFF_CHIP_CACHE", .pme_code = 0x000002D120, .pme_short_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", .pme_long_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC", .pme_code = 0x000003515A, .pme_short_desc = "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load", .pme_long_desc = "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_MRK_DATA_FROM_ON_CHIP_CACHE", .pme_code = 0x000004D140, .pme_short_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL2L3_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RL2L3_MOD_CYC", .pme_code = 0x000002D14A, .pme_short_desc = "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL2L3_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_RL2L3_MOD", .pme_code = 0x000001D14A, .pme_short_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL2L3_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RL2L3_SHR_CYC", .pme_code = 0x000004C12A, .pme_short_desc = "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", .pme_long_desc = "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL2L3_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_RL2L3_SHR", .pme_code = 0x0000035150, .pme_short_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", .pme_long_desc = "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL4_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RL4_CYC", .pme_code = 0x000004D12A, .pme_short_desc = "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load", .pme_long_desc = "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RL4 ] = { .pme_name = "PM_MRK_DATA_FROM_RL4", .pme_code = 0x000003515C, .pme_short_desc = "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM_CYC", .pme_code = 0x000002C12A, .pme_short_desc = "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load", .pme_long_desc = "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load", }, [ POWER9_PME_PM_MRK_DATA_FROM_RMEM ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM", .pme_code = 0x000001D148, .pme_short_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load", .pme_long_desc = "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load", }, [ POWER9_PME_PM_MRK_DCACHE_RELOAD_INTV ] = { .pme_name = "PM_MRK_DCACHE_RELOAD_INTV", .pme_code = 0x0000040118, .pme_short_desc = "Combined Intervention event", .pme_long_desc = "Combined Intervention event", }, [ POWER9_PME_PM_MRK_DERAT_MISS_16G ] = { .pme_name = "PM_MRK_DERAT_MISS_16G", .pme_code = 0x000004C15C, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 16G", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 16G", }, [ POWER9_PME_PM_MRK_DERAT_MISS_16M ] = { .pme_name = "PM_MRK_DERAT_MISS_16M", .pme_code = 0x000003D154, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 16M", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 16M", }, [ POWER9_PME_PM_MRK_DERAT_MISS_1G ] = { .pme_name = "PM_MRK_DERAT_MISS_1G", .pme_code = 0x000003D152, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 1G.", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", }, [ POWER9_PME_PM_MRK_DERAT_MISS_2M ] = { .pme_name = "PM_MRK_DERAT_MISS_2M", .pme_code = 0x000002D152, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 2M.", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", }, [ POWER9_PME_PM_MRK_DERAT_MISS_4K ] = { .pme_name = "PM_MRK_DERAT_MISS_4K", .pme_code = 0x000002D150, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 4K", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 4K", }, [ POWER9_PME_PM_MRK_DERAT_MISS_64K ] = { .pme_name = "PM_MRK_DERAT_MISS_64K", .pme_code = 0x000002D154, .pme_short_desc = "Marked Data ERAT Miss (Data TLB Access) page size 64K", .pme_long_desc = "Marked Data ERAT Miss (Data TLB Access) page size 64K", }, [ POWER9_PME_PM_MRK_DERAT_MISS ] = { .pme_name = "PM_MRK_DERAT_MISS", .pme_code = 0x00000301E6, .pme_short_desc = "Erat Miss (TLB Access) All page sizes", .pme_long_desc = "Erat Miss (TLB Access) All page sizes", }, [ POWER9_PME_PM_MRK_DFU_FIN ] = { .pme_name = "PM_MRK_DFU_FIN", .pme_code = 0x0000020132, .pme_short_desc = "Decimal Unit marked Instruction Finish", .pme_long_desc = "Decimal Unit marked Instruction Finish", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_DL2L3_MOD ] = { .pme_name = "PM_MRK_DPTEG_FROM_DL2L3_MOD", .pme_code = 0x000004F148, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_DL2L3_SHR ] = { .pme_name = "PM_MRK_DPTEG_FROM_DL2L3_SHR", .pme_code = 0x000003F148, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_DL4 ] = { .pme_name = "PM_MRK_DPTEG_FROM_DL4", .pme_code = 0x000003F14C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_DMEM ] = { .pme_name = "PM_MRK_DPTEG_FROM_DMEM", .pme_code = 0x000004F14C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L21_MOD ] = { .pme_name = "PM_MRK_DPTEG_FROM_L21_MOD", .pme_code = 0x000004F146, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L21_SHR ] = { .pme_name = "PM_MRK_DPTEG_FROM_L21_SHR", .pme_code = 0x000003F146, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L2_MEPF ] = { .pme_name = "PM_MRK_DPTEG_FROM_L2_MEPF", .pme_code = 0x000002F140, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L2MISS ] = { .pme_name = "PM_MRK_DPTEG_FROM_L2MISS", .pme_code = 0x000001F14E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L2_NO_CONFLICT ] = { .pme_name = "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT", .pme_code = 0x000001F140, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L2 ] = { .pme_name = "PM_MRK_DPTEG_FROM_L2", .pme_code = 0x000001F142, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L31_ECO_MOD ] = { .pme_name = "PM_MRK_DPTEG_FROM_L31_ECO_MOD", .pme_code = 0x000004F144, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L31_ECO_SHR ] = { .pme_name = "PM_MRK_DPTEG_FROM_L31_ECO_SHR", .pme_code = 0x000003F144, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L31_MOD ] = { .pme_name = "PM_MRK_DPTEG_FROM_L31_MOD", .pme_code = 0x000002F144, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L31_SHR ] = { .pme_name = "PM_MRK_DPTEG_FROM_L31_SHR", .pme_code = 0x000001F146, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT ] = { .pme_name = "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT", .pme_code = 0x000003F142, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L3_MEPF ] = { .pme_name = "PM_MRK_DPTEG_FROM_L3_MEPF", .pme_code = 0x000002F142, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L3MISS ] = { .pme_name = "PM_MRK_DPTEG_FROM_L3MISS", .pme_code = 0x000004F14E, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L3_NO_CONFLICT ] = { .pme_name = "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT", .pme_code = 0x000001F144, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_L3 ] = { .pme_name = "PM_MRK_DPTEG_FROM_L3", .pme_code = 0x000004F142, .pme_short_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_LL4 ] = { .pme_name = "PM_MRK_DPTEG_FROM_LL4", .pme_code = 0x000001F14C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_LMEM ] = { .pme_name = "PM_MRK_DPTEG_FROM_LMEM", .pme_code = 0x000002F148, .pme_short_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_MEMORY ] = { .pme_name = "PM_MRK_DPTEG_FROM_MEMORY", .pme_code = 0x000002F14C, .pme_short_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE ] = { .pme_name = "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", .pme_code = 0x000004F14A, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_ON_CHIP_CACHE ] = { .pme_name = "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", .pme_code = 0x000001F148, .pme_short_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_RL2L3_MOD ] = { .pme_name = "PM_MRK_DPTEG_FROM_RL2L3_MOD", .pme_code = 0x000002F146, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_RL2L3_SHR ] = { .pme_name = "PM_MRK_DPTEG_FROM_RL2L3_SHR", .pme_code = 0x000001F14A, .pme_short_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_RL4 ] = { .pme_name = "PM_MRK_DPTEG_FROM_RL4", .pme_code = 0x000002F14A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DPTEG_FROM_RMEM ] = { .pme_name = "PM_MRK_DPTEG_FROM_RMEM", .pme_code = 0x000003F14A, .pme_short_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request.", .pme_long_desc = "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", }, [ POWER9_PME_PM_MRK_DTLB_MISS_16G ] = { .pme_name = "PM_MRK_DTLB_MISS_16G", .pme_code = 0x000002D15E, .pme_short_desc = "Marked Data TLB Miss page size 16G", .pme_long_desc = "Marked Data TLB Miss page size 16G", }, [ POWER9_PME_PM_MRK_DTLB_MISS_16M ] = { .pme_name = "PM_MRK_DTLB_MISS_16M", .pme_code = 0x000004C15E, .pme_short_desc = "Marked Data TLB Miss page size 16M", .pme_long_desc = "Marked Data TLB Miss page size 16M", }, [ POWER9_PME_PM_MRK_DTLB_MISS_1G ] = { .pme_name = "PM_MRK_DTLB_MISS_1G", .pme_code = 0x000001D15C, .pme_short_desc = "Marked Data TLB reload (after a miss) page size 2M.", .pme_long_desc = "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used", }, [ POWER9_PME_PM_MRK_DTLB_MISS_4K ] = { .pme_name = "PM_MRK_DTLB_MISS_4K", .pme_code = 0x000002D156, .pme_short_desc = "Marked Data TLB Miss page size 4k", .pme_long_desc = "Marked Data TLB Miss page size 4k", }, [ POWER9_PME_PM_MRK_DTLB_MISS_64K ] = { .pme_name = "PM_MRK_DTLB_MISS_64K", .pme_code = 0x000003D156, .pme_short_desc = "Marked Data TLB Miss page size 64K", .pme_long_desc = "Marked Data TLB Miss page size 64K", }, [ POWER9_PME_PM_MRK_DTLB_MISS ] = { .pme_name = "PM_MRK_DTLB_MISS", .pme_code = 0x00000401E4, .pme_short_desc = "Marked dtlb miss", .pme_long_desc = "Marked dtlb miss", }, [ POWER9_PME_PM_MRK_FAB_RSP_BKILL_CYC ] = { .pme_name = "PM_MRK_FAB_RSP_BKILL_CYC", .pme_code = 0x000001F152, .pme_short_desc = "cycles L2 RC took for a bkill", .pme_long_desc = "cycles L2 RC took for a bkill", }, [ POWER9_PME_PM_MRK_FAB_RSP_BKILL ] = { .pme_name = "PM_MRK_FAB_RSP_BKILL", .pme_code = 0x0000040154, .pme_short_desc = "Marked store had to do a bkill", .pme_long_desc = "Marked store had to do a bkill", }, [ POWER9_PME_PM_MRK_FAB_RSP_CLAIM_RTY ] = { .pme_name = "PM_MRK_FAB_RSP_CLAIM_RTY", .pme_code = 0x000003015E, .pme_short_desc = "Sampled store did a rwitm and got a rty", .pme_long_desc = "Sampled store did a rwitm and got a rty", }, [ POWER9_PME_PM_MRK_FAB_RSP_DCLAIM_CYC ] = { .pme_name = "PM_MRK_FAB_RSP_DCLAIM_CYC", .pme_code = 0x000002F152, .pme_short_desc = "cycles L2 RC took for a dclaim", .pme_long_desc = "cycles L2 RC took for a dclaim", }, [ POWER9_PME_PM_MRK_FAB_RSP_DCLAIM ] = { .pme_name = "PM_MRK_FAB_RSP_DCLAIM", .pme_code = 0x0000030154, .pme_short_desc = "Marked store had to do a dclaim", .pme_long_desc = "Marked store had to do a dclaim", }, [ POWER9_PME_PM_MRK_FAB_RSP_RD_RTY ] = { .pme_name = "PM_MRK_FAB_RSP_RD_RTY", .pme_code = 0x000004015E, .pme_short_desc = "Sampled L2 reads retry count", .pme_long_desc = "Sampled L2 reads retry count", }, [ POWER9_PME_PM_MRK_FAB_RSP_RD_T_INTV ] = { .pme_name = "PM_MRK_FAB_RSP_RD_T_INTV", .pme_code = 0x000001015E, .pme_short_desc = "Sampled Read got a T intervention", .pme_long_desc = "Sampled Read got a T intervention", }, [ POWER9_PME_PM_MRK_FAB_RSP_RWITM_CYC ] = { .pme_name = "PM_MRK_FAB_RSP_RWITM_CYC", .pme_code = 0x000004F150, .pme_short_desc = "cycles L2 RC took for a rwitm", .pme_long_desc = "cycles L2 RC took for a rwitm", }, [ POWER9_PME_PM_MRK_FAB_RSP_RWITM_RTY ] = { .pme_name = "PM_MRK_FAB_RSP_RWITM_RTY", .pme_code = 0x000002015E, .pme_short_desc = "Sampled store did a rwitm and got a rty", .pme_long_desc = "Sampled store did a rwitm and got a rty", }, [ POWER9_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x0000020134, .pme_short_desc = "fxu marked instr finish", .pme_long_desc = "fxu marked instr finish", }, [ POWER9_PME_PM_MRK_IC_MISS ] = { .pme_name = "PM_MRK_IC_MISS", .pme_code = 0x000004013A, .pme_short_desc = "Marked instruction experienced I cache miss", .pme_long_desc = "Marked instruction experienced I cache miss", }, [ POWER9_PME_PM_MRK_INST_CMPL ] = { .pme_name = "PM_MRK_INST_CMPL", .pme_code = 0x00000401E0, .pme_short_desc = "marked instruction completed", .pme_long_desc = "marked instruction completed", }, [ POWER9_PME_PM_MRK_INST_DECODED ] = { .pme_name = "PM_MRK_INST_DECODED", .pme_code = 0x0000020130, .pme_short_desc = "An instruction was marked at decode time.", .pme_long_desc = "An instruction was marked at decode time. Random Instruction Sampling (RIS) only", }, [ POWER9_PME_PM_MRK_INST_DISP ] = { .pme_name = "PM_MRK_INST_DISP", .pme_code = 0x00000101E0, .pme_short_desc = "The thread has dispatched a randomly sampled marked instruction", .pme_long_desc = "The thread has dispatched a randomly sampled marked instruction", }, [ POWER9_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x0000030130, .pme_short_desc = "marked instruction finished", .pme_long_desc = "marked instruction finished", }, [ POWER9_PME_PM_MRK_INST_FROM_L3MISS ] = { .pme_name = "PM_MRK_INST_FROM_L3MISS", .pme_code = 0x00000401E6, .pme_short_desc = "Marked instruction was reloaded from a location beyond the local chiplet", .pme_long_desc = "Marked instruction was reloaded from a location beyond the local chiplet", }, [ POWER9_PME_PM_MRK_INST_ISSUED ] = { .pme_name = "PM_MRK_INST_ISSUED", .pme_code = 0x0000010132, .pme_short_desc = "Marked instruction issued", .pme_long_desc = "Marked instruction issued", }, [ POWER9_PME_PM_MRK_INST_TIMEO ] = { .pme_name = "PM_MRK_INST_TIMEO", .pme_code = 0x0000040134, .pme_short_desc = "marked Instruction finish timeout (instruction lost)", .pme_long_desc = "marked Instruction finish timeout (instruction lost)", }, [ POWER9_PME_PM_MRK_INST ] = { .pme_name = "PM_MRK_INST", .pme_code = 0x0000024058, .pme_short_desc = "An instruction was marked.", .pme_long_desc = "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens", }, [ POWER9_PME_PM_MRK_L1_ICACHE_MISS ] = { .pme_name = "PM_MRK_L1_ICACHE_MISS", .pme_code = 0x00000101E4, .pme_short_desc = "sampled Instruction suffered an icache Miss", .pme_long_desc = "sampled Instruction suffered an icache Miss", }, [ POWER9_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0x00000101EA, .pme_short_desc = "Marked demand reload", .pme_long_desc = "Marked demand reload", }, [ POWER9_PME_PM_MRK_L2_RC_DISP ] = { .pme_name = "PM_MRK_L2_RC_DISP", .pme_code = 0x0000020114, .pme_short_desc = "Marked Instruction RC dispatched in L2", .pme_long_desc = "Marked Instruction RC dispatched in L2", }, [ POWER9_PME_PM_MRK_L2_RC_DONE ] = { .pme_name = "PM_MRK_L2_RC_DONE", .pme_code = 0x000003012A, .pme_short_desc = "Marked RC done", .pme_long_desc = "Marked RC done", }, [ POWER9_PME_PM_MRK_L2_TM_REQ_ABORT ] = { .pme_name = "PM_MRK_L2_TM_REQ_ABORT", .pme_code = 0x000001E15E, .pme_short_desc = "TM abort", .pme_long_desc = "TM abort", }, [ POWER9_PME_PM_MRK_L2_TM_ST_ABORT_SISTER ] = { .pme_name = "PM_MRK_L2_TM_ST_ABORT_SISTER", .pme_code = 0x000003E15C, .pme_short_desc = "TM marked store abort for this thread", .pme_long_desc = "TM marked store abort for this thread", }, [ POWER9_PME_PM_MRK_LARX_FIN ] = { .pme_name = "PM_MRK_LARX_FIN", .pme_code = 0x0000040116, .pme_short_desc = "Larx finished", .pme_long_desc = "Larx finished", }, [ POWER9_PME_PM_MRK_LD_MISS_EXPOSED_CYC ] = { .pme_name = "PM_MRK_LD_MISS_EXPOSED_CYC", .pme_code = 0x000001013E, .pme_short_desc = "Marked Load exposed Miss (use edge detect to count #)", .pme_long_desc = "Marked Load exposed Miss (use edge detect to count #)", }, [ POWER9_PME_PM_MRK_LD_MISS_L1_CYC ] = { .pme_name = "PM_MRK_LD_MISS_L1_CYC", .pme_code = 0x000001D056, .pme_short_desc = "Marked ld latency", .pme_long_desc = "Marked ld latency", }, [ POWER9_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x00000201E2, .pme_short_desc = "Marked DL1 Demand Miss counted at exec time.", .pme_long_desc = "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", }, [ POWER9_PME_PM_MRK_LSU_DERAT_MISS ] = { .pme_name = "PM_MRK_LSU_DERAT_MISS", .pme_code = 0x0000030162, .pme_short_desc = "Marked derat reload (miss) for any page size", .pme_long_desc = "Marked derat reload (miss) for any page size", }, [ POWER9_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x0000040132, .pme_short_desc = "lsu marked instr PPC finish", .pme_long_desc = "lsu marked instr PPC finish", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_ATOMIC ] = { .pme_name = "PM_MRK_LSU_FLUSH_ATOMIC", .pme_code = 0x000000D098, .pme_short_desc = "Quad-word loads (lq) are considered atomic because they always span at least 2 slices.", .pme_long_desc = "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_EMSH ] = { .pme_name = "PM_MRK_LSU_FLUSH_EMSH", .pme_code = 0x000000D898, .pme_short_desc = "An ERAT miss was detected after a set-p hit.", .pme_long_desc = "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_LARX_STCX ] = { .pme_name = "PM_MRK_LSU_FLUSH_LARX_STCX", .pme_code = 0x000000D8A4, .pme_short_desc = "A larx is flushed because an older larx has an LMQ reservation for the same thread.", .pme_long_desc = "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_LHL_SHL ] = { .pme_name = "PM_MRK_LSU_FLUSH_LHL_SHL", .pme_code = 0x000000D8A0, .pme_short_desc = "The instruction was flushed because of a sequential load/store consistency.", .pme_long_desc = "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores).", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_LHS ] = { .pme_name = "PM_MRK_LSU_FLUSH_LHS", .pme_code = 0x000000D0A0, .pme_short_desc = "Effective Address alias flush : no EA match but Real Address match.", .pme_long_desc = "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_RELAUNCH_MISS ] = { .pme_name = "PM_MRK_LSU_FLUSH_RELAUNCH_MISS", .pme_code = 0x000000D09C, .pme_short_desc = "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent", .pme_long_desc = "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_SAO ] = { .pme_name = "PM_MRK_LSU_FLUSH_SAO", .pme_code = 0x000000D0A4, .pme_short_desc = "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush", .pme_long_desc = "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush", }, [ POWER9_PME_PM_MRK_LSU_FLUSH_UE ] = { .pme_name = "PM_MRK_LSU_FLUSH_UE", .pme_code = 0x000000D89C, .pme_short_desc = "Correctable ECC error on reload data, reported at critical data forward time", .pme_long_desc = "Correctable ECC error on reload data, reported at critical data forward time", }, [ POWER9_PME_PM_MRK_NTC_CYC ] = { .pme_name = "PM_MRK_NTC_CYC", .pme_code = 0x000002011C, .pme_short_desc = "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)", .pme_long_desc = "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)", }, [ POWER9_PME_PM_MRK_NTF_FIN ] = { .pme_name = "PM_MRK_NTF_FIN", .pme_code = 0x0000020112, .pme_short_desc = "Marked next to finish instruction finished", .pme_long_desc = "Marked next to finish instruction finished", }, [ POWER9_PME_PM_MRK_PROBE_NOP_CMPL ] = { .pme_name = "PM_MRK_PROBE_NOP_CMPL", .pme_code = 0x000001F05E, .pme_short_desc = "Marked probeNops completed", .pme_long_desc = "Marked probeNops completed", }, [ POWER9_PME_PM_MRK_RUN_CYC ] = { .pme_name = "PM_MRK_RUN_CYC", .pme_code = 0x000001D15E, .pme_short_desc = "Run cycles in which a marked instruction is in the pipeline", .pme_long_desc = "Run cycles in which a marked instruction is in the pipeline", }, [ POWER9_PME_PM_MRK_STALL_CMPLU_CYC ] = { .pme_name = "PM_MRK_STALL_CMPLU_CYC", .pme_code = 0x000003013E, .pme_short_desc = "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)", .pme_long_desc = "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)", }, [ POWER9_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x0000030134, .pme_short_desc = "marked store finished with intervention", .pme_long_desc = "marked store finished with intervention", }, [ POWER9_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x00000301E2, .pme_short_desc = "Marked store completed and sent to nest", .pme_long_desc = "Marked store completed and sent to nest", }, [ POWER9_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x000003E158, .pme_short_desc = "marked stcx failed", .pme_long_desc = "marked stcx failed", }, [ POWER9_PME_PM_MRK_STCX_FIN ] = { .pme_name = "PM_MRK_STCX_FIN", .pme_code = 0x0000024056, .pme_short_desc = "Number of marked stcx instructions finished.", .pme_long_desc = "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", }, [ POWER9_PME_PM_MRK_ST_DONE_L2 ] = { .pme_name = "PM_MRK_ST_DONE_L2", .pme_code = 0x0000010134, .pme_short_desc = "marked store completed in L2 ( RC machine done)", .pme_long_desc = "marked store completed in L2 ( RC machine done)", }, [ POWER9_PME_PM_MRK_ST_DRAIN_TO_L2DISP_CYC ] = { .pme_name = "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", .pme_code = 0x000003F150, .pme_short_desc = "cycles to drain st from core to L2", .pme_long_desc = "cycles to drain st from core to L2", }, [ POWER9_PME_PM_MRK_ST_FWD ] = { .pme_name = "PM_MRK_ST_FWD", .pme_code = 0x000003012C, .pme_short_desc = "Marked st forwards", .pme_long_desc = "Marked st forwards", }, [ POWER9_PME_PM_MRK_ST_L2DISP_TO_CMPL_CYC ] = { .pme_name = "PM_MRK_ST_L2DISP_TO_CMPL_CYC", .pme_code = 0x000001F150, .pme_short_desc = "cycles from L2 rc disp to l2 rc completion", .pme_long_desc = "cycles from L2 rc disp to l2 rc completion", }, [ POWER9_PME_PM_MRK_ST_NEST ] = { .pme_name = "PM_MRK_ST_NEST", .pme_code = 0x0000020138, .pme_short_desc = "Marked store sent to nest", .pme_long_desc = "Marked store sent to nest", }, [ POWER9_PME_PM_MRK_TEND_FAIL ] = { .pme_name = "PM_MRK_TEND_FAIL", .pme_code = 0x00000028A4, .pme_short_desc = "Nested or not nested tend failed for a marked tend instruction", .pme_long_desc = "Nested or not nested tend failed for a marked tend instruction", }, [ POWER9_PME_PM_MRK_VSU_FIN ] = { .pme_name = "PM_MRK_VSU_FIN", .pme_code = 0x0000030132, .pme_short_desc = "VSU marked instr finish", .pme_long_desc = "VSU marked instr finish", }, [ POWER9_PME_PM_MULT_MRK ] = { .pme_name = "PM_MULT_MRK", .pme_code = 0x000003D15E, .pme_short_desc = "mult marked instr", .pme_long_desc = "mult marked instr", }, [ POWER9_PME_PM_NEST_REF_CLK ] = { .pme_name = "PM_NEST_REF_CLK", .pme_code = 0x000003006E, .pme_short_desc = "Multiply by 4 to obtain the number of PB cycles", .pme_long_desc = "Multiply by 4 to obtain the number of PB cycles", }, [ POWER9_PME_PM_NON_DATA_STORE ] = { .pme_name = "PM_NON_DATA_STORE", .pme_code = 0x000000F8A0, .pme_short_desc = "All ops that drain from s2q to L2 and contain no data", .pme_long_desc = "All ops that drain from s2q to L2 and contain no data", }, [ POWER9_PME_PM_NON_FMA_FLOP_CMPL ] = { .pme_name = "PM_NON_FMA_FLOP_CMPL", .pme_code = 0x000004D056, .pme_short_desc = "Non FMA instruction completed", .pme_long_desc = "Non FMA instruction completed", }, [ POWER9_PME_PM_NON_MATH_FLOP_CMPL ] = { .pme_name = "PM_NON_MATH_FLOP_CMPL", .pme_code = 0x000004D05A, .pme_short_desc = "Non FLOP operation completed", .pme_long_desc = "Non FLOP operation completed", }, [ POWER9_PME_PM_NON_TM_RST_SC ] = { .pme_name = "PM_NON_TM_RST_SC", .pme_code = 0x00000260A6, .pme_short_desc = "Non-TM snp rst TM SC", .pme_long_desc = "Non-TM snp rst TM SC", }, [ POWER9_PME_PM_NTC_ALL_FIN ] = { .pme_name = "PM_NTC_ALL_FIN", .pme_code = 0x000002001A, .pme_short_desc = "Cycles after all instructions have finished to group completed", .pme_long_desc = "Cycles after all instructions have finished to group completed", }, [ POWER9_PME_PM_NTC_FIN ] = { .pme_name = "PM_NTC_FIN", .pme_code = 0x000002405A, .pme_short_desc = "Cycles in which the oldest instruction in the pipeline (NTC) finishes.", .pme_long_desc = "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", }, [ POWER9_PME_PM_NTC_ISSUE_HELD_ARB ] = { .pme_name = "PM_NTC_ISSUE_HELD_ARB", .pme_code = 0x000002E016, .pme_short_desc = "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", .pme_long_desc = "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", }, [ POWER9_PME_PM_NTC_ISSUE_HELD_DARQ_FULL ] = { .pme_name = "PM_NTC_ISSUE_HELD_DARQ_FULL", .pme_code = 0x000001006A, .pme_short_desc = "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", .pme_long_desc = "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", }, [ POWER9_PME_PM_NTC_ISSUE_HELD_OTHER ] = { .pme_name = "PM_NTC_ISSUE_HELD_OTHER", .pme_code = 0x000003D05A, .pme_short_desc = "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", .pme_long_desc = "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", }, [ POWER9_PME_PM_PARTIAL_ST_FIN ] = { .pme_name = "PM_PARTIAL_ST_FIN", .pme_code = 0x0000034054, .pme_short_desc = "Any store finished by an LSU slice", .pme_long_desc = "Any store finished by an LSU slice", }, [ POWER9_PME_PM_PMC1_OVERFLOW ] = { .pme_name = "PM_PMC1_OVERFLOW", .pme_code = 0x0000020010, .pme_short_desc = "Overflow from counter 1", .pme_long_desc = "Overflow from counter 1", }, [ POWER9_PME_PM_PMC1_REWIND ] = { .pme_name = "PM_PMC1_REWIND", .pme_code = 0x000004D02C, .pme_short_desc = "", .pme_long_desc = "", }, [ POWER9_PME_PM_PMC1_SAVED ] = { .pme_name = "PM_PMC1_SAVED", .pme_code = 0x000004D010, .pme_short_desc = "PMC1 Rewind Value saved", .pme_long_desc = "PMC1 Rewind Value saved", }, [ POWER9_PME_PM_PMC2_OVERFLOW ] = { .pme_name = "PM_PMC2_OVERFLOW", .pme_code = 0x0000030010, .pme_short_desc = "Overflow from counter 2", .pme_long_desc = "Overflow from counter 2", }, [ POWER9_PME_PM_PMC2_REWIND ] = { .pme_name = "PM_PMC2_REWIND", .pme_code = 0x0000030020, .pme_short_desc = "PMC2 Rewind Event (did not match condition)", .pme_long_desc = "PMC2 Rewind Event (did not match condition)", }, [ POWER9_PME_PM_PMC2_SAVED ] = { .pme_name = "PM_PMC2_SAVED", .pme_code = 0x0000010022, .pme_short_desc = "PMC2 Rewind Value saved", .pme_long_desc = "PMC2 Rewind Value saved", }, [ POWER9_PME_PM_PMC3_OVERFLOW ] = { .pme_name = "PM_PMC3_OVERFLOW", .pme_code = 0x0000040010, .pme_short_desc = "Overflow from counter 3", .pme_long_desc = "Overflow from counter 3", }, [ POWER9_PME_PM_PMC3_REWIND ] = { .pme_name = "PM_PMC3_REWIND", .pme_code = 0x000001000A, .pme_short_desc = "PMC3 rewind event.", .pme_long_desc = "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change.", }, [ POWER9_PME_PM_PMC3_SAVED ] = { .pme_name = "PM_PMC3_SAVED", .pme_code = 0x000004D012, .pme_short_desc = "PMC3 Rewind Value saved", .pme_long_desc = "PMC3 Rewind Value saved", }, [ POWER9_PME_PM_PMC4_OVERFLOW ] = { .pme_name = "PM_PMC4_OVERFLOW", .pme_code = 0x0000010010, .pme_short_desc = "Overflow from counter 4", .pme_long_desc = "Overflow from counter 4", }, [ POWER9_PME_PM_PMC4_REWIND ] = { .pme_name = "PM_PMC4_REWIND", .pme_code = 0x0000010020, .pme_short_desc = "PMC4 Rewind Event", .pme_long_desc = "PMC4 Rewind Event", }, [ POWER9_PME_PM_PMC4_SAVED ] = { .pme_name = "PM_PMC4_SAVED", .pme_code = 0x0000030022, .pme_short_desc = "PMC4 Rewind Value saved (matched condition)", .pme_long_desc = "PMC4 Rewind Value saved (matched condition)", }, [ POWER9_PME_PM_PMC5_OVERFLOW ] = { .pme_name = "PM_PMC5_OVERFLOW", .pme_code = 0x0000010024, .pme_short_desc = "Overflow from counter 5", .pme_long_desc = "Overflow from counter 5", }, [ POWER9_PME_PM_PMC6_OVERFLOW ] = { .pme_name = "PM_PMC6_OVERFLOW", .pme_code = 0x0000030024, .pme_short_desc = "Overflow from counter 6", .pme_long_desc = "Overflow from counter 6", }, [ POWER9_PME_PM_PROBE_NOP_DISP ] = { .pme_name = "PM_PROBE_NOP_DISP", .pme_code = 0x0000040014, .pme_short_desc = "ProbeNops dispatched", .pme_long_desc = "ProbeNops dispatched", }, [ POWER9_PME_PM_PTE_PREFETCH ] = { .pme_name = "PM_PTE_PREFETCH", .pme_code = 0x000000F084, .pme_short_desc = "PTE prefetches", .pme_long_desc = "PTE prefetches", }, [ POWER9_PME_PM_PTESYNC ] = { .pme_name = "PM_PTESYNC", .pme_code = 0x000000589C, .pme_short_desc = "ptesync instruction counted when the instruction is decoded and transmitted", .pme_long_desc = "ptesync instruction counted when the instruction is decoded and transmitted", }, [ POWER9_PME_PM_PUMP_CPRED ] = { .pme_name = "PM_PUMP_CPRED", .pme_code = 0x0000010054, .pme_short_desc = "Pump prediction correct.", .pme_long_desc = "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_PUMP_MPRED ] = { .pme_name = "PM_PUMP_MPRED", .pme_code = 0x0000040052, .pme_short_desc = "Pump misprediction.", .pme_long_desc = "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_RADIX_PWC_L1_HIT ] = { .pme_name = "PM_RADIX_PWC_L1_HIT", .pme_code = 0x000001F056, .pme_short_desc = "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit.", .pme_long_desc = "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit.", }, [ POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L1_PDE_FROM_L2", .pme_code = 0x000002D026, .pme_short_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L3MISS ] = { .pme_name = "PM_RADIX_PWC_L1_PDE_FROM_L3MISS", .pme_code = 0x000004F056, .pme_short_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache.", .pme_long_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache", }, [ POWER9_PME_PM_RADIX_PWC_L1_PDE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L1_PDE_FROM_L3", .pme_code = 0x000003F058, .pme_short_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L2_HIT ] = { .pme_name = "PM_RADIX_PWC_L2_HIT", .pme_code = 0x000002D024, .pme_short_desc = "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache.", .pme_long_desc = "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache.", }, [ POWER9_PME_PM_RADIX_PWC_L2_PDE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L2_PDE_FROM_L2", .pme_code = 0x000002D028, .pme_short_desc = "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L2_PDE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L2_PDE_FROM_L3", .pme_code = 0x000003F05A, .pme_short_desc = "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L2_PTE_FROM_L2", .pme_code = 0x000001F058, .pme_short_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", }, [ POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L3MISS ] = { .pme_name = "PM_RADIX_PWC_L2_PTE_FROM_L3MISS", .pme_code = 0x000004F05C, .pme_short_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache", }, [ POWER9_PME_PM_RADIX_PWC_L2_PTE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L2_PTE_FROM_L3", .pme_code = 0x000004F058, .pme_short_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", }, [ POWER9_PME_PM_RADIX_PWC_L3_HIT ] = { .pme_name = "PM_RADIX_PWC_L3_HIT", .pme_code = 0x000003F056, .pme_short_desc = "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache.", .pme_long_desc = "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache.", }, [ POWER9_PME_PM_RADIX_PWC_L3_PDE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L3_PDE_FROM_L2", .pme_code = 0x000002D02A, .pme_short_desc = "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L3_PDE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L3_PDE_FROM_L3", .pme_code = 0x000001F15C, .pme_short_desc = "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache", .pme_long_desc = "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache", }, [ POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L3_PTE_FROM_L2", .pme_code = 0x000002D02E, .pme_short_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation", }, [ POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L3MISS ] = { .pme_name = "PM_RADIX_PWC_L3_PTE_FROM_L3MISS", .pme_code = 0x000004F05E, .pme_short_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache", }, [ POWER9_PME_PM_RADIX_PWC_L3_PTE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L3_PTE_FROM_L3", .pme_code = 0x000003F05E, .pme_short_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation", }, [ POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L2 ] = { .pme_name = "PM_RADIX_PWC_L4_PTE_FROM_L2", .pme_code = 0x000001F05A, .pme_short_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation", }, [ POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L3MISS ] = { .pme_name = "PM_RADIX_PWC_L4_PTE_FROM_L3MISS", .pme_code = 0x000003F054, .pme_short_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache", }, [ POWER9_PME_PM_RADIX_PWC_L4_PTE_FROM_L3 ] = { .pme_name = "PM_RADIX_PWC_L4_PTE_FROM_L3", .pme_code = 0x000004F05A, .pme_short_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache.", .pme_long_desc = "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation", }, [ POWER9_PME_PM_RADIX_PWC_MISS ] = { .pme_name = "PM_RADIX_PWC_MISS", .pme_code = 0x000004F054, .pme_short_desc = "A radix translation attempt missed in the TLB and all levels of page walk cache.", .pme_long_desc = "A radix translation attempt missed in the TLB and all levels of page walk cache.", }, [ POWER9_PME_PM_RC0_BUSY ] = { .pme_name = "PM_RC0_BUSY", .pme_code = 0x000001608C, .pme_short_desc = "RC mach 0 Busy.", .pme_long_desc = "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_RC0_BUSY_ALT ] = { .pme_name = "PM_RC0_BUSY_ALT", .pme_code = 0x000002608C, .pme_short_desc = "RC mach 0 Busy.", .pme_long_desc = "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_RC_USAGE ] = { .pme_name = "PM_RC_USAGE", .pme_code = 0x000001688C, .pme_short_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each RC machine busy.", .pme_long_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", }, [ POWER9_PME_PM_RD_CLEARING_SC ] = { .pme_name = "PM_RD_CLEARING_SC", .pme_code = 0x00000468A6, .pme_short_desc = "Read clearing SC", .pme_long_desc = "Read clearing SC", }, [ POWER9_PME_PM_RD_FORMING_SC ] = { .pme_name = "PM_RD_FORMING_SC", .pme_code = 0x00000460A6, .pme_short_desc = "Read forming SC", .pme_long_desc = "Read forming SC", }, [ POWER9_PME_PM_RD_HIT_PF ] = { .pme_name = "PM_RD_HIT_PF", .pme_code = 0x00000268A8, .pme_short_desc = "RD machine hit L3 PF machine", .pme_long_desc = "RD machine hit L3 PF machine", }, [ POWER9_PME_PM_RUN_CYC_SMT2_MODE ] = { .pme_name = "PM_RUN_CYC_SMT2_MODE", .pme_code = 0x000003006C, .pme_short_desc = "Cycles in which this thread's run latch is set and the core is in SMT2 mode", .pme_long_desc = "Cycles in which this thread's run latch is set and the core is in SMT2 mode", }, [ POWER9_PME_PM_RUN_CYC_SMT4_MODE ] = { .pme_name = "PM_RUN_CYC_SMT4_MODE", .pme_code = 0x000002006C, .pme_short_desc = "Cycles in which this thread's run latch is set and the core is in SMT4 mode", .pme_long_desc = "Cycles in which this thread's run latch is set and the core is in SMT4 mode", }, [ POWER9_PME_PM_RUN_CYC_ST_MODE ] = { .pme_name = "PM_RUN_CYC_ST_MODE", .pme_code = 0x000001006C, .pme_short_desc = "Cycles run latch is set and core is in ST mode", .pme_long_desc = "Cycles run latch is set and core is in ST mode", }, [ POWER9_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x00000200F4, .pme_short_desc = "Run_cycles", .pme_long_desc = "Run_cycles", }, [ POWER9_PME_PM_RUN_INST_CMPL ] = { .pme_name = "PM_RUN_INST_CMPL", .pme_code = 0x00000400FA, .pme_short_desc = "Run_Instructions", .pme_long_desc = "Run_Instructions", }, [ POWER9_PME_PM_RUN_PURR ] = { .pme_name = "PM_RUN_PURR", .pme_code = 0x00000400F4, .pme_short_desc = "Run_PURR", .pme_long_desc = "Run_PURR", }, [ POWER9_PME_PM_RUN_SPURR ] = { .pme_name = "PM_RUN_SPURR", .pme_code = 0x0000010008, .pme_short_desc = "Run SPURR", .pme_long_desc = "Run SPURR", }, [ POWER9_PME_PM_S2Q_FULL ] = { .pme_name = "PM_S2Q_FULL", .pme_code = 0x000000E080, .pme_short_desc = "Cycles during which the S2Q is full", .pme_long_desc = "Cycles during which the S2Q is full", }, [ POWER9_PME_PM_SCALAR_FLOP_CMPL ] = { .pme_name = "PM_SCALAR_FLOP_CMPL", .pme_code = 0x0000045056, .pme_short_desc = "Scalar flop operation completed", .pme_long_desc = "Scalar flop operation completed", }, [ POWER9_PME_PM_SHL_CREATED ] = { .pme_name = "PM_SHL_CREATED", .pme_code = 0x000000508C, .pme_short_desc = "Store-Hit-Load Table Entry Created", .pme_long_desc = "Store-Hit-Load Table Entry Created", }, [ POWER9_PME_PM_SHL_ST_DEP_CREATED ] = { .pme_name = "PM_SHL_ST_DEP_CREATED", .pme_code = 0x000000588C, .pme_short_desc = "Store-Hit-Load Table Read Hit with entry Enabled", .pme_long_desc = "Store-Hit-Load Table Read Hit with entry Enabled", }, [ POWER9_PME_PM_SHL_ST_DISABLE ] = { .pme_name = "PM_SHL_ST_DISABLE", .pme_code = 0x0000005090, .pme_short_desc = "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)", .pme_long_desc = "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)", }, [ POWER9_PME_PM_SLB_TABLEWALK_CYC ] = { .pme_name = "PM_SLB_TABLEWALK_CYC", .pme_code = 0x000000F09C, .pme_short_desc = "Cycles when a tablewalk is pending on this thread on the SLB table", .pme_long_desc = "Cycles when a tablewalk is pending on this thread on the SLB table", }, [ POWER9_PME_PM_SN0_BUSY ] = { .pme_name = "PM_SN0_BUSY", .pme_code = 0x0000016090, .pme_short_desc = "SN mach 0 Busy.", .pme_long_desc = "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_SN0_BUSY_ALT ] = { .pme_name = "PM_SN0_BUSY_ALT", .pme_code = 0x0000026090, .pme_short_desc = "SN mach 0 Busy.", .pme_long_desc = "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)", }, [ POWER9_PME_PM_SN_HIT ] = { .pme_name = "PM_SN_HIT", .pme_code = 0x00000460A8, .pme_short_desc = "Any port snooper hit L3.", .pme_long_desc = "Any port snooper hit L3. Up to 4 can happen in a cycle but we only count 1", }, [ POWER9_PME_PM_SN_INVL ] = { .pme_name = "PM_SN_INVL", .pme_code = 0x00000368A8, .pme_short_desc = "Any port snooper detects a store to a line in the Sx state and invalidates the line.", .pme_long_desc = "Any port snooper detects a store to a line in the Sx state and invalidates the line. Up to 4 can happen in a cycle but we only count 1", }, [ POWER9_PME_PM_SN_MISS ] = { .pme_name = "PM_SN_MISS", .pme_code = 0x00000468A8, .pme_short_desc = "Any port snooper L3 miss or collision.", .pme_long_desc = "Any port snooper L3 miss or collision. Up to 4 can happen in a cycle but we only count 1", }, [ POWER9_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x000000F880, .pme_short_desc = "TLBIE snoop", .pme_long_desc = "TLBIE snoop", }, [ POWER9_PME_PM_SNP_TM_HIT_M ] = { .pme_name = "PM_SNP_TM_HIT_M", .pme_code = 0x00000360A6, .pme_short_desc = "Snp TM st hit M/Mu", .pme_long_desc = "Snp TM st hit M/Mu", }, [ POWER9_PME_PM_SNP_TM_HIT_T ] = { .pme_name = "PM_SNP_TM_HIT_T", .pme_code = 0x00000368A6, .pme_short_desc = "Snp TM sthit T/Tn/Te", .pme_long_desc = "Snp TM sthit T/Tn/Te", }, [ POWER9_PME_PM_SN_USAGE ] = { .pme_name = "PM_SN_USAGE", .pme_code = 0x000003688C, .pme_short_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy.", .pme_long_desc = "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", }, [ POWER9_PME_PM_SP_FLOP_CMPL ] = { .pme_name = "PM_SP_FLOP_CMPL", .pme_code = 0x000004505A, .pme_short_desc = "SP instruction completed", .pme_long_desc = "SP instruction completed", }, [ POWER9_PME_PM_SRQ_EMPTY_CYC ] = { .pme_name = "PM_SRQ_EMPTY_CYC", .pme_code = 0x0000040008, .pme_short_desc = "Cycles in which the SRQ has at least one (out of four) empty slice", .pme_long_desc = "Cycles in which the SRQ has at least one (out of four) empty slice", }, [ POWER9_PME_PM_SRQ_SYNC_CYC ] = { .pme_name = "PM_SRQ_SYNC_CYC", .pme_code = 0x000000D0AC, .pme_short_desc = "A sync is in the S2Q (edge detect to count)", .pme_long_desc = "A sync is in the S2Q (edge detect to count)", }, [ POWER9_PME_PM_STALL_END_ICT_EMPTY ] = { .pme_name = "PM_STALL_END_ICT_EMPTY", .pme_code = 0x0000010028, .pme_short_desc = "The number a times the core transitioned from a stall to ICT-empty for this thread", .pme_long_desc = "The number a times the core transitioned from a stall to ICT-empty for this thread", }, [ POWER9_PME_PM_ST_CAUSED_FAIL ] = { .pme_name = "PM_ST_CAUSED_FAIL", .pme_code = 0x000001608E, .pme_short_desc = "Non-TM Store caused any thread to fail", .pme_long_desc = "Non-TM Store caused any thread to fail", }, [ POWER9_PME_PM_ST_CMPL ] = { .pme_name = "PM_ST_CMPL", .pme_code = 0x00000200F0, .pme_short_desc = "Stores completed from S2Q (2nd-level store queue).", .pme_long_desc = "Stores completed from S2Q (2nd-level store queue).", }, [ POWER9_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x000001E058, .pme_short_desc = "stcx failed", .pme_long_desc = "stcx failed", }, [ POWER9_PME_PM_STCX_FIN ] = { .pme_name = "PM_STCX_FIN", .pme_code = 0x000002E014, .pme_short_desc = "Number of stcx instructions finished.", .pme_long_desc = "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", }, [ POWER9_PME_PM_STCX_SUCCESS_CMPL ] = { .pme_name = "PM_STCX_SUCCESS_CMPL", .pme_code = 0x000000C8BC, .pme_short_desc = "Number of stcx instructions that completed successfully", .pme_long_desc = "Number of stcx instructions that completed successfully", }, [ POWER9_PME_PM_ST_FIN ] = { .pme_name = "PM_ST_FIN", .pme_code = 0x0000020016, .pme_short_desc = "Store finish count.", .pme_long_desc = "Store finish count. Includes speculative activity", }, [ POWER9_PME_PM_ST_FWD ] = { .pme_name = "PM_ST_FWD", .pme_code = 0x0000020018, .pme_short_desc = "Store forwards that finished", .pme_long_desc = "Store forwards that finished", }, [ POWER9_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0x00000300F0, .pme_short_desc = "Store Missed L1", .pme_long_desc = "Store Missed L1", }, [ POWER9_PME_PM_STOP_FETCH_PENDING_CYC ] = { .pme_name = "PM_STOP_FETCH_PENDING_CYC", .pme_code = 0x00000048A4, .pme_short_desc = "Fetching is stopped due to an incoming instruction that will result in a flush", .pme_long_desc = "Fetching is stopped due to an incoming instruction that will result in a flush", }, /* See also alternate entries for 0000010000 / POWER9_PME_PM_SUSPENDED with code(s) 0000020000 0000030000 0000040000 at the bottom of this table. \n */ [ POWER9_PME_PM_SUSPENDED ] = { .pme_name = "PM_SUSPENDED", .pme_code = 0x0000010000, .pme_short_desc = "Counter OFF", .pme_long_desc = "Counter OFF", }, [ POWER9_PME_PM_SYNC_MRK_BR_LINK ] = { .pme_name = "PM_SYNC_MRK_BR_LINK", .pme_code = 0x0000015152, .pme_short_desc = "Marked Branch and link branch that can cause a synchronous interrupt", .pme_long_desc = "Marked Branch and link branch that can cause a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_BR_MPRED ] = { .pme_name = "PM_SYNC_MRK_BR_MPRED", .pme_code = 0x000001515C, .pme_short_desc = "Marked Branch mispredict that can cause a synchronous interrupt", .pme_long_desc = "Marked Branch mispredict that can cause a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_FX_DIVIDE ] = { .pme_name = "PM_SYNC_MRK_FX_DIVIDE", .pme_code = 0x0000015156, .pme_short_desc = "Marked fixed point divide that can cause a synchronous interrupt", .pme_long_desc = "Marked fixed point divide that can cause a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_L2HIT ] = { .pme_name = "PM_SYNC_MRK_L2HIT", .pme_code = 0x0000015158, .pme_short_desc = "Marked L2 Hits that can throw a synchronous interrupt", .pme_long_desc = "Marked L2 Hits that can throw a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_L2MISS ] = { .pme_name = "PM_SYNC_MRK_L2MISS", .pme_code = 0x000001515A, .pme_short_desc = "Marked L2 Miss that can throw a synchronous interrupt", .pme_long_desc = "Marked L2 Miss that can throw a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_L3MISS ] = { .pme_name = "PM_SYNC_MRK_L3MISS", .pme_code = 0x0000015154, .pme_short_desc = "Marked L3 misses that can throw a synchronous interrupt", .pme_long_desc = "Marked L3 misses that can throw a synchronous interrupt", }, [ POWER9_PME_PM_SYNC_MRK_PROBE_NOP ] = { .pme_name = "PM_SYNC_MRK_PROBE_NOP", .pme_code = 0x0000015150, .pme_short_desc = "Marked probeNops which can cause synchronous interrupts", .pme_long_desc = "Marked probeNops which can cause synchronous interrupts", }, [ POWER9_PME_PM_SYS_PUMP_CPRED ] = { .pme_name = "PM_SYS_PUMP_CPRED", .pme_code = 0x0000030050, .pme_short_desc = "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_SYS_PUMP_MPRED_RTY ] = { .pme_name = "PM_SYS_PUMP_MPRED_RTY", .pme_code = 0x0000040050, .pme_short_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", .pme_long_desc = "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_SYS_PUMP_MPRED ] = { .pme_name = "PM_SYS_PUMP_MPRED", .pme_code = 0x0000030052, .pme_short_desc = "Final Pump Scope (system) mispredicted.", .pme_long_desc = "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", }, [ POWER9_PME_PM_TABLEWALK_CYC_PREF ] = { .pme_name = "PM_TABLEWALK_CYC_PREF", .pme_code = 0x000000F884, .pme_short_desc = "tablewalk qualified for pte prefetches", .pme_long_desc = "tablewalk qualified for pte prefetches", }, [ POWER9_PME_PM_TABLEWALK_CYC ] = { .pme_name = "PM_TABLEWALK_CYC", .pme_code = 0x0000010026, .pme_short_desc = "Cycles when an instruction tablewalk is active", .pme_long_desc = "Cycles when an instruction tablewalk is active", }, [ POWER9_PME_PM_TAGE_CORRECT_TAKEN_CMPL ] = { .pme_name = "PM_TAGE_CORRECT_TAKEN_CMPL", .pme_code = 0x00000050B4, .pme_short_desc = "The TAGE overrode BHT direction prediction and it was correct.", .pme_long_desc = "The TAGE overrode BHT direction prediction and it was correct. Counted at completion for taken branches only", }, [ POWER9_PME_PM_TAGE_CORRECT ] = { .pme_name = "PM_TAGE_CORRECT", .pme_code = 0x00000058B4, .pme_short_desc = "The TAGE overrode BHT direction prediction and it was correct.", .pme_long_desc = "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time", }, [ POWER9_PME_PM_TAGE_OVERRIDE_WRONG_SPEC ] = { .pme_name = "PM_TAGE_OVERRIDE_WRONG_SPEC", .pme_code = 0x00000058B8, .pme_short_desc = "The TAGE overrode BHT direction prediction and it was correct.", .pme_long_desc = "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time", }, [ POWER9_PME_PM_TAGE_OVERRIDE_WRONG ] = { .pme_name = "PM_TAGE_OVERRIDE_WRONG", .pme_code = 0x00000050B8, .pme_short_desc = "The TAGE overrode BHT direction prediction but it was incorrect.", .pme_long_desc = "The TAGE overrode BHT direction prediction but it was incorrect. Counted at completion for taken branches only", }, [ POWER9_PME_PM_TAKEN_BR_MPRED_CMPL ] = { .pme_name = "PM_TAKEN_BR_MPRED_CMPL", .pme_code = 0x0000020056, .pme_short_desc = "Total number of taken branches that were incorrectly predicted as not-taken.", .pme_long_desc = "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions", }, [ POWER9_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x00000300F8, .pme_short_desc = "timebase event", .pme_long_desc = "timebase event", }, [ POWER9_PME_PM_TEND_PEND_CYC ] = { .pme_name = "PM_TEND_PEND_CYC", .pme_code = 0x000000E8B0, .pme_short_desc = "TEND latency per thread", .pme_long_desc = "TEND latency per thread", }, [ POWER9_PME_PM_THRD_ALL_RUN_CYC ] = { .pme_name = "PM_THRD_ALL_RUN_CYC", .pme_code = 0x000002000C, .pme_short_desc = "Cycles in which all the threads have the run latch set", .pme_long_desc = "Cycles in which all the threads have the run latch set", }, [ POWER9_PME_PM_THRD_CONC_RUN_INST ] = { .pme_name = "PM_THRD_CONC_RUN_INST", .pme_code = 0x00000300F4, .pme_short_desc = "PPC Instructions Finished by this thread when all threads in the core had the run-latch set", .pme_long_desc = "PPC Instructions Finished by this thread when all threads in the core had the run-latch set", }, [ POWER9_PME_PM_THRD_PRIO_0_1_CYC ] = { .pme_name = "PM_THRD_PRIO_0_1_CYC", .pme_code = 0x00000040BC, .pme_short_desc = "Cycles thread running at priority level 0 or 1", .pme_long_desc = "Cycles thread running at priority level 0 or 1", }, [ POWER9_PME_PM_THRD_PRIO_2_3_CYC ] = { .pme_name = "PM_THRD_PRIO_2_3_CYC", .pme_code = 0x00000048BC, .pme_short_desc = "Cycles thread running at priority level 2 or 3", .pme_long_desc = "Cycles thread running at priority level 2 or 3", }, [ POWER9_PME_PM_THRD_PRIO_4_5_CYC ] = { .pme_name = "PM_THRD_PRIO_4_5_CYC", .pme_code = 0x0000005080, .pme_short_desc = "Cycles thread running at priority level 4 or 5", .pme_long_desc = "Cycles thread running at priority level 4 or 5", }, [ POWER9_PME_PM_THRD_PRIO_6_7_CYC ] = { .pme_name = "PM_THRD_PRIO_6_7_CYC", .pme_code = 0x0000005880, .pme_short_desc = "Cycles thread running at priority level 6 or 7", .pme_long_desc = "Cycles thread running at priority level 6 or 7", }, [ POWER9_PME_PM_THRESH_ACC ] = { .pme_name = "PM_THRESH_ACC", .pme_code = 0x0000024154, .pme_short_desc = "This event increments every time the threshold event counter ticks.", .pme_long_desc = "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs.", }, [ POWER9_PME_PM_THRESH_EXC_1024 ] = { .pme_name = "PM_THRESH_EXC_1024", .pme_code = 0x00000301EA, .pme_short_desc = "Threshold counter exceeded a value of 1024", .pme_long_desc = "Threshold counter exceeded a value of 1024", }, [ POWER9_PME_PM_THRESH_EXC_128 ] = { .pme_name = "PM_THRESH_EXC_128", .pme_code = 0x00000401EA, .pme_short_desc = "Threshold counter exceeded a value of 128", .pme_long_desc = "Threshold counter exceeded a value of 128", }, [ POWER9_PME_PM_THRESH_EXC_2048 ] = { .pme_name = "PM_THRESH_EXC_2048", .pme_code = 0x00000401EC, .pme_short_desc = "Threshold counter exceeded a value of 2048", .pme_long_desc = "Threshold counter exceeded a value of 2048", }, [ POWER9_PME_PM_THRESH_EXC_256 ] = { .pme_name = "PM_THRESH_EXC_256", .pme_code = 0x00000101E8, .pme_short_desc = "Threshold counter exceed a count of 256", .pme_long_desc = "Threshold counter exceed a count of 256", }, [ POWER9_PME_PM_THRESH_EXC_32 ] = { .pme_name = "PM_THRESH_EXC_32", .pme_code = 0x00000201E6, .pme_short_desc = "Threshold counter exceeded a value of 32", .pme_long_desc = "Threshold counter exceeded a value of 32", }, [ POWER9_PME_PM_THRESH_EXC_4096 ] = { .pme_name = "PM_THRESH_EXC_4096", .pme_code = 0x00000101E6, .pme_short_desc = "Threshold counter exceed a count of 4096", .pme_long_desc = "Threshold counter exceed a count of 4096", }, [ POWER9_PME_PM_THRESH_EXC_512 ] = { .pme_name = "PM_THRESH_EXC_512", .pme_code = 0x00000201E8, .pme_short_desc = "Threshold counter exceeded a value of 512", .pme_long_desc = "Threshold counter exceeded a value of 512", }, [ POWER9_PME_PM_THRESH_EXC_64 ] = { .pme_name = "PM_THRESH_EXC_64", .pme_code = 0x00000301E8, .pme_short_desc = "Threshold counter exceeded a value of 64", .pme_long_desc = "Threshold counter exceeded a value of 64", }, [ POWER9_PME_PM_THRESH_MET ] = { .pme_name = "PM_THRESH_MET", .pme_code = 0x00000101EC, .pme_short_desc = "threshold exceeded", .pme_long_desc = "threshold exceeded", }, [ POWER9_PME_PM_THRESH_NOT_MET ] = { .pme_name = "PM_THRESH_NOT_MET", .pme_code = 0x000004016E, .pme_short_desc = "Threshold counter did not meet threshold", .pme_long_desc = "Threshold counter did not meet threshold", }, [ POWER9_PME_PM_TLB_HIT ] = { .pme_name = "PM_TLB_HIT", .pme_code = 0x000001F054, .pme_short_desc = "Number of times the TLB had the data required by the instruction.", .pme_long_desc = "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT", }, [ POWER9_PME_PM_TLBIE_FIN ] = { .pme_name = "PM_TLBIE_FIN", .pme_code = 0x0000030058, .pme_short_desc = "tlbie finished", .pme_long_desc = "tlbie finished", }, [ POWER9_PME_PM_TLB_MISS ] = { .pme_name = "PM_TLB_MISS", .pme_code = 0x0000020066, .pme_short_desc = "TLB Miss (I + D)", .pme_long_desc = "TLB Miss (I + D)", }, [ POWER9_PME_PM_TM_ABORTS ] = { .pme_name = "PM_TM_ABORTS", .pme_code = 0x0000030056, .pme_short_desc = "Number of TM transactions aborted", .pme_long_desc = "Number of TM transactions aborted", }, [ POWER9_PME_PM_TMA_REQ_L2 ] = { .pme_name = "PM_TMA_REQ_L2", .pme_code = 0x000000E0A4, .pme_short_desc = "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding", .pme_long_desc = "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding", }, [ POWER9_PME_PM_TM_CAM_OVERFLOW ] = { .pme_name = "PM_TM_CAM_OVERFLOW", .pme_code = 0x00000168A6, .pme_short_desc = "L3 TM cam overflow during L2 co of SC", .pme_long_desc = "L3 TM cam overflow during L2 co of SC", }, [ POWER9_PME_PM_TM_CAP_OVERFLOW ] = { .pme_name = "PM_TM_CAP_OVERFLOW", .pme_code = 0x000004608E, .pme_short_desc = "TM Footprint Capacity Overflow", .pme_long_desc = "TM Footprint Capacity Overflow", }, [ POWER9_PME_PM_TM_FAIL_CONF_NON_TM ] = { .pme_name = "PM_TM_FAIL_CONF_NON_TM", .pme_code = 0x00000028A8, .pme_short_desc = "TM aborted because a conflict occurred with a non-transactional access by another processor", .pme_long_desc = "TM aborted because a conflict occurred with a non-transactional access by another processor", }, [ POWER9_PME_PM_TM_FAIL_CONF_TM ] = { .pme_name = "PM_TM_FAIL_CONF_TM", .pme_code = 0x00000020AC, .pme_short_desc = "TM aborted because a conflict occurred with another transaction.", .pme_long_desc = "TM aborted because a conflict occurred with another transaction.", }, [ POWER9_PME_PM_TM_FAIL_FOOTPRINT_OVERFLOW ] = { .pme_name = "PM_TM_FAIL_FOOTPRINT_OVERFLOW", .pme_code = 0x00000020A8, .pme_short_desc = "TM aborted because the tracking limit for transactional storage accesses was exceeded.", .pme_long_desc = "TM aborted because the tracking limit for transactional storage accesses was exceeded.. Asynchronous", }, [ POWER9_PME_PM_TM_FAIL_NON_TX_CONFLICT ] = { .pme_name = "PM_TM_FAIL_NON_TX_CONFLICT", .pme_code = 0x000000E0B0, .pme_short_desc = "Non transactional conflict from LSU, gets reported to TEXASR", .pme_long_desc = "Non transactional conflict from LSU, gets reported to TEXASR", }, [ POWER9_PME_PM_TM_FAIL_SELF ] = { .pme_name = "PM_TM_FAIL_SELF", .pme_code = 0x00000028AC, .pme_short_desc = "TM aborted because a self-induced conflict occurred in Suspended state, due to one of the following: a store to a storage location that was previously accessed transactionally; a dcbf, dcbi, or icbi specify- ing a block that was previously accessed transactionally; a dcbst specifying a block that was previously written transactionally; or a tlbie that specifies a translation that was pre- viously used transactionally", .pme_long_desc = "TM aborted because a self-induced conflict occurred in Suspended state, due to one of the following: a store to a storage location that was previously accessed transactionally; a dcbf, dcbi, or icbi specify- ing a block that was previously accessed transactionally; a dcbst specifying a block that was previously written transactionally; or a tlbie that specifies a translation that was pre- viously used transactionally", }, [ POWER9_PME_PM_TM_FAIL_TLBIE ] = { .pme_name = "PM_TM_FAIL_TLBIE", .pme_code = 0x000000E0AC, .pme_short_desc = "Transaction failed because there was a TLBIE hit in the bloom filter", .pme_long_desc = "Transaction failed because there was a TLBIE hit in the bloom filter", }, [ POWER9_PME_PM_TM_FAIL_TX_CONFLICT ] = { .pme_name = "PM_TM_FAIL_TX_CONFLICT", .pme_code = 0x000000E8AC, .pme_short_desc = "Transactional conflict from LSU, gets reported to TEXASR", .pme_long_desc = "Transactional conflict from LSU, gets reported to TEXASR", }, [ POWER9_PME_PM_TM_FAV_CAUSED_FAIL ] = { .pme_name = "PM_TM_FAV_CAUSED_FAIL", .pme_code = 0x000002688E, .pme_short_desc = "TM Load (fav) caused another thread to fail", .pme_long_desc = "TM Load (fav) caused another thread to fail", }, [ POWER9_PME_PM_TM_FAV_TBEGIN ] = { .pme_name = "PM_TM_FAV_TBEGIN", .pme_code = 0x000000209C, .pme_short_desc = "Dispatch time Favored tbegin", .pme_long_desc = "Dispatch time Favored tbegin", }, [ POWER9_PME_PM_TM_LD_CAUSED_FAIL ] = { .pme_name = "PM_TM_LD_CAUSED_FAIL", .pme_code = 0x000001688E, .pme_short_desc = "Non-TM Load caused any thread to fail", .pme_long_desc = "Non-TM Load caused any thread to fail", }, [ POWER9_PME_PM_TM_LD_CONF ] = { .pme_name = "PM_TM_LD_CONF", .pme_code = 0x000002608E, .pme_short_desc = "TM Load (fav or non-fav) ran into conflict (failed)", .pme_long_desc = "TM Load (fav or non-fav) ran into conflict (failed)", }, [ POWER9_PME_PM_TM_NESTED_TBEGIN ] = { .pme_name = "PM_TM_NESTED_TBEGIN", .pme_code = 0x00000020A0, .pme_short_desc = "Completion Tm nested tbegin", .pme_long_desc = "Completion Tm nested tbegin", }, [ POWER9_PME_PM_TM_NESTED_TEND ] = { .pme_name = "PM_TM_NESTED_TEND", .pme_code = 0x0000002098, .pme_short_desc = "Completion time nested tend", .pme_long_desc = "Completion time nested tend", }, [ POWER9_PME_PM_TM_NON_FAV_TBEGIN ] = { .pme_name = "PM_TM_NON_FAV_TBEGIN", .pme_code = 0x000000289C, .pme_short_desc = "Dispatch time non favored tbegin", .pme_long_desc = "Dispatch time non favored tbegin", }, [ POWER9_PME_PM_TM_OUTER_TBEGIN_DISP ] = { .pme_name = "PM_TM_OUTER_TBEGIN_DISP", .pme_code = 0x000004E05E, .pme_short_desc = "Number of outer tbegin instructions dispatched.", .pme_long_desc = "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions", }, [ POWER9_PME_PM_TM_OUTER_TBEGIN ] = { .pme_name = "PM_TM_OUTER_TBEGIN", .pme_code = 0x0000002094, .pme_short_desc = "Completion time outer tbegin", .pme_long_desc = "Completion time outer tbegin", }, [ POWER9_PME_PM_TM_OUTER_TEND ] = { .pme_name = "PM_TM_OUTER_TEND", .pme_code = 0x0000002894, .pme_short_desc = "Completion time outer tend", .pme_long_desc = "Completion time outer tend", }, [ POWER9_PME_PM_TM_PASSED ] = { .pme_name = "PM_TM_PASSED", .pme_code = 0x000002E052, .pme_short_desc = "Number of TM transactions that passed", .pme_long_desc = "Number of TM transactions that passed", }, [ POWER9_PME_PM_TM_RST_SC ] = { .pme_name = "PM_TM_RST_SC", .pme_code = 0x00000268A6, .pme_short_desc = "TM-snp rst RM SC", .pme_long_desc = "TM-snp rst RM SC", }, [ POWER9_PME_PM_TM_SC_CO ] = { .pme_name = "PM_TM_SC_CO", .pme_code = 0x00000160A6, .pme_short_desc = "L3 castout TM SC line", .pme_long_desc = "L3 castout TM SC line", }, [ POWER9_PME_PM_TM_ST_CAUSED_FAIL ] = { .pme_name = "PM_TM_ST_CAUSED_FAIL", .pme_code = 0x000003688E, .pme_short_desc = "TM Store (fav or non-fav) caused another thread to fail", .pme_long_desc = "TM Store (fav or non-fav) caused another thread to fail", }, [ POWER9_PME_PM_TM_ST_CONF ] = { .pme_name = "PM_TM_ST_CONF", .pme_code = 0x000003608E, .pme_short_desc = "TM Store (fav or non-fav) ran into conflict (failed)", .pme_long_desc = "TM Store (fav or non-fav) ran into conflict (failed)", }, [ POWER9_PME_PM_TM_TABORT_TRECLAIM ] = { .pme_name = "PM_TM_TABORT_TRECLAIM", .pme_code = 0x0000002898, .pme_short_desc = "Completion time tabortnoncd, tabortcd, treclaim", .pme_long_desc = "Completion time tabortnoncd, tabortcd, treclaim", }, [ POWER9_PME_PM_TM_TRANS_RUN_CYC ] = { .pme_name = "PM_TM_TRANS_RUN_CYC", .pme_code = 0x0000010060, .pme_short_desc = "run cycles in transactional state", .pme_long_desc = "run cycles in transactional state", }, [ POWER9_PME_PM_TM_TRANS_RUN_INST ] = { .pme_name = "PM_TM_TRANS_RUN_INST", .pme_code = 0x0000030060, .pme_short_desc = "Run instructions completed in transactional state (gated by the run latch)", .pme_long_desc = "Run instructions completed in transactional state (gated by the run latch)", }, [ POWER9_PME_PM_TM_TRESUME ] = { .pme_name = "PM_TM_TRESUME", .pme_code = 0x00000020A4, .pme_short_desc = "TM resume instruction completed", .pme_long_desc = "TM resume instruction completed", }, [ POWER9_PME_PM_TM_TSUSPEND ] = { .pme_name = "PM_TM_TSUSPEND", .pme_code = 0x00000028A0, .pme_short_desc = "TM suspend instruction completed", .pme_long_desc = "TM suspend instruction completed", }, [ POWER9_PME_PM_TM_TX_PASS_RUN_CYC ] = { .pme_name = "PM_TM_TX_PASS_RUN_CYC", .pme_code = 0x000002E012, .pme_short_desc = "cycles spent in successful transactions", .pme_long_desc = "cycles spent in successful transactions", }, [ POWER9_PME_PM_TM_TX_PASS_RUN_INST ] = { .pme_name = "PM_TM_TX_PASS_RUN_INST", .pme_code = 0x000004E014, .pme_short_desc = "Run instructions spent in successful transactions", .pme_long_desc = "Run instructions spent in successful transactions", }, [ POWER9_PME_PM_VECTOR_FLOP_CMPL ] = { .pme_name = "PM_VECTOR_FLOP_CMPL", .pme_code = 0x000004D058, .pme_short_desc = "Vector FP instruction completed", .pme_long_desc = "Vector FP instruction completed", }, [ POWER9_PME_PM_VECTOR_LD_CMPL ] = { .pme_name = "PM_VECTOR_LD_CMPL", .pme_code = 0x0000044054, .pme_short_desc = "Number of vector load instructions completed", .pme_long_desc = "Number of vector load instructions completed", }, [ POWER9_PME_PM_VECTOR_ST_CMPL ] = { .pme_name = "PM_VECTOR_ST_CMPL", .pme_code = 0x0000044056, .pme_short_desc = "Number of vector store instructions completed", .pme_long_desc = "Number of vector store instructions completed", }, [ POWER9_PME_PM_VSU_DP_FSQRT_FDIV ] = { .pme_name = "PM_VSU_DP_FSQRT_FDIV", .pme_code = 0x000003D058, .pme_short_desc = "vector versions of fdiv,fsqrt", .pme_long_desc = "vector versions of fdiv,fsqrt", }, [ POWER9_PME_PM_VSU_FIN ] = { .pme_name = "PM_VSU_FIN", .pme_code = 0x000002505C, .pme_short_desc = "VSU instruction finished.", .pme_long_desc = "VSU instruction finished. Up to 4 per cycle", }, [ POWER9_PME_PM_VSU_FSQRT_FDIV ] = { .pme_name = "PM_VSU_FSQRT_FDIV", .pme_code = 0x000004D04E, .pme_short_desc = "four flops operation (fdiv,fsqrt) Scalar Instructions only", .pme_long_desc = "four flops operation (fdiv,fsqrt) Scalar Instructions only", }, [ POWER9_PME_PM_VSU_NON_FLOP_CMPL ] = { .pme_name = "PM_VSU_NON_FLOP_CMPL", .pme_code = 0x000004D050, .pme_short_desc = "Non FLOP operation completed", .pme_long_desc = "Non FLOP operation completed", }, [ POWER9_PME_PM_XLATE_HPT_MODE ] = { .pme_name = "PM_XLATE_HPT_MODE", .pme_code = 0x000000F098, .pme_short_desc = "LSU reports every cycle the thread is in HPT translation mode (as opposed to radix mode)", .pme_long_desc = "LSU reports every cycle the thread is in HPT translation mode (as opposed to radix mode)", }, [ POWER9_PME_PM_XLATE_MISS ] = { .pme_name = "PM_XLATE_MISS", .pme_code = 0x000000F89C, .pme_short_desc = "The LSU requested a line from L2 for translation.", .pme_long_desc = "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions", }, [ POWER9_PME_PM_XLATE_RADIX_MODE ] = { .pme_name = "PM_XLATE_RADIX_MODE", .pme_code = 0x000000F898, .pme_short_desc = "LSU reports every cycle the thread is in radix translation mode (as opposed to HPT mode)", .pme_long_desc = "LSU reports every cycle the thread is in radix translation mode (as opposed to HPT mode)", }, [ POWER9_PME_PM_BR_2PATH_ALT ] = { .pme_name = "PM_BR_2PATH_ALT", .pme_code = 0x0000040036, .pme_short_desc = "Branches that are not strongly biased", .pme_long_desc = "Branches that are not strongly biased", }, [ POWER9_PME_PM_CYC_ALT ] = { .pme_name = "PM_CYC_ALT", .pme_code = 0x000002001E, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER9_PME_PM_CYC_ALT2 ] = { .pme_name = "PM_CYC_ALT2", .pme_code = 0x000003001E, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER9_PME_PM_CYC_ALT3 ] = { .pme_name = "PM_CYC_ALT3", .pme_code = 0x000004001E, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER9_PME_PM_INST_CMPL_ALT ] = { .pme_name = "PM_INST_CMPL_ALT", .pme_code = 0x0000020002, .pme_short_desc = "Number of PowerPC Instructions that completed.", .pme_long_desc = "Number of PowerPC Instructions that completed.", }, [ POWER9_PME_PM_INST_CMPL_ALT2 ] = { .pme_name = "PM_INST_CMPL_ALT2", .pme_code = 0x0000030002, .pme_short_desc = "Number of PowerPC Instructions that completed.", .pme_long_desc = "Number of PowerPC Instructions that completed.", }, [ POWER9_PME_PM_INST_CMPL_ALT3 ] = { .pme_name = "PM_INST_CMPL_ALT3", .pme_code = 0x0000040002, .pme_short_desc = "Number of PowerPC Instructions that completed.", .pme_long_desc = "Number of PowerPC Instructions that completed.", }, [ POWER9_PME_PM_INST_DISP_ALT ] = { .pme_name = "PM_INST_DISP_ALT", .pme_code = 0x00000300F2, .pme_short_desc = "# PPC Dispatched", .pme_long_desc = "# PPC Dispatched", }, [ POWER9_PME_PM_LD_MISS_L1_ALT ] = { .pme_name = "PM_LD_MISS_L1_ALT", .pme_code = 0x00000400F0, .pme_short_desc = "Load Missed L1, counted at execution time (can be greater than loads finished).", .pme_long_desc = "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", }, [ POWER9_PME_PM_SUSPENDED_ALT ] = { .pme_name = "PM_SUSPENDED_ALT", .pme_code = 0x0000020000, .pme_short_desc = "Counter OFF", .pme_long_desc = "Counter OFF", }, [ POWER9_PME_PM_SUSPENDED_ALT2 ] = { .pme_name = "PM_SUSPENDED_ALT2", .pme_code = 0x0000030000, .pme_short_desc = "Counter OFF", .pme_long_desc = "Counter OFF", }, [ POWER9_PME_PM_SUSPENDED_ALT3 ] = { .pme_name = "PM_SUSPENDED_ALT3", .pme_code = 0x0000040000, .pme_short_desc = "Counter OFF", .pme_long_desc = "Counter OFF", }, /* total 957 */ }; #endif papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_impl_pmds.3000664 001750 001750 00000000035 13216244361 024537 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_impl_pmcs.3 papi-5.6.0/man/man3/PAPI_epc.3000664 001750 001750 00000004702 13216244356 017617 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_epc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_epc \- .PP Simplified call to get arbitrary events per cycle, real and processor time\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface: \fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIevent\fP event code to be measured (0 defaults to PAPI_TOT_INS) .br \fI*rtime\fP total realtime since the first call .br \fI*ptime\fP total process time since the first call .br \fI*ref\fP incremental reference clock cycles since the last call .br \fI*core\fP incremental core clock cycles since the last call .br \fI*evt\fP total events since the first call .br \fI*epc\fP incremental events per cycle since the last call .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_epc()\fP\&. .br \fIPAPI_ENOEVNT\fP One of the requested events does not exist\&. .br \fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. .RE .PP The first call to \fBPAPI_epc()\fP will initialize the PAPI High Level interface, set up the counters to monitor the user specified event, PAPI_TOT_CYC, and PAPI_REF_CYC (if it exists) and start the counters\&. .PP Subsequent calls will read the counters and return total real time, total process time, total event counts since the start of the measurement and the core and reference cycle count and EPC rate since the latest call to \fBPAPI_epc()\fP\&. .PP A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. .PP \fBPAPI_epc\fP can provide a more detailed look at algorithm efficiency in light of clock variability in modern cpus\&. MFLOPS is no longer an adequate description of peak performance if clock rates can arbitrarily speed up or slow down\&. By allowing a user specified event and reporting reference cycles, core cycles and real time, \fBPAPI_epc\fP provides the information to compute an accurate effective clock rate, and an accurate measure of computational throughput\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_flips()\fP .PP \fBPAPI_flops()\fP .PP \fBPAPI_ipc()\fP .PP \fBPAPI_stop_counters()\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/perfctr/000775 001750 001750 00000000000 13216244357 021161 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/sparc_ultra12_events.h000664 001750 001750 00000006160 13216244365 025157 0ustar00jshenry1963jshenry1963000000 000000 static const sparc_entry_t ultra12_pe[] = { /* These two must always be first. */ { .name = "Cycle_cnt", .desc = "Accumulated cycles", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x0, }, { .name = "Instr_cnt", .desc = "Number of instructions completed", .ctrl = PME_CTRL_S0 | PME_CTRL_S1, .code = 0x1, }, { .name = "Dispatch0_IC_miss", .desc = "I-buffer is empty from I-Cache miss", .ctrl = PME_CTRL_S0, .code = 0x2, }, /* PIC0 events for UltraSPARC-I/II/IIi/IIe */ { .name = "Dispatch0_storeBuf", .desc = "Store buffer can not hold additional stores", .ctrl = PME_CTRL_S0, .code = 0x3, }, { .name = "IC_ref", .desc = "I-cache references", .ctrl = PME_CTRL_S0, .code = 0x8, }, { .name = "DC_rd", .desc = "D-cache read references (including accesses that subsequently trap)", .ctrl = PME_CTRL_S0, .code = 0x9, }, { .name = "DC_wr", .desc = "D-cache write references (including accesses that subsequently trap)", .ctrl = PME_CTRL_S0, .code = 0xa, }, { .name = "Load_use", .desc = "An instruction in the execute stage depends on an earlier load result that is not yet available", .ctrl = PME_CTRL_S0, .code = 0xb, }, { .name = "EC_ref", .desc = "Total E-cache references", .ctrl = PME_CTRL_S0, .code = 0xc, }, { .name = "EC_write_hit_RDO", .desc = "E-cache hits that do a read for ownership UPA transaction", .ctrl = PME_CTRL_S0, .code = 0xd, }, { .name = "EC_snoop_inv", .desc = "E-cache invalidates from the following UPA transactions: S_INV_REQ, S_CPI_REQ", .ctrl = PME_CTRL_S0, .code = 0xe, }, { .name = "EC_rd_hit", .desc = "E-cache read hits from D-cache misses", .ctrl = PME_CTRL_S0, .code = 0xf, }, /* PIC1 events for UltraSPARC-I/II/IIi/IIe */ { .name = "Dispatch0_mispred", .desc = "I-buffer is empty from Branch misprediction", .ctrl = PME_CTRL_S1, .code = 0x2, }, { .name = "Dispatch0_FP_use", .desc = "First instruction in the group depends on an earlier floating point result that is not yet available", .ctrl = PME_CTRL_S1, .code = 0x3, }, { .name = "IC_hit", .desc = "I-cache hits", .ctrl = PME_CTRL_S1, .code = 0x8, }, { .name = "DC_rd_hit", .desc = "D-cache read hits", .ctrl = PME_CTRL_S1, .code = 0x9, }, { .name = "DC_wr_hit", .desc = "D-cache write hits", .ctrl = PME_CTRL_S1, .code = 0xa, }, { .name = "Load_use_RAW", .desc = "There is a load use in the execute stage and there is a read-after-write hazard on the oldest outstanding load", .ctrl = PME_CTRL_S1, .code = 0xb, }, { .name = "EC_hit", .desc = "Total E-cache hits", .ctrl = PME_CTRL_S1, .code = 0xc, }, { .name = "EC_wb", .desc = "E-cache misses that do writebacks", .ctrl = PME_CTRL_S1, .code = 0xd, }, { .name = "EC_snoop_cb", .desc = "E-cache snoop copy-backs from the following UPA transactions: S_CPB_REQ, S_CPI_REQ, S_CPD_REQ, S_CPB_MIS_REQ", .ctrl = PME_CTRL_S1, .code = 0xe, }, { .name = "EC_ic_hit", .desc = "E-cache read hits from I-cache misses", .ctrl = PME_CTRL_S1, .code = 0xf, }, }; #define PME_SPARC_ULTRA12_EVENT_COUNT (sizeof(ultra12_pe)/sizeof(sparc_entry_t)) papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.46000664 001750 001750 00000001310 13216244367 023206 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.46 (1.7 marketing) GHz Athlon] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1466764 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 90 cycles PERFCTR INIT: rdtsc cost is 18.6 cycles (1283 total) PERFCTR INIT: rdpmc cost is 19.8 cycles (1359 total) PERFCTR INIT: rdmsr (counter) cost is 51.5 cycles (3391 total) PERFCTR INIT: rdmsr (evntsel) cost is 52.4 cycles (3446 total) PERFCTR INIT: wrmsr (counter) cost is 86.6 cycles (5636 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.7 cycles (14920 total) PERFCTR INIT: read cr4 cost is 5.9 cycles (473 total) PERFCTR INIT: write cr4 cost is 64.9 cycles (4246 total) perfctr: driver 2.3.9, cpu type AMD K7 at 1466764 kHz papi-5.6.0/src/libpfm4/debian/README.source000664 001750 001750 00000000131 13216244363 022255 0ustar00jshenry1963jshenry1963000000 000000 Sources were slightly modified to compile with -Werror -Arun Sharma (aruns@google.com) papi-5.6.0/src/libpfm-3.y/lib/intel_corei7_events.h000664 001750 001750 00000176757 13216244363 024121 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ static pme_nhm_entry_t corei7_pe[]={ /* * BEGIN architected events */ {.pme_name = "UNHALTED_CORE_CYCLES", .pme_code = 0x003c, .pme_cntmsk = 0x2000f, .pme_flags = PFMLIB_NHM_FIXED1, .pme_desc = "count core clock cycles whenever the clock signal on the specific core is running (not halted). Alias to event CPU_CLK_UNHALTED:THREAD" }, {.pme_name = "INSTRUCTION_RETIRED", .pme_code = 0x00c0, .pme_cntmsk = 0x1000f, .pme_flags = PFMLIB_NHM_FIXED0|PFMLIB_NHM_PEBS, .pme_desc = "count the number of instructions at retirement. Alias to event INST_RETIRED:ANY_P", }, {.pme_name = "INSTRUCTIONS_RETIRED", .pme_code = 0x00c0, .pme_cntmsk = 0x1000f, .pme_flags = PFMLIB_NHM_FIXED0|PFMLIB_NHM_PEBS, .pme_desc = "This is an alias for INSTRUCTION_RETIRED", }, {.pme_name = "UNHALTED_REFERENCE_CYCLES", .pme_code = 0x013c, .pme_cntmsk = 0x40000, .pme_flags = PFMLIB_NHM_FIXED2_ONLY, .pme_desc = "Unhalted reference cycles", }, {.pme_name = "LLC_REFERENCES", .pme_code = 0x4f2e, .pme_cntmsk = 0xf, .pme_desc = "count each request originating from the core to reference a cache line in the last level cache. The count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to L2_RQSTS:SELF_DEMAND_MESI", }, {.pme_name = "LAST_LEVEL_CACHE_REFERENCES", .pme_code = 0x4f2e, .pme_cntmsk = 0xf, .pme_desc = "This is an alias for LLC_REFERENCES", }, {.pme_name = "LLC_MISSES", .pme_code = 0x412e, .pme_cntmsk = 0xf, .pme_desc = "count each cache miss condition for references to the last level cache. The event count may include speculation, but excludes cache line fills due to hardware prefetch. Alias to event L2_RQSTS:SELF_DEMAND_I_STATE", }, {.pme_name = "LAST_LEVEL_CACHE_MISSES", .pme_code = 0x412e, .pme_cntmsk = 0xf, .pme_desc = "This is an alias for LLC_MISSES", }, {.pme_name = "BRANCH_INSTRUCTIONS_RETIRED", .pme_code = 0x00c4, .pme_cntmsk = 0xf, .pme_desc = "count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction. Alias to event BR_INST_RETIRED:ANY", }, /* * BEGIN core specific events */ { .pme_name = "ARITH", .pme_desc = "Counts arithmetic multiply and divide operations", .pme_code = 0x14, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "CYCLES_DIV_BUSY", .pme_udesc = "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE.", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DIV", .pme_udesc = "Counts the number of divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE.", .pme_ucode = 0x01 | (1<<16) | (1<<15) | (1<<10), /* cmask=1  invert=1  edge=1 */ .pme_uflags = 0, }, { .pme_uname = "MUL", .pme_udesc = "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD.", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "BACLEAR", .pme_desc = "Branch address calculator", .pme_code = 0xE6, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "BAD_TARGET", .pme_udesc = "BACLEAR asserted with bad target address", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "CLEAR", .pme_udesc = "BACLEAR asserted, regardless of cause", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "BACLEAR_FORCE_IQ", .pme_desc = "Instruction queue forced BACLEAR", .pme_code = 0x01A7, .pme_flags = 0, }, { .pme_name = "BOGUS_BR", .pme_desc = "Counts the number of bogus branches.", .pme_code = 0x01E4, .pme_flags = 0, }, { .pme_name = "BPU_CLEARS", .pme_desc = "Branch prediction Unit clears", .pme_code = 0xE8, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "EARLY", .pme_udesc = "Early Branch Prediciton Unit clears", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "LATE", .pme_udesc = "Late Branch Prediction Unit clears", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "ANY", .pme_udesc = "count any Branch Prediction Unit clears", .pme_ucode = 0x03, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "BPU_MISSED_CALL_RET", .pme_desc = "Branch prediction unit missed call or return", .pme_code = 0x01E5, .pme_flags = 0, }, { .pme_name = "BR_INST_DECODED", .pme_desc = "Branch instructions decoded", .pme_code = 0x01E0, .pme_flags = 0, }, { .pme_name = "BR_INST_EXEC", .pme_desc = "Branch instructions executed", .pme_code = 0x88, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "Branch instructions executed", .pme_ucode = 0x7F, .pme_uflags = 0, }, { .pme_uname = "COND", .pme_udesc = "Conditional branch instructions executed", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DIRECT", .pme_udesc = "Unconditional branches executed", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "DIRECT_NEAR_CALL", .pme_udesc = "Unconditional call branches executed", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "INDIRECT_NEAR_CALL", .pme_udesc = "Indirect call branches executed", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "INDIRECT_NON_CALL", .pme_udesc = "Indirect non call branches executed", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "NEAR_CALLS", .pme_udesc = "Call branches executed", .pme_ucode = 0x30, .pme_uflags = 0, }, { .pme_uname = "NON_CALLS", .pme_udesc = "All non call branches executed", .pme_ucode = 0x07, .pme_uflags = 0, }, { .pme_uname = "RETURN_NEAR", .pme_udesc = "Indirect return branches executed", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "TAKEN", .pme_udesc = "Taken branches executed", .pme_ucode = 0x40, .pme_uflags = 0, }, }, .pme_numasks = 10 }, { .pme_name = "BR_INST_RETIRED", .pme_desc = "Retired branch instructions", .pme_code = 0xC4, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ALL_BRANCHES", .pme_udesc = "Retired branch instructions (Precise Event)", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "CONDITIONAL", .pme_udesc = "Retired conditional branch instructions (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "NEAR_CALL", .pme_udesc = "Retired near call instructions (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 3 }, { .pme_name = "BR_MISP_EXEC", .pme_desc = "Mispredicted branches executed", .pme_code = 0x89, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "Mispredicted branches executed", .pme_ucode = 0x7F, .pme_uflags = 0, }, { .pme_uname = "COND", .pme_udesc = "Mispredicted conditional branches executed", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DIRECT", .pme_udesc = "Mispredicted unconditional branches executed", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "DIRECT_NEAR_CALL", .pme_udesc = "Mispredicted non call branches executed", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "INDIRECT_NEAR_CALL", .pme_udesc = "Mispredicted indirect call branches executed", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "INDIRECT_NON_CALL", .pme_udesc = "Mispredicted indirect non call branches executed", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "NEAR_CALLS", .pme_udesc = "Mispredicted call branches executed", .pme_ucode = 0x30, .pme_uflags = 0, }, { .pme_uname = "NON_CALLS", .pme_udesc = "Mispredicted non call branches executed", .pme_ucode = 0x07, .pme_uflags = 0, }, { .pme_uname = "RETURN_NEAR", .pme_udesc = "Mispredicted return branches executed", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "TAKEN", .pme_udesc = "Mispredicted taken branches executed", .pme_ucode = 0x40, .pme_uflags = 0, }, }, .pme_numasks = 10 }, { .pme_name = "BR_MISP_RETIRED", .pme_desc = "Count Mispredicted Branch Activity", .pme_code = 0xC5, .pme_flags = 0, .pme_umasks = { { .pme_uname = "NEAR_CALL", .pme_udesc = "Counts mispredicted direct and indirect near unconditional retired calls", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 1 }, { .pme_name = "CACHE_LOCK_CYCLES", .pme_desc = "Cache lock cycles", .pme_code = 0x63, .pme_flags = PFMLIB_NHM_PMC01, .pme_umasks = { { .pme_uname = "L1D", .pme_udesc = "Cycles L1D locked", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "L1D_L2", .pme_udesc = "Cycles L1D and L2 locked", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "CPU_CLK_UNHALTED", .pme_desc = "Cycles when processor is not in halted state", .pme_code = 0x3C, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "THREAD_P", .pme_udesc = "Cycles when thread is not halted (programmable counter)", .pme_ucode = 0x00, .pme_uflags = 0, }, { .pme_uname = "REF_P", .pme_udesc = "Reference base clock (133 Mhz) cycles when thread is not halted", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "DTLB_LOAD_MISSES", .pme_desc = "Data TLB load misses", .pme_code = 0x08, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "DTLB load misses", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PDE_MISS", .pme_udesc = "DTLB load miss caused by low part of address", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "WALK_COMPLETED", .pme_udesc = "DTLB load miss page walks complete", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "STLB_HIT", .pme_udesc = "DTLB second level hit", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PDP_MISS", .pme_udesc = "Number of DTLB cache load misses where the high part of the linear to physical address translation was missed", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "LARGE_WALK_COMPLETED", .pme_udesc = "Counts number of completed large page walks due to load miss in the STLB", .pme_ucode = 0x80, .pme_uflags = 0, }, }, .pme_numasks = 6 }, { .pme_name = "DTLB_MISSES", .pme_desc = "Data TLB misses", .pme_code = 0x49, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "DTLB misses", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "STLB_HIT", .pme_udesc = "DTLB first level misses but second level hit", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "WALK_COMPLETED", .pme_udesc = "DTLB miss page walks", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "PDE_MISS", .pme_udesc = "Number of DTLB cache misses where the low part of the linear to physical address translation was missed", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PDP_MISS", .pme_udesc = "Number of DTLB misses where the high part of the linear to physical address translation was missed", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "LARGE_WALK_COMPLETED", .pme_udesc = "Counts number of completed large page walks due to misses in the STLB", .pme_ucode = 0x80, .pme_uflags = 0, }, }, .pme_numasks = 6 }, { .pme_name = "EPT", .pme_desc = "Extended Page Directory", .pme_code = 0x4F, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "EPDE_MISS", .pme_udesc = "Extended Page Directory Entry miss", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "EPDPE_MISS", .pme_udesc = "Extended Page Directory Pointer miss", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "EPDPE_HIT", .pme_udesc = "Extended Page Directory Pointer hit", .pme_ucode = 0x04, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "ES_REG_RENAMES", .pme_desc = "ES segment renames", .pme_code = 0x01D5, .pme_flags = 0, }, { .pme_name = "FP_ASSIST", .pme_desc = "Floating point assists", .pme_code = 0xF7, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ALL", .pme_udesc = "Floating point assists (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "INPUT", .pme_udesc = "Floating poiint assists for invalid input value (Precise Event)", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "OUTPUT", .pme_udesc = "Floating point assists for invalid output value (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 3 }, { .pme_name = "FP_COMP_OPS_EXE", .pme_desc = "Floating point computational micro-ops", .pme_code = 0x10, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "MMX", .pme_udesc = "MMX Uops", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "SSE_DOUBLE_PRECISION", .pme_udesc = "SSE* FP double precision Uops", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "SSE_FP", .pme_udesc = "SSE and SSE2 FP Uops", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "SSE_FP_PACKED", .pme_udesc = "SSE FP packed Uops", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "SSE_FP_SCALAR", .pme_udesc = "SSE FP scalar Uops", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "SSE_SINGLE_PRECISION", .pme_udesc = "SSE* FP single precision Uops", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "SSE2_INTEGER", .pme_udesc = "SSE2 integer Uops", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "X87", .pme_udesc = "Computational floating-point operations executed", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 8 }, { .pme_name = "FP_MMX_TRANS", .pme_desc = "Floating Point to and from MMX transitions", .pme_code = 0xCC, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All Floating Point to and from MMX transitions", .pme_ucode = 0x03, .pme_uflags = 0, }, { .pme_uname = "TO_FP", .pme_udesc = "Transitions from MMX to Floating Point instructions", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "TO_MMX", .pme_udesc = "Transitions from Floating Point to MMX instructions", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "HW_INT", .pme_desc = "Hardware interrupts", .pme_code = 0x1D, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "RCV", .pme_udesc = "Number of interrupt received", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "CYCLES_MASKED", .pme_udesc = "Number of cycles interrupt are masked", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "CYCLES_PENDING_AND_MASKED", .pme_udesc = "Number of cycles interrupts are pending and masked", .pme_ucode = 0x04, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "IFU_IVC", .pme_desc = "Instruction Fetch unit victim cache", .pme_code = 0x81, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "FULL", .pme_udesc = "Instruction Fetche unit victim cache full", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "L1I_EVICTION", .pme_udesc = "L1 Instruction cache evictions", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "ILD_STALL", .pme_desc = "Instruction Length Decoder stalls", .pme_code = 0x87, .pme_flags = 0, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "Any Instruction Length Decoder stall cycles", .pme_ucode = 0x0F, .pme_uflags = 0, }, { .pme_uname = "IQ_FULL", .pme_udesc = "Instruction Queue full stall cycles", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "LCP", .pme_udesc = "Length Change Prefix stall cycles", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "MRU", .pme_udesc = "Stall cycles due to BPU MRU bypass", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "REGEN", .pme_udesc = "Regen stall cycles", .pme_ucode = 0x08, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "INST_DECODED", .pme_desc = "Instructions decoded", .pme_code = 0x18, .pme_flags = 0, .pme_umasks = { { .pme_uname = "DEC0", .pme_udesc = "Instructions that must be decoded by decoder 0", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 1 }, { .pme_name = "INST_QUEUE_WRITES", .pme_desc = "Instructions written to instruction queue.", .pme_code = 0x0117, .pme_flags = 0, }, { .pme_name = "INST_QUEUE_WRITE_CYCLES", .pme_desc = "Cycles instructions are written to the instruction queue", .pme_code = 0x011E, .pme_flags = 0, }, { .pme_name = "INST_RETIRED", .pme_desc = "Instructions retired", .pme_code = 0xC0, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY_P", .pme_udesc = "Instructions Retired (Precise Event)", .pme_ucode = 0x00, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "X87", .pme_udesc = "Retired floating-point operations (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 2 }, { .pme_name = "IO_TRANSACTIONS", .pme_desc = "I/O transactions", .pme_code = 0x016C, .pme_flags = 0, }, { .pme_name = "ITLB_FLUSH", .pme_desc = "Counts the number of ITLB flushes", .pme_code = 0x01AE, .pme_flags = 0, }, { .pme_name = "ITLB_MISSES", .pme_desc = "Instruction TLB misses", .pme_code = 0x85, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "ITLB miss", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "WALK_COMPLETED", .pme_udesc = "ITLB miss page walks", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "STLB_HIT", .pme_udesc = "Counts the number of ITLB misses that hit in the second level TLB", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PDE_MISS", .pme_udesc = "Number of ITLB misses where the low part of the linear to physical address translation was missed", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PDP_MISS", .pme_udesc = "Number of ITLB misses where the high part of the linear to physical address translation was missed", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "LARGE_WALK_COMPLETED", .pme_udesc = "Counts number of completed large page walks due to misses in the STLB", .pme_ucode = 0x80, .pme_uflags = 0, }, }, .pme_numasks = 6 }, { .pme_name = "ITLB_MISS_RETIRED", .pme_desc = "Retired instructions that missed the ITLB (Precise Event)", .pme_code = 0x20C8, .pme_flags = PFMLIB_NHM_PEBS, }, { .pme_name = "L1D", .pme_desc = "L1D cache", .pme_code = 0x51, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "M_EVICT", .pme_udesc = "L1D cache lines replaced in M state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "M_REPL", .pme_udesc = "L1D cache lines allocated in the M state", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "M_SNOOP_EVICT", .pme_udesc = "L1D snoop eviction of cache lines in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "REPL", .pme_udesc = "L1 data cache lines allocated", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "L1D_ALL_REF", .pme_desc = "L1D references", .pme_code = 0x43, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All references to the L1 data cache", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "CACHEABLE", .pme_udesc = "L1 data cacheable reads and writes", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "L1D_CACHE_LD", .pme_desc = "L1D cacheable loads. WARNING: event may overcount loads", .pme_code = 0x40, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "E_STATE", .pme_udesc = "L1 data cache read in E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "I_STATE", .pme_udesc = "L1 data cache read in I state (misses)", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "M_STATE", .pme_udesc = "L1 data cache read in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "MESI", .pme_udesc = "L1 data cache reads", .pme_ucode = 0x0F, .pme_uflags = 0, }, { .pme_uname = "S_STATE", .pme_udesc = "L1 data cache read in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "L1D_CACHE_LOCK", .pme_desc = "L1 data cache load lock", .pme_code = 0x42, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "E_STATE", .pme_udesc = "L1 data cache load locks in E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "HIT", .pme_udesc = "L1 data cache load lock hits. WARNING: overcounts by 3x", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "M_STATE", .pme_udesc = "L1 data cache load locks in M state. WARNING: overcounts by 3x", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "S_STATE", .pme_udesc = "L1 data cache load locks in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "L1D_CACHE_LOCK_FB_HIT", .pme_desc = "L1D load lock accepted in fill buffer", .pme_code = 0x0153, .pme_flags = PFMLIB_NHM_PMC01, }, { .pme_name = "L1D_CACHE_PREFETCH_LOCK_FB_HIT", .pme_desc = "L1D prefetch load lock accepted in fill buffer", .pme_code = 0x0152, .pme_flags = PFMLIB_NHM_PMC01, }, { .pme_name = "L1D_CACHE_ST", .pme_desc = "L1 data cache stores", .pme_code = 0x41, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "E_STATE", .pme_udesc = "L1 data cache stores in E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "I_STATE", .pme_udesc = "L1 data cache store in the I state", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "M_STATE", .pme_udesc = "L1 data cache stores in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "S_STATE", .pme_udesc = "L1 data cache stores in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "MESI", .pme_udesc = "L1 data cache store in all states", .pme_ucode = 0x0F, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "L1D_PREFETCH", .pme_desc = "L1D hardware prefetch", .pme_code = 0x4E, .pme_flags = PFMLIB_NHM_PMC01|PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "MISS", .pme_udesc = "L1D hardware prefetch misses", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "REQUESTS", .pme_udesc = "L1D hardware prefetch requests", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "TRIGGERS", .pme_udesc = "L1D hardware prefetch requests triggered", .pme_ucode = 0x04, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "L1D_WB_L2", .pme_desc = "L1 writebacks to L2", .pme_code = 0x28, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "E_STATE", .pme_udesc = "L1 writebacks to L2 in E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "I_STATE", .pme_udesc = "L1 writebacks to L2 in I state (misses)", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "M_STATE", .pme_udesc = "L1 writebacks to L2 in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "S_STATE", .pme_udesc = "L1 writebacks to L2 in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "MESI", .pme_udesc = "All L1 writebacks to L2", .pme_ucode = 0x0F, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "L1I", .pme_desc = "L1I instruction fetches", .pme_code = 0x80, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "CYCLES_STALLED", .pme_udesc = "L1I instruction fetch stall cycles", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "HITS", .pme_udesc = "L1I instruction fetch hits", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "MISSES", .pme_udesc = "L1I instruction fetch misses", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "READS", .pme_udesc = "L1I Instruction fetches", .pme_ucode = 0x03, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "L1I_OPPORTUNISTIC_HITS", .pme_desc = "Opportunistic hits in streaming", .pme_code = 0x0183, .pme_flags = 0, }, { .pme_name = "L2_DATA_RQSTS", .pme_desc = "L2 data requests", .pme_code = 0x26, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All L2 data requests", .pme_ucode = 0xFF, .pme_uflags = 0, }, { .pme_uname = "DEMAND_E_STATE", .pme_udesc = "L2 data demand loads in E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "DEMAND_I_STATE", .pme_udesc = "L2 data demand loads in I state (misses)", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DEMAND_M_STATE", .pme_udesc = "L2 data demand loads in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "DEMAND_MESI", .pme_udesc = "L2 data demand requests", .pme_ucode = 0x0F, .pme_uflags = 0, }, { .pme_uname = "DEMAND_S_STATE", .pme_udesc = "L2 data demand loads in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_E_STATE", .pme_udesc = "L2 data prefetches in E state", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_I_STATE", .pme_udesc = "L2 data prefetches in the I state (misses)", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_M_STATE", .pme_udesc = "L2 data prefetches in M state", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_MESI", .pme_udesc = "All L2 data prefetches", .pme_ucode = 0xF0, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_S_STATE", .pme_udesc = "L2 data prefetches in the S state", .pme_ucode = 0x20, .pme_uflags = 0, }, }, .pme_numasks = 11 }, { .pme_name = "L2_HW_PREFETCH", .pme_desc = "L2 HW prefetches", .pme_code = 0xF3, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "HIT", .pme_udesc = "Count L2 HW prefetcher detector hits", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "ALLOC", .pme_udesc = "Count L2 HW prefetcher allocations", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "DATA_TRIGGER", .pme_udesc = "Count L2 HW data prefetcher triggered", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "CODE_TRIGGER", .pme_udesc = "Count L2 HW code prefetcher triggered", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "DCA_TRIGGER", .pme_udesc = "Count L2 HW DCA prefetcher triggered", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "KICK_START", .pme_udesc = "Count L2 HW prefetcher kick started", .pme_ucode = 0x20, .pme_uflags = 0, }, }, .pme_numasks = 6 }, { .pme_name = "L2_LINES_IN", .pme_desc = "L2 lines allocated", .pme_code = 0xF1, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "any L2 lines allocated", .pme_ucode = 0x07, .pme_uflags = 0, }, { .pme_uname = "E_STATE", .pme_udesc = "L2 lines allocated in the E state", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "S_STATE", .pme_udesc = "L2 lines allocated in the S state", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "L2_LINES_OUT", .pme_desc = "L2 lines evicted", .pme_code = 0xF2, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "L2 lines evicted", .pme_ucode = 0x0F, .pme_uflags = 0, }, { .pme_uname = "DEMAND_CLEAN", .pme_udesc = "L2 lines evicted by a demand request", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DEMAND_DIRTY", .pme_udesc = "L2 modified lines evicted by a demand request", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_CLEAN", .pme_udesc = "L2 lines evicted by a prefetch request", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_DIRTY", .pme_udesc = "L2 modified lines evicted by a prefetch request", .pme_ucode = 0x08, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "L2_RQSTS", .pme_desc = "L2 requests", .pme_code = 0x24, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "MISS", .pme_udesc = "All L2 misses", .pme_ucode = 0xAA, .pme_uflags = 0, }, { .pme_uname = "REFERENCES", .pme_udesc = "All L2 requests", .pme_ucode = 0xFF, .pme_uflags = 0, }, { .pme_uname = "IFETCH_HIT", .pme_udesc = "L2 instruction fetch hits", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "IFETCH_MISS", .pme_udesc = "L2 instruction fetch misses", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "IFETCHES", .pme_udesc = "L2 instruction fetches", .pme_ucode = 0x30, .pme_uflags = 0, }, { .pme_uname = "LD_HIT", .pme_udesc = "L2 load hits", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "LD_MISS", .pme_udesc = "L2 load misses", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "LOADS", .pme_udesc = "L2 requests", .pme_ucode = 0x03, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_HIT", .pme_udesc = "L2 prefetch hits", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "PREFETCH_MISS", .pme_udesc = "L2 prefetch misses", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "PREFETCHES", .pme_udesc = "All L2 prefetches", .pme_ucode = 0xC0, .pme_uflags = 0, }, { .pme_uname = "RFO_HIT", .pme_udesc = "L2 RFO hits", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "RFO_MISS", .pme_udesc = "L2 RFO misses", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "RFOS", .pme_udesc = "L2 RFO requests", .pme_ucode = 0x0C, .pme_uflags = 0, }, }, .pme_numasks = 14 }, { .pme_name = "L2_TRANSACTIONS", .pme_desc = "L2 transactions", .pme_code = 0xF0, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All L2 transactions", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "FILL", .pme_udesc = "L2 fill transactions", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "IFETCH", .pme_udesc = "L2 instruction fetch transactions", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "L1D_WB", .pme_udesc = "L1D writeback to L2 transactions", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "LOAD", .pme_udesc = "L2 Load transactions", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PREFETCH", .pme_udesc = "L2 prefetch transactions", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "RFO", .pme_udesc = "L2 RFO transactions", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "WB", .pme_udesc = "L2 writeback to LLC transactions", .pme_ucode = 0x40, .pme_uflags = 0, }, }, .pme_numasks = 8 }, { .pme_name = "L2_WRITE", .pme_desc = "L2 demand lock/store RFO", .pme_code = 0x27, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "LOCK_E_STATE", .pme_udesc = "L2 demand lock RFOs in E state", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "LOCK_I_STATE", .pme_udesc = "L2 demand lock RFOs in I state (misses)", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "LOCK_S_STATE", .pme_udesc = "L2 demand lock RFOs in S state", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "LOCK_HIT", .pme_udesc = "All demand L2 lock RFOs that hit the cache", .pme_ucode = 0xE0, .pme_uflags = 0, }, { .pme_uname = "LOCK_M_STATE", .pme_udesc = "L2 demand lock RFOs in M state", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "LOCK_MESI", .pme_udesc = "All demand L2 lock RFOs", .pme_ucode = 0xF0, .pme_uflags = 0, }, { .pme_uname = "RFO_HIT", .pme_udesc = "All L2 demand store RFOs that hit the cache", .pme_ucode = 0x0E, .pme_uflags = 0, }, { .pme_uname = "RFO_E_STATE", .pme_udesc = "L2 demand store RFOs in the E state (exclusive)", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "RFO_I_STATE", .pme_udesc = "L2 demand store RFOs in I state (misses)", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "RFO_M_STATE", .pme_udesc = "L2 demand store RFOs in M state", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "RFO_MESI", .pme_udesc = "All L2 demand store RFOs", .pme_ucode = 0x0F, .pme_uflags = 0, }, { .pme_uname = "RFO_S_STATE", .pme_udesc = "L2 demand store RFOs in S state", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 12 }, { .pme_name = "LARGE_ITLB", .pme_desc = "Large instruction TLB", .pme_code = 0x82, .pme_flags = 0, .pme_umasks = { { .pme_uname = "HIT", .pme_udesc = "Large ITLB hit", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 1 }, { .pme_name = "LOAD_DISPATCH", .pme_desc = "Loads dispatched", .pme_code = 0x13, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All loads dispatched", .pme_ucode = 0x07, .pme_uflags = 0, }, { .pme_uname = "MOB", .pme_udesc = "Loads dispatched from the MOB", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "RS", .pme_udesc = "Loads dispatched that bypass the MOB", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "RS_DELAYED", .pme_udesc = "Loads dispatched from stage 305", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "LOAD_HIT_PRE", .pme_desc = "Load operations conflicting with software prefetches", .pme_code = 0x014C, .pme_flags = PFMLIB_NHM_PMC01, }, { .pme_name = "LONGEST_LAT_CACHE", .pme_desc = "Longest latency cache reference", .pme_code = 0x2E, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "REFERENCE", .pme_udesc = "Longest latency cache reference", .pme_ucode = 0x4F, .pme_uflags = 0, }, { .pme_uname = "MISS", .pme_udesc = "Longest latency cache miss", .pme_ucode = 0x41, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "LSD", .pme_desc = "Loop stream detector", .pme_code = 0xA8, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ACTIVE", .pme_udesc = "Cycles when uops were delivered by the LSD", .pme_ucode = 0x01 | (1<<16), .pme_uflags = 0, }, { .pme_uname = "INACTIVE", .pme_udesc = "Cycles no uops were delivered by the LSD", .pme_ucode = 0x01 | (1<<16)|(1<<15), .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "MACHINE_CLEARS", .pme_desc = "Machine Clear", .pme_code = 0xC3, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "SMC", .pme_udesc = "Self-Modifying Code detected", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "CYCLES", .pme_udesc = "Cycles machine clear asserted", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "MEM_ORDER", .pme_udesc = "Execution pipeline restart due to Memory ordering conflicts", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "FUSION_ASSIST", .pme_udesc = "Counts the number of macro-fusion assists", .pme_ucode = 0x10, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "MACRO_INSTS", .pme_desc = "Macro-fused instructions", .pme_code = 0xD0, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "DECODED", .pme_udesc = "Instructions decoded", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "FUSIONS_DECODED", .pme_udesc = "Macro-fused instructions decoded", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "MEMORY_DISAMBIGUATION", .pme_desc = "Memory Disambiguation Activity", .pme_code = 0x09, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "RESET", .pme_udesc = "Counts memory disambiguation reset cycles", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "WATCHDOG", .pme_udesc = "Counts the number of times the memory disambiguation watchdog kicked in", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "WATCH_CYCLES", .pme_udesc = "Counts the cycles that the memory disambiguation watchdog is active", .pme_ucode = 0x08, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "MEM_INST_RETIRED", .pme_desc = "Memory instructions retired", .pme_code = 0x0B, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "LATENCY_ABOVE_THRESHOLD", .pme_udesc = "Memory instructions retired above programmed clocks, minimum value threhold is 4, requires PEBS", .pme_ucode = 0x10, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "LOADS", .pme_udesc = "Instructions retired which contains a load (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "STORES", .pme_udesc = "Instructions retired which contains a store (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 3 }, { .pme_name = "MEM_LOAD_RETIRED", .pme_desc = "Retired loads", .pme_code = 0xCB, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "DTLB_MISS", .pme_udesc = "Retired loads that miss the DTLB (Precise Event)", .pme_ucode = 0x80, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "HIT_LFB", .pme_udesc = "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)", .pme_ucode = 0x40, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "L1D_HIT", .pme_udesc = "Retired loads that hit the L1 data cache (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "L2_HIT", .pme_udesc = "Retired loads that hit the L2 cache (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "L3_MISS", .pme_udesc = "Retired loads that miss the LLC cache (Precise Event)", .pme_ucode = 0x10, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "LLC_MISS", .pme_udesc = "This is an alias for L3_MISS", .pme_ucode = 0x10, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "L3_UNSHARED_HIT", .pme_udesc = "Retired loads that hit valid versions in the LLC cache (Precise Event)", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "LLC_UNSHARED_HIT", .pme_udesc = "This is an alias for L3_UNSHARED_HIT", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "OTHER_CORE_L2_HIT_HITM", .pme_udesc = "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)", .pme_ucode = 0x08, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 9 }, { .pme_name = "MEM_STORE_RETIRED", .pme_desc = "Retired stores", .pme_code = 0x0C, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "DTLB_MISS", .pme_udesc = "Retired stores that miss the DTLB (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 1 }, { .pme_name = "MEM_UNCORE_RETIRED", .pme_desc = "Load instructions retired which hit offcore", .pme_code = 0x0F, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "OTHER_CORE_L2_HITM", .pme_udesc = "Load instructions retired that HIT modified data in sibling core (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "REMOTE_CACHE_LOCAL_HOME_HIT", .pme_udesc = "Load instructions retired remote cache HIT data source (Precise Event)", .pme_ucode = 0x08, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "REMOTE_DRAM", .pme_udesc = "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)", .pme_ucode = 0x10, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "LOCAL_DRAM", .pme_udesc = "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)", .pme_ucode = 0x20, .pme_uflags = PFMLIB_NHM_PEBS, }, /* Model 46 only (must be after common umasks) */ { .pme_uname = "L3_DATA_MISS_UNKNOWN", .pme_udesc = "Load instructions retired where the memory reference missed L3 and data source is unknown (Model 46 only, Precise Event)", .pme_ucode = 0x01, .pme_umodel = 46, .pme_uflags = PFMLIB_NHM_PEBS, }, /* Model 46 only (must be after common umasks) */ { .pme_uname = "UNCACHEABLE", .pme_udesc = "Load instructions retired where the memory reference missed L1, L2, L3 caches and to perform I/O (Model 46 only, Precise Event)", .pme_ucode = 0x80, .pme_umodel = 46, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 6 /* patched at runtime for model 46 */ }, { .pme_name = "OFFCORE_REQUESTS", .pme_desc = "Offcore memory requests", .pme_code = 0xB0, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "All offcore requests", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "ANY_READ", .pme_udesc = "Offcore read requests", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "ANY_RFO", .pme_udesc = "Offcore RFO requests", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "DEMAND_READ_CODE", .pme_udesc = "Counts number of offcore demand code read requests. Does not count L2 prefetch requests.", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "DEMAND_READ_DATA", .pme_udesc = "Offcore demand data read requests", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DEMAND_RFO", .pme_udesc = "Offcore demand RFO requests", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "L1D_WRITEBACK", .pme_udesc = "Offcore L1 data cache writebacks", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "UNCACHED_MEM", .pme_udesc = "Counts number of offcore uncached memory requests", .pme_ucode = 0x20, .pme_uflags = 0, }, }, .pme_numasks = 8 }, { .pme_name = "OFFCORE_REQUESTS_SQ_FULL", .pme_desc = "Counts cycles the Offcore Request buffer or Super Queue is full.", .pme_code = 0x01B2, .pme_flags = 0, }, { .pme_name = "PARTIAL_ADDRESS_ALIAS", .pme_desc = "False dependencies due to partial address aliasing", .pme_code = 0x0107, .pme_flags = 0, }, { .pme_name = "PIC_ACCESSES", .pme_desc = "Programmable interrupt controller", .pme_code = 0xBA, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "TPR_READS", .pme_udesc = "Counts number of TPR reads", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "TPR_WRITES", .pme_udesc = "Counts number of TPR writes", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 2 }, { .pme_name = "RAT_STALLS", .pme_desc = "Register allocation table stalls", .pme_code = 0xD2, .pme_flags = 0, .pme_umasks = { { .pme_uname = "FLAGS", .pme_udesc = "Flag stall cycles", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "REGISTERS", .pme_udesc = "Partial register stall cycles", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "ROB_READ_PORT", .pme_udesc = "ROB read port stalls cycles", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "SCOREBOARD", .pme_udesc = "Scoreboard stall cycles", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "ANY", .pme_udesc = "All RAT stall cycles", .pme_ucode = 0x0F, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "RESOURCE_STALLS", .pme_desc = "Processor stalls", .pme_code = 0xA2, .pme_flags = 0, .pme_umasks = { { .pme_uname = "FPCW", .pme_udesc = "FPU control word write stall cycles", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "LOAD", .pme_udesc = "Load buffer stall cycles", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "MXCSR", .pme_udesc = "MXCSR rename stall cycles", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "RS_FULL", .pme_udesc = "Reservation Station full stall cycles", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "STORE", .pme_udesc = "Store buffer stall cycles", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "OTHER", .pme_udesc = "Other Resource related stall cycles", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "ROB_FULL", .pme_udesc = "ROB full stall cycles", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "ANY", .pme_udesc = "Resource related stall cycles", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 8 }, { .pme_name = "SEG_RENAME_STALLS", .pme_desc = "Segment rename stall cycles", .pme_code = 0x01D4, .pme_flags = 0, }, { .pme_name = "SEGMENT_REG_LOADS", .pme_desc = "Counts number of segment register loads", .pme_code = 0x01F8, .pme_flags = 0, }, { .pme_name = "SIMD_INT_128", .pme_desc = "128 bit SIMD integer operations", .pme_code = 0x12, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "PACK", .pme_udesc = "128 bit SIMD integer pack operations", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "PACKED_ARITH", .pme_udesc = "128 bit SIMD integer arithmetic operations", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PACKED_LOGICAL", .pme_udesc = "128 bit SIMD integer logical operations", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PACKED_MPY", .pme_udesc = "128 bit SIMD integer multiply operations", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PACKED_SHIFT", .pme_udesc = "128 bit SIMD integer shift operations", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "SHUFFLE_MOVE", .pme_udesc = "128 bit SIMD integer shuffle/move operations", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "UNPACK", .pme_udesc = "128 bit SIMD integer unpack operations", .pme_ucode = 0x08, .pme_uflags = 0, }, }, .pme_numasks = 7 }, { .pme_name = "SIMD_INT_64", .pme_desc = "64 bit SIMD integer operations", .pme_code = 0xFD, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "PACK", .pme_udesc = "SIMD integer 64 bit pack operations", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "PACKED_ARITH", .pme_udesc = "SIMD integer 64 bit arithmetic operations", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PACKED_LOGICAL", .pme_udesc = "SIMD integer 64 bit logical operations", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PACKED_MPY", .pme_udesc = "SIMD integer 64 bit packed multiply operations", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PACKED_SHIFT", .pme_udesc = "SIMD integer 64 bit shift operations", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "SHUFFLE_MOVE", .pme_udesc = "SIMD integer 64 bit shuffle/move operations", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "UNPACK", .pme_udesc = "SIMD integer 64 bit unpack operations", .pme_ucode = 0x08, .pme_uflags = 0, }, }, .pme_numasks = 7 }, { .pme_name = "SNOOP_RESPONSE", .pme_desc = "Snoop", .pme_code = 0xB8, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "HIT", .pme_udesc = "Thread responded HIT to snoop", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "HITE", .pme_udesc = "Thread responded HITE to snoop", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "HITM", .pme_udesc = "Thread responded HITM to snoop", .pme_ucode = 0x04, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "SQ_FULL_STALL_CYCLES", .pme_desc = "Counts cycles the Offcore Request buffer or Super Queue is full and request(s) are outstanding.", .pme_code = 0x01F6, .pme_flags = 0, }, { .pme_name = "SQ_MISC", .pme_desc = "Super Queue Activity Related to L2 Cache Access", .pme_code = 0xF4, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "PROMOTION", .pme_udesc = "Counts the number of L2 secondary misses that hit the Super Queue", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PROMOTION_POST_GO", .pme_udesc = "Counts the number of L2 secondary misses during the Super Queue filling L2", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "LRU_HINTS", .pme_udesc = "Counts number of Super Queue LRU hints sent to L3", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "FILL_DROPPED", .pme_udesc = "Counts the number of SQ L2 fills dropped due to L2 busy", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "SPLIT_LOCK", .pme_udesc = "Super Queue lock splits across a cache line", .pme_ucode = 0x10, .pme_uflags = 0, }, }, .pme_numasks = 5 }, { .pme_name = "SSE_MEM_EXEC", .pme_desc = "Streaming SIMD executed", .pme_code = 0x4B, .pme_flags = 0, .pme_umasks = { { .pme_uname = "NTA", .pme_udesc = "Streaming SIMD L1D NTA prefetch miss", .pme_ucode = 0x01, .pme_uflags = 0, }, }, .pme_numasks = 1 }, { .pme_name = "SSEX_UOPS_RETIRED", .pme_desc = "SIMD micro-ops retired", .pme_code = 0xC7, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "PACKED_DOUBLE", .pme_udesc = "SIMD Packed-Double Uops retired (Precise Event)", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "PACKED_SINGLE", .pme_udesc = "SIMD Packed-Single Uops retired (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "SCALAR_DOUBLE", .pme_udesc = "SIMD Scalar-Double Uops retired (Precise Event)", .pme_ucode = 0x08, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "SCALAR_SINGLE", .pme_udesc = "SIMD Scalar-Single Uops retired (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "VECTOR_INTEGER", .pme_udesc = "SIMD Vector Integer Uops retired (Precise Event)", .pme_ucode = 0x10, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 5 }, { .pme_name = "STORE_BLOCKS", .pme_desc = "Delayed loads", .pme_code = 0x06, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "AT_RET", .pme_udesc = "Loads delayed with at-Retirement block code", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "L1D_BLOCK", .pme_udesc = "Cacheable loads delayed with L1D block code", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "NOT_STA", .pme_udesc = "Loads delayed due to a store blocked for unknown data", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "STA", .pme_udesc = "Loads delayed due to a store blocked for an unknown address", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 4 }, { .pme_name = "TWO_UOP_INSTS_DECODED", .pme_desc = "Two micro-ops instructions decoded", .pme_code = 0x0119, .pme_flags = 0, }, { .pme_name = "UOPS_DECODED_DEC0", .pme_desc = "Micro-ops decoded by decoder 0", .pme_code = 0x013D, .pme_flags = 0, }, { .pme_name = "UOPS_DECODED", .pme_desc = "Micro-ops decoded", .pme_code = 0xD1, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ESP_FOLDING", .pme_udesc = "Stack pointer instructions decoded", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "ESP_SYNC", .pme_udesc = "Stack pointer sync operations", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "MS", .pme_udesc = "Uops decoded by Microcode Sequencer", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "MS_CYCLES_ACTIVE", .pme_udesc = "cycles in which at least one uop is decoded by Microcode Sequencer", .pme_ucode = 0x2 | (1<< 16), /* counter-mask = 1 */ }, }, .pme_numasks = 4 }, { .pme_name = "UOPS_EXECUTED", .pme_desc = "Micro-ops executed", .pme_code = 0xB1, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "PORT0", .pme_udesc = "Uops executed on port 0", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "PORT1", .pme_udesc = "Uops executed on port 1", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "PORT2_CORE", .pme_udesc = "Uops executed on port 2 (core count only)", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "PORT3_CORE", .pme_udesc = "Uops executed on port 3 (core count only)", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "PORT4_CORE", .pme_udesc = "Uops executed on port 4 (core count only)", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PORT5", .pme_udesc = "Uops executed on port 5", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PORT015", .pme_udesc = "Uops issued on ports 0, 1 or 5", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "PORT234_CORE", .pme_udesc = "Uops issued on ports 2, 3 or 4 (core count only)", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "PORT015_STALL_CYCLES", .pme_udesc = "Cycles no Uops issued on ports 0, 1 or 5", .pme_ucode = 0x40 | (1<<16) | (1<<15), /* counter-mask=1, inv=1 */ .pme_uflags = 0, }, }, .pme_numasks = 9 }, { .pme_name = "UOPS_ISSUED", .pme_desc = "Micro-ops issued", .pme_code = 0x0E, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "Uops issued", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "STALLED_CYCLES", .pme_udesc = "Cycles stalled no issued uops", .pme_ucode = 0x01 | (1<<16) | (1<<15), /* counter-mask=1, inv=1 */ .pme_uflags = 0, }, { .pme_uname = "FUSED", .pme_udesc = "Fused Uops issued", .pme_ucode = 0x02, .pme_uflags = 0, }, }, .pme_numasks = 3 }, { .pme_name = "UOPS_RETIRED", .pme_desc = "Micro-ops retired", .pme_code = 0xC2, .pme_flags = PFMLIB_NHM_UMASK_NCOMBO, .pme_umasks = { { .pme_uname = "ANY", .pme_udesc = "Uops retired (Precise Event)", .pme_ucode = 0x01, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "RETIRE_SLOTS", .pme_udesc = "Retirement slots used (Precise Event)", .pme_ucode = 0x02, .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "ACTIVE_CYCLES", .pme_udesc = "Cycles Uops are being retired (Precise Event)", .pme_ucode = 0x01 | (1<< 16), /* counter mask = 1 */ .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "STALL_CYCLES", .pme_udesc = "Cycles No Uops retired (Precise Event)", .pme_ucode = 0x01 | (1<<16) | (1<<15), /* counter-mask=1, inv=1 */ .pme_uflags = PFMLIB_NHM_PEBS, }, { .pme_uname = "MACRO_FUSED", .pme_udesc = "Macro-fused Uops retired (Precise Event)", .pme_ucode = 0x04, .pme_uflags = PFMLIB_NHM_PEBS, }, }, .pme_numasks = 5 }, { .pme_name = "UOP_UNFUSION", .pme_desc = "Micro-ops unfusions due to FP exceptions", .pme_code = 0x01DB, .pme_flags = 0, }, /* * BEGIN OFFCORE_RESPONSE */ { .pme_name = "OFFCORE_RESPONSE_0", .pme_desc = "Offcore response", .pme_code = 0x01B7, .pme_flags = PFMLIB_NHM_OFFCORE_RSP0, .pme_umasks = { { .pme_uname = "DMND_DATA_RD", .pme_udesc = "Request. Counts the number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", .pme_ucode = 0x01, .pme_uflags = 0, }, { .pme_uname = "DMND_RFO", .pme_udesc = "Request. Counts the number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO", .pme_ucode = 0x02, .pme_uflags = 0, }, { .pme_uname = "DMND_IFETCH", .pme_udesc = "Request. Counts the number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", .pme_ucode = 0x04, .pme_uflags = 0, }, { .pme_uname = "WB", .pme_udesc = "Request. Counts the number of writeback (modified to exclusive) transactions", .pme_ucode = 0x08, .pme_uflags = 0, }, { .pme_uname = "PF_DATA_RD", .pme_udesc = "Request. Counts the number of data cacheline reads generated by L2 prefetchers", .pme_ucode = 0x10, .pme_uflags = 0, }, { .pme_uname = "PF_RFO", .pme_udesc = "Request. Counts the number of RFO requests generated by L2 prefetchers", .pme_ucode = 0x20, .pme_uflags = 0, }, { .pme_uname = "PF_IFETCH", .pme_udesc = "Request. Counts the number of code reads generated by L2 prefetchers", .pme_ucode = 0x40, .pme_uflags = 0, }, { .pme_uname = "OTHER", .pme_udesc = "Request. Counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", .pme_ucode = 0x80, .pme_uflags = 0, }, { .pme_uname = "ANY_REQUEST", .pme_udesc = "Request. Counts any request type", .pme_ucode = 0xff, .pme_uflags = 0, }, { .pme_uname = "UNCORE_HIT", .pme_udesc = "Response. Counts L3 Hit: local or remote home requests that hit L3 cache in the uncore with no coherency actions required (snooping)", .pme_ucode = 0x100, .pme_uflags = 0, }, { .pme_uname = "OTHER_CORE_HIT_SNP", .pme_udesc = "Response. Counts L3 Hit: local or remote home requests that hit L3 cache in the uncore and was serviced by another core with a cross core snoop where no modified copies were found (clean)", .pme_ucode = 0x200, .pme_uflags = 0, }, { .pme_uname = "OTHER_CORE_HITM", .pme_udesc = "Response. Counts L3 Hit: local or remote home requests that hit L3 cache in the uncore and was serviced by another core with a cross core snoop where modified copies were found (HITM)", .pme_ucode = 0x400, .pme_uflags = 0, }, { .pme_uname = "REMOTE_CACHE_FWD", .pme_udesc = "Response. Counts L3 Miss: local homed requests that missed the L3 cache and was serviced by forwarded data following a cross package snoop where no modified copies found. (Remote home requests are not counted)", .pme_ucode = 0x1000, .pme_uflags = 0, }, { .pme_uname = "REMOTE_DRAM", .pme_udesc = "Response. Counts L3 Miss: remote home requests that missed the L3 cache and were serviced by remote DRAM", .pme_ucode = 0x2000, .pme_uflags = 0, }, { .pme_uname = "LOCAL_DRAM", .pme_udesc = "Response. Counts L3 Miss: local home requests that missed the L3 cache and were serviced by local DRAM", .pme_ucode = 0x4000, .pme_uflags = 0, }, { .pme_uname = "NON_DRAM", .pme_udesc = "Response. Non-DRAM requests that were serviced by IOH", .pme_ucode = 0x8000, .pme_uflags = 0, }, { .pme_uname = "ANY_RESPONSE", .pme_udesc = "Response. Counts any response type", .pme_ucode = 0xf700, .pme_uflags = 0, }, }, .pme_numasks = 17 } }; #define PME_COREI7_UNHALTED_CORE_CYCLES 0 #define PME_COREI7_INSTRUCTIONS_RETIRED 1 #define PME_COREI7_EVENT_COUNT (sizeof(corei7_pe)/sizeof(pme_nhm_entry_t)) papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_comp.h000664 001750 001750 00000003413 13216244362 024735 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_COMP_H__ #define __PFMLIB_COMP_H__ #ifdef __ia64__ #include #endif #ifdef __x86_64__ #include #endif #ifdef __i386__ #include #endif #ifdef __mips__ #include #endif #ifdef __powerpc__ #include #endif #ifdef __sparc__ #include #endif #ifdef __cell__ #include #endif #ifdef __crayx2 #include #endif #endif /* __PFMLIB_COMP_H__ */ papi-5.6.0/src/components/appio/tests/iozone/client_list000664 001750 001750 00000002433 13216244356 025526 0ustar00jshenry1963jshenry1963000000 000000 # # Lines that start with # in column 0 are comments. # # There are now two formats supported. # Format: 3 fields, space delimited. # Format: 4 fields, space delimited. # # Format: 3 fields, space delimited. # client_name working_dir_on_client path_to_iozone_on_client # Format: 4 fields, space delimited. # client_name working_dir_on_client path_to_iozone_on_client path_to_testfile # # Example: With two clients (format 3 fields) # # client1 /home/user/tmp /home/user/tmp/iozone # client2 /home/user/tmp /home/user/tmp/iozone # # # Example: With two copies of Iozone on each of the two clients # (format 3 fields) # # client1 /home/user/tmp /home/user/tmp/iozone # client1 /home/user/tmp /home/user/tmp/iozone # client2 /home/user/tmp /home/user/tmp/iozone # client2 /home/user/tmp /home/user/tmp/iozone # # Example: With two clients (format 4 fields) # client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo1 # client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo2 # # Example: With two copies of Iozone on each of the two clients # (format 4 fields) # client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo1 # client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo2 # client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo3 # client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo4 papi-5.6.0/src/libpfm4/perf_examples/x86/bts_smpl.c000664 001750 001750 00000015444 13216244365 024141 0ustar00jshenry1963jshenry1963000000 000000 /* * bts_smpl.c - example of Intel Branch Trace Stack sampling * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define SMPL_PERIOD 24000000ULL typedef struct { int opt_no_show; int opt_inherit; int mmap_pages; } options_t; static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; static int num_fds; static options_t options; static struct option the_options[]={ { "help", 0, 0, 1}, { "no-show", 0, &options.opt_no_show, 1}, { 0, 0, 0, 0} }; static void cld_handler(int n) { longjmp(jbuf, 1); } int child(char **arg) { /* * force the task to stop before executing the first * user level instruction */ ptrace(PTRACE_TRACEME, 0, NULL, NULL); execvp(arg[0], arg); /* not reached */ return -1; } struct timeval last_read, this_read; static void process_smpl_buf(perf_event_desc_t *hw) { struct perf_event_header ehdr; int ret; for(;;) { ret = perf_read_buffer(hw, &ehdr, sizeof(ehdr)); if (ret) return; /* nothing to read */ switch(ehdr.type) { case PERF_RECORD_SAMPLE: perf_display_sample(fds, num_fds, hw - fds, &ehdr, stdout); collected_samples++; break; case PERF_RECORD_EXIT: display_exit(hw, stdout); break; case PERF_RECORD_LOST: display_lost(hw, fds, num_fds, stdout); break; case PERF_RECORD_THROTTLE: display_freq(1, hw, stdout); break; case PERF_RECORD_UNTHROTTLE: display_freq(0, hw, stdout); break; default: printf("unknown sample type %d sz=%d\n", ehdr.type, ehdr.size); perf_skip_buffer(hw, ehdr.size - sizeof(ehdr)); } } } int mainloop(char **arg) { static uint64_t ovfl_count; /* static to avoid setjmp issue */ struct pollfd pollfds[1]; size_t map_size = 0; sigset_t bmask; pid_t pid; uint64_t val[2]; int status, ret; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed\n"); map_size = (options.mmap_pages+1)*getpagesize(); /* * does allocate fds */ ret = perf_setup_list_events("branches:u", &fds, &num_fds); if (ret || !num_fds) errx(1, "cannot setup event"); memset(pollfds, 0, sizeof(pollfds)); /* * Create the child task */ if ((pid=fork()) == -1) err(1, "cannot fork process\n"); if (pid == 0) exit(child(arg)); /* * wait for the child to exec */ ret = waitpid(pid, &status, WUNTRACED); if (ret == -1) err(1, "waitpid failed"); if (WIFEXITED(status)) errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status)); fds[0].fd = -1; fds[0].hw.disabled = 0; /* start immediately */ if (options.opt_inherit) fds[0].hw.inherit = 1; fds[0].hw.sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_ADDR; /* * BTS only supported at user level */ if (fds[0].hw.exclude_user ||fds[0].hw.exclude_kernel == 0) errx(1, "BTS currently supported only at the user level\n"); /* * period MUST be one to trigger BTS: tracing not sampling anymore */ fds[0].hw.sample_period = 1; fds[0].hw.exclude_kernel = 1; fds[0].hw.exclude_hv = 1; fds[0].hw.read_format |= PERF_FORMAT_ID; fds[0].fd = perf_event_open(&fds[0].hw, pid, -1, -1, 0); if (fds[0].fd == -1) err(1, "cannot attach event %s", fds[0].name); fds[0].buf = mmap(NULL, map_size, PROT_READ|PROT_WRITE, MAP_SHARED, fds[0].fd, 0); if (fds[0].buf == MAP_FAILED) err(1, "cannot mmap buffer"); /* does not include header page */ fds[0].pgmsk = (options.mmap_pages*getpagesize())-1; ret = read(fds[0].fd, val, sizeof(val)); if (ret == -1) err(1, "cannot read id %zu", sizeof(val)); fds[0].id = val[1]; printf("%"PRIu64" %s\n", fds[0].id, fds[0].name); /* * effectively activate monitoring */ ptrace(PTRACE_DETACH, pid, NULL, 0); signal(SIGCHLD, cld_handler); pollfds[0].fd = fds[0].fd; pollfds[0].events = POLLIN; if (setjmp(jbuf) == 1) goto terminate_session; sigemptyset(&bmask); sigaddset(&bmask, SIGCHLD); /* * core loop */ for(;;) { ret = poll(pollfds, 1, -1); if (ret < 0 && errno == EINTR) break; ovfl_count++; ret = sigprocmask(SIG_SETMASK, &bmask, NULL); if (ret) err(1, "setmask"); process_smpl_buf(&fds[0]); ret = sigprocmask(SIG_UNBLOCK, &bmask, NULL); if (ret) err(1, "unblock"); } terminate_session: /* * cleanup child */ wait4(pid, &status, 0, NULL); close(fds[0].fd); /* check for partial event buffer */ process_smpl_buf(&fds[0]); munmap(fds[0].buf, map_size); free(fds); printf("%"PRIu64" samples collected in %"PRIu64" poll events, %"PRIu64" lost samples\n", collected_samples, ovfl_count, lost_samples); return 0; } static void usage(void) { printf("usage: bts_smpl [-h] [--help] [-i] [-m mmap_pages] cmd\n"); } int main(int argc, char **argv) { int c; while ((c=getopt_long(argc, argv,"+hm:p:if", the_options, 0)) != -1) { switch(c) { case 0: continue; case 'i': options.opt_inherit = 1; break; case 'm': if (options.mmap_pages) errx(1, "mmap pages already set\n"); options.mmap_pages = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown option"); } } if (argv[optind] == NULL) errx(1, "you must specify a command to execute\n"); if (!options.mmap_pages) options.mmap_pages = 4; return mainloop(argv+optind); } papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/mont_etb.c000664 001750 001750 00000032767 13216244362 024403 0ustar00jshenry1963jshenry1963000000 000000 /* * mont_btb.c - example of how use the BTB with the Dual-Core Itanium 2 PMU * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include #include typedef pfm_default_smpl_hdr_t etb_hdr_t; typedef pfm_default_smpl_entry_t etb_entry_t; typedef pfm_default_smpl_ctx_arg_t etb_ctx_arg_t; #define BTB_FMT_UUID PFM_DEFAULT_SMPL_UUID static pfm_uuid_t buf_fmt_id = BTB_FMT_UUID; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch * events. */ #define SMPL_PERIOD (4UL*256) /* * We use a small buffer size to exercise the overflow handler */ #define SMPL_BUF_NENTRIES 64 #define M_PMD(x) (1UL<<(x)) #define ETB_REGS_MASK (M_PMD(38)| M_PMD(39)| \ M_PMD(48)|M_PMD(49)|M_PMD(50)|M_PMD(51)|M_PMD(52)|M_PMD(53)|M_PMD(54)|M_PMD(55)|\ M_PMD(56)|M_PMD(57)|M_PMD(58)|M_PMD(59)|M_PMD(60)|M_PMD(61)|M_PMD(62)|M_PMD(63)) static void *smpl_vaddr; static size_t entry_size; static int id; #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define hweight64(x) _m64_popcnt(x) #elif defined(__GNUC__) static __inline__ int hweight64 (unsigned long x) { unsigned long result; __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); return (int)result; } #else #error "you need to provide inline assembly from your compiler" #endif /* * we don't use static to make sure the compiler does not inline the function */ long func1(void) { return random();} long func2(void) { return random();} long do_test(unsigned long loop) { long sum = 0; while(loop--) { if (loop & 0x1) sum += func1(); else sum += loop + func2(); } return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * print content of sampling buffer * * XXX: using stdio to print from a signal handler is not safe with multi-threaded * applications */ #define safe_printf printf static void show_etb_reg(int j, pfm_mont_pmd_reg_t reg, pfm_mont_pmd_reg_t pmd39) { unsigned long bruflush, b1, etb_ext; unsigned long addr; int is_valid; is_valid = reg.pmd48_63_etb_mont_reg.etb_s == 0 && reg.pmd48_63_etb_mont_reg.etb_mp == 0 ? 0 : 1; /* * the joy of the ETB extension register layout! */ if (j < 8) etb_ext = (pmd39.pmd_val>>(8*j)) & 0xf; else etb_ext = (pmd39.pmd_val>>(4+8*(j-1))) & 0xf; b1 = etb_ext & 0x1; bruflush = (etb_ext >> 1) & 0x1; safe_printf("\tPMD%-2d: 0x%016lx s=%d mp=%d bru=%ld b1=%ld valid=%c\n", j+48, reg.pmd_val, reg.pmd48_63_etb_mont_reg.etb_s, reg.pmd48_63_etb_mont_reg.etb_mp, bruflush, b1, is_valid ? 'Y' : 'N'); if (!is_valid) return; if (reg.pmd48_63_etb_mont_reg.etb_s) { addr = (reg.pmd48_63_etb_mont_reg.etb_addr+b1)<<4; addr |= reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? reg.pmd48_63_etb_mont_reg.etb_slot : 0; safe_printf("\t Source Address: 0x%016lx\n" "\t Taken=%c Prediction:%s\n\n", addr, reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? 'Y' : 'N', reg.pmd48_63_etb_mont_reg.etb_mp ? "FE Failure" : bruflush ? "BE Failure" : "Success"); } else { safe_printf("\t Target Address:0x%016lx\n\n", (unsigned long)(reg.pmd48_63_etb_mont_reg.etb_addr<<4)); } } static void show_etb(pfm_mont_pmd_reg_t *etb) { int i, last; pfm_mont_pmd_reg_t pmd38, pmd39; pmd38.pmd_val = etb[0].pmd_val; pmd39.pmd_val = etb[1].pmd_val; i = pmd38.pmd38_mont_reg.etbi_full ? pmd38.pmd38_mont_reg.etbi_ebi : 0; last = pmd38.pmd38_mont_reg.etbi_ebi; safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last, pmd38.pmd38_mont_reg.etbi_ebi, pmd38.pmd38_mont_reg.etbi_full); do { show_etb_reg(i, etb[i], pmd39); i = (i+1) % 16; } while (i != last); } void process_smpl_buffer(void) { etb_hdr_t *hdr; etb_entry_t *ent; unsigned long pos; unsigned long smpl_entry = 0; pfm_mont_pmd_reg_t *reg; size_t count; static unsigned long last_ovfl = ~0UL; hdr = (etb_hdr_t *)smpl_vaddr; /* * check that we are not diplaying the previous set of samples again. * Required to take care of the last batch of samples. */ if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); count = hdr->hdr_count; /* * walk through all the entries recored in the buffer */ while(count--) { ent = (etb_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_mont_pmd_reg_t*)(ent+1); /* * in this particular example, we have pmd48-pmd63 has the ETB. We have also * included pmd38/pmd39 (ETB index and extenseion) has part of the registers * to record. This trick allows us to get the index to decode the sequential * order of the BTB. * * Recorded registers are always recorded in increasing index order. So we know * that where to find pmd38/pmd39. */ show_etb(reg); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { process_smpl_buffer(); /* * And resume monitoring */ if (perfmonctl(id, PFM_RESTART, NULL, 0)) fatal_error("pfm_restart errno %d\n", errno); } int main(void) { int ret; int type = 0; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_mont_input_param_t mont_inp; etb_ctx_arg_t ctx; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("Can't initialize library\n"); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_MONTECITO_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(&ctx, 0, sizeof(ctx)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&mont_inp,0, sizeof(mont_inp)); /* * Before calling pfm_find_dispatch(), we must specify what kind * of branches we want to capture. We are interested in all taken * branches * therefore we program we set the various fields to: */ mont_inp.pfp_mont_etb.etb_used = 1; mont_inp.pfp_mont_etb.etb_tm = 0x2; mont_inp.pfp_mont_etb.etb_ptm = 0x3; mont_inp.pfp_mont_etb.etb_ppm = 0x3; mont_inp.pfp_mont_etb.etb_brt = 0x0; mont_inp.pfp_mont_etb.etb_plm = PFM_PLM3; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find event BRANCH_EVENT\n"); /* * set the (global) privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * We initialize the format specific information. * The format is identified by its UUID which must be copied * into the ctx_buf_fmt_id field. */ memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ ctx.buf_arg.buf_size = getpagesize(); /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract our file descriptor */ id = ctx.ctx_arg.ctx_fd; /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = ctx.ctx_arg.ctx_smpl_vaddr; if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc * PMD38 returned as used PMD by libpfm, will be reset */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full and randomization */ pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; /* * Now prepare the argument to initialize the PMD and the sampling period * We know we use only one PMD in this case, therefore pmd[0] corresponds * to our first event which is our sampling period. */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * indicate PMD to collect in each sample (good up to PMD63) */ pc[0].reg_smpl_pmds[0] = ETB_REGS_MASK; /* * compute size of each sample: fixed-size header + all our BTB regs */ entry_size = sizeof(etb_entry_t)+(hweight64(ETB_REGS_MASK)<<3); /* * When our counter overflows, we want to ETB index to be reset, so that we keep * in sync. */ pc[0].reg_reset_pmds[0] = M_PMD(38); /* * Now program the registers */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) fatal_error("pfm_write_pmcs error errno %d\n",errno); /* * we use 2 registers = 1 for the branch_event + 1 to reset PMD38 */ if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ pfm_self_start(id); do_test(1000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer. Note that the buffer may not be full at this point. * */ process_smpl_buffer(); /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/libpfm-3.y/lib/ppc970mp_events.h000664 001750 001750 00000445441 13216244363 023102 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __PPC970MP_EVENTS_H__ #define __PPC970MP_EVENTS_H__ /* * File: ppc970mp_events.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2007. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF 0 #define PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID 1 #define PPC970MP_PME_PM_FPU1_SINGLE 2 #define PPC970MP_PME_PM_FPU0_STALL3 3 #define PPC970MP_PME_PM_TB_BIT_TRANS 4 #define PPC970MP_PME_PM_GPR_MAP_FULL_CYC 5 #define PPC970MP_PME_PM_MRK_ST_CMPL 6 #define PPC970MP_PME_PM_FPU0_STF 7 #define PPC970MP_PME_PM_FPU1_FMA 8 #define PPC970MP_PME_PM_LSU1_FLUSH_ULD 9 #define PPC970MP_PME_PM_MRK_INST_FIN 10 #define PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST 11 #define PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC 12 #define PPC970MP_PME_PM_FPU_FDIV 13 #define PPC970MP_PME_PM_FPU0_FULL_CYC 14 #define PPC970MP_PME_PM_FPU_SINGLE 15 #define PPC970MP_PME_PM_FPU0_FMA 16 #define PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD 17 #define PPC970MP_PME_PM_LSU1_FLUSH_LRQ 18 #define PPC970MP_PME_PM_DTLB_MISS 19 #define PPC970MP_PME_PM_CMPLU_STALL_FXU 20 #define PPC970MP_PME_PM_MRK_ST_MISS_L1 21 #define PPC970MP_PME_PM_EXT_INT 22 #define PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ 23 #define PPC970MP_PME_PM_MRK_ST_GPS 24 #define PPC970MP_PME_PM_GRP_DISP_SUCCESS 25 #define PPC970MP_PME_PM_LSU1_LDF 26 #define PPC970MP_PME_PM_LSU0_SRQ_STFWD 27 #define PPC970MP_PME_PM_CR_MAP_FULL_CYC 28 #define PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD 29 #define PPC970MP_PME_PM_LSU_DERAT_MISS 30 #define PPC970MP_PME_PM_FPU0_SINGLE 31 #define PPC970MP_PME_PM_FPU1_FDIV 32 #define PPC970MP_PME_PM_FPU1_FEST 33 #define PPC970MP_PME_PM_FPU0_FRSP_FCONV 34 #define PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL 35 #define PPC970MP_PME_PM_MRK_ST_CMPL_INT 36 #define PPC970MP_PME_PM_FLUSH_BR_MPRED 37 #define PPC970MP_PME_PM_FXU_FIN 38 #define PPC970MP_PME_PM_FPU_STF 39 #define PPC970MP_PME_PM_DSLB_MISS 40 #define PPC970MP_PME_PM_FXLS1_FULL_CYC 41 #define PPC970MP_PME_PM_CMPLU_STALL_FPU 42 #define PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE 43 #define PPC970MP_PME_PM_MRK_STCX_FAIL 44 #define PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE 45 #define PPC970MP_PME_PM_CMPLU_STALL_LSU 46 #define PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR 47 #define PPC970MP_PME_PM_LSU_FLUSH_ULD 48 #define PPC970MP_PME_PM_MRK_BRU_FIN 49 #define PPC970MP_PME_PM_IERAT_XLATE_WR 50 #define PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED 51 #define PPC970MP_PME_PM_LSU0_BUSY 52 #define PPC970MP_PME_PM_DATA_FROM_MEM 53 #define PPC970MP_PME_PM_FPR_MAP_FULL_CYC 54 #define PPC970MP_PME_PM_FPU1_FULL_CYC 55 #define PPC970MP_PME_PM_FPU0_FIN 56 #define PPC970MP_PME_PM_GRP_BR_REDIR 57 #define PPC970MP_PME_PM_GCT_EMPTY_IC_MISS 58 #define PPC970MP_PME_PM_THRESH_TIMEO 59 #define PPC970MP_PME_PM_FPU_FSQRT 60 #define PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ 61 #define PPC970MP_PME_PM_PMC1_OVERFLOW 62 #define PPC970MP_PME_PM_FXLS0_FULL_CYC 63 #define PPC970MP_PME_PM_FPU0_ALL 64 #define PPC970MP_PME_PM_DATA_TABLEWALK_CYC 65 #define PPC970MP_PME_PM_FPU0_FEST 66 #define PPC970MP_PME_PM_DATA_FROM_L25_MOD 67 #define PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS 68 #define PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 69 #define PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF 70 #define PPC970MP_PME_PM_FPU_FEST 71 #define PPC970MP_PME_PM_0INST_FETCH 72 #define PPC970MP_PME_PM_LD_MISS_L1_LSU0 73 #define PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF 74 #define PPC970MP_PME_PM_L1_PREF 75 #define PPC970MP_PME_PM_FPU1_STALL3 76 #define PPC970MP_PME_PM_BRQ_FULL_CYC 77 #define PPC970MP_PME_PM_PMC8_OVERFLOW 78 #define PPC970MP_PME_PM_PMC7_OVERFLOW 79 #define PPC970MP_PME_PM_WORK_HELD 80 #define PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0 81 #define PPC970MP_PME_PM_FXU_IDLE 82 #define PPC970MP_PME_PM_INST_CMPL 83 #define PPC970MP_PME_PM_LSU1_FLUSH_UST 84 #define PPC970MP_PME_PM_LSU0_FLUSH_ULD 85 #define PPC970MP_PME_PM_LSU_FLUSH 86 #define PPC970MP_PME_PM_INST_FROM_L2 87 #define PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL 88 #define PPC970MP_PME_PM_PMC2_OVERFLOW 89 #define PPC970MP_PME_PM_FPU0_DENORM 90 #define PPC970MP_PME_PM_FPU1_FMOV_FEST 91 #define PPC970MP_PME_PM_INST_FETCH_CYC 92 #define PPC970MP_PME_PM_GRP_DISP_REJECT 93 #define PPC970MP_PME_PM_LSU_LDF 94 #define PPC970MP_PME_PM_INST_DISP 95 #define PPC970MP_PME_PM_DATA_FROM_L25_SHR 96 #define PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID 97 #define PPC970MP_PME_PM_MRK_GRP_ISSUED 98 #define PPC970MP_PME_PM_FPU_FMA 99 #define PPC970MP_PME_PM_MRK_CRU_FIN 100 #define PPC970MP_PME_PM_CMPLU_STALL_REJECT 101 #define PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST 102 #define PPC970MP_PME_PM_MRK_FXU_FIN 103 #define PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS 104 #define PPC970MP_PME_PM_BR_ISSUED 105 #define PPC970MP_PME_PM_PMC4_OVERFLOW 106 #define PPC970MP_PME_PM_EE_OFF 107 #define PPC970MP_PME_PM_INST_FROM_L25_MOD 108 #define PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS 109 #define PPC970MP_PME_PM_ITLB_MISS 110 #define PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE 111 #define PPC970MP_PME_PM_GRP_DISP_VALID 112 #define PPC970MP_PME_PM_MRK_GRP_DISP 113 #define PPC970MP_PME_PM_LSU_FLUSH_UST 114 #define PPC970MP_PME_PM_FXU1_FIN 115 #define PPC970MP_PME_PM_GRP_CMPL 116 #define PPC970MP_PME_PM_FPU_FRSP_FCONV 117 #define PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ 118 #define PPC970MP_PME_PM_CMPLU_STALL_OTHER 119 #define PPC970MP_PME_PM_LSU_LMQ_FULL_CYC 120 #define PPC970MP_PME_PM_ST_REF_L1_LSU0 121 #define PPC970MP_PME_PM_LSU0_DERAT_MISS 122 #define PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC 123 #define PPC970MP_PME_PM_FPU_STALL3 124 #define PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS 125 #define PPC970MP_PME_PM_MRK_DATA_FROM_L2 126 #define PPC970MP_PME_PM_LSU0_FLUSH_SRQ 127 #define PPC970MP_PME_PM_FPU0_FMOV_FEST 128 #define PPC970MP_PME_PM_IOPS_CMPL 129 #define PPC970MP_PME_PM_LD_REF_L1_LSU0 130 #define PPC970MP_PME_PM_LSU1_FLUSH_SRQ 131 #define PPC970MP_PME_PM_CMPLU_STALL_DIV 132 #define PPC970MP_PME_PM_GRP_BR_MPRED 133 #define PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC 134 #define PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL 135 #define PPC970MP_PME_PM_ST_REF_L1 136 #define PPC970MP_PME_PM_MRK_VMX_FIN 137 #define PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC 138 #define PPC970MP_PME_PM_FPU1_STF 139 #define PPC970MP_PME_PM_RUN_CYC 140 #define PPC970MP_PME_PM_LSU_LMQ_S0_VALID 141 #define PPC970MP_PME_PM_LSU0_LDF 142 #define PPC970MP_PME_PM_LSU_LRQ_S0_VALID 143 #define PPC970MP_PME_PM_PMC3_OVERFLOW 144 #define PPC970MP_PME_PM_MRK_IMR_RELOAD 145 #define PPC970MP_PME_PM_MRK_GRP_TIMEO 146 #define PPC970MP_PME_PM_FPU_FMOV_FEST 147 #define PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC 148 #define PPC970MP_PME_PM_XER_MAP_FULL_CYC 149 #define PPC970MP_PME_PM_ST_MISS_L1 150 #define PPC970MP_PME_PM_STOP_COMPLETION 151 #define PPC970MP_PME_PM_MRK_GRP_CMPL 152 #define PPC970MP_PME_PM_ISLB_MISS 153 #define PPC970MP_PME_PM_SUSPENDED 154 #define PPC970MP_PME_PM_CYC 155 #define PPC970MP_PME_PM_LD_MISS_L1_LSU1 156 #define PPC970MP_PME_PM_STCX_FAIL 157 #define PPC970MP_PME_PM_LSU1_SRQ_STFWD 158 #define PPC970MP_PME_PM_GRP_DISP 159 #define PPC970MP_PME_PM_L2_PREF 160 #define PPC970MP_PME_PM_FPU1_DENORM 161 #define PPC970MP_PME_PM_DATA_FROM_L2 162 #define PPC970MP_PME_PM_FPU0_FPSCR 163 #define PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD 164 #define PPC970MP_PME_PM_FPU0_FSQRT 165 #define PPC970MP_PME_PM_LD_REF_L1 166 #define PPC970MP_PME_PM_MRK_L1_RELOAD_VALID 167 #define PPC970MP_PME_PM_1PLUS_PPC_CMPL 168 #define PPC970MP_PME_PM_INST_FROM_L1 169 #define PPC970MP_PME_PM_EE_OFF_EXT_INT 170 #define PPC970MP_PME_PM_PMC6_OVERFLOW 171 #define PPC970MP_PME_PM_LSU_LRQ_FULL_CYC 172 #define PPC970MP_PME_PM_IC_PREF_INSTALL 173 #define PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS 174 #define PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ 175 #define PPC970MP_PME_PM_GCT_FULL_CYC 176 #define PPC970MP_PME_PM_INST_FROM_MEM 177 #define PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED 178 #define PPC970MP_PME_PM_FXU_BUSY 179 #define PPC970MP_PME_PM_ST_REF_L1_LSU1 180 #define PPC970MP_PME_PM_MRK_LD_MISS_L1 181 #define PPC970MP_PME_PM_L1_WRITE_CYC 182 #define PPC970MP_PME_PM_LSU1_BUSY 183 #define PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL 184 #define PPC970MP_PME_PM_CMPLU_STALL_FDIV 185 #define PPC970MP_PME_PM_FPU_ALL 186 #define PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC 187 #define PPC970MP_PME_PM_INST_FROM_L25_SHR 188 #define PPC970MP_PME_PM_GRP_MRK 189 #define PPC970MP_PME_PM_BR_MPRED_CR 190 #define PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC 191 #define PPC970MP_PME_PM_FPU1_FIN 192 #define PPC970MP_PME_PM_LSU_REJECT_SRQ 193 #define PPC970MP_PME_PM_BR_MPRED_TA 194 #define PPC970MP_PME_PM_CRQ_FULL_CYC 195 #define PPC970MP_PME_PM_LD_MISS_L1 196 #define PPC970MP_PME_PM_INST_FROM_PREF 197 #define PPC970MP_PME_PM_STCX_PASS 198 #define PPC970MP_PME_PM_DC_INV_L2 199 #define PPC970MP_PME_PM_LSU_SRQ_FULL_CYC 200 #define PPC970MP_PME_PM_LSU0_FLUSH_LRQ 201 #define PPC970MP_PME_PM_LSU_SRQ_S0_VALID 202 #define PPC970MP_PME_PM_LARX_LSU0 203 #define PPC970MP_PME_PM_GCT_EMPTY_CYC 204 #define PPC970MP_PME_PM_FPU1_ALL 205 #define PPC970MP_PME_PM_FPU1_FSQRT 206 #define PPC970MP_PME_PM_FPU_FIN 207 #define PPC970MP_PME_PM_LSU_SRQ_STFWD 208 #define PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1 209 #define PPC970MP_PME_PM_FXU0_FIN 210 #define PPC970MP_PME_PM_MRK_FPU_FIN 211 #define PPC970MP_PME_PM_PMC5_OVERFLOW 212 #define PPC970MP_PME_PM_SNOOP_TLBIE 213 #define PPC970MP_PME_PM_FPU1_FRSP_FCONV 214 #define PPC970MP_PME_PM_FPU0_FDIV 215 #define PPC970MP_PME_PM_LD_REF_L1_LSU1 216 #define PPC970MP_PME_PM_HV_CYC 217 #define PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC 218 #define PPC970MP_PME_PM_FPU_DENORM 219 #define PPC970MP_PME_PM_LSU0_REJECT_SRQ 220 #define PPC970MP_PME_PM_LSU1_REJECT_SRQ 221 #define PPC970MP_PME_PM_LSU1_DERAT_MISS 222 #define PPC970MP_PME_PM_IC_PREF_REQ 223 #define PPC970MP_PME_PM_MRK_LSU_FIN 224 #define PPC970MP_PME_PM_MRK_DATA_FROM_MEM 225 #define PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS 226 #define PPC970MP_PME_PM_LSU0_FLUSH_UST 227 #define PPC970MP_PME_PM_LSU_FLUSH_LRQ 228 #define PPC970MP_PME_PM_LSU_FLUSH_SRQ 229 static const int ppc970mp_event_ids[][PPC970MP_NUM_EVENT_COUNTERS] = { [ PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF ] = { -1, -1, -1, -1, -1, 66, -1, -1 }, [ PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { -1, -1, 61, 61, -1, -1, 60, 61 }, [ PPC970MP_PME_PM_FPU1_SINGLE ] = { 23, 22, -1, -1, 23, 22, -1, -1 }, [ PPC970MP_PME_PM_FPU0_STALL3 ] = { 15, 14, -1, -1, 15, 14, -1, -1 }, [ PPC970MP_PME_PM_TB_BIT_TRANS ] = { -1, -1, -1, -1, -1, -1, -1, 67 }, [ PPC970MP_PME_PM_GPR_MAP_FULL_CYC ] = { -1, -1, 27, 28, -1, -1, 27, 27 }, [ PPC970MP_PME_PM_MRK_ST_CMPL ] = { 78, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU0_STF ] = { 16, 15, -1, -1, 16, 15, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FMA ] = { 20, 19, -1, -1, 20, 19, -1, -1 }, [ PPC970MP_PME_PM_LSU1_FLUSH_ULD ] = { 57, 56, -1, -1, 58, 55, -1, -1 }, [ PPC970MP_PME_PM_MRK_INST_FIN ] = { -1, -1, -1, -1, -1, -1, 50, -1 }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST ] = { -1, -1, 56, 56, -1, -1, 55, 55 }, [ PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC ] = { 65, 65, -1, -1, 66, 64, -1, -1 }, [ PPC970MP_PME_PM_FPU_FDIV ] = { 27, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FULL_CYC ] = { 13, 12, -1, -1, 13, 12, -1, -1 }, [ PPC970MP_PME_PM_FPU_SINGLE ] = { -1, -1, -1, -1, 27, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FMA ] = { 11, 10, -1, -1, 11, 10, -1, -1 }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD ] = { -1, -1, 59, 59, -1, -1, 58, 58 }, [ PPC970MP_PME_PM_LSU1_FLUSH_LRQ ] = { 55, 54, -1, -1, 56, 53, -1, -1 }, [ PPC970MP_PME_PM_DTLB_MISS ] = { 6, 5, -1, -1, 6, 5, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_FXU ] = { -1, -1, -1, -1, 85, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_ST_MISS_L1 ] = { 79, 75, -1, -1, 76, 76, -1, -1 }, [ PPC970MP_PME_PM_EXT_INT ] = { -1, -1, -1, -1, -1, -1, -1, 10 }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { -1, -1, 57, 57, -1, -1, 56, 56 }, [ PPC970MP_PME_PM_MRK_ST_GPS ] = { -1, -1, -1, -1, -1, 75, -1, -1 }, [ PPC970MP_PME_PM_GRP_DISP_SUCCESS ] = { -1, -1, -1, -1, 33, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU1_LDF ] = { -1, -1, 42, 40, -1, -1, 40, 41 }, [ PPC970MP_PME_PM_LSU0_SRQ_STFWD ] = { 53, 52, -1, -1, 54, 51, -1, -1 }, [ PPC970MP_PME_PM_CR_MAP_FULL_CYC ] = { 1, 1, -1, -1, 2, 1, -1, -1 }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD ] = { -1, -1, 55, 55, -1, -1, 54, 54 }, [ PPC970MP_PME_PM_LSU_DERAT_MISS ] = { -1, -1, -1, -1, -1, 62, -1, -1 }, [ PPC970MP_PME_PM_FPU0_SINGLE ] = { 14, 13, -1, -1, 14, 13, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FDIV ] = { 19, 18, -1, -1, 19, 18, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FEST ] = { -1, -1, 17, 18, -1, -1, 17, 18 }, [ PPC970MP_PME_PM_FPU0_FRSP_FCONV ] = { -1, -1, 16, 17, -1, -1, 16, 17 }, [ PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL ] = { -1, 27, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_ST_CMPL_INT ] = { -1, -1, 62, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FLUSH_BR_MPRED ] = { -1, -1, 10, 11, -1, -1, 10, 11 }, [ PPC970MP_PME_PM_FXU_FIN ] = { -1, -1, 26, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU_STF ] = { -1, -1, -1, -1, -1, 26, -1, -1 }, [ PPC970MP_PME_PM_DSLB_MISS ] = { 5, 4, -1, -1, 5, 4, -1, -1 }, [ PPC970MP_PME_PM_FXLS1_FULL_CYC ] = { -1, -1, 23, 24, -1, -1, 23, 24 }, [ PPC970MP_PME_PM_CMPLU_STALL_FPU ] = { -1, -1, -1, -1, -1, -1, 67, -1 }, [ PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE ] = { -1, -1, 45, 43, -1, -1, 43, 45 }, [ PPC970MP_PME_PM_MRK_STCX_FAIL ] = { 77, 74, -1, -1, 75, 74, -1, -1 }, [ PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { -1, -1, -1, -1, -1, -1, 24, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_LSU ] = { -1, -1, -1, -1, 84, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR ] = { -1, -1, -1, -1, 92, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_FLUSH_ULD ] = { 64, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_BRU_FIN ] = { -1, 70, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_IERAT_XLATE_WR ] = { -1, -1, 70, 67, -1, -1, 72, 68 }, [ PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED ] = { -1, -1, -1, -1, -1, -1, 71, -1 }, [ PPC970MP_PME_PM_LSU0_BUSY ] = { 85, 80, -1, -1, 81, 81, -1, -1 }, [ PPC970MP_PME_PM_DATA_FROM_MEM ] = { -1, 87, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPR_MAP_FULL_CYC ] = { 7, 6, -1, -1, 7, 6, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FULL_CYC ] = { 22, 21, -1, -1, 22, 21, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FIN ] = { -1, -1, 13, 14, -1, -1, 13, 14 }, [ PPC970MP_PME_PM_GRP_BR_REDIR ] = { 31, 30, -1, -1, 31, 30, -1, -1 }, [ PPC970MP_PME_PM_GCT_EMPTY_IC_MISS ] = { -1, -1, -1, -1, 88, -1, -1, -1 }, [ PPC970MP_PME_PM_THRESH_TIMEO ] = { -1, 82, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU_FSQRT ] = { -1, -1, -1, -1, -1, 25, -1, -1 }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { -1, -1, 53, 53, -1, -1, 52, 52 }, [ PPC970MP_PME_PM_PMC1_OVERFLOW ] = { -1, 76, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FXLS0_FULL_CYC ] = { -1, -1, 22, 23, -1, -1, 22, 23 }, [ PPC970MP_PME_PM_FPU0_ALL ] = { 8, 7, -1, -1, 8, 7, -1, -1 }, [ PPC970MP_PME_PM_DATA_TABLEWALK_CYC ] = { 4, 3, -1, -1, 4, 3, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FEST ] = { -1, -1, 12, 13, -1, -1, 12, 13 }, [ PPC970MP_PME_PM_DATA_FROM_L25_MOD ] = { -1, -1, -1, -1, -1, 87, -1, -1 }, [ PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS ] = { 49, 48, -1, -1, 50, 47, -1, -1 }, [ PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { -1, 64, 48, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { 51, 50, -1, -1, 52, 49, -1, -1 }, [ PPC970MP_PME_PM_FPU_FEST ] = { -1, -1, 21, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_0INST_FETCH ] = { -1, -1, -1, 0, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU0 ] = { -1, -1, 37, 35, -1, -1, 35, 35 }, [ PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { 61, 60, -1, -1, 62, 59, -1, -1 }, [ PPC970MP_PME_PM_L1_PREF ] = { -1, -1, 33, 32, -1, -1, 32, 32 }, [ PPC970MP_PME_PM_FPU1_STALL3 ] = { 24, 23, -1, -1, 24, 23, -1, -1 }, [ PPC970MP_PME_PM_BRQ_FULL_CYC ] = { 0, 0, -1, -1, 1, 0, -1, -1 }, [ PPC970MP_PME_PM_PMC8_OVERFLOW ] = { 80, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_PMC7_OVERFLOW ] = { -1, -1, -1, -1, -1, -1, -1, 62 }, [ PPC970MP_PME_PM_WORK_HELD ] = { -1, 83, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { 75, 72, -1, -1, 73, 72, -1, -1 }, [ PPC970MP_PME_PM_FXU_IDLE ] = { -1, -1, -1, -1, 28, -1, -1, -1 }, [ PPC970MP_PME_PM_INST_CMPL ] = { 36, 36, 30, 30, 38, 35, 30, 30 }, [ PPC970MP_PME_PM_LSU1_FLUSH_UST ] = { 58, 57, -1, -1, 59, 56, -1, -1 }, [ PPC970MP_PME_PM_LSU0_FLUSH_ULD ] = { 47, 46, -1, -1, 48, 45, -1, -1 }, [ PPC970MP_PME_PM_LSU_FLUSH ] = { -1, -1, 43, 41, -1, -1, 41, 42 }, [ PPC970MP_PME_PM_INST_FROM_L2 ] = { 39, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL ] = { 60, 59, -1, -1, 61, 58, -1, -1 }, [ PPC970MP_PME_PM_PMC2_OVERFLOW ] = { -1, -1, 64, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU0_DENORM ] = { 9, 8, -1, -1, 9, 8, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FMOV_FEST ] = { -1, -1, 19, 20, -1, -1, 19, 20 }, [ PPC970MP_PME_PM_INST_FETCH_CYC ] = { 90, 86, -1, -1, 90, 85, -1, -1 }, [ PPC970MP_PME_PM_GRP_DISP_REJECT ] = { 32, 32, -1, -1, 32, 31, -1, 29 }, [ PPC970MP_PME_PM_LSU_LDF ] = { -1, -1, -1, -1, -1, -1, -1, 43 }, [ PPC970MP_PME_PM_INST_DISP ] = { 37, 37, -1, -1, 39, 36, -1, -1 }, [ PPC970MP_PME_PM_DATA_FROM_L25_SHR ] = { -1, -1, -1, -1, 91, -1, -1, -1 }, [ PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID ] = { -1, -1, 32, 31, -1, -1, 31, 31 }, [ PPC970MP_PME_PM_MRK_GRP_ISSUED ] = { -1, -1, -1, -1, -1, 70, -1, -1 }, [ PPC970MP_PME_PM_FPU_FMA ] = { -1, 25, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_CRU_FIN ] = { -1, -1, -1, 50, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_REJECT ] = { -1, -1, -1, -1, -1, -1, 69, -1 }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST ] = { -1, -1, 60, 60, -1, -1, 59, 59 }, [ PPC970MP_PME_PM_MRK_FXU_FIN ] = { -1, -1, -1, -1, -1, 69, -1, -1 }, [ PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS ] = { 59, 58, -1, -1, 60, 57, -1, -1 }, [ PPC970MP_PME_PM_BR_ISSUED ] = { -1, -1, 0, 1, -1, -1, 0, 0 }, [ PPC970MP_PME_PM_PMC4_OVERFLOW ] = { -1, -1, -1, -1, 77, -1, -1, -1 }, [ PPC970MP_PME_PM_EE_OFF ] = { -1, -1, 8, 9, -1, -1, 8, 8 }, [ PPC970MP_PME_PM_INST_FROM_L25_MOD ] = { -1, -1, -1, -1, -1, 37, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS ] = { -1, -1, -1, -1, -1, -1, 70, -1 }, [ PPC970MP_PME_PM_ITLB_MISS ] = { 41, 40, -1, -1, 42, 39, -1, -1 }, [ PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { -1, -1, -1, 26, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_GRP_DISP_VALID ] = { 33, 33, -1, -1, 34, 32, -1, -1 }, [ PPC970MP_PME_PM_MRK_GRP_DISP ] = { 72, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_FLUSH_UST ] = { -1, 63, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FXU1_FIN ] = { -1, -1, 25, 27, -1, -1, 26, 26 }, [ PPC970MP_PME_PM_GRP_CMPL ] = { -1, -1, -1, -1, -1, -1, 28, -1 }, [ PPC970MP_PME_PM_FPU_FRSP_FCONV ] = { -1, -1, -1, -1, -1, -1, 21, -1 }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { -1, -1, 54, 54, -1, -1, 53, 53 }, [ PPC970MP_PME_PM_CMPLU_STALL_OTHER ] = { 88, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_LMQ_FULL_CYC ] = { -1, -1, 44, 42, -1, -1, 42, 44 }, [ PPC970MP_PME_PM_ST_REF_L1_LSU0 ] = { -1, -1, 67, 64, -1, -1, 64, 64 }, [ PPC970MP_PME_PM_LSU0_DERAT_MISS ] = { 44, 43, -1, -1, 45, 42, -1, -1 }, [ PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC ] = { -1, -1, 51, 49, -1, -1, 48, 50 }, [ PPC970MP_PME_PM_FPU_STALL3 ] = { -1, 26, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS ] = { -1, -1, -1, -1, 68, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L2 ] = { 71, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU0_FLUSH_SRQ ] = { 46, 45, -1, -1, 47, 44, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FMOV_FEST ] = { -1, -1, 14, 15, -1, -1, 14, 15 }, [ PPC970MP_PME_PM_IOPS_CMPL ] = { 91, -1, -1, 68, -1, 86, 73, 69 }, [ PPC970MP_PME_PM_LD_REF_L1_LSU0 ] = { -1, -1, 39, 37, -1, -1, 37, 38 }, [ PPC970MP_PME_PM_LSU1_FLUSH_SRQ ] = { 56, 55, -1, -1, 57, 54, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_DIV ] = { -1, -1, -1, -1, -1, -1, 68, -1 }, [ PPC970MP_PME_PM_GRP_BR_MPRED ] = { 30, 29, -1, -1, 30, 29, -1, -1 }, [ PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC ] = { -1, -1, 46, 44, -1, -1, 44, 46 }, [ PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL ] = { 50, 49, -1, -1, 51, 48, -1, -1 }, [ PPC970MP_PME_PM_ST_REF_L1 ] = { -1, -1, -1, -1, -1, -1, 63, -1 }, [ PPC970MP_PME_PM_MRK_VMX_FIN ] = { -1, -1, 63, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC ] = { -1, -1, -1, 47, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU1_STF ] = { 25, 24, -1, -1, 25, 24, -1, -1 }, [ PPC970MP_PME_PM_RUN_CYC ] = { 81, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_LMQ_S0_VALID ] = { -1, -1, 47, 45, -1, -1, 45, 47 }, [ PPC970MP_PME_PM_LSU0_LDF ] = { -1, -1, 41, 39, -1, -1, 39, 40 }, [ PPC970MP_PME_PM_LSU_LRQ_S0_VALID ] = { 66, 66, -1, -1, 67, 65, -1, -1 }, [ PPC970MP_PME_PM_PMC3_OVERFLOW ] = { -1, -1, -1, 62, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_IMR_RELOAD ] = { 73, 71, -1, -1, 72, 71, -1, -1 }, [ PPC970MP_PME_PM_MRK_GRP_TIMEO ] = { -1, -1, -1, -1, 71, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU_FMOV_FEST ] = { -1, -1, -1, -1, -1, -1, -1, 22 }, [ PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC ] = { -1, -1, 28, 29, -1, -1, 29, 28 }, [ PPC970MP_PME_PM_XER_MAP_FULL_CYC ] = { 87, 84, -1, -1, 83, 83, -1, -1 }, [ PPC970MP_PME_PM_ST_MISS_L1 ] = { -1, -1, 66, 63, -1, -1, 62, 63 }, [ PPC970MP_PME_PM_STOP_COMPLETION ] = { -1, -1, 65, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_GRP_CMPL ] = { -1, -1, -1, 51, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_ISLB_MISS ] = { 40, 39, -1, -1, 41, 38, -1, -1 }, [ PPC970MP_PME_PM_SUSPENDED ] = { 86, 81, 69, 66, 82, 82, 66, 66 }, [ PPC970MP_PME_PM_CYC ] = { 2, 2, 4, 5, 3, 2, 4, 4 }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU1 ] = { -1, -1, 38, 36, -1, -1, 36, 36 }, [ PPC970MP_PME_PM_STCX_FAIL ] = { 83, 78, -1, -1, 79, 79, -1, -1 }, [ PPC970MP_PME_PM_LSU1_SRQ_STFWD ] = { 63, 62, -1, -1, 64, 61, -1, -1 }, [ PPC970MP_PME_PM_GRP_DISP ] = { -1, 31, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_L2_PREF ] = { -1, -1, 35, 34, -1, -1, 34, 34 }, [ PPC970MP_PME_PM_FPU1_DENORM ] = { 18, 17, -1, -1, 18, 17, -1, -1 }, [ PPC970MP_PME_PM_DATA_FROM_L2 ] = { 3, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FPSCR ] = { -1, -1, 15, 16, -1, -1, 15, 16 }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD ] = { -1, -1, -1, -1, -1, 88, -1, -1 }, [ PPC970MP_PME_PM_FPU0_FSQRT ] = { 12, 11, -1, -1, 12, 11, -1, -1 }, [ PPC970MP_PME_PM_LD_REF_L1 ] = { -1, -1, -1, -1, -1, -1, -1, 37 }, [ PPC970MP_PME_PM_MRK_L1_RELOAD_VALID ] = { -1, -1, 52, 52, -1, -1, 51, 51 }, [ PPC970MP_PME_PM_1PLUS_PPC_CMPL ] = { -1, -1, -1, -1, 0, -1, -1, -1 }, [ PPC970MP_PME_PM_INST_FROM_L1 ] = { 38, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_EE_OFF_EXT_INT ] = { -1, -1, 9, 10, -1, -1, 9, 9 }, [ PPC970MP_PME_PM_PMC6_OVERFLOW ] = { -1, -1, -1, -1, -1, -1, 61, -1 }, [ PPC970MP_PME_PM_LSU_LRQ_FULL_CYC ] = { -1, -1, 49, 46, -1, -1, 46, 48 }, [ PPC970MP_PME_PM_IC_PREF_INSTALL ] = { 34, 34, -1, -1, 36, 33, -1, -1 }, [ PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { -1, -1, 6, 7, -1, -1, 6, 6 }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { -1, -1, 58, 58, -1, -1, 57, 57 }, [ PPC970MP_PME_PM_GCT_FULL_CYC ] = { 29, 28, -1, -1, 29, 28, -1, -1 }, [ PPC970MP_PME_PM_INST_FROM_MEM ] = { -1, 38, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED ] = { -1, -1, 11, 12, -1, -1, 11, 12 }, [ PPC970MP_PME_PM_FXU_BUSY ] = { -1, -1, -1, -1, -1, 27, -1, -1 }, [ PPC970MP_PME_PM_ST_REF_L1_LSU1 ] = { -1, -1, 68, 65, -1, -1, 65, 65 }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1 ] = { 74, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_L1_WRITE_CYC ] = { -1, -1, 34, 33, -1, -1, 33, 33 }, [ PPC970MP_PME_PM_LSU1_BUSY ] = { 89, 85, -1, -1, 89, 84, -1, -1 }, [ PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL ] = { -1, 67, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_FDIV ] = { -1, -1, -1, -1, 87, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU_ALL ] = { -1, -1, -1, -1, 26, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC ] = { 68, 68, -1, -1, 69, 67, -1, -1 }, [ PPC970MP_PME_PM_INST_FROM_L25_SHR ] = { -1, -1, -1, -1, 40, -1, -1, -1 }, [ PPC970MP_PME_PM_GRP_MRK ] = { -1, -1, -1, -1, 35, -1, -1, -1 }, [ PPC970MP_PME_PM_BR_MPRED_CR ] = { -1, -1, 1, 2, -1, -1, 1, 1 }, [ PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC ] = { -1, -1, 7, 8, -1, -1, 7, 7 }, [ PPC970MP_PME_PM_FPU1_FIN ] = { -1, -1, 18, 19, -1, -1, 18, 19 }, [ PPC970MP_PME_PM_LSU_REJECT_SRQ ] = { 67, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_BR_MPRED_TA ] = { -1, -1, 2, 3, -1, -1, 2, 2 }, [ PPC970MP_PME_PM_CRQ_FULL_CYC ] = { -1, -1, 3, 4, -1, -1, 3, 3 }, [ PPC970MP_PME_PM_LD_MISS_L1 ] = { -1, -1, 36, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_INST_FROM_PREF ] = { -1, -1, 31, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_STCX_PASS ] = { 84, 79, -1, -1, 80, 80, -1, -1 }, [ PPC970MP_PME_PM_DC_INV_L2 ] = { -1, -1, 5, 6, -1, -1, 5, 5 }, [ PPC970MP_PME_PM_LSU_SRQ_FULL_CYC ] = { -1, -1, 50, 48, -1, -1, 47, 49 }, [ PPC970MP_PME_PM_LSU0_FLUSH_LRQ ] = { 45, 44, -1, -1, 46, 43, -1, -1 }, [ PPC970MP_PME_PM_LSU_SRQ_S0_VALID ] = { 69, 69, -1, -1, 70, 68, -1, -1 }, [ PPC970MP_PME_PM_LARX_LSU0 ] = { 42, 41, -1, -1, 43, 40, -1, -1 }, [ PPC970MP_PME_PM_GCT_EMPTY_CYC ] = { 28, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_FPU1_ALL ] = { 17, 16, -1, -1, 17, 16, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FSQRT ] = { 21, 20, -1, -1, 21, 20, -1, -1 }, [ PPC970MP_PME_PM_FPU_FIN ] = { -1, -1, -1, 22, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU_SRQ_STFWD ] = { 70, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { 76, 73, -1, -1, 74, 73, -1, -1 }, [ PPC970MP_PME_PM_FXU0_FIN ] = { -1, -1, 24, 25, -1, -1, 25, 25 }, [ PPC970MP_PME_PM_MRK_FPU_FIN ] = { -1, -1, -1, -1, -1, -1, 49, -1 }, [ PPC970MP_PME_PM_PMC5_OVERFLOW ] = { -1, -1, -1, -1, -1, 77, -1, -1 }, [ PPC970MP_PME_PM_SNOOP_TLBIE ] = { 82, 77, -1, -1, 78, 78, -1, -1 }, [ PPC970MP_PME_PM_FPU1_FRSP_FCONV ] = { -1, -1, 20, 21, -1, -1, 20, 21 }, [ PPC970MP_PME_PM_FPU0_FDIV ] = { 10, 9, -1, -1, 10, 9, -1, -1 }, [ PPC970MP_PME_PM_LD_REF_L1_LSU1 ] = { -1, -1, 40, 38, -1, -1, 38, 39 }, [ PPC970MP_PME_PM_HV_CYC ] = { -1, -1, 29, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC ] = { 43, 42, -1, -1, 44, 41, -1, -1 }, [ PPC970MP_PME_PM_FPU_DENORM ] = { 26, -1, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU0_REJECT_SRQ ] = { 52, 51, -1, -1, 53, 50, -1, -1 }, [ PPC970MP_PME_PM_LSU1_REJECT_SRQ ] = { 62, 61, -1, -1, 63, 60, -1, -1 }, [ PPC970MP_PME_PM_LSU1_DERAT_MISS ] = { 54, 53, -1, -1, 55, 52, -1, -1 }, [ PPC970MP_PME_PM_IC_PREF_REQ ] = { 35, 35, -1, -1, 37, 34, -1, -1 }, [ PPC970MP_PME_PM_MRK_LSU_FIN ] = { -1, -1, -1, -1, -1, -1, -1, 60 }, [ PPC970MP_PME_PM_MRK_DATA_FROM_MEM ] = { -1, 88, -1, -1, -1, -1, -1, -1 }, [ PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { -1, -1, -1, -1, 86, -1, -1, -1 }, [ PPC970MP_PME_PM_LSU0_FLUSH_UST ] = { 48, 47, -1, -1, 49, 46, -1, -1 }, [ PPC970MP_PME_PM_LSU_FLUSH_LRQ ] = { -1, -1, -1, -1, -1, 63, -1, -1 }, [ PPC970MP_PME_PM_LSU_FLUSH_SRQ ] = { -1, -1, -1, -1, 65, -1, -1, -1 } }; static const unsigned long long ppc970mp_group_vecs[][PPC970MP_NUM_GROUP_VEC] = { [ PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { 0x0000000800000000ULL }, [ PPC970MP_PME_PM_FPU1_SINGLE ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU0_STALL3 ] = { 0x0000000000002000ULL }, [ PPC970MP_PME_PM_TB_BIT_TRANS ] = { 0x0000000000080000ULL }, [ PPC970MP_PME_PM_GPR_MAP_FULL_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_ST_CMPL ] = { 0x0000000800000000ULL }, [ PPC970MP_PME_PM_FPU0_STF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_FMA ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_LSU1_FLUSH_ULD ] = { 0x0000000000008000ULL }, [ PPC970MP_PME_PM_MRK_INST_FIN ] = { 0x0004000200000000ULL }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC ] = { 0x0000000010000000ULL }, [ PPC970MP_PME_PM_FPU_FDIV ] = { 0x0000100000900010ULL }, [ PPC970MP_PME_PM_FPU0_FULL_CYC ] = { 0x0000000000000080ULL }, [ PPC970MP_PME_PM_FPU_SINGLE ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU0_FMA ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_LSU1_FLUSH_LRQ ] = { 0x0000000000004000ULL }, [ PPC970MP_PME_PM_DTLB_MISS ] = { 0x0000000010600000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_FXU ] = { 0x0000080000000000ULL }, [ PPC970MP_PME_PM_MRK_ST_MISS_L1 ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_EXT_INT ] = { 0x0000000000000200ULL }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_MRK_ST_GPS ] = { 0x0000000800000000ULL }, [ PPC970MP_PME_PM_GRP_DISP_SUCCESS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU1_LDF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU0_SRQ_STFWD ] = { 0x0000000000020000ULL }, [ PPC970MP_PME_PM_CR_MAP_FULL_CYC ] = { 0x0000000000000040ULL }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_LSU_DERAT_MISS ] = { 0x0000040100000000ULL }, [ PPC970MP_PME_PM_FPU0_SINGLE ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_FDIV ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_FPU1_FEST ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_FPU0_FRSP_FCONV ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL ] = { 0x0000080000000000ULL }, [ PPC970MP_PME_PM_MRK_ST_CMPL_INT ] = { 0x0000000800000000ULL }, [ PPC970MP_PME_PM_FLUSH_BR_MPRED ] = { 0x0000200000000000ULL }, [ PPC970MP_PME_PM_FXU_FIN ] = { 0x0000084000100000ULL }, [ PPC970MP_PME_PM_FPU_STF ] = { 0x0000000000800020ULL }, [ PPC970MP_PME_PM_DSLB_MISS ] = { 0x0000000004000000ULL }, [ PPC970MP_PME_PM_FXLS1_FULL_CYC ] = { 0x0000008000000080ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_FPU ] = { 0x0000100000000000ULL }, [ PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_STCX_FAIL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { 0x0000004000000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_LSU ] = { 0x0000020000000000ULL }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR ] = { 0x0004000000000000ULL }, [ PPC970MP_PME_PM_LSU_FLUSH_ULD ] = { 0x0000000000000008ULL }, [ PPC970MP_PME_PM_MRK_BRU_FIN ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_IERAT_XLATE_WR ] = { 0x0000000080000000ULL }, [ PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED ] = { 0x0000200000000000ULL }, [ PPC970MP_PME_PM_LSU0_BUSY ] = { 0x0000020003020000ULL }, [ PPC970MP_PME_PM_DATA_FROM_MEM ] = { 0x0003000008000000ULL }, [ PPC970MP_PME_PM_FPR_MAP_FULL_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_FULL_CYC ] = { 0x0000000000000080ULL }, [ PPC970MP_PME_PM_FPU0_FIN ] = { 0x0000000000802800ULL }, [ PPC970MP_PME_PM_GRP_BR_REDIR ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_GCT_EMPTY_IC_MISS ] = { 0x0000200000000000ULL }, [ PPC970MP_PME_PM_THRESH_TIMEO ] = { 0x0000000200000000ULL }, [ PPC970MP_PME_PM_FPU_FSQRT ] = { 0x0000100000100010ULL }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_PMC1_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FXLS0_FULL_CYC ] = { 0x0000008000000080ULL }, [ PPC970MP_PME_PM_FPU0_ALL ] = { 0x0000000000000800ULL }, [ PPC970MP_PME_PM_DATA_TABLEWALK_CYC ] = { 0x0000000020000000ULL }, [ PPC970MP_PME_PM_FPU0_FEST ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_DATA_FROM_L25_MOD ] = { 0x0002400000000000ULL }, [ PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { 0x0000000000480000ULL }, [ PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU_FEST ] = { 0x0000000000000010ULL }, [ PPC970MP_PME_PM_0INST_FETCH ] = { 0x0000010000000000ULL }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU0 ] = { 0x0001000000008000ULL }, [ PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_L1_PREF ] = { 0x0000000010000000ULL }, [ PPC970MP_PME_PM_FPU1_STALL3 ] = { 0x0000000000002000ULL }, [ PPC970MP_PME_PM_BRQ_FULL_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_PMC8_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_PMC7_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_WORK_HELD ] = { 0x0000000000000200ULL }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_FXU_IDLE ] = { 0x000000c000000000ULL }, [ PPC970MP_PME_PM_INST_CMPL ] = { 0x0007fffbffffffffULL }, [ PPC970MP_PME_PM_LSU1_FLUSH_UST ] = { 0x0000000000010000ULL }, [ PPC970MP_PME_PM_LSU0_FLUSH_ULD ] = { 0x0000000000008000ULL }, [ PPC970MP_PME_PM_LSU_FLUSH ] = { 0x0000020000000000ULL }, [ PPC970MP_PME_PM_INST_FROM_L2 ] = { 0x0000800020000000ULL }, [ PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_PMC2_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU0_DENORM ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_FPU1_FMOV_FEST ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_INST_FETCH_CYC ] = { 0x0000010000000000ULL }, [ PPC970MP_PME_PM_GRP_DISP_REJECT ] = { 0x0000000000000101ULL }, [ PPC970MP_PME_PM_LSU_LDF ] = { 0x0000000000800020ULL }, [ PPC970MP_PME_PM_INST_DISP ] = { 0x0000000100000146ULL }, [ PPC970MP_PME_PM_DATA_FROM_L25_SHR ] = { 0x0002400000000000ULL }, [ PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID ] = { 0x0000000100040000ULL }, [ PPC970MP_PME_PM_MRK_GRP_ISSUED ] = { 0x0000000200000000ULL }, [ PPC970MP_PME_PM_FPU_FMA ] = { 0x0000100000900010ULL }, [ PPC970MP_PME_PM_MRK_CRU_FIN ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_REJECT ] = { 0x0000040000000000ULL }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_MRK_FXU_FIN ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_BR_ISSUED ] = { 0x0000800007000000ULL }, [ PPC970MP_PME_PM_PMC4_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_EE_OFF ] = { 0x0000000000000200ULL }, [ PPC970MP_PME_PM_INST_FROM_L25_MOD ] = { 0x0000010000000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS ] = { 0x0000020000000000ULL }, [ PPC970MP_PME_PM_ITLB_MISS ] = { 0x0000000010200000ULL }, [ PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { 0x0000004000000000ULL }, [ PPC970MP_PME_PM_GRP_DISP_VALID ] = { 0x0000000100000100ULL }, [ PPC970MP_PME_PM_MRK_GRP_DISP ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_LSU_FLUSH_UST ] = { 0x0000000000000008ULL }, [ PPC970MP_PME_PM_FXU1_FIN ] = { 0x0000008000000100ULL }, [ PPC970MP_PME_PM_GRP_CMPL ] = { 0x0000000020080001ULL }, [ PPC970MP_PME_PM_FPU_FRSP_FCONV ] = { 0x0000000000000020ULL }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_OTHER ] = { 0x0000040000000000ULL }, [ PPC970MP_PME_PM_LSU_LMQ_FULL_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_ST_REF_L1_LSU0 ] = { 0x0000000000030000ULL }, [ PPC970MP_PME_PM_LSU0_DERAT_MISS ] = { 0x0000000000040000ULL }, [ PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC ] = { 0x0000000040000000ULL }, [ PPC970MP_PME_PM_FPU_STALL3 ] = { 0x0000000000000020ULL }, [ PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L2 ] = { 0x0004000000000000ULL }, [ PPC970MP_PME_PM_LSU0_FLUSH_SRQ ] = { 0x0000000000004000ULL }, [ PPC970MP_PME_PM_FPU0_FMOV_FEST ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_IOPS_CMPL ] = { 0x0000100000000000ULL }, [ PPC970MP_PME_PM_LD_REF_L1_LSU0 ] = { 0x0000000000008000ULL }, [ PPC970MP_PME_PM_LSU1_FLUSH_SRQ ] = { 0x0000000000004000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_DIV ] = { 0x0000080000000000ULL }, [ PPC970MP_PME_PM_GRP_BR_MPRED ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC ] = { 0x0000400008000000ULL }, [ PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_ST_REF_L1 ] = { 0x000000010260000eULL }, [ PPC970MP_PME_PM_MRK_VMX_FIN ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_STF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_RUN_CYC ] = { 0x0000000004000001ULL }, [ PPC970MP_PME_PM_LSU_LMQ_S0_VALID ] = { 0x0000400008000000ULL }, [ PPC970MP_PME_PM_LSU0_LDF ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU_LRQ_S0_VALID ] = { 0x0000000010000000ULL }, [ PPC970MP_PME_PM_PMC3_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_IMR_RELOAD ] = { 0x0000001000000000ULL }, [ PPC970MP_PME_PM_MRK_GRP_TIMEO ] = { 0x0000000800000000ULL }, [ PPC970MP_PME_PM_FPU_FMOV_FEST ] = { 0x0000000000100010ULL }, [ PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC ] = { 0x0000000000000040ULL }, [ PPC970MP_PME_PM_XER_MAP_FULL_CYC ] = { 0x0000000000000040ULL }, [ PPC970MP_PME_PM_ST_MISS_L1 ] = { 0x0000000000610000ULL }, [ PPC970MP_PME_PM_STOP_COMPLETION ] = { 0x0000000000000201ULL }, [ PPC970MP_PME_PM_MRK_GRP_CMPL ] = { 0x0000000a00000000ULL }, [ PPC970MP_PME_PM_ISLB_MISS ] = { 0x0000000004000000ULL }, [ PPC970MP_PME_PM_SUSPENDED ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_CYC ] = { 0x0007ffffffffffffULL }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU1 ] = { 0x0003000000008000ULL }, [ PPC970MP_PME_PM_STCX_FAIL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU1_SRQ_STFWD ] = { 0x0000000000020000ULL }, [ PPC970MP_PME_PM_GRP_DISP ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_L2_PREF ] = { 0x0000000010000000ULL }, [ PPC970MP_PME_PM_FPU1_DENORM ] = { 0x0000000000001000ULL }, [ PPC970MP_PME_PM_DATA_FROM_L2 ] = { 0x0003000008000000ULL }, [ PPC970MP_PME_PM_FPU0_FPSCR ] = { 0x0000000000002000ULL }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD ] = { 0x0004000000000000ULL }, [ PPC970MP_PME_PM_FPU0_FSQRT ] = { 0x0000000000000800ULL }, [ PPC970MP_PME_PM_LD_REF_L1 ] = { 0x000304004260000eULL }, [ PPC970MP_PME_PM_MRK_L1_RELOAD_VALID ] = { 0x0004000000000000ULL }, [ PPC970MP_PME_PM_1PLUS_PPC_CMPL ] = { 0x0001000000080001ULL }, [ PPC970MP_PME_PM_INST_FROM_L1 ] = { 0x0000010080000000ULL }, [ PPC970MP_PME_PM_EE_OFF_EXT_INT ] = { 0x0000000000000200ULL }, [ PPC970MP_PME_PM_PMC6_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU_LRQ_FULL_CYC ] = { 0x0000000000000080ULL }, [ PPC970MP_PME_PM_IC_PREF_INSTALL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_GCT_FULL_CYC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_INST_FROM_MEM ] = { 0x0000810020000000ULL }, [ PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED ] = { 0x0000020000000000ULL }, [ PPC970MP_PME_PM_FXU_BUSY ] = { 0x000008c000000000ULL }, [ PPC970MP_PME_PM_ST_REF_L1_LSU1 ] = { 0x0000000000030000ULL }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1 ] = { 0x0000000200000000ULL }, [ PPC970MP_PME_PM_L1_WRITE_CYC ] = { 0x0000200000000000ULL }, [ PPC970MP_PME_PM_LSU1_BUSY ] = { 0x0000020000000000ULL }, [ PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_FDIV ] = { 0x0000100000000000ULL }, [ PPC970MP_PME_PM_FPU_ALL ] = { 0x0000000000000020ULL }, [ PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC ] = { 0x0000000040000000ULL }, [ PPC970MP_PME_PM_INST_FROM_L25_SHR ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_GRP_MRK ] = { 0x0000000600000000ULL }, [ PPC970MP_PME_PM_BR_MPRED_CR ] = { 0x0000800005000000ULL }, [ PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_FIN ] = { 0x0000000000802800ULL }, [ PPC970MP_PME_PM_LSU_REJECT_SRQ ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_BR_MPRED_TA ] = { 0x0000a00005000000ULL }, [ PPC970MP_PME_PM_CRQ_FULL_CYC ] = { 0x0000000000000040ULL }, [ PPC970MP_PME_PM_LD_MISS_L1 ] = { 0x0000040043600006ULL }, [ PPC970MP_PME_PM_INST_FROM_PREF ] = { 0x0000810000000000ULL }, [ PPC970MP_PME_PM_STCX_PASS ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_DC_INV_L2 ] = { 0x0000000020010006ULL }, [ PPC970MP_PME_PM_LSU_SRQ_FULL_CYC ] = { 0x0000000000000080ULL }, [ PPC970MP_PME_PM_LSU0_FLUSH_LRQ ] = { 0x0000000000004000ULL }, [ PPC970MP_PME_PM_LSU_SRQ_S0_VALID ] = { 0x0000000040000000ULL }, [ PPC970MP_PME_PM_LARX_LSU0 ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_GCT_EMPTY_CYC ] = { 0x0000200100080200ULL }, [ PPC970MP_PME_PM_FPU1_ALL ] = { 0x0000000000000800ULL }, [ PPC970MP_PME_PM_FPU1_FSQRT ] = { 0x0000000000000800ULL }, [ PPC970MP_PME_PM_FPU_FIN ] = { 0x0000080000100010ULL }, [ PPC970MP_PME_PM_LSU_SRQ_STFWD ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { 0x0000002000000000ULL }, [ PPC970MP_PME_PM_FXU0_FIN ] = { 0x0000008000000100ULL }, [ PPC970MP_PME_PM_MRK_FPU_FIN ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_PMC5_OVERFLOW ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_SNOOP_TLBIE ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_FPU1_FRSP_FCONV ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_FPU0_FDIV ] = { 0x0000000000000400ULL }, [ PPC970MP_PME_PM_LD_REF_L1_LSU1 ] = { 0x0000000000008000ULL }, [ PPC970MP_PME_PM_HV_CYC ] = { 0x0000000020080000ULL }, [ PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC ] = { 0x0000000000000040ULL }, [ PPC970MP_PME_PM_FPU_DENORM ] = { 0x0000000000000020ULL }, [ PPC970MP_PME_PM_LSU0_REJECT_SRQ ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU1_REJECT_SRQ ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_LSU1_DERAT_MISS ] = { 0x0000000000040000ULL }, [ PPC970MP_PME_PM_IC_PREF_REQ ] = { 0x0000000000000000ULL }, [ PPC970MP_PME_PM_MRK_LSU_FIN ] = { 0x0000000400000000ULL }, [ PPC970MP_PME_PM_MRK_DATA_FROM_MEM ] = { 0x0004000000000000ULL }, [ PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { 0x0000040000000000ULL }, [ PPC970MP_PME_PM_LSU0_FLUSH_UST ] = { 0x0000000000010000ULL }, [ PPC970MP_PME_PM_LSU_FLUSH_LRQ ] = { 0x0000000000000008ULL }, [ PPC970MP_PME_PM_LSU_FLUSH_SRQ ] = { 0x0000000000000008ULL } }; static const pme_power_entry_t ppc970mp_pe[] = { [ PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU_REJECT_RELOAD_CDF", .pme_code = 0x6920, .pme_short_desc = "LSU reject due to reload CDF or tag update collision", .pme_long_desc = "LSU reject due to reload CDF or tag update collision", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_REJECT_RELOAD_CDF] }, [ PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { .pme_name = "PM_MRK_LSU_SRQ_INST_VALID", .pme_code = 0x936, .pme_short_desc = "Marked instruction valid in SRQ", .pme_long_desc = "This signal is asserted every cycle when a marked request is resident in the Store Request Queue", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU_SRQ_INST_VALID] }, [ PPC970MP_PME_PM_FPU1_SINGLE ] = { .pme_name = "PM_FPU1_SINGLE", .pme_code = 0x127, .pme_short_desc = "FPU1 executed single precision instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing single precision instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_SINGLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_SINGLE] }, [ PPC970MP_PME_PM_FPU0_STALL3 ] = { .pme_name = "PM_FPU0_STALL3", .pme_code = 0x121, .pme_short_desc = "FPU0 stalled in pipe3", .pme_long_desc = "This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_STALL3], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_STALL3] }, [ PPC970MP_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x8005, .pme_short_desc = "Time Base bit transition", .pme_long_desc = "When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_TB_BIT_TRANS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_TB_BIT_TRANS] }, [ PPC970MP_PME_PM_GPR_MAP_FULL_CYC ] = { .pme_name = "PM_GPR_MAP_FULL_CYC", .pme_code = 0x335, .pme_short_desc = "Cycles GPR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GPR_MAP_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GPR_MAP_FULL_CYC] }, [ PPC970MP_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x1003, .pme_short_desc = "Marked store instruction completed", .pme_long_desc = "A sampled store has completed (data home)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_ST_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_ST_CMPL] }, [ PPC970MP_PME_PM_FPU0_STF ] = { .pme_name = "PM_FPU0_STF", .pme_code = 0x122, .pme_short_desc = "FPU0 executed store instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing a store instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_STF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_STF] }, [ PPC970MP_PME_PM_FPU1_FMA ] = { .pme_name = "PM_FPU1_FMA", .pme_code = 0x105, .pme_short_desc = "FPU1 executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FMA], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FMA] }, [ PPC970MP_PME_PM_LSU1_FLUSH_ULD ] = { .pme_name = "PM_LSU1_FLUSH_ULD", .pme_code = 0x804, .pme_short_desc = "LSU1 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_FLUSH_ULD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_FLUSH_ULD] }, [ PPC970MP_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x7005, .pme_short_desc = "Marked instruction finished", .pme_long_desc = "One of the execution units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_INST_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_INST_FIN] }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU0_FLUSH_UST", .pme_code = 0x711, .pme_short_desc = "LSU0 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 0 because it was unaligned", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU0_FLUSH_UST] }, [ PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LRQ_S0_ALLOC", .pme_code = 0x826, .pme_short_desc = "LRQ slot 0 allocated", .pme_long_desc = "LRQ slot zero was allocated", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LRQ_S0_ALLOC] }, [ PPC970MP_PME_PM_FPU_FDIV ] = { .pme_name = "PM_FPU_FDIV", .pme_code = 0x1100, .pme_short_desc = "FPU executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FDIV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FDIV] }, [ PPC970MP_PME_PM_FPU0_FULL_CYC ] = { .pme_name = "PM_FPU0_FULL_CYC", .pme_code = 0x303, .pme_short_desc = "Cycles FPU0 issue queue full", .pme_long_desc = "The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FULL_CYC] }, [ PPC970MP_PME_PM_FPU_SINGLE ] = { .pme_name = "PM_FPU_SINGLE", .pme_code = 0x5120, .pme_short_desc = "FPU executed single precision instruction", .pme_long_desc = "FPU is executing single precision instruction. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_SINGLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_SINGLE] }, [ PPC970MP_PME_PM_FPU0_FMA ] = { .pme_name = "PM_FPU0_FMA", .pme_code = 0x101, .pme_short_desc = "FPU0 executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FMA], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FMA] }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU1_FLUSH_ULD", .pme_code = 0x714, .pme_short_desc = "LSU1 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU1_FLUSH_ULD] }, [ PPC970MP_PME_PM_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_LSU1_FLUSH_LRQ", .pme_code = 0x806, .pme_short_desc = "LSU1 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_FLUSH_LRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_FLUSH_LRQ] }, [ PPC970MP_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x704, .pme_short_desc = "Data TLB misses", .pme_long_desc = "A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DTLB_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DTLB_MISS] }, [ PPC970MP_PME_PM_CMPLU_STALL_FXU ] = { .pme_name = "PM_CMPLU_STALL_FXU", .pme_code = 0x508b, .pme_short_desc = "Completion stall caused by FXU instruction", .pme_long_desc = "Completion stall caused by FXU instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_FXU], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_FXU] }, [ PPC970MP_PME_PM_MRK_ST_MISS_L1 ] = { .pme_name = "PM_MRK_ST_MISS_L1", .pme_code = 0x723, .pme_short_desc = "Marked L1 D cache store misses", .pme_long_desc = "A marked store missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_ST_MISS_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_ST_MISS_L1] }, [ PPC970MP_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x8002, .pme_short_desc = "External interrupts", .pme_long_desc = "An external interrupt occurred", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_EXT_INT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_EXT_INT] }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_LRQ", .pme_code = 0x716, .pme_short_desc = "LSU1 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU1_FLUSH_LRQ] }, [ PPC970MP_PME_PM_MRK_ST_GPS ] = { .pme_name = "PM_MRK_ST_GPS", .pme_code = 0x6003, .pme_short_desc = "Marked store sent to GPS", .pme_long_desc = "A sampled store has been sent to the memory subsystem", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_ST_GPS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_ST_GPS] }, [ PPC970MP_PME_PM_GRP_DISP_SUCCESS ] = { .pme_name = "PM_GRP_DISP_SUCCESS", .pme_code = 0x5001, .pme_short_desc = "Group dispatch success", .pme_long_desc = "Number of groups sucessfully dispatched (not rejected)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_DISP_SUCCESS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_DISP_SUCCESS] }, [ PPC970MP_PME_PM_LSU1_LDF ] = { .pme_name = "PM_LSU1_LDF", .pme_code = 0x734, .pme_short_desc = "LSU1 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed from LSU unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_LDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_LDF] }, [ PPC970MP_PME_PM_LSU0_SRQ_STFWD ] = { .pme_name = "PM_LSU0_SRQ_STFWD", .pme_code = 0x820, .pme_short_desc = "LSU0 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 0", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_SRQ_STFWD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_SRQ_STFWD] }, [ PPC970MP_PME_PM_CR_MAP_FULL_CYC ] = { .pme_name = "PM_CR_MAP_FULL_CYC", .pme_code = 0x304, .pme_short_desc = "Cycles CR logical operation mapper full", .pme_long_desc = "The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CR_MAP_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CR_MAP_FULL_CYC] }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU0_FLUSH_ULD", .pme_code = 0x710, .pme_short_desc = "LSU0 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU0_FLUSH_ULD] }, [ PPC970MP_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x6700, .pme_short_desc = "DERAT misses", .pme_long_desc = "Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_DERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_DERAT_MISS] }, [ PPC970MP_PME_PM_FPU0_SINGLE ] = { .pme_name = "PM_FPU0_SINGLE", .pme_code = 0x123, .pme_short_desc = "FPU0 executed single precision instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing single precision instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_SINGLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_SINGLE] }, [ PPC970MP_PME_PM_FPU1_FDIV ] = { .pme_name = "PM_FPU1_FDIV", .pme_code = 0x104, .pme_short_desc = "FPU1 executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FDIV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FDIV] }, [ PPC970MP_PME_PM_FPU1_FEST ] = { .pme_name = "PM_FPU1_FEST", .pme_code = 0x116, .pme_short_desc = "FPU1 executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FEST] }, [ PPC970MP_PME_PM_FPU0_FRSP_FCONV ] = { .pme_name = "PM_FPU0_FRSP_FCONV", .pme_code = 0x111, .pme_short_desc = "FPU0 executed FRSP or FCONV instructions", .pme_long_desc = "This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FRSP_FCONV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FRSP_FCONV] }, [ PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL ] = { .pme_name = "PM_GCT_EMPTY_SRQ_FULL", .pme_code = 0x200b, .pme_short_desc = "GCT empty caused by SRQ full", .pme_long_desc = "GCT empty caused by SRQ full", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GCT_EMPTY_SRQ_FULL] }, [ PPC970MP_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x3003, .pme_short_desc = "Marked store completed with intervention", .pme_long_desc = "A marked store previously sent to the memory subsystem completed (data home) after requiring intervention", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_ST_CMPL_INT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_ST_CMPL_INT] }, [ PPC970MP_PME_PM_FLUSH_BR_MPRED ] = { .pme_name = "PM_FLUSH_BR_MPRED", .pme_code = 0x316, .pme_short_desc = "Flush caused by branch mispredict", .pme_long_desc = "Flush caused by branch mispredict", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FLUSH_BR_MPRED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FLUSH_BR_MPRED] }, [ PPC970MP_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x3330, .pme_short_desc = "FXU produced a result", .pme_long_desc = "The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU_FIN] }, [ PPC970MP_PME_PM_FPU_STF ] = { .pme_name = "PM_FPU_STF", .pme_code = 0x6120, .pme_short_desc = "FPU executed store instruction", .pme_long_desc = "FPU is executing a store instruction. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_STF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_STF] }, [ PPC970MP_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x705, .pme_short_desc = "Data SLB misses", .pme_long_desc = "A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DSLB_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DSLB_MISS] }, [ PPC970MP_PME_PM_FXLS1_FULL_CYC ] = { .pme_name = "PM_FXLS1_FULL_CYC", .pme_code = 0x314, .pme_short_desc = "Cycles FXU1/LS1 queue full", .pme_long_desc = "The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXLS1_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXLS1_FULL_CYC] }, [ PPC970MP_PME_PM_CMPLU_STALL_FPU ] = { .pme_name = "PM_CMPLU_STALL_FPU", .pme_code = 0x704b, .pme_short_desc = "Completion stall caused by FPU instruction", .pme_long_desc = "Completion stall caused by FPU instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_FPU], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_FPU] }, [ PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE ] = { .pme_name = "PM_LSU_LMQ_LHR_MERGE", .pme_code = 0x935, .pme_short_desc = "LMQ LHR merges", .pme_long_desc = "A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LMQ_LHR_MERGE] }, [ PPC970MP_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x726, .pme_short_desc = "Marked STCX failed", .pme_long_desc = "A marked stcx (stwcx or stdcx) failed", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_STCX_FAIL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_STCX_FAIL] }, [ PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { .pme_name = "PM_FXU0_BUSY_FXU1_IDLE", .pme_code = 0x7002, .pme_short_desc = "FXU0 busy FXU1 idle", .pme_long_desc = "FXU0 is busy while FXU1 was idle", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU0_BUSY_FXU1_IDLE] }, [ PPC970MP_PME_PM_CMPLU_STALL_LSU ] = { .pme_name = "PM_CMPLU_STALL_LSU", .pme_code = 0x504b, .pme_short_desc = "Completion stall caused by LSU instruction", .pme_long_desc = "Completion stall caused by LSU instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_LSU], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_LSU] }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR", .pme_code = 0x5937, .pme_short_desc = "Marked data loaded from L2.5 shared", .pme_long_desc = "DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_DATA_FROM_L25_SHR] }, [ PPC970MP_PME_PM_LSU_FLUSH_ULD ] = { .pme_name = "PM_LSU_FLUSH_ULD", .pme_code = 0x1800, .pme_short_desc = "LRQ unaligned load flushes", .pme_long_desc = "A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_FLUSH_ULD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_FLUSH_ULD] }, [ PPC970MP_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x2005, .pme_short_desc = "Marked instruction BRU processing finished", .pme_long_desc = "The branch unit finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_BRU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_BRU_FIN] }, [ PPC970MP_PME_PM_IERAT_XLATE_WR ] = { .pme_name = "PM_IERAT_XLATE_WR", .pme_code = 0x430, .pme_short_desc = "Translation written to ierat", .pme_long_desc = "This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available).", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_IERAT_XLATE_WR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_IERAT_XLATE_WR] }, [ PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED ] = { .pme_name = "PM_GCT_EMPTY_BR_MPRED", .pme_code = 0x708c, .pme_short_desc = "GCT empty due to branch mispredict", .pme_long_desc = "GCT empty due to branch mispredict", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GCT_EMPTY_BR_MPRED] }, [ PPC970MP_PME_PM_LSU0_BUSY ] = { .pme_name = "PM_LSU0_BUSY", .pme_code = 0x823, .pme_short_desc = "LSU0 busy", .pme_long_desc = "LSU unit 0 is busy rejecting instructions", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_BUSY], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_BUSY] }, [ PPC970MP_PME_PM_DATA_FROM_MEM ] = { .pme_name = "PM_DATA_FROM_MEM", .pme_code = 0x2837, .pme_short_desc = "Data loaded from memory", .pme_long_desc = "Data loaded from memory", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DATA_FROM_MEM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DATA_FROM_MEM] }, [ PPC970MP_PME_PM_FPR_MAP_FULL_CYC ] = { .pme_name = "PM_FPR_MAP_FULL_CYC", .pme_code = 0x301, .pme_short_desc = "Cycles FPR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPR_MAP_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPR_MAP_FULL_CYC] }, [ PPC970MP_PME_PM_FPU1_FULL_CYC ] = { .pme_name = "PM_FPU1_FULL_CYC", .pme_code = 0x307, .pme_short_desc = "Cycles FPU1 issue queue full", .pme_long_desc = "The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FULL_CYC] }, [ PPC970MP_PME_PM_FPU0_FIN ] = { .pme_name = "PM_FPU0_FIN", .pme_code = 0x113, .pme_short_desc = "FPU0 produced a result", .pme_long_desc = "fp0 finished, produced a result This only indicates finish, not completion. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FIN] }, [ PPC970MP_PME_PM_GRP_BR_REDIR ] = { .pme_name = "PM_GRP_BR_REDIR", .pme_code = 0x326, .pme_short_desc = "Group experienced branch redirect", .pme_long_desc = "Group experienced branch redirect", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_BR_REDIR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_BR_REDIR] }, [ PPC970MP_PME_PM_GCT_EMPTY_IC_MISS ] = { .pme_name = "PM_GCT_EMPTY_IC_MISS", .pme_code = 0x508c, .pme_short_desc = "GCT empty due to I cache miss", .pme_long_desc = "GCT empty due to I cache miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GCT_EMPTY_IC_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GCT_EMPTY_IC_MISS] }, [ PPC970MP_PME_PM_THRESH_TIMEO ] = { .pme_name = "PM_THRESH_TIMEO", .pme_code = 0x2003, .pme_short_desc = "Threshold timeout", .pme_long_desc = "The threshold timer expired", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_THRESH_TIMEO], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_THRESH_TIMEO] }, [ PPC970MP_PME_PM_FPU_FSQRT ] = { .pme_name = "PM_FPU_FSQRT", .pme_code = 0x6100, .pme_short_desc = "FPU executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FSQRT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FSQRT] }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_LRQ", .pme_code = 0x712, .pme_short_desc = "LSU0 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU0_FLUSH_LRQ] }, [ PPC970MP_PME_PM_PMC1_OVERFLOW ] = { .pme_name = "PM_PMC1_OVERFLOW", .pme_code = 0x200a, .pme_short_desc = "PMC1 Overflow", .pme_long_desc = "PMC1 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC1_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC1_OVERFLOW] }, [ PPC970MP_PME_PM_FXLS0_FULL_CYC ] = { .pme_name = "PM_FXLS0_FULL_CYC", .pme_code = 0x310, .pme_short_desc = "Cycles FXU0/LS0 queue full", .pme_long_desc = "The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXLS0_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXLS0_FULL_CYC] }, [ PPC970MP_PME_PM_FPU0_ALL ] = { .pme_name = "PM_FPU0_ALL", .pme_code = 0x103, .pme_short_desc = "FPU0 executed add", .pme_long_desc = " mult", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_ALL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_ALL] }, [ PPC970MP_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x707, .pme_short_desc = "Cycles doing data tablewalks", .pme_long_desc = "This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DATA_TABLEWALK_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DATA_TABLEWALK_CYC] }, [ PPC970MP_PME_PM_FPU0_FEST ] = { .pme_name = "PM_FPU0_FEST", .pme_code = 0x112, .pme_short_desc = "FPU0 executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FEST] }, [ PPC970MP_PME_PM_DATA_FROM_L25_MOD ] = { .pme_name = "PM_DATA_FROM_L25_MOD", .pme_code = 0x6837, .pme_short_desc = "Data loaded from L2.5 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DATA_FROM_L25_MOD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DATA_FROM_L25_MOD] }, [ PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU0_REJECT_ERAT_MISS", .pme_code = 0x923, .pme_short_desc = "LSU0 reject due to ERAT miss", .pme_long_desc = "LSU0 reject due to ERAT miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_REJECT_ERAT_MISS] }, [ PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x2002, .pme_short_desc = "Cycles LMQ and SRQ empty", .pme_long_desc = "Cycles when both the LMQ and SRQ are empty (LSU is idle)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC] }, [ PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU0_REJECT_RELOAD_CDF", .pme_code = 0x922, .pme_short_desc = "LSU0 reject due to reload CDF or tag update collision", .pme_long_desc = "LSU0 reject due to reload CDF or tag update collision", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_REJECT_RELOAD_CDF] }, [ PPC970MP_PME_PM_FPU_FEST ] = { .pme_name = "PM_FPU_FEST", .pme_code = 0x3110, .pme_short_desc = "FPU executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FEST] }, [ PPC970MP_PME_PM_0INST_FETCH ] = { .pme_name = "PM_0INST_FETCH", .pme_code = 0x442d, .pme_short_desc = "No instructions fetched", .pme_long_desc = "No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_0INST_FETCH], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_0INST_FETCH] }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_LD_MISS_L1_LSU0", .pme_code = 0x812, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "A load, executing on unit 0, missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_MISS_L1_LSU0], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_MISS_L1_LSU0] }, [ PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU1_REJECT_RELOAD_CDF", .pme_code = 0x926, .pme_short_desc = "LSU1 reject due to reload CDF or tag update collision", .pme_long_desc = "LSU1 reject due to reload CDF or tag update collision", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_REJECT_RELOAD_CDF] }, [ PPC970MP_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0x731, .pme_short_desc = "L1 cache data prefetches", .pme_long_desc = "A request to prefetch data into the L1 was made", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_L1_PREF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_L1_PREF] }, [ PPC970MP_PME_PM_FPU1_STALL3 ] = { .pme_name = "PM_FPU1_STALL3", .pme_code = 0x125, .pme_short_desc = "FPU1 stalled in pipe3", .pme_long_desc = "This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_STALL3], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_STALL3] }, [ PPC970MP_PME_PM_BRQ_FULL_CYC ] = { .pme_name = "PM_BRQ_FULL_CYC", .pme_code = 0x305, .pme_short_desc = "Cycles branch queue full", .pme_long_desc = "The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups).", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_BRQ_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_BRQ_FULL_CYC] }, [ PPC970MP_PME_PM_PMC8_OVERFLOW ] = { .pme_name = "PM_PMC8_OVERFLOW", .pme_code = 0x100a, .pme_short_desc = "PMC8 Overflow", .pme_long_desc = "PMC8 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC8_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC8_OVERFLOW] }, [ PPC970MP_PME_PM_PMC7_OVERFLOW ] = { .pme_name = "PM_PMC7_OVERFLOW", .pme_code = 0x800a, .pme_short_desc = "PMC7 Overflow", .pme_long_desc = "PMC7 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC7_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC7_OVERFLOW] }, [ PPC970MP_PME_PM_WORK_HELD ] = { .pme_name = "PM_WORK_HELD", .pme_code = 0x2001, .pme_short_desc = "Work held", .pme_long_desc = "RAS Unit has signaled completion to stop and there are groups waiting to complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_WORK_HELD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_WORK_HELD] }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU0", .pme_code = 0x720, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "A marked load, executing on unit 0, missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU0] }, [ PPC970MP_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x5002, .pme_short_desc = "FXU idle", .pme_long_desc = "FXU0 and FXU1 are both idle", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU_IDLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU_IDLE] }, [ PPC970MP_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x1, .pme_short_desc = "Instructions completed", .pme_long_desc = "Number of Eligible Instructions that completed. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_CMPL] }, [ PPC970MP_PME_PM_LSU1_FLUSH_UST ] = { .pme_name = "PM_LSU1_FLUSH_UST", .pme_code = 0x805, .pme_short_desc = "LSU1 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_FLUSH_UST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_FLUSH_UST] }, [ PPC970MP_PME_PM_LSU0_FLUSH_ULD ] = { .pme_name = "PM_LSU0_FLUSH_ULD", .pme_code = 0x800, .pme_short_desc = "LSU0 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_FLUSH_ULD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_FLUSH_ULD] }, [ PPC970MP_PME_PM_LSU_FLUSH ] = { .pme_name = "PM_LSU_FLUSH", .pme_code = 0x315, .pme_short_desc = "Flush initiated by LSU", .pme_long_desc = "Flush initiated by LSU", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_FLUSH], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_FLUSH] }, [ PPC970MP_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x1426, .pme_short_desc = "Instructions fetched from L2", .pme_long_desc = "An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_L2], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_L2] }, [ PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU1_REJECT_LMQ_FULL", .pme_code = 0x925, .pme_short_desc = "LSU1 reject due to LMQ full or missed data coming", .pme_long_desc = "LSU1 reject due to LMQ full or missed data coming", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_REJECT_LMQ_FULL] }, [ PPC970MP_PME_PM_PMC2_OVERFLOW ] = { .pme_name = "PM_PMC2_OVERFLOW", .pme_code = 0x300a, .pme_short_desc = "PMC2 Overflow", .pme_long_desc = "PMC2 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC2_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC2_OVERFLOW] }, [ PPC970MP_PME_PM_FPU0_DENORM ] = { .pme_name = "PM_FPU0_DENORM", .pme_code = 0x120, .pme_short_desc = "FPU0 received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_DENORM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_DENORM] }, [ PPC970MP_PME_PM_FPU1_FMOV_FEST ] = { .pme_name = "PM_FPU1_FMOV_FEST", .pme_code = 0x114, .pme_short_desc = "FPU1 executing FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FMOV_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FMOV_FEST] }, [ PPC970MP_PME_PM_INST_FETCH_CYC ] = { .pme_name = "PM_INST_FETCH_CYC", .pme_code = 0x424, .pme_short_desc = "Cycles at least 1 instruction fetched", .pme_long_desc = "Asserted each cycle when the IFU sends at least one instruction to the IDU. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FETCH_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FETCH_CYC] }, [ PPC970MP_PME_PM_GRP_DISP_REJECT ] = { .pme_name = "PM_GRP_DISP_REJECT", .pme_code = 0x324, .pme_short_desc = "Group dispatch rejected", .pme_long_desc = "A group that previously attempted dispatch was rejected.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_DISP_REJECT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_DISP_REJECT] }, [ PPC970MP_PME_PM_LSU_LDF ] = { .pme_name = "PM_LSU_LDF", .pme_code = 0x8730, .pme_short_desc = "LSU executed Floating Point load instruction", .pme_long_desc = "LSU executed Floating Point load instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LDF] }, [ PPC970MP_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x320, .pme_short_desc = "Instructions dispatched", .pme_long_desc = "The ISU sends the number of instructions dispatched.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_DISP], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_DISP] }, [ PPC970MP_PME_PM_DATA_FROM_L25_SHR ] = { .pme_name = "PM_DATA_FROM_L25_SHR", .pme_code = 0x5837, .pme_short_desc = "Data loaded from L2.5 shared", .pme_long_desc = "DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DATA_FROM_L25_SHR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DATA_FROM_L25_SHR] }, [ PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0x834, .pme_short_desc = "L1 reload data source valid", .pme_long_desc = "The data source information is valid", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_L1_DCACHE_RELOAD_VALID] }, [ PPC970MP_PME_PM_MRK_GRP_ISSUED ] = { .pme_name = "PM_MRK_GRP_ISSUED", .pme_code = 0x6005, .pme_short_desc = "Marked group issued", .pme_long_desc = "A sampled instruction was issued", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_GRP_ISSUED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_GRP_ISSUED] }, [ PPC970MP_PME_PM_FPU_FMA ] = { .pme_name = "PM_FPU_FMA", .pme_code = 0x2100, .pme_short_desc = "FPU executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FMA], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FMA] }, [ PPC970MP_PME_PM_MRK_CRU_FIN ] = { .pme_name = "PM_MRK_CRU_FIN", .pme_code = 0x4005, .pme_short_desc = "Marked instruction CRU processing finished", .pme_long_desc = "The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_CRU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_CRU_FIN] }, [ PPC970MP_PME_PM_CMPLU_STALL_REJECT ] = { .pme_name = "PM_CMPLU_STALL_REJECT", .pme_code = 0x70cb, .pme_short_desc = "Completion stall caused by reject", .pme_long_desc = "Completion stall caused by reject", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_REJECT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_REJECT] }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU1_FLUSH_UST", .pme_code = 0x715, .pme_short_desc = "LSU1 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU1_FLUSH_UST] }, [ PPC970MP_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x6004, .pme_short_desc = "Marked instruction FXU processing finished", .pme_long_desc = "Marked instruction FXU processing finished", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_FXU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_FXU_FIN] }, [ PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU1_REJECT_ERAT_MISS", .pme_code = 0x927, .pme_short_desc = "LSU1 reject due to ERAT miss", .pme_long_desc = "LSU1 reject due to ERAT miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_REJECT_ERAT_MISS] }, [ PPC970MP_PME_PM_BR_ISSUED ] = { .pme_name = "PM_BR_ISSUED", .pme_code = 0x431, .pme_short_desc = "Branches issued", .pme_long_desc = "This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_BR_ISSUED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_BR_ISSUED] }, [ PPC970MP_PME_PM_PMC4_OVERFLOW ] = { .pme_name = "PM_PMC4_OVERFLOW", .pme_code = 0x500a, .pme_short_desc = "PMC4 Overflow", .pme_long_desc = "PMC4 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC4_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC4_OVERFLOW] }, [ PPC970MP_PME_PM_EE_OFF ] = { .pme_name = "PM_EE_OFF", .pme_code = 0x333, .pme_short_desc = "Cycles MSR(EE) bit off", .pme_long_desc = "The number of Cycles MSR(EE) bit was off.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_EE_OFF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_EE_OFF] }, [ PPC970MP_PME_PM_INST_FROM_L25_MOD ] = { .pme_name = "PM_INST_FROM_L25_MOD", .pme_code = 0x6426, .pme_short_desc = "Instruction fetched from L2.5 modified", .pme_long_desc = "Instruction fetched from L2.5 modified", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_L25_MOD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_L25_MOD] }, [ PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS ] = { .pme_name = "PM_CMPLU_STALL_ERAT_MISS", .pme_code = 0x704c, .pme_short_desc = "Completion stall caused by ERAT miss", .pme_long_desc = "Completion stall caused by ERAT miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_ERAT_MISS] }, [ PPC970MP_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x700, .pme_short_desc = "Instruction TLB misses", .pme_long_desc = "A TLB miss for an Instruction Fetch has occurred", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ITLB_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ITLB_MISS] }, [ PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { .pme_name = "PM_FXU1_BUSY_FXU0_IDLE", .pme_code = 0x4002, .pme_short_desc = "FXU1 busy FXU0 idle", .pme_long_desc = "FXU0 was idle while FXU1 was busy", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU1_BUSY_FXU0_IDLE] }, [ PPC970MP_PME_PM_GRP_DISP_VALID ] = { .pme_name = "PM_GRP_DISP_VALID", .pme_code = 0x323, .pme_short_desc = "Group dispatch valid", .pme_long_desc = "Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_DISP_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_DISP_VALID] }, [ PPC970MP_PME_PM_MRK_GRP_DISP ] = { .pme_name = "PM_MRK_GRP_DISP", .pme_code = 0x1002, .pme_short_desc = "Marked group dispatched", .pme_long_desc = "A group containing a sampled instruction was dispatched", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_GRP_DISP], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_GRP_DISP] }, [ PPC970MP_PME_PM_LSU_FLUSH_UST ] = { .pme_name = "PM_LSU_FLUSH_UST", .pme_code = 0x2800, .pme_short_desc = "SRQ unaligned store flushes", .pme_long_desc = "A store was flushed because it was unaligned", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_FLUSH_UST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_FLUSH_UST] }, [ PPC970MP_PME_PM_FXU1_FIN ] = { .pme_name = "PM_FXU1_FIN", .pme_code = 0x336, .pme_short_desc = "FXU1 produced a result", .pme_long_desc = "The Fixed Point unit 1 finished an instruction and produced a result", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU1_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU1_FIN] }, [ PPC970MP_PME_PM_GRP_CMPL ] = { .pme_name = "PM_GRP_CMPL", .pme_code = 0x7003, .pme_short_desc = "Group completed", .pme_long_desc = "A group completed. Microcoded instructions that span multiple groups will generate this event once per group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_CMPL] }, [ PPC970MP_PME_PM_FPU_FRSP_FCONV ] = { .pme_name = "PM_FPU_FRSP_FCONV", .pme_code = 0x7110, .pme_short_desc = "FPU executed FRSP or FCONV instructions", .pme_long_desc = "This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FRSP_FCONV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FRSP_FCONV] }, [ PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_SRQ", .pme_code = 0x713, .pme_short_desc = "LSU0 marked SRQ flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU0_FLUSH_SRQ] }, [ PPC970MP_PME_PM_CMPLU_STALL_OTHER ] = { .pme_name = "PM_CMPLU_STALL_OTHER", .pme_code = 0x100b, .pme_short_desc = "Completion stall caused by other reason", .pme_long_desc = "Completion stall caused by other reason", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_OTHER], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_OTHER] }, [ PPC970MP_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0x837, .pme_short_desc = "Cycles LMQ full", .pme_long_desc = "The LMQ was full", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LMQ_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LMQ_FULL_CYC] }, [ PPC970MP_PME_PM_ST_REF_L1_LSU0 ] = { .pme_name = "PM_ST_REF_L1_LSU0", .pme_code = 0x811, .pme_short_desc = "LSU0 L1 D cache store references", .pme_long_desc = "A store executed on unit 0", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ST_REF_L1_LSU0], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ST_REF_L1_LSU0] }, [ PPC970MP_PME_PM_LSU0_DERAT_MISS ] = { .pme_name = "PM_LSU0_DERAT_MISS", .pme_code = 0x702, .pme_short_desc = "LSU0 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_DERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_DERAT_MISS] }, [ PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC ] = { .pme_name = "PM_LSU_SRQ_SYNC_CYC", .pme_code = 0x735, .pme_short_desc = "SRQ sync duration", .pme_long_desc = "This signal is asserted every cycle when a sync is in the SRQ.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_SYNC_CYC] }, [ PPC970MP_PME_PM_FPU_STALL3 ] = { .pme_name = "PM_FPU_STALL3", .pme_code = 0x2120, .pme_short_desc = "FPU stalled in pipe3", .pme_long_desc = "FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_STALL3], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_STALL3] }, [ PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU_REJECT_ERAT_MISS", .pme_code = 0x5920, .pme_short_desc = "LSU reject due to ERAT miss", .pme_long_desc = "LSU reject due to ERAT miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_REJECT_ERAT_MISS] }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x1937, .pme_short_desc = "Marked data loaded from L2", .pme_long_desc = "DL1 was reloaded from the local L2 due to a marked demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_DATA_FROM_L2], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_DATA_FROM_L2] }, [ PPC970MP_PME_PM_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_LSU0_FLUSH_SRQ", .pme_code = 0x803, .pme_short_desc = "LSU0 SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_FLUSH_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_FLUSH_SRQ] }, [ PPC970MP_PME_PM_FPU0_FMOV_FEST ] = { .pme_name = "PM_FPU0_FMOV_FEST", .pme_code = 0x110, .pme_short_desc = "FPU0 executed FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FMOV_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FMOV_FEST] }, [ PPC970MP_PME_PM_IOPS_CMPL ] = { .pme_name = "PM_IOPS_CMPL", .pme_code = 0x1001, .pme_short_desc = "IOPS instructions completed", .pme_long_desc = "Number of IOPS Instructions that completed.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_IOPS_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_IOPS_CMPL] }, [ PPC970MP_PME_PM_LD_REF_L1_LSU0 ] = { .pme_name = "PM_LD_REF_L1_LSU0", .pme_code = 0x810, .pme_short_desc = "LSU0 L1 D cache load references", .pme_long_desc = "A load executed on unit 0", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_REF_L1_LSU0], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_REF_L1_LSU0] }, [ PPC970MP_PME_PM_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_LSU1_FLUSH_SRQ", .pme_code = 0x807, .pme_short_desc = "LSU1 SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_FLUSH_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_FLUSH_SRQ] }, [ PPC970MP_PME_PM_CMPLU_STALL_DIV ] = { .pme_name = "PM_CMPLU_STALL_DIV", .pme_code = 0x708b, .pme_short_desc = "Completion stall caused by DIV instruction", .pme_long_desc = "Completion stall caused by DIV instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_DIV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_DIV] }, [ PPC970MP_PME_PM_GRP_BR_MPRED ] = { .pme_name = "PM_GRP_BR_MPRED", .pme_code = 0x327, .pme_short_desc = "Group experienced a branch mispredict", .pme_long_desc = "Group experienced a branch mispredict", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_BR_MPRED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_BR_MPRED] }, [ PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LMQ_S0_ALLOC", .pme_code = 0x836, .pme_short_desc = "LMQ slot 0 allocated", .pme_long_desc = "The first entry in the LMQ was allocated.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LMQ_S0_ALLOC] }, [ PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU0_REJECT_LMQ_FULL", .pme_code = 0x921, .pme_short_desc = "LSU0 reject due to LMQ full or missed data coming", .pme_long_desc = "LSU0 reject due to LMQ full or missed data coming", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_REJECT_LMQ_FULL] }, [ PPC970MP_PME_PM_ST_REF_L1 ] = { .pme_name = "PM_ST_REF_L1", .pme_code = 0x7810, .pme_short_desc = "L1 D cache store references", .pme_long_desc = "Total DL1 Store references", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ST_REF_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ST_REF_L1] }, [ PPC970MP_PME_PM_MRK_VMX_FIN ] = { .pme_name = "PM_MRK_VMX_FIN", .pme_code = 0x3005, .pme_short_desc = "Marked instruction VMX processing finished", .pme_long_desc = "Marked instruction VMX processing finished", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_VMX_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_VMX_FIN] }, [ PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_SRQ_EMPTY_CYC", .pme_code = 0x4003, .pme_short_desc = "Cycles SRQ empty", .pme_long_desc = "The Store Request Queue is empty", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_EMPTY_CYC] }, [ PPC970MP_PME_PM_FPU1_STF ] = { .pme_name = "PM_FPU1_STF", .pme_code = 0x126, .pme_short_desc = "FPU1 executed store instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing a store instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_STF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_STF] }, [ PPC970MP_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x1005, .pme_short_desc = "Run cycles", .pme_long_desc = "Processor Cycles gated by the run latch", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_RUN_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_RUN_CYC] }, [ PPC970MP_PME_PM_LSU_LMQ_S0_VALID ] = { .pme_name = "PM_LSU_LMQ_S0_VALID", .pme_code = 0x835, .pme_short_desc = "LMQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LMQ_S0_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LMQ_S0_VALID] }, [ PPC970MP_PME_PM_LSU0_LDF ] = { .pme_name = "PM_LSU0_LDF", .pme_code = 0x730, .pme_short_desc = "LSU0 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed from LSU unit 0", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_LDF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_LDF] }, [ PPC970MP_PME_PM_LSU_LRQ_S0_VALID ] = { .pme_name = "PM_LSU_LRQ_S0_VALID", .pme_code = 0x822, .pme_short_desc = "LRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LRQ_S0_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LRQ_S0_VALID] }, [ PPC970MP_PME_PM_PMC3_OVERFLOW ] = { .pme_name = "PM_PMC3_OVERFLOW", .pme_code = 0x400a, .pme_short_desc = "PMC3 Overflow", .pme_long_desc = "PMC3 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC3_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC3_OVERFLOW] }, [ PPC970MP_PME_PM_MRK_IMR_RELOAD ] = { .pme_name = "PM_MRK_IMR_RELOAD", .pme_code = 0x722, .pme_short_desc = "Marked IMR reloaded", .pme_long_desc = "A DL1 reload occured due to marked load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_IMR_RELOAD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_IMR_RELOAD] }, [ PPC970MP_PME_PM_MRK_GRP_TIMEO ] = { .pme_name = "PM_MRK_GRP_TIMEO", .pme_code = 0x5005, .pme_short_desc = "Marked group completion timeout", .pme_long_desc = "The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_GRP_TIMEO], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_GRP_TIMEO] }, [ PPC970MP_PME_PM_FPU_FMOV_FEST ] = { .pme_name = "PM_FPU_FMOV_FEST", .pme_code = 0x8110, .pme_short_desc = "FPU executing FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FMOV_FEST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FMOV_FEST] }, [ PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC ] = { .pme_name = "PM_GRP_DISP_BLK_SB_CYC", .pme_code = 0x331, .pme_short_desc = "Cycles group dispatch blocked by scoreboard", .pme_long_desc = "The ISU sends a signal indicating that dispatch is blocked by scoreboard.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_DISP_BLK_SB_CYC] }, [ PPC970MP_PME_PM_XER_MAP_FULL_CYC ] = { .pme_name = "PM_XER_MAP_FULL_CYC", .pme_code = 0x302, .pme_short_desc = "Cycles XER mapper full", .pme_long_desc = "The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_XER_MAP_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_XER_MAP_FULL_CYC] }, [ PPC970MP_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0x813, .pme_short_desc = "L1 D cache store misses", .pme_long_desc = "A store missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ST_MISS_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ST_MISS_L1] }, [ PPC970MP_PME_PM_STOP_COMPLETION ] = { .pme_name = "PM_STOP_COMPLETION", .pme_code = 0x3001, .pme_short_desc = "Completion stopped", .pme_long_desc = "RAS Unit has signaled completion to stop", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_STOP_COMPLETION], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_STOP_COMPLETION] }, [ PPC970MP_PME_PM_MRK_GRP_CMPL ] = { .pme_name = "PM_MRK_GRP_CMPL", .pme_code = 0x4004, .pme_short_desc = "Marked group completed", .pme_long_desc = "A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_GRP_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_GRP_CMPL] }, [ PPC970MP_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x701, .pme_short_desc = "Instruction SLB misses", .pme_long_desc = "A SLB miss for an instruction fetch as occurred", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ISLB_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ISLB_MISS] }, [ PPC970MP_PME_PM_SUSPENDED ] = { .pme_name = "PM_SUSPENDED", .pme_code = 0x0, .pme_short_desc = "Suspended", .pme_long_desc = "Suspended", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_SUSPENDED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_SUSPENDED] }, [ PPC970MP_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0x7, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CYC] }, [ PPC970MP_PME_PM_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_LD_MISS_L1_LSU1", .pme_code = 0x816, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "A load, executing on unit 1, missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_MISS_L1_LSU1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_MISS_L1_LSU1] }, [ PPC970MP_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x721, .pme_short_desc = "STCX failed", .pme_long_desc = "A stcx (stwcx or stdcx) failed", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_STCX_FAIL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_STCX_FAIL] }, [ PPC970MP_PME_PM_LSU1_SRQ_STFWD ] = { .pme_name = "PM_LSU1_SRQ_STFWD", .pme_code = 0x824, .pme_short_desc = "LSU1 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_SRQ_STFWD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_SRQ_STFWD] }, [ PPC970MP_PME_PM_GRP_DISP ] = { .pme_name = "PM_GRP_DISP", .pme_code = 0x2004, .pme_short_desc = "Group dispatches", .pme_long_desc = "A group was dispatched", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_DISP], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_DISP] }, [ PPC970MP_PME_PM_L2_PREF ] = { .pme_name = "PM_L2_PREF", .pme_code = 0x733, .pme_short_desc = "L2 cache prefetches", .pme_long_desc = "A request to prefetch data into L2 was made", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_L2_PREF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_L2_PREF] }, [ PPC970MP_PME_PM_FPU1_DENORM ] = { .pme_name = "PM_FPU1_DENORM", .pme_code = 0x124, .pme_short_desc = "FPU1 received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_DENORM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_DENORM] }, [ PPC970MP_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x1837, .pme_short_desc = "Data loaded from L2", .pme_long_desc = "DL1 was reloaded from the local L2 due to a demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DATA_FROM_L2], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DATA_FROM_L2] }, [ PPC970MP_PME_PM_FPU0_FPSCR ] = { .pme_name = "PM_FPU0_FPSCR", .pme_code = 0x130, .pme_short_desc = "FPU0 executed FPSCR instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FPSCR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FPSCR] }, [ PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD", .pme_code = 0x6937, .pme_short_desc = "Marked data loaded from L2.5 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_DATA_FROM_L25_MOD] }, [ PPC970MP_PME_PM_FPU0_FSQRT ] = { .pme_name = "PM_FPU0_FSQRT", .pme_code = 0x102, .pme_short_desc = "FPU0 executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FSQRT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FSQRT] }, [ PPC970MP_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x8810, .pme_short_desc = "L1 D cache load references", .pme_long_desc = "Total DL1 Load references", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_REF_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_REF_L1] }, [ PPC970MP_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0x934, .pme_short_desc = "Marked L1 reload data source valid", .pme_long_desc = "The source information is valid and is for a marked load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_L1_RELOAD_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_L1_RELOAD_VALID] }, [ PPC970MP_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x5003, .pme_short_desc = "One or more PPC instruction completed", .pme_long_desc = "A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_1PLUS_PPC_CMPL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_1PLUS_PPC_CMPL] }, [ PPC970MP_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x142d, .pme_short_desc = "Instruction fetched from L1", .pme_long_desc = "An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_L1] }, [ PPC970MP_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x337, .pme_short_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_long_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_EE_OFF_EXT_INT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_EE_OFF_EXT_INT] }, [ PPC970MP_PME_PM_PMC6_OVERFLOW ] = { .pme_name = "PM_PMC6_OVERFLOW", .pme_code = 0x700a, .pme_short_desc = "PMC6 Overflow", .pme_long_desc = "PMC6 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC6_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC6_OVERFLOW] }, [ PPC970MP_PME_PM_LSU_LRQ_FULL_CYC ] = { .pme_name = "PM_LSU_LRQ_FULL_CYC", .pme_code = 0x312, .pme_short_desc = "Cycles LRQ full", .pme_long_desc = "The ISU sends this signal when the LRQ is full.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_LRQ_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_LRQ_FULL_CYC] }, [ PPC970MP_PME_PM_IC_PREF_INSTALL ] = { .pme_name = "PM_IC_PREF_INSTALL", .pme_code = 0x427, .pme_short_desc = "Instruction prefetched installed in prefetch", .pme_long_desc = "New line coming into the prefetch buffer", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_IC_PREF_INSTALL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_IC_PREF_INSTALL] }, [ PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { .pme_name = "PM_DC_PREF_OUT_OF_STREAMS", .pme_code = 0x732, .pme_short_desc = "D cache out of streams", .pme_long_desc = "out of streams", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DC_PREF_OUT_OF_STREAMS] }, [ PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_SRQ", .pme_code = 0x717, .pme_short_desc = "LSU1 marked SRQ flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU1_FLUSH_SRQ] }, [ PPC970MP_PME_PM_GCT_FULL_CYC ] = { .pme_name = "PM_GCT_FULL_CYC", .pme_code = 0x300, .pme_short_desc = "Cycles GCT full", .pme_long_desc = "The ISU sends a signal indicating the gct is full. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GCT_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GCT_FULL_CYC] }, [ PPC970MP_PME_PM_INST_FROM_MEM ] = { .pme_name = "PM_INST_FROM_MEM", .pme_code = 0x2426, .pme_short_desc = "Instruction fetched from memory", .pme_long_desc = "Instruction fetched from memory", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_MEM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_MEM] }, [ PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED ] = { .pme_name = "PM_FLUSH_LSU_BR_MPRED", .pme_code = 0x317, .pme_short_desc = "Flush caused by LSU or branch mispredict", .pme_long_desc = "Flush caused by LSU or branch mispredict", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FLUSH_LSU_BR_MPRED] }, [ PPC970MP_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x6002, .pme_short_desc = "FXU busy", .pme_long_desc = "FXU0 and FXU1 are both busy", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU_BUSY], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU_BUSY] }, [ PPC970MP_PME_PM_ST_REF_L1_LSU1 ] = { .pme_name = "PM_ST_REF_L1_LSU1", .pme_code = 0x815, .pme_short_desc = "LSU1 L1 D cache store references", .pme_long_desc = "A store executed on unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_ST_REF_L1_LSU1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_ST_REF_L1_LSU1] }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x1720, .pme_short_desc = "Marked L1 D cache load misses", .pme_long_desc = "Marked L1 D cache load misses", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LD_MISS_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LD_MISS_L1] }, [ PPC970MP_PME_PM_L1_WRITE_CYC ] = { .pme_name = "PM_L1_WRITE_CYC", .pme_code = 0x434, .pme_short_desc = "Cycles writing to instruction L1", .pme_long_desc = "This signal is asserted each cycle a cache write is active.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_L1_WRITE_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_L1_WRITE_CYC] }, [ PPC970MP_PME_PM_LSU1_BUSY ] = { .pme_name = "PM_LSU1_BUSY", .pme_code = 0x827, .pme_short_desc = "LSU1 busy", .pme_long_desc = "LSU unit 0 is busy rejecting instructions ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_BUSY], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_BUSY] }, [ PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU_REJECT_LMQ_FULL", .pme_code = 0x2920, .pme_short_desc = "LSU reject due to LMQ full or missed data coming", .pme_long_desc = "LSU reject due to LMQ full or missed data coming", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_REJECT_LMQ_FULL] }, [ PPC970MP_PME_PM_CMPLU_STALL_FDIV ] = { .pme_name = "PM_CMPLU_STALL_FDIV", .pme_code = 0x504c, .pme_short_desc = "Completion stall caused by FDIV or FQRT instruction", .pme_long_desc = "Completion stall caused by FDIV or FQRT instruction", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_FDIV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_FDIV] }, [ PPC970MP_PME_PM_FPU_ALL ] = { .pme_name = "PM_FPU_ALL", .pme_code = 0x5100, .pme_short_desc = "FPU executed add", .pme_long_desc = " mult", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_ALL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_ALL] }, [ PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_SRQ_S0_ALLOC", .pme_code = 0x825, .pme_short_desc = "SRQ slot 0 allocated", .pme_long_desc = "SRQ Slot zero was allocated", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_S0_ALLOC] }, [ PPC970MP_PME_PM_INST_FROM_L25_SHR ] = { .pme_name = "PM_INST_FROM_L25_SHR", .pme_code = 0x5426, .pme_short_desc = "Instruction fetched from L2.5 shared", .pme_long_desc = "Instruction fetched from L2.5 shared", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_L25_SHR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_L25_SHR] }, [ PPC970MP_PME_PM_GRP_MRK ] = { .pme_name = "PM_GRP_MRK", .pme_code = 0x5004, .pme_short_desc = "Group marked in IDU", .pme_long_desc = "A group was sampled (marked)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GRP_MRK], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GRP_MRK] }, [ PPC970MP_PME_PM_BR_MPRED_CR ] = { .pme_name = "PM_BR_MPRED_CR", .pme_code = 0x432, .pme_short_desc = "Branch mispredictions due to CR bit setting", .pme_long_desc = "This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_BR_MPRED_CR], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_BR_MPRED_CR] }, [ PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC ] = { .pme_name = "PM_DC_PREF_STREAM_ALLOC", .pme_code = 0x737, .pme_short_desc = "D cache new prefetch stream allocated", .pme_long_desc = "A new Prefetch Stream was allocated", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DC_PREF_STREAM_ALLOC] }, [ PPC970MP_PME_PM_FPU1_FIN ] = { .pme_name = "PM_FPU1_FIN", .pme_code = 0x117, .pme_short_desc = "FPU1 produced a result", .pme_long_desc = "fp1 finished, produced a result. This only indicates finish, not completion. ", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FIN] }, [ PPC970MP_PME_PM_LSU_REJECT_SRQ ] = { .pme_name = "PM_LSU_REJECT_SRQ", .pme_code = 0x1920, .pme_short_desc = "LSU SRQ rejects", .pme_long_desc = "LSU SRQ rejects", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_REJECT_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_REJECT_SRQ] }, [ PPC970MP_PME_PM_BR_MPRED_TA ] = { .pme_name = "PM_BR_MPRED_TA", .pme_code = 0x433, .pme_short_desc = "Branch mispredictions due to target address", .pme_long_desc = "branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_BR_MPRED_TA], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_BR_MPRED_TA] }, [ PPC970MP_PME_PM_CRQ_FULL_CYC ] = { .pme_name = "PM_CRQ_FULL_CYC", .pme_code = 0x311, .pme_short_desc = "Cycles CR issue queue full", .pme_long_desc = "The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups).", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CRQ_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CRQ_FULL_CYC] }, [ PPC970MP_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x3810, .pme_short_desc = "L1 D cache load misses", .pme_long_desc = "Total DL1 Load references that miss the DL1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_MISS_L1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_MISS_L1] }, [ PPC970MP_PME_PM_INST_FROM_PREF ] = { .pme_name = "PM_INST_FROM_PREF", .pme_code = 0x342d, .pme_short_desc = "Instructions fetched from prefetch", .pme_long_desc = "An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_INST_FROM_PREF], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_INST_FROM_PREF] }, [ PPC970MP_PME_PM_STCX_PASS ] = { .pme_name = "PM_STCX_PASS", .pme_code = 0x725, .pme_short_desc = "Stcx passes", .pme_long_desc = "A stcx (stwcx or stdcx) instruction was successful", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_STCX_PASS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_STCX_PASS] }, [ PPC970MP_PME_PM_DC_INV_L2 ] = { .pme_name = "PM_DC_INV_L2", .pme_code = 0x817, .pme_short_desc = "L1 D cache entries invalidated from L2", .pme_long_desc = "A dcache invalidated was received from the L2 because a line in L2 was castout.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_DC_INV_L2], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_DC_INV_L2] }, [ PPC970MP_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x313, .pme_short_desc = "Cycles SRQ full", .pme_long_desc = "The ISU sends this signal when the srq is full.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_FULL_CYC] }, [ PPC970MP_PME_PM_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_LSU0_FLUSH_LRQ", .pme_code = 0x802, .pme_short_desc = "LSU0 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_FLUSH_LRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_FLUSH_LRQ] }, [ PPC970MP_PME_PM_LSU_SRQ_S0_VALID ] = { .pme_name = "PM_LSU_SRQ_S0_VALID", .pme_code = 0x821, .pme_short_desc = "SRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_S0_VALID], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_S0_VALID] }, [ PPC970MP_PME_PM_LARX_LSU0 ] = { .pme_name = "PM_LARX_LSU0", .pme_code = 0x727, .pme_short_desc = "Larx executed on LSU0", .pme_long_desc = "A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LARX_LSU0], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LARX_LSU0] }, [ PPC970MP_PME_PM_GCT_EMPTY_CYC ] = { .pme_name = "PM_GCT_EMPTY_CYC", .pme_code = 0x1004, .pme_short_desc = "Cycles GCT empty", .pme_long_desc = "The Global Completion Table is completely empty", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_GCT_EMPTY_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_GCT_EMPTY_CYC] }, [ PPC970MP_PME_PM_FPU1_ALL ] = { .pme_name = "PM_FPU1_ALL", .pme_code = 0x107, .pme_short_desc = "FPU1 executed add", .pme_long_desc = " mult", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_ALL], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_ALL] }, [ PPC970MP_PME_PM_FPU1_FSQRT ] = { .pme_name = "PM_FPU1_FSQRT", .pme_code = 0x106, .pme_short_desc = "FPU1 executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FSQRT], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FSQRT] }, [ PPC970MP_PME_PM_FPU_FIN ] = { .pme_name = "PM_FPU_FIN", .pme_code = 0x4110, .pme_short_desc = "FPU produced a result", .pme_long_desc = "FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_FIN] }, [ PPC970MP_PME_PM_LSU_SRQ_STFWD ] = { .pme_name = "PM_LSU_SRQ_STFWD", .pme_code = 0x1820, .pme_short_desc = "SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_SRQ_STFWD], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_SRQ_STFWD] }, [ PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU1", .pme_code = 0x724, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "A marked load, executing on unit 1, missed the dcache", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LD_MISS_L1_LSU1] }, [ PPC970MP_PME_PM_FXU0_FIN ] = { .pme_name = "PM_FXU0_FIN", .pme_code = 0x332, .pme_short_desc = "FXU0 produced a result", .pme_long_desc = "The Fixed Point unit 0 finished an instruction and produced a result", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FXU0_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FXU0_FIN] }, [ PPC970MP_PME_PM_MRK_FPU_FIN ] = { .pme_name = "PM_MRK_FPU_FIN", .pme_code = 0x7004, .pme_short_desc = "Marked instruction FPU processing finished", .pme_long_desc = "One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_FPU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_FPU_FIN] }, [ PPC970MP_PME_PM_PMC5_OVERFLOW ] = { .pme_name = "PM_PMC5_OVERFLOW", .pme_code = 0x600a, .pme_short_desc = "PMC5 Overflow", .pme_long_desc = "PMC5 Overflow", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_PMC5_OVERFLOW], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_PMC5_OVERFLOW] }, [ PPC970MP_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x703, .pme_short_desc = "Snoop TLBIE", .pme_long_desc = "A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_SNOOP_TLBIE], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_SNOOP_TLBIE] }, [ PPC970MP_PME_PM_FPU1_FRSP_FCONV ] = { .pme_name = "PM_FPU1_FRSP_FCONV", .pme_code = 0x115, .pme_short_desc = "FPU1 executed FRSP or FCONV instructions", .pme_long_desc = "This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU1_FRSP_FCONV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU1_FRSP_FCONV] }, [ PPC970MP_PME_PM_FPU0_FDIV ] = { .pme_name = "PM_FPU0_FDIV", .pme_code = 0x100, .pme_short_desc = "FPU0 executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU0_FDIV], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU0_FDIV] }, [ PPC970MP_PME_PM_LD_REF_L1_LSU1 ] = { .pme_name = "PM_LD_REF_L1_LSU1", .pme_code = 0x814, .pme_short_desc = "LSU1 L1 D cache load references", .pme_long_desc = "A load executed on unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LD_REF_L1_LSU1], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LD_REF_L1_LSU1] }, [ PPC970MP_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x3004, .pme_short_desc = "Hypervisor Cycles", .pme_long_desc = "Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_HV_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_HV_CYC] }, [ PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC ] = { .pme_name = "PM_LR_CTR_MAP_FULL_CYC", .pme_code = 0x306, .pme_short_desc = "Cycles LR/CTR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LR_CTR_MAP_FULL_CYC] }, [ PPC970MP_PME_PM_FPU_DENORM ] = { .pme_name = "PM_FPU_DENORM", .pme_code = 0x1120, .pme_short_desc = "FPU received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_FPU_DENORM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_FPU_DENORM] }, [ PPC970MP_PME_PM_LSU0_REJECT_SRQ ] = { .pme_name = "PM_LSU0_REJECT_SRQ", .pme_code = 0x920, .pme_short_desc = "LSU0 SRQ rejects", .pme_long_desc = "LSU0 SRQ rejects", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_REJECT_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_REJECT_SRQ] }, [ PPC970MP_PME_PM_LSU1_REJECT_SRQ ] = { .pme_name = "PM_LSU1_REJECT_SRQ", .pme_code = 0x924, .pme_short_desc = "LSU1 SRQ rejects", .pme_long_desc = "LSU1 SRQ rejects", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_REJECT_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_REJECT_SRQ] }, [ PPC970MP_PME_PM_LSU1_DERAT_MISS ] = { .pme_name = "PM_LSU1_DERAT_MISS", .pme_code = 0x706, .pme_short_desc = "LSU1 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU1_DERAT_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU1_DERAT_MISS] }, [ PPC970MP_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x426, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "Asserted when a non-canceled prefetch is made to the cache interface unit (CIU).", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_IC_PREF_REQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_IC_PREF_REQ] }, [ PPC970MP_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x8004, .pme_short_desc = "Marked instruction LSU processing finished", .pme_long_desc = "One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_LSU_FIN], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_LSU_FIN] }, [ PPC970MP_PME_PM_MRK_DATA_FROM_MEM ] = { .pme_name = "PM_MRK_DATA_FROM_MEM", .pme_code = 0x2937, .pme_short_desc = "Marked data loaded from memory", .pme_long_desc = "Marked data loaded from memory", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_MRK_DATA_FROM_MEM], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_MRK_DATA_FROM_MEM] }, [ PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { .pme_name = "PM_CMPLU_STALL_DCACHE_MISS", .pme_code = 0x50cb, .pme_short_desc = "Completion stall caused by D cache miss", .pme_long_desc = "Completion stall caused by D cache miss", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_CMPLU_STALL_DCACHE_MISS] }, [ PPC970MP_PME_PM_LSU0_FLUSH_UST ] = { .pme_name = "PM_LSU0_FLUSH_UST", .pme_code = 0x801, .pme_short_desc = "LSU0 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary)", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU0_FLUSH_UST], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU0_FLUSH_UST] }, [ PPC970MP_PME_PM_LSU_FLUSH_LRQ ] = { .pme_name = "PM_LSU_FLUSH_LRQ", .pme_code = 0x6800, .pme_short_desc = "LRQ flushes", .pme_long_desc = "A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_FLUSH_LRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_FLUSH_LRQ] }, [ PPC970MP_PME_PM_LSU_FLUSH_SRQ ] = { .pme_name = "PM_LSU_FLUSH_SRQ", .pme_code = 0x5800, .pme_short_desc = "SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", .pme_event_ids = ppc970mp_event_ids[PPC970MP_PME_PM_LSU_FLUSH_SRQ], .pme_group_vector = ppc970mp_group_vecs[PPC970MP_PME_PM_LSU_FLUSH_SRQ] } }; #define PPC970MP_PME_EVENT_COUNT 230 static const int ppc970mp_group_event_ids[][PPC970MP_NUM_EVENT_COUNTERS] = { [ 0 ] = { 81, 2, 65, 30, 0, 2, 28, 29 }, [ 1 ] = { 2, 2, 36, 6, 39, 35, 63, 37 }, [ 2 ] = { 36, 2, 36, 6, 39, 35, 63, 37 }, [ 3 ] = { 64, 63, 4, 30, 65, 63, 63, 37 }, [ 4 ] = { 27, 25, 21, 22, 3, 25, 30, 22 }, [ 5 ] = { 26, 26, 4, 30, 26, 26, 21, 43 }, [ 6 ] = { 87, 1, 3, 29, 44, 36, 30, 4 }, [ 7 ] = { 13, 21, 22, 24, 3, 35, 46, 49 }, [ 8 ] = { 37, 2, 24, 27, 34, 31, 30, 4 }, [ 9 ] = { 28, 83, 65, 10, 3, 35, 8, 10 }, [ 10 ] = { 10, 18, 16, 21, 11, 19, 30, 4 }, [ 11 ] = { 12, 20, 13, 19, 8, 16, 30, 4 }, [ 12 ] = { 9, 17, 14, 20, 3, 35, 12, 18 }, [ 13 ] = { 15, 23, 13, 19, 3, 35, 4, 16 }, [ 14 ] = { 45, 54, 4, 5, 47, 54, 30, 4 }, [ 15 ] = { 47, 56, 39, 38, 3, 35, 35, 36 }, [ 16 ] = { 48, 57, 67, 65, 3, 35, 62, 5 }, [ 17 ] = { 53, 62, 67, 65, 81, 2, 30, 4 }, [ 18 ] = { 44, 53, 4, 5, 38, 2, 31, 4 }, [ 19 ] = { 28, 64, 29, 5, 0, 35, 28, 67 }, [ 20 ] = { 27, 25, 26, 22, 3, 25, 30, 22 }, [ 21 ] = { 6, 40, 36, 63, 3, 35, 63, 37 }, [ 22 ] = { 6, 64, 36, 63, 3, 35, 63, 37 }, [ 23 ] = { 27, 25, 13, 19, 3, 26, 30, 43 }, [ 24 ] = { 36, 2, 36, 1, 81, 2, 1, 2 }, [ 25 ] = { 36, 2, 36, 1, 3, 81, 63, 37 }, [ 26 ] = { 81, 4, 0, 2, 41, 2, 30, 2 }, [ 27 ] = { 3, 87, 30, 5, 38, 2, 44, 47 }, [ 28 ] = { 6, 40, 30, 5, 66, 65, 32, 34 }, [ 29 ] = { 39, 38, 29, 30, 4, 2, 28, 5 }, [ 30 ] = { 68, 69, 36, 49, 38, 35, 4, 37 }, [ 31 ] = { 38, 36, 70, 5, 38, 2, 30, 4 }, [ 32 ] = { 28, 33, 32, 30, 39, 62, 63, 4 }, [ 33 ] = { 74, 82, 4, 51, 35, 70, 50, 30 }, [ 34 ] = { 72, 70, 4, 50, 35, 69, 49, 60 }, [ 35 ] = { 78, 2, 62, 51, 71, 75, 60, 30 }, [ 36 ] = { 79, 71, 56, 60, 3, 35, 54, 58 }, [ 37 ] = { 75, 73, 53, 57, 3, 35, 53, 57 }, [ 38 ] = { 36, 36, 26, 26, 28, 27, 24, 4 }, [ 39 ] = { 36, 2, 23, 23, 28, 27, 25, 26 }, [ 40 ] = { 38, 38, 31, 0, 90, 37, 4, 30 }, [ 41 ] = { 85, 85, 43, 12, 84, 35, 70, 4 }, [ 42 ] = { 88, 36, 36, 5, 86, 62, 69, 37 }, [ 43 ] = { 36, 27, 26, 22, 85, 27, 68, 4 }, [ 44 ] = { 27, 25, 30, 68, 87, 25, 67, 4 }, [ 45 ] = { 28, 36, 10, 3, 88, 2, 71, 33 }, [ 46 ] = { 36, 36, 4, 5, 91, 87, 44, 47 }, [ 47 ] = { 39, 38, 31, 1, 3, 35, 1, 2 }, [ 48 ] = { 3, 87, 30, 35, 0, 2, 36, 37 }, [ 49 ] = { 3, 87, 30, 5, 91, 87, 36, 37 }, [ 50 ] = { 71, 88, 30, 5, 92, 88, 50, 51 } }; static const pmg_power_group_t ppc970mp_groups[] = { [ 0 ] = { .pmg_name = "pm_slice0", .pmg_desc = "Time Slice 0", .pmg_event_ids = ppc970mp_group_event_ids[0], .pmg_mmcr0 = 0x000000000000051eULL, .pmg_mmcr1 = 0x000000000a46f18cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 1 ] = { .pmg_name = "pm_eprof", .pmg_desc = "Group for use with eprof", .pmg_event_ids = ppc970mp_group_event_ids[1], .pmg_mmcr0 = 0x0000000000000f1eULL, .pmg_mmcr1 = 0x4003001005f09000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 2 ] = { .pmg_name = "pm_basic", .pmg_desc = "Basic performance indicators", .pmg_event_ids = ppc970mp_group_event_ids[2], .pmg_mmcr0 = 0x000000000000091eULL, .pmg_mmcr1 = 0x4003001005f09000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 3 ] = { .pmg_name = "pm_lsu", .pmg_desc = "Information on the Load Store Unit", .pmg_event_ids = ppc970mp_group_event_ids[3], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000f00007a400000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 4 ] = { .pmg_name = "pm_fpu1", .pmg_desc = "Floating Point events", .pmg_event_ids = ppc970mp_group_event_ids[4], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000001e0480ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 5 ] = { .pmg_name = "pm_fpu2", .pmg_desc = "Floating Point events", .pmg_event_ids = ppc970mp_group_event_ids[5], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000020e87a400000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 6 ] = { .pmg_name = "pm_isu_rename", .pmg_desc = "ISU Rename Pool Events", .pmg_event_ids = ppc970mp_group_event_ids[6], .pmg_mmcr0 = 0x0000000000001228ULL, .pmg_mmcr1 = 0x400000218e6d84bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 7 ] = { .pmg_name = "pm_isu_queues1", .pmg_desc = "ISU Rename Pool Events", .pmg_event_ids = ppc970mp_group_event_ids[7], .pmg_mmcr0 = 0x000000000000132eULL, .pmg_mmcr1 = 0x40000000851e994cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 8 ] = { .pmg_name = "pm_isu_flow", .pmg_desc = "ISU Instruction Flow Events", .pmg_event_ids = ppc970mp_group_event_ids[8], .pmg_mmcr0 = 0x000000000000181eULL, .pmg_mmcr1 = 0x400000b3d7b7c4bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 9 ] = { .pmg_name = "pm_isu_work", .pmg_desc = "ISU Indicators of Work Blockage", .pmg_event_ids = ppc970mp_group_event_ids[9], .pmg_mmcr0 = 0x0000000000000402ULL, .pmg_mmcr1 = 0x400000050fde9d88ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 10 ] = { .pmg_name = "pm_fpu3", .pmg_desc = "Floating Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[10], .pmg_mmcr0 = 0x0000000000001028ULL, .pmg_mmcr1 = 0x000000008d6354bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 11 ] = { .pmg_name = "pm_fpu4", .pmg_desc = "Floating Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[11], .pmg_mmcr0 = 0x000000000000122cULL, .pmg_mmcr1 = 0x000000009de774bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 12 ] = { .pmg_name = "pm_fpu5", .pmg_desc = "Floating Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[12], .pmg_mmcr0 = 0x0000000000001838ULL, .pmg_mmcr1 = 0x000000c0851e9958ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 13 ] = { .pmg_name = "pm_fpu7", .pmg_desc = "Floating Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[13], .pmg_mmcr0 = 0x000000000000193aULL, .pmg_mmcr1 = 0x000000c89dde97e0ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 14 ] = { .pmg_name = "pm_lsu_flush", .pmg_desc = "LSU Flush Events", .pmg_event_ids = ppc970mp_group_event_ids[14], .pmg_mmcr0 = 0x000000000000122cULL, .pmg_mmcr1 = 0x000c00007be774bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 15 ] = { .pmg_name = "pm_lsu_load1", .pmg_desc = "LSU Load Events", .pmg_event_ids = ppc970mp_group_event_ids[15], .pmg_mmcr0 = 0x0000000000001028ULL, .pmg_mmcr1 = 0x000f0000851e9958ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 16 ] = { .pmg_name = "pm_lsu_store1", .pmg_desc = "LSU Store Events", .pmg_event_ids = ppc970mp_group_event_ids[16], .pmg_mmcr0 = 0x000000000000112aULL, .pmg_mmcr1 = 0x000f00008d5e99dcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 17 ] = { .pmg_name = "pm_lsu_store2", .pmg_desc = "LSU Store Events", .pmg_event_ids = ppc970mp_group_event_ids[17], .pmg_mmcr0 = 0x0000000000001838ULL, .pmg_mmcr1 = 0x0003c0d08d76f4bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 18 ] = { .pmg_name = "pm_lsu7", .pmg_desc = "Information on the Load Store Unit", .pmg_event_ids = ppc970mp_group_event_ids[18], .pmg_mmcr0 = 0x000000000000122cULL, .pmg_mmcr1 = 0x000830047bd2fe3cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 19 ] = { .pmg_name = "pm_misc", .pmg_desc = "Misc Events for testing", .pmg_event_ids = ppc970mp_group_event_ids[19], .pmg_mmcr0 = 0x0000000000000404ULL, .pmg_mmcr1 = 0x0000000023c69194ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 20 ] = { .pmg_name = "pm_pe_bench1", .pmg_desc = "PE Benchmarker group for FP analysis", .pmg_event_ids = ppc970mp_group_event_ids[20], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x10001002001e0480ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 21 ] = { .pmg_name = "pm_pe_bench4", .pmg_desc = "PE Benchmarker group for L1 and TLB", .pmg_event_ids = ppc970mp_group_event_ids[21], .pmg_mmcr0 = 0x0000000000001420ULL, .pmg_mmcr1 = 0x000b000004de9000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 22 ] = { .pmg_name = "pm_hpmcount1", .pmg_desc = "Hpmcount group for L1 and TLB behavior", .pmg_event_ids = ppc970mp_group_event_ids[22], .pmg_mmcr0 = 0x0000000000001404ULL, .pmg_mmcr1 = 0x000b000004de9000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 23 ] = { .pmg_name = "pm_hpmcount2", .pmg_desc = "Hpmcount group for computation", .pmg_event_ids = ppc970mp_group_event_ids[23], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x000020289dde0480ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 24 ] = { .pmg_name = "pm_l1andbr", .pmg_desc = "L1 misses and branch misspredict analysis", .pmg_event_ids = ppc970mp_group_event_ids[24], .pmg_mmcr0 = 0x000000000000091eULL, .pmg_mmcr1 = 0x8003c01d0676fd6cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 25 ] = { .pmg_name = "Instruction mix: loads", .pmg_desc = " stores and branches", .pmg_event_ids = ppc970mp_group_event_ids[25], .pmg_mmcr0 = 0x000000000000091eULL, .pmg_mmcr1 = 0x8003c021065fb000ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 26 ] = { .pmg_name = "pm_branch", .pmg_desc = "SLB and branch misspredict analysis", .pmg_event_ids = ppc970mp_group_event_ids[26], .pmg_mmcr0 = 0x000000000000052aULL, .pmg_mmcr1 = 0x8008000bcea2f4ecULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 27 ] = { .pmg_name = "pm_data", .pmg_desc = "data source and LMQ", .pmg_event_ids = ppc970mp_group_event_ids[27], .pmg_mmcr0 = 0x000000000000070eULL, .pmg_mmcr1 = 0x0000300c4bd2ff74ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 28 ] = { .pmg_name = "pm_tlb", .pmg_desc = "TLB and LRQ plus data prefetch", .pmg_event_ids = ppc970mp_group_event_ids[28], .pmg_mmcr0 = 0x0000000000001420ULL, .pmg_mmcr1 = 0x0008e03c4bfdacecULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 29 ] = { .pmg_name = "pm_isource", .pmg_desc = "inst source and tablewalk", .pmg_event_ids = ppc970mp_group_event_ids[29], .pmg_mmcr0 = 0x000000000000060cULL, .pmg_mmcr1 = 0x800b00c0226ef1dcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 30 ] = { .pmg_name = "pm_sync", .pmg_desc = "Sync and SRQ", .pmg_event_ids = ppc970mp_group_event_ids[30], .pmg_mmcr0 = 0x0000000000001d32ULL, .pmg_mmcr1 = 0x0003e0c107529780ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 31 ] = { .pmg_name = "pm_ierat", .pmg_desc = "IERAT", .pmg_event_ids = ppc970mp_group_event_ids[31], .pmg_mmcr0 = 0x0000000000000d12ULL, .pmg_mmcr1 = 0x80000082c3d2f4bcULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 32 ] = { .pmg_name = "pm_derat", .pmg_desc = "DERAT", .pmg_event_ids = ppc970mp_group_event_ids[32], .pmg_mmcr0 = 0x0000000000000436ULL, .pmg_mmcr1 = 0x100b7052e274003cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 33 ] = { .pmg_name = "pm_mark1", .pmg_desc = "Information on marked instructions", .pmg_event_ids = ppc970mp_group_event_ids[33], .pmg_mmcr0 = 0x0000000000000006ULL, .pmg_mmcr1 = 0x00008080790852a4ULL, .pmg_mmcra = 0x0000000000002001ULL }, [ 34 ] = { .pmg_name = "pm_mark2", .pmg_desc = "Marked Instructions Processing Flow", .pmg_event_ids = ppc970mp_group_event_ids[34], .pmg_mmcr0 = 0x000000000000020aULL, .pmg_mmcr1 = 0x0000000079484210ULL, .pmg_mmcra = 0x0000000000002001ULL }, [ 35 ] = { .pmg_name = "pm_mark3", .pmg_desc = "Marked Stores Processing Flow", .pmg_event_ids = ppc970mp_group_event_ids[35], .pmg_mmcr0 = 0x000000000000031eULL, .pmg_mmcr1 = 0x00203004190a3f24ULL, .pmg_mmcra = 0x0000000000002001ULL }, [ 36 ] = { .pmg_name = "pm_lsu_mark1", .pmg_desc = "Load Store Unit Marked Events", .pmg_event_ids = ppc970mp_group_event_ids[36], .pmg_mmcr0 = 0x0000000000001b34ULL, .pmg_mmcr1 = 0x000280c08d5e9850ULL, .pmg_mmcra = 0x0000000000002001ULL }, [ 37 ] = { .pmg_name = "pm_lsu_mark2", .pmg_desc = "Load Store Unit Marked Events", .pmg_event_ids = ppc970mp_group_event_ids[37], .pmg_mmcr0 = 0x0000000000001838ULL, .pmg_mmcr1 = 0x000280c0959e99dcULL, .pmg_mmcra = 0x0000000000002001ULL }, [ 38 ] = { .pmg_name = "pm_fxu1", .pmg_desc = "Fixed Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[38], .pmg_mmcr0 = 0x0000000000000912ULL, .pmg_mmcr1 = 0x100010020084213cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 39 ] = { .pmg_name = "pm_fxu2", .pmg_desc = "Fixed Point events by unit", .pmg_event_ids = ppc970mp_group_event_ids[39], .pmg_mmcr0 = 0x000000000000091eULL, .pmg_mmcr1 = 0x4000000ca4042d78ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 40 ] = { .pmg_name = "pm_ifu", .pmg_desc = "pm_ifu", .pmg_event_ids = ppc970mp_group_event_ids[40], .pmg_mmcr0 = 0x0000000000000d0cULL, .pmg_mmcr1 = 0x800000f06b7867a4ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 41 ] = { .pmg_name = "pm_cpi_stack1", .pmg_desc = "CPI stack analysis", .pmg_event_ids = ppc970mp_group_event_ids[41], .pmg_mmcr0 = 0x0000000000001b3eULL, .pmg_mmcr1 = 0x4000c0c0add6963dULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 42 ] = { .pmg_name = "pm_cpi_stack2", .pmg_desc = "CPI stack analysis", .pmg_event_ids = ppc970mp_group_event_ids[42], .pmg_mmcr0 = 0x0000000000000b12ULL, .pmg_mmcr1 = 0x000b000003d60583ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 43 ] = { .pmg_name = "pm_cpi_stack3", .pmg_desc = "CPI stack analysis", .pmg_event_ids = ppc970mp_group_event_ids[43], .pmg_mmcr0 = 0x0000000000000916ULL, .pmg_mmcr1 = 0x10001002001625beULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 44 ] = { .pmg_name = "pm_cpi_stack4", .pmg_desc = "CPI stack analysis", .pmg_event_ids = ppc970mp_group_event_ids[44], .pmg_mmcr0 = 0x0000000000000000ULL, .pmg_mmcr1 = 0x00000000485805bdULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 45 ] = { .pmg_name = "pm_cpi_stack5", .pmg_desc = "CPI stack analysis", .pmg_event_ids = ppc970mp_group_event_ids[45], .pmg_mmcr0 = 0x0000000000000412ULL, .pmg_mmcr1 = 0x90014009b6d8f672ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 46 ] = { .pmg_name = "pm_data2", .pmg_desc = "data source and LMQ", .pmg_event_ids = ppc970mp_group_event_ids[46], .pmg_mmcr0 = 0x0000000000000912ULL, .pmg_mmcr1 = 0x0000300c7bce7f74ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 47 ] = { .pmg_name = "pm_fetch_branch", .pmg_desc = "Instruction fetch and branch events", .pmg_event_ids = ppc970mp_group_event_ids[47], .pmg_mmcr0 = 0x000000000000060cULL, .pmg_mmcr1 = 0x800000cd6e5e9d6cULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 48 ] = { .pmg_name = "pm_l1l2_miss", .pmg_desc = "L1 and L2 miss events", .pmg_event_ids = ppc970mp_group_event_ids[48], .pmg_mmcr0 = 0x000000000000070eULL, .pmg_mmcr1 = 0x000330004c86fb00ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 49 ] = { .pmg_name = "pm_data_from", .pmg_desc = "Data From L2 instructions", .pmg_event_ids = ppc970mp_group_event_ids[49], .pmg_mmcr0 = 0x000000000000070eULL, .pmg_mmcr1 = 0x000330004bce7b00ULL, .pmg_mmcra = 0x0000000000002000ULL }, [ 50 ] = { .pmg_name = "pm_mark_data_from", .pmg_desc = "Marked Data From L2 instructions", .pmg_event_ids = ppc970mp_group_event_ids[50], .pmg_mmcr0 = 0x000000000000070eULL, .pmg_mmcr1 = 0x002030084bce72f0ULL, .pmg_mmcra = 0x0000000000002001ULL } }; #endif papi-5.6.0/src/components/Rules.components000664 001750 001750 00000000132 13216244356 022710 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ # This file is intended to prevent an empty include compile error in Makefile.inc papi-5.6.0/src/examples/add_event/Papi_add_env_event.c000664 001750 001750 00000011304 13216244361 025006 0ustar00jshenry1963jshenry1963000000 000000 /* * This example shows how to use PAPI_library_init, PAPI_create_eventset, * PAPI_add_event, * PAPI_start and PAPI_stop. These 5 functions * will allow a user to do most of the performance information gathering * that they would need. PAPI_read could also be used if you don't want * to stop the EventSet from running but only check the counts. * * Also, we will use PAPI_perror for * error information. * * In addition, a new call was created called PAPI_add_env_event * that allows a user to setup environment variable to read * which event should be monitored this allows different events * to be monitored at runtime without recompiling, the syntax * is as follows: * PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); * EventSet is the same as in PAPI_add_event * Event is the default event to monitor if the environment variable * does not exist and differs from PAPI_add_event as it is * a pointer. * env_varialbe is the name of the environment variable to look for * the event code, this can be a name, number or hex, for example * PAPI_L1_DCM could be defined in the environment variable as * all of the following: PAPI_L1_DCM, 0x80000000, or -2147483648 * * To use only add_event you would change the calls to * PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); * to PAPI_add_event(int *EventSet, int Event); * * We will also use PAPI_event_code_to_name since the event may have * changed. * Author: Kevin London * email: london@cs.utk.edu */ #include #include #include "papi.h" /* This needs to be included anytime you use PAPI */ int PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); int main(){ int retval,i; int EventSet=PAPI_NULL; int event_code=PAPI_TOT_INS; /* By default monitor total instructions */ char errstring[PAPI_MAX_STR_LEN]; char event_name[PAPI_MAX_STR_LEN]; float a[1000],b[1000],c[1000]; long long values; /* This initializes the library and checks the version number of the * header file, to the version of the library, if these don't match * then it is likely that PAPI won't work correctly. */ if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ){ /* This call loads up what the error means into errstring * if retval == PAPI_ESYS then it might be beneficial * to call perror as well to see what system call failed */ PAPI_perror("PAPI_library_init"); exit(-1); } /* Create space for the EventSet */ if ( (retval=PAPI_create_eventset( &EventSet ))!=PAPI_OK){ PAPI_perror(retval, errstring, PAPI_MAX_STR_LEN); exit(-1); } /* After this call if the environment variable PAPI_EVENT is set, * event_code may contain something different than total instructions. */ if ( (retval=PAPI_add_env_event(&EventSet, &event_code, "PAPI_EVENT"))!=PAPI_OK){ PAPI_perror("PAPI_add_env_event"); exit(-1); } /* Now lets start counting */ if ( (retval = PAPI_start(EventSet)) != PAPI_OK ){ PAPI_perror("PAPI_start"); exit(-1); } /* Some work to take up some time, the PAPI_start/PAPI_stop (and/or * PAPI_read) should surround what you want to monitor. */ for ( i=0;i<1000;i++){ a[i] = b[i]-c[i]; c[i] = a[i]*1.2; } if ( (retval = PAPI_stop(EventSet, &values) ) != PAPI_OK ){ PAPI_perror("PAPI_stop"); exit(-1); } if ( (retval=PAPI_event_code_to_name( event_code, event_name))!=PAPI_OK){ PAPI_perror("PAPI_event_code_to_name"); exit(-1); } printf("Ending values for %s: %lld\n", event_name,values); /* Remove PAPI instrumentation, this is necessary on platforms * that need to release shared memory segments and is always * good practice. */ PAPI_shutdown(); exit(0); } int PAPI_add_env_event(int *EventSet, int *EventCode, char *env_variable){ int real_event=*EventCode; char *eventname; int retval; if ( env_variable != NULL ){ if ( (eventname=getenv(env_variable)) ) { if ( eventname[0] == 'P' ) { /* Use the PAPI name */ retval=PAPI_event_name_to_code(eventname, &real_event ); if ( retval != PAPI_OK ) real_event = *EventCode; } else{ if ( strlen(eventname)>1 && eventname[1]=='x') sscanf(eventname, "%#x", &real_event); else real_event = atoi(eventname); } } } if ( (retval = PAPI_add_event( *EventSet, real_event))!= PAPI_OK ){ if ( real_event != *EventCode ) { if ( (retval = PAPI_add_event( *EventSet, *EventCode)) == PAPI_OK ){ real_event = *EventCode; } } } *EventCode = real_event; return retval; } papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/ita2_irr.c000664 001750 001750 00000025765 13216244362 024307 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_irr.c - example of how to use code range restriction with the Itanium2 PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define VECTOR_SIZE 1000000UL typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "fp_ops_retired", VECTOR_SIZE<<1 }, { NULL, 0UL } }; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } void saxpy(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } } void saxpy2(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } } static int do_test(void) { unsigned long size; double *a, *b, *c; size = VECTOR_SIZE; a = malloc(size*sizeof(double)); b = malloc(size*sizeof(double)); c = malloc(size*sizeof(double)); if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); memset(a, 0, size*sizeof(double)); memset(b, 0, size*sizeof(double)); memset(c, 0, size*sizeof(double)); saxpy(a,b,c, size); saxpy2(a,b,c, size); return 0; } int main(int argc, char **argv) { event_desc_t *p; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita2_input_param_t ita2_inp; pfmlib_ita2_output_param_t ita2_outp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_dbreg_t ibrs[8]; pfarg_context_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct fd { /* function descriptor */ unsigned long addr; unsigned long gp; } *fd; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Compute the range we are interested in * * On IA-64, the function pointer does not point directly * to the function but to a descriptor which contains two * unsigned long: the first one is the actual start address * of the function, the second is the gp (global pointer) * to load into r1 before jumping into the function. Unlesss * we're jumping into a shared library the gp is the same as * the current gp. * * In the artificial example, we also rely on the compiler/linker * NOT reordering code layout. We depend on saxpy2() being just * after saxpy(). * */ fd = (struct fd *)saxpy; range_start = fd->addr; fd = (struct fd *)saxpy2; range_end = fd->addr; memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(ibrs,0, sizeof(ibrs)); memset(&load_args,0, sizeof(load_args)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita2_inp,0, sizeof(ita2_inp)); memset(&ita2_outp,0, sizeof(ita2_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("cannot find %s event\n", p->event_name); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. * * In the case of code range restriction on Itanium 2, the library will try to use the fine * mode first and then it will default to using multiple pairs to cover the range. */ ita2_inp.pfp_ita2_irange.rr_used = 1; /* indicate we use code range restriction */ ita2_inp.pfp_ita2_irange.rr_limits[0].rr_start = range_start; ita2_inp.pfp_ita2_irange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * print offsets */ printf("code range : [0x%016lx-0x%016lx)\n" "start_offset:-0x%lx end_offset:+0x%lx\n" "%d pairs of debug registers used\n", range_start, range_end, ita2_outp.pfp_ita2_irange.rr_infos[0].rr_soff, ita2_outp.pfp_ita2_irange.rr_infos[0].rr_eoff, ita2_outp.pfp_ita2_irange.rr_nbr_used >> 1); /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract our file descriptor */ id = ctx[0].ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * propagate the setup for the debug registers from the library to the arguments * to the perfmonctl() syscall. The library does not know the type of the syscall * anymore. */ for (i=0; i < ita2_outp.pfp_ita2_irange.rr_nbr_used; i++) { ibrs[i].dbreg_num = ita2_outp.pfp_ita2_irange.rr_br[i].reg_num; ibrs[i].dbreg_value = ita2_outp.pfp_ita2_irange.rr_br[i].reg_value; } /* * Program the code debug registers. * * IMPORTANT: programming the debug register MUST always be done before the PMCs * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. */ if (perfmonctl(id, PFM_WRITE_IBRS, ibrs, ita2_outp.pfp_ita2_irange.rr_nbr_used) == -1) { fatal_error("child: perfmonctl error PFM_WRITE_IBRS errno %d\n",errno); } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("child: perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("child: perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * Let's roll now. * * We run two distinct copies of the same function but we restrict measurement * to the first one (saxpy). Therefore the expected count is half what you would * get if code range restriction was not used. The core loop in both case uses * two floating point operation per iteration. */ pfm_self_start(id); do_test(); pfm_self_stop(id); /* * now read the results */ if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); } /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_amd64_fam15h.c000664 001750 001750 00000006575 13216244365 023217 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_amd64_fam15h.c : AMD64 Family 15h * * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_amd64_priv.h" #include "events/amd64_events_fam15h.h" #include "events/amd64_events_fam15h_nb.h" pfmlib_pmu_t amd64_fam15h_interlagos_support={ .desc = "AMD64 Fam15h Interlagos", .name = "amd64_fam15h_interlagos", .pmu = PFM_PMU_AMD64_FAM15H_INTERLAGOS, .pmu_rev = 0, .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam15h_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = AMD64_FAM10H_PLM, .num_cntrs = 6, .max_encoding = 1, .pe = amd64_fam15h_pe, .atdesc = amd64_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .cpu_family = PFM_PMU_AMD64_FAM15H_INTERLAGOS, .pmu_detect = pfm_amd64_family_detect, .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), .get_event_first = pfm_amd64_get_event_first, .get_event_next = pfm_amd64_get_event_next, .event_is_valid = pfm_amd64_event_is_valid, .validate_table = pfm_amd64_validate_table, .get_event_info = pfm_amd64_get_event_info, .get_event_attr_info = pfm_amd64_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_amd64_perf_validate_pattrs), .get_event_nattrs = pfm_amd64_get_event_nattrs, }; pfmlib_pmu_t amd64_fam15h_nb_support={ .desc = "AMD64 Fam15h NorthBridge", .name = "amd64_fam15h_nb", .pmu = PFM_PMU_AMD64_FAM15H_NB, .perf_name = "amd_nb", .pmu_rev = 0, .pme_count = LIBPFM_ARRAY_SIZE(amd64_fam15h_nb_pe), .type = PFM_PMU_TYPE_UNCORE, .supported_plm = 0, /* no plm support */ .num_cntrs = 4, .max_encoding = 1, .pe = amd64_fam15h_nb_pe, .atdesc = amd64_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .cpu_family = PFM_PMU_AMD64_FAM15H_INTERLAGOS, .pmu_detect = pfm_amd64_family_detect, .get_event_encoding[PFM_OS_NONE] = pfm_amd64_get_encoding, PFMLIB_ENCODE_PERF(pfm_amd64_get_perf_encoding), .get_event_first = pfm_amd64_get_event_first, .get_event_next = pfm_amd64_get_event_next, .event_is_valid = pfm_amd64_event_is_valid, .validate_table = pfm_amd64_validate_table, .get_event_info = pfm_amd64_get_event_info, .get_event_attr_info = pfm_amd64_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_amd64_nb_perf_validate_pattrs), .get_event_nattrs = pfm_amd64_get_event_nattrs, }; papi-5.6.0/src/libpfm4/lib/events/arm_cortex_a9_events.h000664 001750 001750 00000020355 13216244364 025232 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2010 University of Tennessee * Contributed by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ /* * the various event names are the same as those given in the * file linux-2.6/arch/arm/kernel/perf_event.c */ /* * Cortex A9 r2p2 Event Table * based on Table 11-7 from the "Cortex A9 Technical Reference Manual" */ static const arm_entry_t arm_cortex_a9_pe []={ /* * ARMv7 events */ {.name = "PMNC_SW_INCR", .code = 0x00, .desc = "Incremented by writes to the Software Increment Register" }, {.name = "IFETCH_MISS", .code = 0x01, .desc = "Instruction fetches that cause lowest-level cache miss" }, {.name = "ITLB_MISS", .code = 0x02, .desc = "Instruction fetches that cause lowest-level TLB miss" }, {.name = "DCACHE_REFILL", .code = 0x03, .desc = "Data read or writes that cause lowest-level cache miss" }, {.name = "DCACHE_ACCESS", .code = 0x04, .desc = "Data read or writes that cause lowest-level cache access" }, {.name = "DTLB_REFILL", .code = 0x05, .desc = "Data read or writes that cause lowest-level TLB refill" }, {.name = "DREAD", .code = 0x06, .desc = "Data read architecturally executed" }, {.name = "DWRITE", .code = 0x07, .desc = "Data write architecturally executed" }, {.name = "EXC_TAKEN", .code = 0x09, .desc = "Counts each exception taken" }, {.name = "EXC_EXECUTED", .code = 0x0a, .desc = "Exception returns architecturally executed" }, {.name = "CID_WRITE", .code = 0x0b, .desc = "Instruction writes to Context ID Register, architecturally executed" }, {.name = "PC_WRITE", .code = 0x0c, .desc = "Software change of PC. Equivalent to branches" }, {.name = "PC_IMM_BRANCH", .code = 0x0d, .desc = "Immediate branches architecturally executed" }, {.name = "UNALIGNED_ACCESS", .code = 0x0f, .desc = "Unaligned accesses architecturally executed" }, {.name = "PC_BRANCH_MIS_PRED", .code = 0x10, .desc = "Branches mispredicted or not predicted" }, {.name = "CLOCK_CYCLES", .code = 0x11, .desc = "Clock cycles" }, {.name = "PC_BRANCH_MIS_USED", .code = 0x12, .desc = "Branches that could have been predicted" }, /* * Cortex A9 specific events */ {.name = "JAVA_HW_BYTECODE_EXEC", .code = 0x40, .desc = "Java bytecodes decoded, including speculative (approximate)" }, {.name = "JAVA_SW_BYTECODE_EXEC", .code = 0x41, .desc = "Software Java bytecodes decoded, including speculative (approximate)" }, {.name = "JAZELLE_BRANCH_EXEC", .code = 0x42, .desc = "Jazelle backward branches executed. Includes branches that are flushed because of previous load/store which abort late (approximate)" }, {.name = "COHERENT_LINE_MISS", .code = 0x50, .desc = "Coherent linefill misses which also miss on other processors" }, {.name = "COHERENT_LINE_HIT", .code = 0x51, .desc = "Coherent linefill requests that hit on another processor" }, {.name = "ICACHE_DEP_STALL_CYCLES", .code = 0x60, .desc = "Cycles processor is stalled waiting for instruction cache and the instruction cache is performing at least one linefill (approximate)" }, {.name = "DCACHE_DEP_STALL_CYCLES", .code = 0x61, .desc = "Cycles processor is stalled waiting for data cache" }, {.name = "TLB_MISS_DEP_STALL_CYCLES", .code = 0x62, .desc = "Cycles processor is stalled waiting for completion of TLB walk (approximate)" }, {.name = "STREX_EXECUTED_PASSED", .code = 0x63, .desc = "Number of STREX instructions executed and passed" }, {.name = "STREX_EXECUTED_FAILED", .code = 0x64, .desc = "Number of STREX instructions executed and failed" }, {.name = "DATA_EVICTION", .code = 0x65, .desc = "Data eviction requests due to linefill in data cache" }, {.name = "ISSUE_STAGE_NO_INST", .code = 0x66, .desc = "Cycles the issue stage does not dispatch any instructions" }, {.name = "ISSUE_STAGE_EMPTY", .code = 0x67, .desc = "Cycles where issue stage is empty" }, {.name = "INST_OUT_OF_RENAME_STAGE", .code = 0x68, .desc = "Number of instructions going through register renaming stage (approximate)" }, {.name = "PREDICTABLE_FUNCT_RETURNS", .code = 0x6e, .desc = "Number of predictable function returns whose condition codes do not fail (approximate)" }, {.name = "MAIN_UNIT_EXECUTED_INST", .code = 0x70, .desc = "Instructions executed in the main execution, multiply, ALU pipelines (approximate)" }, {.name = "SECOND_UNIT_EXECUTED_INST", .code = 0x71, .desc = "Instructions executed in the second execution pipeline" }, {.name = "LD_ST_UNIT_EXECUTED_INST", .code = 0x72, .desc = "Instructions executed in the Load/Store unit" }, {.name = "FP_EXECUTED_INST", .code = 0x73, .desc = "Floating point instructions going through register renaming stage" }, {.name = "NEON_EXECUTED_INST", .code = 0x74, .desc = "NEON instructions going through register renaming stage (approximate)" }, {.name = "PLD_FULL_DEP_STALL_CYCLES", .code = 0x80, .desc = "Cycles processor is stalled because PLD slots are full (approximate)" }, {.name = "DATA_WR_DEP_STALL_CYCLES", .code = 0x81, .desc = "Cycles processor is stalled due to writes to external memory (approximate)" }, {.name = "ITLB_MISS_DEP_STALL_CYCLES", .code = 0x82, .desc = "Cycles stalled due to main instruction TLB miss (approximate)" }, {.name = "DTLB_MISS_DEP_STALL_CYCLES", .code = 0x83, .desc = "Cycles stalled due to main data TLB miss (approximate)" }, {.name = "MICRO_ITLB_MISS_DEP_STALL_CYCLES", .code = 0x84, .desc = "Cycles stalled due to micro instruction TLB miss (approximate)" }, {.name = "MICRO_DTLB_MISS_DEP_STALL_CYCLES", .code = 0x85, .desc = "Cycles stalled due to micro data TLB miss (approximate)" }, {.name = "DMB_DEP_STALL_CYCLES", .code = 0x86, .desc = "Cycles stalled due to DMB memory barrier (approximate)" }, {.name = "INTGR_CLK_ENABLED_CYCLES", .code = 0x8a, .desc = "Cycles during which integer core clock is enabled (approximate)" }, {.name = "DATA_ENGINE_CLK_EN_CYCLES", .code = 0x8b, .desc = "Cycles during which Data Engine clock is enabled (approximate)" }, {.name = "ISB_INST", .code = 0x90, .desc = "Number of ISB instructions architecturally executed" }, {.name = "DSB_INST", .code = 0x91, .desc = "Number of DSB instructions architecturally executed" }, {.name = "DMB_INST", .code = 0x92, .desc = "Number of DMB instructions architecturally executed (approximate)" }, {.name = "EXT_INTERRUPTS", .code = 0x93, .desc = "Number of External interrupts (approximate)" }, {.name = "PLE_CACHE_LINE_RQST_COMPLETED", .code = 0xa0, .desc = "PLE cache line requests completed" }, {.name = "PLE_CACHE_LINE_RQST_SKIPPED", .code = 0xa1, .desc = "PLE cache line requests skipped" }, {.name = "PLE_FIFO_FLUSH", .code = 0xa2, .desc = "PLE FIFO flushes" }, {.name = "PLE_RQST_COMPLETED", .code = 0xa3, .desc = "PLE requests completed" }, {.name = "PLE_FIFO_OVERFLOW", .code = 0xa4, .desc = "PLE FIFO overflows" }, {.name = "PLE_RQST_PROG", .code = 0xa5, .desc = "PLE requests programmed" }, {.name = "CPU_CYCLES", .code = 0xff, .desc = "CPU cycles" }, }; #define ARM_CORTEX_A9_EVENT_COUNT (sizeof(arm_cortex_a9_pe)/sizeof(arm_entry_t)) papi-5.6.0/src/components/net/tests/000775 001750 001750 00000000000 13216244357 021444 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/intel_p6_events.h000664 001750 001750 00000061060 13216244364 024214 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: p6 (Intel P6 Processor Family) */ static const intel_x86_umask_t p6_l2_ifetch[]={ { .uname = "I", .udesc = "Invalid state", .ucode = 0x100, }, { .uname = "S", .udesc = "Shared state", .ucode = 0x200, }, { .uname = "E", .udesc = "Exclusive state", .ucode = 0x400, }, { .uname = "M", .udesc = "Modified state", .ucode = 0x800, }, }; static const intel_x86_umask_t p6_bus_drdy_clocks[]={ { .uname = "SELF", .udesc = "Clocks when processor is driving bus", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Clocks when any agent is driving bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t p6_mmx_instr_type_exec[]={ { .uname = "MUL", .udesc = "MMX packed multiply instructions executed", .ucode = 0x100, }, { .uname = "SHIFT", .udesc = "MMX packed shift instructions executed", .ucode = 0x200, }, { .uname = "PACK", .udesc = "MMX pack operation instructions executed", .ucode = 0x400, }, { .uname = "UNPACK", .udesc = "MMX unpack operation instructions executed", .ucode = 0x800, }, { .uname = "LOGICAL", .udesc = "MMX packed logical instructions executed", .ucode = 0x1000, }, { .uname = "ARITH", .udesc = "MMX packed arithmetic instructions executed", .ucode = 0x2000, }, }; static const intel_x86_umask_t p6_fp_mmx_trans[]={ { .uname = "TO_FP", .udesc = "From MMX instructions to floating-point instructions", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TO_MMX", .udesc = "From floating-point instructions to MMX instructions", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t p6_seg_rename_stalls[]={ { .uname = "ES", .udesc = "Segment register ES", .ucode = 0x100, }, { .uname = "DS", .udesc = "Segment register DS", .ucode = 0x200, }, { .uname = "FS", .udesc = "Segment register FS", .ucode = 0x400, }, { .uname = "GS", .udesc = "Segment register GS", .ucode = 0x800, }, }; static const intel_x86_umask_t p6_emon_kni_pref_dispatched[]={ { .uname = "NTA", .udesc = "Prefetch NTA", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T1", .udesc = "Prefetch T1", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "T2", .udesc = "Prefetch T2", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "WEAK", .udesc = "Weakly ordered stores", .ucode = 0x300, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t p6_emon_kni_inst_retired[]={ { .uname = "PACKED_SCALAR", .udesc = "Packed and scalar instructions", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "SCALAR", .udesc = "Scalar only", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_p6_pe[]={ { .name = "CPU_CLK_UNHALTED", .desc = "Number cycles during which the processor is not halted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x79, }, { .name = "INST_RETIRED", .desc = "Number of instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc0, }, { .name = "DATA_MEM_REFS", .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, }, { .name = "DCU_LINES_IN", .desc = "Total lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x45, }, { .name = "DCU_M_LINES_IN", .desc = "Number of M state lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x46, }, { .name = "DCU_M_LINES_OUT", .desc = "Number of M state lines evicted from the DCU. This includes evictions via snoop HITM, intervention or replacement", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x47, }, { .name = "DCU_MISS_OUTSTANDING", .desc = "Weighted number of cycle while a DCU miss is outstanding, incremented by the number of cache misses at any particular time. Cacheable read requests only are considered. Uncacheable requests are excluded Read-for-ownerships are counted, as well as line fills, invalidates, and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x48, }, { .name = "IFU_IFETCH", .desc = "Number of instruction fetches, both cacheable and noncacheable including UC fetches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x80, }, { .name = "IFU_IFETCH_MISS", .desc = "Number of instruction fetch misses. All instructions fetches that do not hit the IFU (i.e., that produce memory requests). Includes UC accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x81, }, { .name = "ITLB_MISS", .desc = "Number of ITLB misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x85, }, { .name = "IFU_MEM_STALL", .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, }, { .name = "ILD_STALL", .desc = "Number of cycles that the instruction length decoder is stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x87, }, { .name = "L2_IFETCH", .desc = "Number of L2 instruction fetches. This event indicates that a normal instruction fetch was received by the L2. The count includes only L2 cacheable instruction fetches: it does not include UC instruction fetches It does not include ITLB miss accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(p6_l2_ifetch), .ngrp = 1, .umasks = p6_l2_ifetch, }, { .name = "L2_ST", .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(p6_l2_ifetch), .ngrp = 1, .umasks = p6_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_INM", .desc = "Number of modified lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x25, }, { .name = "L2_RQSTS", .desc = "Total number of L2 requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(p6_l2_ifetch), .ngrp = 1, .umasks = p6_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_ADS", .desc = "Number of L2 address strobes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x21, }, { .name = "L2_DBUS_BUSY", .desc = "Number of cycles during which the L2 cache data bus was busy", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x22, }, { .name = "L2_DBUS_BUSY_RD", .desc = "Number of cycles during which the data bus was busy transferring read data from L2 to the processor", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x23, }, { .name = "BUS_DRDY_CLOCKS", .desc = "Number of clocks during which DRDY# is asserted. Utilization of the external system data bus during data transfers", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, }, { .name = "BUS_LOCK_CLOCKS", .desc = "Number of clocks during which LOCK# is asserted on the external system bus", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_REQ_OUTSTANDING", .desc = "Number of bus requests outstanding. This counter is incremented by the number of cacheable read bus requests outstanding in any given cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x60, }, { .name = "BUS_TRANS_BRD", .desc = "Number of burst read transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_RFO", .desc = "Number of completed read for ownership transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x66, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_WB", .desc = "Number of completed write back transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_IFETCH", .desc = "Number of completed instruction fetch transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_INVAL", .desc = "Number of completed invalidate transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_PWR", .desc = "Number of completed partial write transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6a, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_P", .desc = "Number of completed partial transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6b, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IO", .desc = "Number of completed I/O transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_DEF", .desc = "Number of completed deferred transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_BURST", .desc = "Number of completed burst transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6e, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_ANY", .desc = "Number of all completed bus transactions. Address bus utilization can be calculated knowing the minimum address bus occupancy. Includes special cycles, etc.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x70, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_MEM", .desc = "Number of completed memory transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6f, .numasks = LIBPFM_ARRAY_SIZE(p6_bus_drdy_clocks), .ngrp = 1, .umasks = p6_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_DATA_RECV", .desc = "Number of bus clock cycles during which this processor is receiving data", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x64, }, { .name = "BUS_BNR_DRV", .desc = "Number of bus clock cycles during which this processor is driving the BNR# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x61, }, { .name = "BUS_HIT_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HIT# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7a, }, { .name = "BUS_HITM_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HITM# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7b, }, { .name = "BUS_SNOOP_STALL", .desc = "Number of clock cycles during which the bus is snoop stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7e, }, { .name = "FLOPS", .desc = "Number of computational floating-point operations retired. Excludes floating-point computational operations that cause traps or assists. Includes internal sub-operations for complex floating-point instructions like transcendentals. Excludes floating point loads and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xc1, }, { .name = "FP_COMP_OPS_EXE", .desc = "Number of computational floating-point operations executed. The number of FADD, FSUB, FCOM, FMULs, integer MULs and IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This number does not include the number of cycles, but the number of operations. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x10, }, { .name = "FP_ASSIST", .desc = "Number of floating-point exception cases handled by microcode.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x11, }, { .name = "MUL", .desc = "Number of multiplies.This count includes integer as well as FP multiplies and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x12, }, { .name = "DIV", .desc = "Number of divides.This count includes integer as well as FP divides and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x13, }, { .name = "CYCLES_DIV_BUSY", .desc = "Number of cycles during which the divider is busy, and cannot accept new divides. This includes integer and FP divides, FPREM, FPSQRT, etc. and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x14, }, { .name = "LD_BLOCKS", .desc = "Number of load operations delayed due to store buffer blocks. Includes counts caused by preceding stores whose addresses are unknown, preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflicts with the load but which incompletely overlap the load", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3, }, { .name = "SB_DRAINS", .desc = "Number of store buffer drain cycles. Incremented every cycle the store buffer is draining. Draining is caused by serializing operations like CPUID, synchronizing operations like XCHG, interrupt acknowledgment, as well as other conditions (such as cache flushing).", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4, }, { .name = "MISALIGN_MEM_REF", .desc = "Number of misaligned data memory references. Incremented by 1 every cycle during which, either the processor's load or store pipeline dispatches a misaligned micro-op Counting is performed if it is the first or second half or if it is blocked, squashed, or missed. In this context, misaligned means crossing a 64-bit boundary", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x5, }, { .name = "UOPS_RETIRED", .desc = "Number of micro-ops retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc2, }, { .name = "INST_DECODED", .desc = "Number of instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd0, }, { .name = "HW_INT_RX", .desc = "Number of hardware interrupts received", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc8, }, { .name = "CYCLES_INT_MASKED", .desc = "Number of processor cycles for which interrupts are disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc6, }, { .name = "CYCLES_INT_PENDING_AND_MASKED", .desc = "Number of processor cycles for which interrupts are disabled and interrupts are pending.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc7, }, { .name = "BR_INST_RETIRED", .desc = "Number of branch instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc4, }, { .name = "BR_MISS_PRED_RETIRED", .desc = "Number of mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc5, }, { .name = "BR_TAKEN_RETIRED", .desc = "Number of taken branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc9, }, { .name = "BR_MISS_PRED_TAKEN_RET", .desc = "Number of taken mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xca, }, { .name = "BR_INST_DECODED", .desc = "Number of branch instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe0, }, { .name = "BTB_MISSES", .desc = "Number of branches for which the BTB did not produce a prediction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe2, }, { .name = "BR_BOGUS", .desc = "Number of bogus branches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe4, }, { .name = "BACLEARS", .desc = "Number of times BACLEAR is asserted. This is the number of times that a static branch prediction was made, in which the branch decoder decided to make a branch prediction because the BTB did not", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe6, }, { .name = "RESOURCE_STALLS", .desc = "Incremented by 1 during every cycle for which there is a resource related stall. Includes register renaming buffer entries, memory buffer entries. Does not include stalls due to bus queue full, too many cache misses, etc. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xa2, }, { .name = "PARTIAL_RAT_STALLS", .desc = "Number of cycles or events for partial stalls. This includes flag partial stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd2, }, { .name = "SEGMENT_REG_LOADS", .desc = "Number of segment register loads.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6, }, { .name = "MMX_SAT_INSTR_EXEC", .desc = "Number of MMX saturating instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb1, }, { .name = "MMX_UOPS_EXEC", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb2, }, { .name = "MMX_INSTR_TYPE_EXEC", .desc = "Number of MMX instructions executed by type", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb3, .numasks = LIBPFM_ARRAY_SIZE(p6_mmx_instr_type_exec), .ngrp = 1, .umasks = p6_mmx_instr_type_exec, }, { .name = "FP_MMX_TRANS", .desc = "Number of MMX transitions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(p6_fp_mmx_trans), .ngrp = 1, .umasks = p6_fp_mmx_trans, }, { .name = "MMX_ASSIST", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcd, }, { .name = "SEG_RENAME_STALLS", .desc = "Number of Segment Register Renaming Stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd4, .numasks = LIBPFM_ARRAY_SIZE(p6_seg_rename_stalls), .ngrp = 1, .umasks = p6_seg_rename_stalls, }, { .name = "SEG_REG_RENAMES", .desc = "Number of Segment Register Renames", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd5, .numasks = LIBPFM_ARRAY_SIZE(p6_seg_rename_stalls), .ngrp = 1, .umasks = p6_seg_rename_stalls, /* identical to actual umasks list for this event */ }, { .name = "RET_SEG_RENAMES", .desc = "Number of segment register rename events retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd6, }, { .name = "EMON_KNI_PREF_DISPATCHED", .desc = "Number of Streaming SIMD extensions prefetch/weakly-ordered instructions dispatched (speculative prefetches are included in counting). Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7, .numasks = LIBPFM_ARRAY_SIZE(p6_emon_kni_pref_dispatched), .ngrp = 1, .umasks = p6_emon_kni_pref_dispatched, }, { .name = "EMON_KNI_PREF_MISS", .desc = "Number of prefetch/weakly-ordered instructions that miss all caches. Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(p6_emon_kni_pref_dispatched), .ngrp = 1, .umasks = p6_emon_kni_pref_dispatched, /* identical to actual umasks list for this event */ }, { .name = "L2_LD", .desc = "Number of L2 data loads. This event indicates that a normal, unlocked, load memory access was received by the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(p6_l2_ifetch), .ngrp = 1, .umasks = p6_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_IN", .desc = "Number of lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x24, }, { .name = "L2_LINES_OUT", .desc = "Number of lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x26, }, { .name = "L2_M_LINES_OUTM", .desc = "Number of modified lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x27, }, { .name = "EMON_KNI_INST_RETIRED", .desc = "Number of SSE instructions retired. Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd8, .numasks = LIBPFM_ARRAY_SIZE(p6_emon_kni_inst_retired), .ngrp = 1, .umasks = p6_emon_kni_inst_retired, }, { .name = "EMON_KNI_COMP_INST_RET", .desc = "Number of SSE computation instructions retired. Pentium III and later", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd9, .numasks = LIBPFM_ARRAY_SIZE(p6_emon_kni_inst_retired), .ngrp = 1, .umasks = p6_emon_kni_inst_retired, /* identical to actual umasks list for this event */ }, }; papi-5.6.0/src/libpfm-3.y/python/src/pmu.py000664 001750 001750 00000006635 13216244363 022501 0ustar00jshenry1963jshenry1963000000 000000 #!/usr/bin/env python # # Copyright (c) 2008 Google, Inc. # Contributed by Arun Sharma # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # import os from perfmon import * def public_members(self): s = "{ " for k, v in self.__dict__.iteritems(): if not k[0] == '_': s += "%s : %s, " % (k, v) s += " }" return s class System: def __init__(self): self.ncpus = os.sysconf('SC_NPROCESSORS_ONLN') self.pmu = PMU() def __repr__(self): return public_members(self) class Event: def __init__(self): pass def __repr__(self): return '\n' + public_members(self) class EventMask: def __init__(self): pass def __repr__(self): return '\n\t' + public_members(self) class PMU: def __init__(self): pfm_initialize() self.type = pfm_py_get_pmu_type() self.name = pfm_get_pmu_name(PFMON_MAX_EVTNAME_LEN)[1] self.width = pfm_py_get_hw_counter_width() # What does the PMU support? self.__implemented_pmcs = pfmlib_regmask_t() self.__implemented_pmds = pfmlib_regmask_t() self.__implemented_counters = pfmlib_regmask_t() pfm_get_impl_pmcs(self.__implemented_pmcs) pfm_get_impl_pmds(self.__implemented_pmds) pfm_get_impl_counters(self.__implemented_counters) self.implemented_pmcs = self.__implemented_pmcs.weight() self.implemented_pmds = self.__implemented_pmds.weight() self.implemented_counters = self.__implemented_counters.weight() self.__events = None def __parse_events(self): nevents = pfm_py_get_num_events() self.__events = [] for idx in range(0, nevents): e = Event() e.name = pfm_py_get_event_name(idx) e.code = pfm_py_get_event_code(idx) e.__counters = pfmlib_regmask_t() pfm_get_event_counters(idx, e.__counters) # Now the event masks e.masks = [] nmasks = pfm_py_get_num_event_masks(idx) for mask_idx in range(0, nmasks): em = EventMask() em.name = pfm_py_get_event_mask_name(idx, mask_idx) em.code = pfm_py_get_event_mask_code(idx, mask_idx) em.desc = pfm_py_get_event_mask_description(idx, mask_idx) e.masks.append(em) self.__events.append(e) def events(self): if not self.__events: self.__parse_events() return self.__events def __repr__(self): return public_members(self) if __name__ == '__main__': from perfmon import * s = System() print s print s.pmu.events() papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_event_mask_description.3000664 001750 001750 00000000036 13216244361 027313 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_event_name.3 papi-5.6.0/src/libpfm4/lib/pfmlib_sparc_priv.h000664 001750 001750 00000003272 13216244365 023307 0ustar00jshenry1963jshenry1963000000 000000 #ifndef __PFMLIB_SPARC_PRIV_H__ #define __PFMLIB_SPARC_PRIV_H__ typedef struct { char *uname; /* mask name */ char *udesc; /* mask description */ int ubit; /* umask bit position */ } sparc_mask_t; #define EVENT_MASK_BITS 8 typedef struct { char *name; /* event name */ char *desc; /* event description */ char ctrl; /* S0 or S1 */ char __pad; int code; /* S0/S1 encoding */ int numasks; /* number of entries in masks */ sparc_mask_t umasks[EVENT_MASK_BITS]; } sparc_entry_t; typedef union { unsigned int val; struct { unsigned int ctrl_s0 : 1; unsigned int ctrl_s1 : 1; unsigned int reserved1 : 14; unsigned int code : 8; unsigned int umask : 8; } config; } pfm_sparc_reg_t; #define PME_CTRL_S0 1 #define PME_CTRL_S1 2 #define SPARC_ATTR_K 0 #define SPARC_ATTR_U 1 #define SPARC_ATTR_H 2 #define SPARC_PLM (PFM_PLM0|PFM_PLM3) #define NIAGARA2_PLM (SPARC_PLM|PFM_PLMH) extern int pfm_sparc_detect(void *this); extern int pfm_sparc_get_encoding(void *this, pfmlib_event_desc_t *e); extern int pfm_sparc_get_event_first(void *this); extern int pfm_sparc_get_event_next(void *this, int idx); extern int pfm_sparc_event_is_valid(void *this, int pidx); extern int pfm_sparc_validate_table(void *this, FILE *fp); extern int pfm_sparc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); extern int pfm_sparc_get_event_info(void *this, int idx, pfm_event_info_t *info); extern unsigned int pfm_sparc_get_event_nattrs(void *this, int pidx); extern void pfm_sparc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); extern int pfm_sparc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); #endif /* __PFMLIB_SPARC_PRIV_H__ */ papi-5.6.0/src/perfctr-2.7.x/etc/costs/Athlon-1.33000664 001750 001750 00000001470 13216244367 023211 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.33 GHz Athlon XP 1500+] PERFCTR INIT: vendor 2, family 6, model 6, stepping 2, clock 1343234 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 169 cycles PERFCTR INIT: rdtsc cost is 13.3 cycles (1021 total) PERFCTR INIT: rdpmc cost is 13.3 cycles (1026 total) PERFCTR INIT: rdmsr (counter) cost is 51.9 cycles (3492 total) PERFCTR INIT: rdmsr (evntsel) cost is 53.0 cycles (3564 total) PERFCTR INIT: wrmsr (counter) cost is 79.9 cycles (5284 total) PERFCTR INIT: wrmsr (evntsel) cost is 231.6 cycles (14997 total) PERFCTR INIT: read cr4 cost is 2.2 cycles (313 total) PERFCTR INIT: write cr4 cost is 63.0 cycles (4205 total) PERFCTR INIT: write LVTPC cost is 10.3 cycles (830 total) PERFCTR INIT: sync_core cost is 73.6 cycles (4883 total) perfctr: driver 2.7.5, cpu type AMD K7/K8 at 1343234 kHz papi-5.6.0/man/man3/PAPI_get_hardware_info.3000664 001750 001750 00000002331 13216244356 022513 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_hardware_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_hardware_info \- .PP get information about the system hardware .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf In C, this function returns a pointer to a structure containing information about the hardware on which the program runs. In Fortran, the values of the structure are returned explicitly. @retval PAPI_EINVAL One or more of the arguments is invalid. .fi .PP .PP .PP .nf @note The C structure contains detailed information about cache and TLB sizes. This information is not available from Fortran. @par Examples: .fi .PP .PP .nf const PAPI_hw_info_t *hwinfo = NULL; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) exit(1); if ((hwinfo = PAPI_get_hardware_info()) == NULL) exit(1); printf("%d CPUs at %f Mhz\&.\en",hwinfo->totalcpus,hwinfo->mhz); * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_hw_info_t\fP .PP \fBPAPI_get_executable_info\fP, \fBPAPI_get_opt\fP, \fBPAPI_get_dmem_info\fP, \fBPAPI_library_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_ivbep_unc_irp.3000664 001750 001750 00000002061 13216244364 026103 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "February, 2014" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_ivbep_unc_irp - support for Intel Ivy Bridge-EP IRP uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: ivbep_unc_irp .B PMU desc: Intel Ivy Bridge-EP IRP uncore PMU .sp .SH DESCRIPTION The library supports the Intel Ivy Bridge uncore PMU. This PMU model only exists on Ivy Bridge model 62. .SH MODIFIERS The following modifiers are supported on Intel Ivy Bridge IRP uncore PMU: .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a threshold modifier (t) with a value greater or equal to one. This is a boolean modifier. .TP .B t Set the threshold value. When set to a non-zero value, the counter counts the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm4/include/perfmon/err.h000775 001750 001750 00000003256 13216244364 022725 0ustar00jshenry1963jshenry1963000000 000000 /* * err.h: substitute header for compiling on Windows with MingGW * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFM_ERR_H__ #define __PFM_ERR_H__ #ifndef PFMLIB_WINDOWS #include #else /* PFMLIB_WINDOWS */ #define warnx(...) do { \ fprintf (stderr, __VA_ARGS__); \ fprintf (stderr, "\n"); \ } while (0) #define errx(code, ...) do { \ fprintf (stderr, __VA_ARGS__); \ fprintf (stderr, "\n"); \ exit (code); \ } while (0) #define err(code, ...) do { \ fprintf (stderr, __VA_ARGS__); \ fprintf (stderr, " : %s\n", strerror(errno)); \ exit (code); \ } while (0) #endif #endif /* __PFM_ERR_H__ */ papi-5.6.0/src/libpfm-3.y/examples_v2.x/check_events.c000664 001750 001750 00000010162 13216244362 024503 0ustar00jshenry1963jshenry1963000000 000000 /* * check_events.c - check if event assignment is possible * * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_PMU_NAME_LEN 32 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * The goal of this program is to exercise the event assignment * code for a specific PMU model. This program is independent of * the kernel API. */ int main(int argc, char **argv) { char **p; unsigned int i; int ret; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_options_t pfmlib_options; char model[MAX_PMU_NAME_LEN]; unsigned int num_counters; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); printf("PMU model: %s\n", model); pfm_get_num_counters(&num_counters); printf("%u counters available\n", num_counters); /* * prepare parameters to library. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * be nice to user! */ if (argc > 1) { p = argv+1; for (i=0; *p ; i++, p++) { ret = pfm_find_full_event(*p, &inp.pfp_events[i]); if (ret != PFMLIB_SUCCESS) fatal_error("event %s: %s\n", *p, pfm_strerror(ret)); } } else { if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; } /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); for (i=0; i < outp.pfp_pmc_count; i++) printf("PMC%u=0x%llx\n", outp.pfp_pmcs[i].reg_num, outp.pfp_pmcs[i].reg_value); for (i=0; i < outp.pfp_pmd_count; i++) printf("PMD%u\n", outp.pfp_pmds[i].reg_num); return 0; } papi-5.6.0/src/ctests/prof_utils.h000664 001750 001750 00000003316 13216244360 021170 0ustar00jshenry1963jshenry1963000000 000000 /* * File: prof_utils.h * Author: Dan Terpstra * terpstra@cs.utk.edu * Mods: Maynard Johnson * maynardj@us.ibm.com */ /* This file contains utility definitions useful for all profiling tests It should be #included in: - profile.c, - sprofile.c, - profile_pthreads.c, - profile_twoevents.c, - earprofile.c, - future profiling tests. */ /* value for scale parameter that sets scale to 1 */ #define FULL_SCALE 65536 /* Internal prototype */ int prof_events(int num_tests); void prof_print_address(const char *title, const PAPI_exe_info_t *prginfo); void prof_print_prof_info(caddr_t start, caddr_t end, int threshold, char *event_name); void prof_alloc(int num, unsigned long plength); void prof_head(unsigned long blength, int bucket_size, int num_buckets, const char *header); void prof_out(caddr_t start, int n, int bucket, int num_buckets, unsigned int scale); unsigned long prof_size(unsigned long plength, unsigned scale, int bucket, int *num_buckets); int prof_check(int n, int bucket, int num_buckets); int prof_buckets(int bucket); void do_no_profile(int quiet); /* variables global to profiling tests */ extern long long **values; extern char event_name[PAPI_MAX_STR_LEN]; extern int PAPI_event; extern int EventSet; extern void *profbuf[5]; /* Itanium returns function descriptors instead of function addresses. I couldn't find the following structure in a header file, so I duplicated it below. */ #if (defined(ITANIUM1) || defined(ITANIUM2)) struct fdesc { void *ip; /* entry point (code address) */ void *gp; /* global-pointer */ }; #elif defined(__powerpc64__) struct fdesc { void * ip; // function entry point void * toc; void * env; }; #endif papi-5.6.0/src/components/perf_event/perf_event_lib.h000664 001750 001750 00000004423 13216244357 025010 0ustar00jshenry1963jshenry1963000000 000000 /* Various definitions */ /* This is arbitrary. Typically you can add up to ~1000 before */ /* you run out of fds */ #define PERF_EVENT_MAX_MPX_COUNTERS 384 /* We really don't need fancy definitions for these */ typedef struct { int group_leader_fd; /* fd of group leader */ int event_fd; /* fd of event */ int event_opened; /* event successfully opened */ int profiling; /* event is profiling */ int sampling; /* event is a sampling event */ uint32_t nr_mmap_pages; /* number pages in the mmap buffer */ void *mmap_buf; /* used for control/profiling */ uint64_t tail; /* current read location in mmap buffer */ uint64_t mask; /* mask used for wrapping the pages */ int cpu; /* cpu associated with this event */ struct perf_event_attr attr; /* perf_event config structure */ } pe_event_info_t; typedef struct { int num_events; /* number of events in control state */ unsigned int domain; /* control-state wide domain */ unsigned int granularity; /* granularity */ unsigned int multiplexed; /* multiplexing enable */ unsigned int overflow; /* overflow enable */ unsigned int inherit; /* inherit enable */ unsigned int overflow_signal; /* overflow signal */ int cidx; /* current component */ int cpu; /* which cpu to measure */ pid_t tid; /* thread we are monitoring */ pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]; long long counts[PERF_EVENT_MAX_MPX_COUNTERS]; } pe_control_t; typedef struct { int initialized; /* are we initialized? */ int state; /* are we opened and/or running? */ int cidx; /* our component id */ struct native_event_table_t *event_table; /* our event table */ } pe_context_t; papi-5.6.0/src/libpfm4/lib/events/power4_events.h000664 001750 001750 00000241706 13216244365 023724 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __POWER4_EVENTS_H__ #define __POWER4_EVENTS_H__ /* * File: power4_events.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2009. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define POWER4_PME_PM_MRK_LSU_SRQ_INST_VALID 0 #define POWER4_PME_PM_FPU1_SINGLE 1 #define POWER4_PME_PM_DC_PREF_OUT_STREAMS 2 #define POWER4_PME_PM_FPU0_STALL3 3 #define POWER4_PME_PM_TB_BIT_TRANS 4 #define POWER4_PME_PM_GPR_MAP_FULL_CYC 5 #define POWER4_PME_PM_MRK_ST_CMPL 6 #define POWER4_PME_PM_MRK_LSU_FLUSH_LRQ 7 #define POWER4_PME_PM_FPU0_STF 8 #define POWER4_PME_PM_FPU1_FMA 9 #define POWER4_PME_PM_L2SA_MOD_TAG 10 #define POWER4_PME_PM_MRK_DATA_FROM_L275_SHR 11 #define POWER4_PME_PM_1INST_CLB_CYC 12 #define POWER4_PME_PM_LSU1_FLUSH_ULD 13 #define POWER4_PME_PM_MRK_INST_FIN 14 #define POWER4_PME_PM_MRK_LSU0_FLUSH_UST 15 #define POWER4_PME_PM_FPU_FDIV 16 #define POWER4_PME_PM_LSU_LRQ_S0_ALLOC 17 #define POWER4_PME_PM_FPU0_FULL_CYC 18 #define POWER4_PME_PM_FPU_SINGLE 19 #define POWER4_PME_PM_FPU0_FMA 20 #define POWER4_PME_PM_MRK_LSU1_FLUSH_ULD 21 #define POWER4_PME_PM_LSU1_FLUSH_LRQ 22 #define POWER4_PME_PM_L2SA_ST_HIT 23 #define POWER4_PME_PM_L2SB_SHR_INV 24 #define POWER4_PME_PM_DTLB_MISS 25 #define POWER4_PME_PM_MRK_ST_MISS_L1 26 #define POWER4_PME_PM_EXT_INT 27 #define POWER4_PME_PM_MRK_LSU1_FLUSH_LRQ 28 #define POWER4_PME_PM_MRK_ST_GPS 29 #define POWER4_PME_PM_GRP_DISP_SUCCESS 30 #define POWER4_PME_PM_LSU1_LDF 31 #define POWER4_PME_PM_FAB_CMD_ISSUED 32 #define POWER4_PME_PM_LSU0_SRQ_STFWD 33 #define POWER4_PME_PM_CR_MAP_FULL_CYC 34 #define POWER4_PME_PM_MRK_LSU0_FLUSH_ULD 35 #define POWER4_PME_PM_LSU_DERAT_MISS 36 #define POWER4_PME_PM_FPU0_SINGLE 37 #define POWER4_PME_PM_FPU1_FDIV 38 #define POWER4_PME_PM_FPU1_FEST 39 #define POWER4_PME_PM_FPU0_FRSP_FCONV 40 #define POWER4_PME_PM_MRK_ST_CMPL_INT 41 #define POWER4_PME_PM_FXU_FIN 42 #define POWER4_PME_PM_FPU_STF 43 #define POWER4_PME_PM_DSLB_MISS 44 #define POWER4_PME_PM_DATA_FROM_L275_SHR 45 #define POWER4_PME_PM_FXLS1_FULL_CYC 46 #define POWER4_PME_PM_L3B0_DIR_MIS 47 #define POWER4_PME_PM_2INST_CLB_CYC 48 #define POWER4_PME_PM_MRK_STCX_FAIL 49 #define POWER4_PME_PM_LSU_LMQ_LHR_MERGE 50 #define POWER4_PME_PM_FXU0_BUSY_FXU1_IDLE 51 #define POWER4_PME_PM_L3B1_DIR_REF 52 #define POWER4_PME_PM_MRK_LSU_FLUSH_UST 53 #define POWER4_PME_PM_MRK_DATA_FROM_L25_SHR 54 #define POWER4_PME_PM_LSU_FLUSH_ULD 55 #define POWER4_PME_PM_MRK_BRU_FIN 56 #define POWER4_PME_PM_IERAT_XLATE_WR 57 #define POWER4_PME_PM_LSU0_BUSY 58 #define POWER4_PME_PM_L2SA_ST_REQ 59 #define POWER4_PME_PM_DATA_FROM_MEM 60 #define POWER4_PME_PM_FPR_MAP_FULL_CYC 61 #define POWER4_PME_PM_FPU1_FULL_CYC 62 #define POWER4_PME_PM_FPU0_FIN 63 #define POWER4_PME_PM_3INST_CLB_CYC 64 #define POWER4_PME_PM_DATA_FROM_L35 65 #define POWER4_PME_PM_L2SA_SHR_INV 66 #define POWER4_PME_PM_MRK_LSU_FLUSH_SRQ 67 #define POWER4_PME_PM_THRESH_TIMEO 68 #define POWER4_PME_PM_FPU_FSQRT 69 #define POWER4_PME_PM_MRK_LSU0_FLUSH_LRQ 70 #define POWER4_PME_PM_FXLS0_FULL_CYC 71 #define POWER4_PME_PM_DATA_TABLEWALK_CYC 72 #define POWER4_PME_PM_FPU0_ALL 73 #define POWER4_PME_PM_FPU0_FEST 74 #define POWER4_PME_PM_DATA_FROM_L25_MOD 75 #define POWER4_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 76 #define POWER4_PME_PM_FPU_FEST 77 #define POWER4_PME_PM_0INST_FETCH 78 #define POWER4_PME_PM_LARX_LSU1 79 #define POWER4_PME_PM_LD_MISS_L1_LSU0 80 #define POWER4_PME_PM_L1_PREF 81 #define POWER4_PME_PM_FPU1_STALL3 82 #define POWER4_PME_PM_BRQ_FULL_CYC 83 #define POWER4_PME_PM_LARX 84 #define POWER4_PME_PM_MRK_DATA_FROM_L35 85 #define POWER4_PME_PM_WORK_HELD 86 #define POWER4_PME_PM_MRK_LD_MISS_L1_LSU0 87 #define POWER4_PME_PM_FXU_IDLE 88 #define POWER4_PME_PM_INST_CMPL 89 #define POWER4_PME_PM_LSU1_FLUSH_UST 90 #define POWER4_PME_PM_LSU0_FLUSH_ULD 91 #define POWER4_PME_PM_INST_FROM_L2 92 #define POWER4_PME_PM_DATA_FROM_L3 93 #define POWER4_PME_PM_FPU0_DENORM 94 #define POWER4_PME_PM_FPU1_FMOV_FEST 95 #define POWER4_PME_PM_GRP_DISP_REJECT 96 #define POWER4_PME_PM_INST_FETCH_CYC 97 #define POWER4_PME_PM_LSU_LDF 98 #define POWER4_PME_PM_INST_DISP 99 #define POWER4_PME_PM_L2SA_MOD_INV 100 #define POWER4_PME_PM_DATA_FROM_L25_SHR 101 #define POWER4_PME_PM_FAB_CMD_RETRIED 102 #define POWER4_PME_PM_L1_DCACHE_RELOAD_VALID 103 #define POWER4_PME_PM_MRK_GRP_ISSUED 104 #define POWER4_PME_PM_FPU_FULL_CYC 105 #define POWER4_PME_PM_FPU_FMA 106 #define POWER4_PME_PM_MRK_CRU_FIN 107 #define POWER4_PME_PM_MRK_LSU1_FLUSH_UST 108 #define POWER4_PME_PM_MRK_FXU_FIN 109 #define POWER4_PME_PM_BR_ISSUED 110 #define POWER4_PME_PM_EE_OFF 111 #define POWER4_PME_PM_INST_FROM_L3 112 #define POWER4_PME_PM_ITLB_MISS 113 #define POWER4_PME_PM_FXLS_FULL_CYC 114 #define POWER4_PME_PM_FXU1_BUSY_FXU0_IDLE 115 #define POWER4_PME_PM_GRP_DISP_VALID 116 #define POWER4_PME_PM_L2SC_ST_HIT 117 #define POWER4_PME_PM_MRK_GRP_DISP 118 #define POWER4_PME_PM_L2SB_MOD_TAG 119 #define POWER4_PME_PM_INST_FROM_L25_L275 120 #define POWER4_PME_PM_LSU_FLUSH_UST 121 #define POWER4_PME_PM_L2SB_ST_HIT 122 #define POWER4_PME_PM_FXU1_FIN 123 #define POWER4_PME_PM_L3B1_DIR_MIS 124 #define POWER4_PME_PM_4INST_CLB_CYC 125 #define POWER4_PME_PM_GRP_CMPL 126 #define POWER4_PME_PM_DC_PREF_L2_CLONE_L3 127 #define POWER4_PME_PM_FPU_FRSP_FCONV 128 #define POWER4_PME_PM_5INST_CLB_CYC 129 #define POWER4_PME_PM_MRK_LSU0_FLUSH_SRQ 130 #define POWER4_PME_PM_MRK_LSU_FLUSH_ULD 131 #define POWER4_PME_PM_8INST_CLB_CYC 132 #define POWER4_PME_PM_LSU_LMQ_FULL_CYC 133 #define POWER4_PME_PM_ST_REF_L1_LSU0 134 #define POWER4_PME_PM_LSU0_DERAT_MISS 135 #define POWER4_PME_PM_LSU_SRQ_SYNC_CYC 136 #define POWER4_PME_PM_FPU_STALL3 137 #define POWER4_PME_PM_MRK_DATA_FROM_L2 138 #define POWER4_PME_PM_FPU0_FMOV_FEST 139 #define POWER4_PME_PM_LSU0_FLUSH_SRQ 140 #define POWER4_PME_PM_LD_REF_L1_LSU0 141 #define POWER4_PME_PM_L2SC_SHR_INV 142 #define POWER4_PME_PM_LSU1_FLUSH_SRQ 143 #define POWER4_PME_PM_LSU_LMQ_S0_ALLOC 144 #define POWER4_PME_PM_ST_REF_L1 145 #define POWER4_PME_PM_LSU_SRQ_EMPTY_CYC 146 #define POWER4_PME_PM_FPU1_STF 147 #define POWER4_PME_PM_L3B0_DIR_REF 148 #define POWER4_PME_PM_RUN_CYC 149 #define POWER4_PME_PM_LSU_LMQ_S0_VALID 150 #define POWER4_PME_PM_LSU_LRQ_S0_VALID 151 #define POWER4_PME_PM_LSU0_LDF 152 #define POWER4_PME_PM_MRK_IMR_RELOAD 153 #define POWER4_PME_PM_7INST_CLB_CYC 154 #define POWER4_PME_PM_MRK_GRP_TIMEO 155 #define POWER4_PME_PM_FPU_FMOV_FEST 156 #define POWER4_PME_PM_GRP_DISP_BLK_SB_CYC 157 #define POWER4_PME_PM_XER_MAP_FULL_CYC 158 #define POWER4_PME_PM_ST_MISS_L1 159 #define POWER4_PME_PM_STOP_COMPLETION 160 #define POWER4_PME_PM_MRK_GRP_CMPL 161 #define POWER4_PME_PM_ISLB_MISS 162 #define POWER4_PME_PM_CYC 163 #define POWER4_PME_PM_LD_MISS_L1_LSU1 164 #define POWER4_PME_PM_STCX_FAIL 165 #define POWER4_PME_PM_LSU1_SRQ_STFWD 166 #define POWER4_PME_PM_GRP_DISP 167 #define POWER4_PME_PM_DATA_FROM_L2 168 #define POWER4_PME_PM_L2_PREF 169 #define POWER4_PME_PM_FPU0_FPSCR 170 #define POWER4_PME_PM_FPU1_DENORM 171 #define POWER4_PME_PM_MRK_DATA_FROM_L25_MOD 172 #define POWER4_PME_PM_L2SB_ST_REQ 173 #define POWER4_PME_PM_L2SB_MOD_INV 174 #define POWER4_PME_PM_FPU0_FSQRT 175 #define POWER4_PME_PM_LD_REF_L1 176 #define POWER4_PME_PM_MRK_L1_RELOAD_VALID 177 #define POWER4_PME_PM_L2SB_SHR_MOD 178 #define POWER4_PME_PM_INST_FROM_L1 179 #define POWER4_PME_PM_1PLUS_PPC_CMPL 180 #define POWER4_PME_PM_EE_OFF_EXT_INT 181 #define POWER4_PME_PM_L2SC_SHR_MOD 182 #define POWER4_PME_PM_LSU_LRQ_FULL_CYC 183 #define POWER4_PME_PM_IC_PREF_INSTALL 184 #define POWER4_PME_PM_MRK_LSU1_FLUSH_SRQ 185 #define POWER4_PME_PM_GCT_FULL_CYC 186 #define POWER4_PME_PM_INST_FROM_MEM 187 #define POWER4_PME_PM_FXU_BUSY 188 #define POWER4_PME_PM_ST_REF_L1_LSU1 189 #define POWER4_PME_PM_MRK_LD_MISS_L1 190 #define POWER4_PME_PM_MRK_LSU1_INST_FIN 191 #define POWER4_PME_PM_L1_WRITE_CYC 192 #define POWER4_PME_PM_BIQ_IDU_FULL_CYC 193 #define POWER4_PME_PM_MRK_LSU0_INST_FIN 194 #define POWER4_PME_PM_L2SC_ST_REQ 195 #define POWER4_PME_PM_LSU1_BUSY 196 #define POWER4_PME_PM_FPU_ALL 197 #define POWER4_PME_PM_LSU_SRQ_S0_ALLOC 198 #define POWER4_PME_PM_GRP_MRK 199 #define POWER4_PME_PM_FPU1_FIN 200 #define POWER4_PME_PM_DC_PREF_STREAM_ALLOC 201 #define POWER4_PME_PM_BR_MPRED_CR 202 #define POWER4_PME_PM_BR_MPRED_TA 203 #define POWER4_PME_PM_CRQ_FULL_CYC 204 #define POWER4_PME_PM_INST_FROM_PREF 205 #define POWER4_PME_PM_LD_MISS_L1 206 #define POWER4_PME_PM_STCX_PASS 207 #define POWER4_PME_PM_DC_INV_L2 208 #define POWER4_PME_PM_LSU_SRQ_FULL_CYC 209 #define POWER4_PME_PM_LSU0_FLUSH_LRQ 210 #define POWER4_PME_PM_LSU_SRQ_S0_VALID 211 #define POWER4_PME_PM_LARX_LSU0 212 #define POWER4_PME_PM_GCT_EMPTY_CYC 213 #define POWER4_PME_PM_FPU1_ALL 214 #define POWER4_PME_PM_FPU1_FSQRT 215 #define POWER4_PME_PM_FPU_FIN 216 #define POWER4_PME_PM_L2SA_SHR_MOD 217 #define POWER4_PME_PM_MRK_LD_MISS_L1_LSU1 218 #define POWER4_PME_PM_LSU_SRQ_STFWD 219 #define POWER4_PME_PM_FXU0_FIN 220 #define POWER4_PME_PM_MRK_FPU_FIN 221 #define POWER4_PME_PM_LSU_BUSY 222 #define POWER4_PME_PM_INST_FROM_L35 223 #define POWER4_PME_PM_FPU1_FRSP_FCONV 224 #define POWER4_PME_PM_SNOOP_TLBIE 225 #define POWER4_PME_PM_FPU0_FDIV 226 #define POWER4_PME_PM_LD_REF_L1_LSU1 227 #define POWER4_PME_PM_MRK_DATA_FROM_L275_MOD 228 #define POWER4_PME_PM_HV_CYC 229 #define POWER4_PME_PM_6INST_CLB_CYC 230 #define POWER4_PME_PM_LR_CTR_MAP_FULL_CYC 231 #define POWER4_PME_PM_L2SC_MOD_INV 232 #define POWER4_PME_PM_FPU_DENORM 233 #define POWER4_PME_PM_DATA_FROM_L275_MOD 234 #define POWER4_PME_PM_LSU1_DERAT_MISS 235 #define POWER4_PME_PM_IC_PREF_REQ 236 #define POWER4_PME_PM_MRK_LSU_FIN 237 #define POWER4_PME_PM_MRK_DATA_FROM_L3 238 #define POWER4_PME_PM_MRK_DATA_FROM_MEM 239 #define POWER4_PME_PM_LSU0_FLUSH_UST 240 #define POWER4_PME_PM_LSU_FLUSH_LRQ 241 #define POWER4_PME_PM_LSU_FLUSH_SRQ 242 #define POWER4_PME_PM_L2SC_MOD_TAG 243 static const pme_power_entry_t power4_pe[] = { [ POWER4_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { .pme_name = "PM_MRK_LSU_SRQ_INST_VALID", .pme_code = 0x933, .pme_short_desc = "Marked instruction valid in SRQ", .pme_long_desc = "This signal is asserted every cycle when a marked request is resident in the Store Request Queue", }, [ POWER4_PME_PM_FPU1_SINGLE ] = { .pme_name = "PM_FPU1_SINGLE", .pme_code = 0x127, .pme_short_desc = "FPU1 executed single precision instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing single precision instruction.", }, [ POWER4_PME_PM_DC_PREF_OUT_STREAMS ] = { .pme_name = "PM_DC_PREF_OUT_STREAMS", .pme_code = 0xc36, .pme_short_desc = "Out of prefetch streams", .pme_long_desc = "A new prefetch stream was detected, but no more stream entries were available", }, [ POWER4_PME_PM_FPU0_STALL3 ] = { .pme_name = "PM_FPU0_STALL3", .pme_code = 0x121, .pme_short_desc = "FPU0 stalled in pipe3", .pme_long_desc = "This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. ", }, [ POWER4_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x8005, .pme_short_desc = "Time Base bit transition", .pme_long_desc = "When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 ", }, [ POWER4_PME_PM_GPR_MAP_FULL_CYC ] = { .pme_name = "PM_GPR_MAP_FULL_CYC", .pme_code = 0x235, .pme_short_desc = "Cycles GPR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", }, [ POWER4_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x1003, .pme_short_desc = "Marked store instruction completed", .pme_long_desc = "A sampled store has completed (data home)", }, [ POWER4_PME_PM_MRK_LSU_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_LRQ", .pme_code = 0x3910, .pme_short_desc = "Marked LRQ flushes", .pme_long_desc = "A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_FPU0_STF ] = { .pme_name = "PM_FPU0_STF", .pme_code = 0x122, .pme_short_desc = "FPU0 executed store instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing a store instruction.", }, [ POWER4_PME_PM_FPU1_FMA ] = { .pme_name = "PM_FPU1_FMA", .pme_code = 0x105, .pme_short_desc = "FPU1 executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_L2SA_MOD_TAG ] = { .pme_name = "PM_L2SA_MOD_TAG", .pme_code = 0xf06, .pme_short_desc = "L2 slice A transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_MRK_DATA_FROM_L275_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR", .pme_code = 0x6c76, .pme_short_desc = "Marked data loaded from L2.75 shared", .pme_long_desc = "DL1 was reloaded with shared (T) data from the L2 of another MCM due to a marked demand load", }, [ POWER4_PME_PM_1INST_CLB_CYC ] = { .pme_name = "PM_1INST_CLB_CYC", .pme_code = 0x450, .pme_short_desc = "Cycles 1 instruction in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_LSU1_FLUSH_ULD ] = { .pme_name = "PM_LSU1_FLUSH_ULD", .pme_code = 0xc04, .pme_short_desc = "LSU1 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x7005, .pme_short_desc = "Marked instruction finished", .pme_long_desc = "One of the execution units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_MRK_LSU0_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU0_FLUSH_UST", .pme_code = 0x911, .pme_short_desc = "LSU0 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 0 because it was unaligned", }, [ POWER4_PME_PM_FPU_FDIV ] = { .pme_name = "PM_FPU_FDIV", .pme_code = 0x1100, .pme_short_desc = "FPU executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_LSU_LRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LRQ_S0_ALLOC", .pme_code = 0xc26, .pme_short_desc = "LRQ slot 0 allocated", .pme_long_desc = "LRQ slot zero was allocated", }, [ POWER4_PME_PM_FPU0_FULL_CYC ] = { .pme_name = "PM_FPU0_FULL_CYC", .pme_code = 0x203, .pme_short_desc = "Cycles FPU0 issue queue full", .pme_long_desc = "The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped", }, [ POWER4_PME_PM_FPU_SINGLE ] = { .pme_name = "PM_FPU_SINGLE", .pme_code = 0x5120, .pme_short_desc = "FPU executed single precision instruction", .pme_long_desc = "FPU is executing single precision instruction. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_FPU0_FMA ] = { .pme_name = "PM_FPU0_FMA", .pme_code = 0x101, .pme_short_desc = "FPU0 executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_MRK_LSU1_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU1_FLUSH_ULD", .pme_code = 0x914, .pme_short_desc = "LSU1 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_LSU1_FLUSH_LRQ", .pme_code = 0xc06, .pme_short_desc = "LSU1 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_L2SA_ST_HIT ] = { .pme_name = "PM_L2SA_ST_HIT", .pme_code = 0xf11, .pme_short_desc = "L2 slice A store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C.", }, [ POWER4_PME_PM_L2SB_SHR_INV ] = { .pme_name = "PM_L2SB_SHR_INV", .pme_code = 0xf21, .pme_short_desc = "L2 slice B transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER4_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x904, .pme_short_desc = "Data TLB misses", .pme_long_desc = "A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction.", }, [ POWER4_PME_PM_MRK_ST_MISS_L1 ] = { .pme_name = "PM_MRK_ST_MISS_L1", .pme_code = 0x923, .pme_short_desc = "Marked L1 D cache store misses", .pme_long_desc = "A marked store missed the dcache", }, [ POWER4_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x8002, .pme_short_desc = "External interrupts", .pme_long_desc = "An external interrupt occurred", }, [ POWER4_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_LRQ", .pme_code = 0x916, .pme_short_desc = "LSU1 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_MRK_ST_GPS ] = { .pme_name = "PM_MRK_ST_GPS", .pme_code = 0x6003, .pme_short_desc = "Marked store sent to GPS", .pme_long_desc = "A sampled store has been sent to the memory subsystem", }, [ POWER4_PME_PM_GRP_DISP_SUCCESS ] = { .pme_name = "PM_GRP_DISP_SUCCESS", .pme_code = 0x5001, .pme_short_desc = "Group dispatch success", .pme_long_desc = "Number of groups sucessfully dispatched (not rejected)", }, [ POWER4_PME_PM_LSU1_LDF ] = { .pme_name = "PM_LSU1_LDF", .pme_code = 0x934, .pme_short_desc = "LSU1 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed from LSU unit 1", }, [ POWER4_PME_PM_FAB_CMD_ISSUED ] = { .pme_name = "PM_FAB_CMD_ISSUED", .pme_code = 0xf16, .pme_short_desc = "Fabric command issued", .pme_long_desc = "A bus command was issued on the MCM to MCM fabric from the local (this chip's) Fabric Bus Controller. This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2.", }, [ POWER4_PME_PM_LSU0_SRQ_STFWD ] = { .pme_name = "PM_LSU0_SRQ_STFWD", .pme_code = 0xc20, .pme_short_desc = "LSU0 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 0", }, [ POWER4_PME_PM_CR_MAP_FULL_CYC ] = { .pme_name = "PM_CR_MAP_FULL_CYC", .pme_code = 0x204, .pme_short_desc = "Cycles CR logical operation mapper full", .pme_long_desc = "The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", }, [ POWER4_PME_PM_MRK_LSU0_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU0_FLUSH_ULD", .pme_code = 0x910, .pme_short_desc = "LSU0 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x6900, .pme_short_desc = "DERAT misses", .pme_long_desc = "Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction.", }, [ POWER4_PME_PM_FPU0_SINGLE ] = { .pme_name = "PM_FPU0_SINGLE", .pme_code = 0x123, .pme_short_desc = "FPU0 executed single precision instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing single precision instruction.", }, [ POWER4_PME_PM_FPU1_FDIV ] = { .pme_name = "PM_FPU1_FDIV", .pme_code = 0x104, .pme_short_desc = "FPU1 executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER4_PME_PM_FPU1_FEST ] = { .pme_name = "PM_FPU1_FEST", .pme_code = 0x116, .pme_short_desc = "FPU1 executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", }, [ POWER4_PME_PM_FPU0_FRSP_FCONV ] = { .pme_name = "PM_FPU0_FRSP_FCONV", .pme_code = 0x111, .pme_short_desc = "FPU0 executed FRSP or FCONV instructions", .pme_long_desc = "fThis signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x3003, .pme_short_desc = "Marked store completed with intervention", .pme_long_desc = "A marked store previously sent to the memory subsystem completed (data home) after requiring intervention", }, [ POWER4_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x3230, .pme_short_desc = "FXU produced a result", .pme_long_desc = "The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER4_PME_PM_FPU_STF ] = { .pme_name = "PM_FPU_STF", .pme_code = 0x6120, .pme_short_desc = "FPU executed store instruction", .pme_long_desc = "FPU is executing a store instruction. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x905, .pme_short_desc = "Data SLB misses", .pme_long_desc = "A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve", }, [ POWER4_PME_PM_DATA_FROM_L275_SHR ] = { .pme_name = "PM_DATA_FROM_L275_SHR", .pme_code = 0x6c66, .pme_short_desc = "Data loaded from L2.75 shared", .pme_long_desc = "DL1 was reloaded with shared (T) data from the L2 of another MCM due to a demand load", }, [ POWER4_PME_PM_FXLS1_FULL_CYC ] = { .pme_name = "PM_FXLS1_FULL_CYC", .pme_code = 0x214, .pme_short_desc = "Cycles FXU1/LS1 queue full", .pme_long_desc = "The issue queue for FXU/LSU unit 1 cannot accept any more instructions. Issue is stopped", }, [ POWER4_PME_PM_L3B0_DIR_MIS ] = { .pme_name = "PM_L3B0_DIR_MIS", .pme_code = 0xf01, .pme_short_desc = "L3 bank 0 directory misses", .pme_long_desc = "A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3", }, [ POWER4_PME_PM_2INST_CLB_CYC ] = { .pme_name = "PM_2INST_CLB_CYC", .pme_code = 0x451, .pme_short_desc = "Cycles 2 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x925, .pme_short_desc = "Marked STCX failed", .pme_long_desc = "A marked stcx (stwcx or stdcx) failed", }, [ POWER4_PME_PM_LSU_LMQ_LHR_MERGE ] = { .pme_name = "PM_LSU_LMQ_LHR_MERGE", .pme_code = 0x926, .pme_short_desc = "LMQ LHR merges", .pme_long_desc = "A dcache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry.", }, [ POWER4_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { .pme_name = "PM_FXU0_BUSY_FXU1_IDLE", .pme_code = 0x7002, .pme_short_desc = "FXU0 busy FXU1 idle", .pme_long_desc = "FXU0 is busy while FXU1 was idle", }, [ POWER4_PME_PM_L3B1_DIR_REF ] = { .pme_name = "PM_L3B1_DIR_REF", .pme_code = 0xf02, .pme_short_desc = "L3 bank 1 directory references", .pme_long_desc = "A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3", }, [ POWER4_PME_PM_MRK_LSU_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU_FLUSH_UST", .pme_code = 0x7910, .pme_short_desc = "Marked unaligned store flushes", .pme_long_desc = "A marked store was flushed because it was unaligned", }, [ POWER4_PME_PM_MRK_DATA_FROM_L25_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR", .pme_code = 0x5c76, .pme_short_desc = "Marked data loaded from L2.5 shared", .pme_long_desc = "DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load", }, [ POWER4_PME_PM_LSU_FLUSH_ULD ] = { .pme_name = "PM_LSU_FLUSH_ULD", .pme_code = 0x1c00, .pme_short_desc = "LRQ unaligned load flushes", .pme_long_desc = "A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x2005, .pme_short_desc = "Marked instruction BRU processing finished", .pme_long_desc = "The branch unit finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_IERAT_XLATE_WR ] = { .pme_name = "PM_IERAT_XLATE_WR", .pme_code = 0x327, .pme_short_desc = "Translation written to ierat", .pme_long_desc = "This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available).", }, [ POWER4_PME_PM_LSU0_BUSY ] = { .pme_name = "PM_LSU0_BUSY", .pme_code = 0xc33, .pme_short_desc = "LSU0 busy", .pme_long_desc = "LSU unit 0 is busy rejecting instructions", }, [ POWER4_PME_PM_L2SA_ST_REQ ] = { .pme_name = "PM_L2SA_ST_REQ", .pme_code = 0xf10, .pme_short_desc = "L2 slice A store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_DATA_FROM_MEM ] = { .pme_name = "PM_DATA_FROM_MEM", .pme_code = 0x2c66, .pme_short_desc = "Data loaded from memory", .pme_long_desc = "DL1 was reloaded from memory due to a demand load", }, [ POWER4_PME_PM_FPR_MAP_FULL_CYC ] = { .pme_name = "PM_FPR_MAP_FULL_CYC", .pme_code = 0x201, .pme_short_desc = "Cycles FPR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", }, [ POWER4_PME_PM_FPU1_FULL_CYC ] = { .pme_name = "PM_FPU1_FULL_CYC", .pme_code = 0x207, .pme_short_desc = "Cycles FPU1 issue queue full", .pme_long_desc = "The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped", }, [ POWER4_PME_PM_FPU0_FIN ] = { .pme_name = "PM_FPU0_FIN", .pme_code = 0x113, .pme_short_desc = "FPU0 produced a result", .pme_long_desc = "fp0 finished, produced a result This only indicates finish, not completion. ", }, [ POWER4_PME_PM_3INST_CLB_CYC ] = { .pme_name = "PM_3INST_CLB_CYC", .pme_code = 0x452, .pme_short_desc = "Cycles 3 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_DATA_FROM_L35 ] = { .pme_name = "PM_DATA_FROM_L35", .pme_code = 0x3c66, .pme_short_desc = "Data loaded from L3.5", .pme_long_desc = "DL1 was reloaded from the L3 of another MCM due to a demand load", }, [ POWER4_PME_PM_L2SA_SHR_INV ] = { .pme_name = "PM_L2SA_SHR_INV", .pme_code = 0xf05, .pme_short_desc = "L2 slice A transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER4_PME_PM_MRK_LSU_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_SRQ", .pme_code = 0x4910, .pme_short_desc = "Marked SRQ flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER4_PME_PM_THRESH_TIMEO ] = { .pme_name = "PM_THRESH_TIMEO", .pme_code = 0x2003, .pme_short_desc = "Threshold timeout", .pme_long_desc = "The threshold timer expired", }, [ POWER4_PME_PM_FPU_FSQRT ] = { .pme_name = "PM_FPU_FSQRT", .pme_code = 0x6100, .pme_short_desc = "FPU executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_LRQ", .pme_code = 0x912, .pme_short_desc = "LSU0 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_FXLS0_FULL_CYC ] = { .pme_name = "PM_FXLS0_FULL_CYC", .pme_code = 0x210, .pme_short_desc = "Cycles FXU0/LS0 queue full", .pme_long_desc = "The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped", }, [ POWER4_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x936, .pme_short_desc = "Cycles doing data tablewalks", .pme_long_desc = "This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried.", }, [ POWER4_PME_PM_FPU0_ALL ] = { .pme_name = "PM_FPU0_ALL", .pme_code = 0x103, .pme_short_desc = "FPU0 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo", }, [ POWER4_PME_PM_FPU0_FEST ] = { .pme_name = "PM_FPU0_FEST", .pme_code = 0x112, .pme_short_desc = "FPU0 executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. ", }, [ POWER4_PME_PM_DATA_FROM_L25_MOD ] = { .pme_name = "PM_DATA_FROM_L25_MOD", .pme_code = 0x8c66, .pme_short_desc = "Data loaded from L2.5 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load", }, [ POWER4_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x2002, .pme_short_desc = "Cycles LMQ and SRQ empty", .pme_long_desc = "Cycles when both the LMQ and SRQ are empty (LSU is idle)", }, [ POWER4_PME_PM_FPU_FEST ] = { .pme_name = "PM_FPU_FEST", .pme_code = 0x3110, .pme_short_desc = "FPU executed FEST instruction", .pme_long_desc = "This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1.", }, [ POWER4_PME_PM_0INST_FETCH ] = { .pme_name = "PM_0INST_FETCH", .pme_code = 0x8327, .pme_short_desc = "No instructions fetched", .pme_long_desc = "No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss)", }, [ POWER4_PME_PM_LARX_LSU1 ] = { .pme_name = "PM_LARX_LSU1", .pme_code = 0xc77, .pme_short_desc = "Larx executed on LSU1", .pme_long_desc = "Invalid event, larx instructions are never executed on unit 1", }, [ POWER4_PME_PM_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_LD_MISS_L1_LSU0", .pme_code = 0xc12, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "A load, executing on unit 0, missed the dcache", }, [ POWER4_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0xc35, .pme_short_desc = "L1 cache data prefetches", .pme_long_desc = "A request to prefetch data into the L1 was made", }, [ POWER4_PME_PM_FPU1_STALL3 ] = { .pme_name = "PM_FPU1_STALL3", .pme_code = 0x125, .pme_short_desc = "FPU1 stalled in pipe3", .pme_long_desc = "This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. ", }, [ POWER4_PME_PM_BRQ_FULL_CYC ] = { .pme_name = "PM_BRQ_FULL_CYC", .pme_code = 0x205, .pme_short_desc = "Cycles branch queue full", .pme_long_desc = "The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups).", }, [ POWER4_PME_PM_LARX ] = { .pme_name = "PM_LARX", .pme_code = 0x4c70, .pme_short_desc = "Larx executed", .pme_long_desc = "A Larx (lwarx or ldarx) was executed. This is the combined count from LSU0 + LSU1, but these instructions only execute on LSU0", }, [ POWER4_PME_PM_MRK_DATA_FROM_L35 ] = { .pme_name = "PM_MRK_DATA_FROM_L35", .pme_code = 0x3c76, .pme_short_desc = "Marked data loaded from L3.5", .pme_long_desc = "DL1 was reloaded from the L3 of another MCM due to a marked demand load", }, [ POWER4_PME_PM_WORK_HELD ] = { .pme_name = "PM_WORK_HELD", .pme_code = 0x2001, .pme_short_desc = "Work held", .pme_long_desc = "RAS Unit has signaled completion to stop and there are groups waiting to complete", }, [ POWER4_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU0", .pme_code = 0x920, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "A marked load, executing on unit 0, missed the dcache", }, [ POWER4_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x5002, .pme_short_desc = "FXU idle", .pme_long_desc = "FXU0 and FXU1 are both idle", }, [ POWER4_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x8001, .pme_short_desc = "Instructions completed", .pme_long_desc = "Number of Eligible Instructions that completed. ", }, [ POWER4_PME_PM_LSU1_FLUSH_UST ] = { .pme_name = "PM_LSU1_FLUSH_UST", .pme_code = 0xc05, .pme_short_desc = "LSU1 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", }, [ POWER4_PME_PM_LSU0_FLUSH_ULD ] = { .pme_name = "PM_LSU0_FLUSH_ULD", .pme_code = 0xc00, .pme_short_desc = "LSU0 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x3327, .pme_short_desc = "Instructions fetched from L2", .pme_long_desc = "An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_DATA_FROM_L3 ] = { .pme_name = "PM_DATA_FROM_L3", .pme_code = 0x1c66, .pme_short_desc = "Data loaded from L3", .pme_long_desc = "DL1 was reloaded from the local L3 due to a demand load", }, [ POWER4_PME_PM_FPU0_DENORM ] = { .pme_name = "PM_FPU0_DENORM", .pme_code = 0x120, .pme_short_desc = "FPU0 received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized.", }, [ POWER4_PME_PM_FPU1_FMOV_FEST ] = { .pme_name = "PM_FPU1_FMOV_FEST", .pme_code = 0x114, .pme_short_desc = "FPU1 executing FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ", }, [ POWER4_PME_PM_GRP_DISP_REJECT ] = { .pme_name = "PM_GRP_DISP_REJECT", .pme_code = 0x8003, .pme_short_desc = "Group dispatch rejected", .pme_long_desc = "A group that previously attempted dispatch was rejected.", }, [ POWER4_PME_PM_INST_FETCH_CYC ] = { .pme_name = "PM_INST_FETCH_CYC", .pme_code = 0x323, .pme_short_desc = "Cycles at least 1 instruction fetched", .pme_long_desc = "Asserted each cycle when the IFU sends at least one instruction to the IDU. ", }, [ POWER4_PME_PM_LSU_LDF ] = { .pme_name = "PM_LSU_LDF", .pme_code = 0x8930, .pme_short_desc = "LSU executed Floating Point load instruction", .pme_long_desc = "LSU executed Floating Point load instruction", }, [ POWER4_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x221, .pme_short_desc = "Instructions dispatched", .pme_long_desc = "The ISU sends the number of instructions dispatched.", }, [ POWER4_PME_PM_L2SA_MOD_INV ] = { .pme_name = "PM_L2SA_MOD_INV", .pme_code = 0xf07, .pme_short_desc = "L2 slice A transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_DATA_FROM_L25_SHR ] = { .pme_name = "PM_DATA_FROM_L25_SHR", .pme_code = 0x5c66, .pme_short_desc = "Data loaded from L2.5 shared", .pme_long_desc = "DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load", }, [ POWER4_PME_PM_FAB_CMD_RETRIED ] = { .pme_name = "PM_FAB_CMD_RETRIED", .pme_code = 0xf17, .pme_short_desc = "Fabric command retried", .pme_long_desc = "A bus command on the MCM to MCM fabric was retried. This event is the total count of all retried fabric commands for the local MCM (all four chips report the same value). This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2.", }, [ POWER4_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0xc64, .pme_short_desc = "L1 reload data source valid", .pme_long_desc = "The data source information is valid", }, [ POWER4_PME_PM_MRK_GRP_ISSUED ] = { .pme_name = "PM_MRK_GRP_ISSUED", .pme_code = 0x6005, .pme_short_desc = "Marked group issued", .pme_long_desc = "A sampled instruction was issued", }, [ POWER4_PME_PM_FPU_FULL_CYC ] = { .pme_name = "PM_FPU_FULL_CYC", .pme_code = 0x5200, .pme_short_desc = "Cycles FPU issue queue full", .pme_long_desc = "Cycles when one or both FPU issue queues are full", }, [ POWER4_PME_PM_FPU_FMA ] = { .pme_name = "PM_FPU_FMA", .pme_code = 0x2100, .pme_short_desc = "FPU executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_MRK_CRU_FIN ] = { .pme_name = "PM_MRK_CRU_FIN", .pme_code = 0x4005, .pme_short_desc = "Marked instruction CRU processing finished", .pme_long_desc = "The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_MRK_LSU1_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU1_FLUSH_UST", .pme_code = 0x915, .pme_short_desc = "LSU1 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", }, [ POWER4_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x6004, .pme_short_desc = "Marked instruction FXU processing finished", .pme_long_desc = "One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_BR_ISSUED ] = { .pme_name = "PM_BR_ISSUED", .pme_code = 0x330, .pme_short_desc = "Branches issued", .pme_long_desc = "This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue.", }, [ POWER4_PME_PM_EE_OFF ] = { .pme_name = "PM_EE_OFF", .pme_code = 0x233, .pme_short_desc = "Cycles MSR(EE) bit off", .pme_long_desc = "The number of Cycles MSR(EE) bit was off.", }, [ POWER4_PME_PM_INST_FROM_L3 ] = { .pme_name = "PM_INST_FROM_L3", .pme_code = 0x5327, .pme_short_desc = "Instruction fetched from L3", .pme_long_desc = "An instruction fetch group was fetched from L3. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x900, .pme_short_desc = "Instruction TLB misses", .pme_long_desc = "A TLB miss for an Instruction Fetch has occurred", }, [ POWER4_PME_PM_FXLS_FULL_CYC ] = { .pme_name = "PM_FXLS_FULL_CYC", .pme_code = 0x8210, .pme_short_desc = "Cycles FXLS queue is full", .pme_long_desc = "Cycles when one or both FXU/LSU issue queue are full", }, [ POWER4_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { .pme_name = "PM_FXU1_BUSY_FXU0_IDLE", .pme_code = 0x4002, .pme_short_desc = "FXU1 busy FXU0 idle", .pme_long_desc = "FXU0 was idle while FXU1 was busy", }, [ POWER4_PME_PM_GRP_DISP_VALID ] = { .pme_name = "PM_GRP_DISP_VALID", .pme_code = 0x223, .pme_short_desc = "Group dispatch valid", .pme_long_desc = "Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject.", }, [ POWER4_PME_PM_L2SC_ST_HIT ] = { .pme_name = "PM_L2SC_ST_HIT", .pme_code = 0xf15, .pme_short_desc = "L2 slice C store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C.", }, [ POWER4_PME_PM_MRK_GRP_DISP ] = { .pme_name = "PM_MRK_GRP_DISP", .pme_code = 0x1002, .pme_short_desc = "Marked group dispatched", .pme_long_desc = "A group containing a sampled instruction was dispatched", }, [ POWER4_PME_PM_L2SB_MOD_TAG ] = { .pme_name = "PM_L2SB_MOD_TAG", .pme_code = 0xf22, .pme_short_desc = "L2 slice B transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_INST_FROM_L25_L275 ] = { .pme_name = "PM_INST_FROM_L25_L275", .pme_code = 0x2327, .pme_short_desc = "Instruction fetched from L2.5/L2.75", .pme_long_desc = "An instruction fetch group was fetched from the L2 of another chip. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_LSU_FLUSH_UST ] = { .pme_name = "PM_LSU_FLUSH_UST", .pme_code = 0x2c00, .pme_short_desc = "SRQ unaligned store flushes", .pme_long_desc = "A store was flushed because it was unaligned", }, [ POWER4_PME_PM_L2SB_ST_HIT ] = { .pme_name = "PM_L2SB_ST_HIT", .pme_code = 0xf13, .pme_short_desc = "L2 slice B store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C.", }, [ POWER4_PME_PM_FXU1_FIN ] = { .pme_name = "PM_FXU1_FIN", .pme_code = 0x236, .pme_short_desc = "FXU1 produced a result", .pme_long_desc = "The Fixed Point unit 1 finished an instruction and produced a result", }, [ POWER4_PME_PM_L3B1_DIR_MIS ] = { .pme_name = "PM_L3B1_DIR_MIS", .pme_code = 0xf03, .pme_short_desc = "L3 bank 1 directory misses", .pme_long_desc = "A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3", }, [ POWER4_PME_PM_4INST_CLB_CYC ] = { .pme_name = "PM_4INST_CLB_CYC", .pme_code = 0x453, .pme_short_desc = "Cycles 4 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_GRP_CMPL ] = { .pme_name = "PM_GRP_CMPL", .pme_code = 0x7003, .pme_short_desc = "Group completed", .pme_long_desc = "A group completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER4_PME_PM_DC_PREF_L2_CLONE_L3 ] = { .pme_name = "PM_DC_PREF_L2_CLONE_L3", .pme_code = 0xc27, .pme_short_desc = "L2 prefetch cloned with L3", .pme_long_desc = "A prefetch request was made to the L2 with a cloned request sent to the L3", }, [ POWER4_PME_PM_FPU_FRSP_FCONV ] = { .pme_name = "PM_FPU_FRSP_FCONV", .pme_code = 0x7110, .pme_short_desc = "FPU executed FRSP or FCONV instructions", .pme_long_desc = "This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_5INST_CLB_CYC ] = { .pme_name = "PM_5INST_CLB_CYC", .pme_code = 0x454, .pme_short_desc = "Cycles 5 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_SRQ", .pme_code = 0x913, .pme_short_desc = "LSU0 marked SRQ flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER4_PME_PM_MRK_LSU_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU_FLUSH_ULD", .pme_code = 0x8910, .pme_short_desc = "Marked unaligned load flushes", .pme_long_desc = "A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER4_PME_PM_8INST_CLB_CYC ] = { .pme_name = "PM_8INST_CLB_CYC", .pme_code = 0x457, .pme_short_desc = "Cycles 8 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0x927, .pme_short_desc = "Cycles LMQ full", .pme_long_desc = "The LMQ was full", }, [ POWER4_PME_PM_ST_REF_L1_LSU0 ] = { .pme_name = "PM_ST_REF_L1_LSU0", .pme_code = 0xc11, .pme_short_desc = "LSU0 L1 D cache store references", .pme_long_desc = "A store executed on unit 0", }, [ POWER4_PME_PM_LSU0_DERAT_MISS ] = { .pme_name = "PM_LSU0_DERAT_MISS", .pme_code = 0x902, .pme_short_desc = "LSU0 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", }, [ POWER4_PME_PM_LSU_SRQ_SYNC_CYC ] = { .pme_name = "PM_LSU_SRQ_SYNC_CYC", .pme_code = 0x932, .pme_short_desc = "SRQ sync duration", .pme_long_desc = "This signal is asserted every cycle when a sync is in the SRQ.", }, [ POWER4_PME_PM_FPU_STALL3 ] = { .pme_name = "PM_FPU_STALL3", .pme_code = 0x2120, .pme_short_desc = "FPU stalled in pipe3", .pme_long_desc = "FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x4c76, .pme_short_desc = "Marked data loaded from L2", .pme_long_desc = "DL1 was reloaded from the local L2 due to a marked demand load", }, [ POWER4_PME_PM_FPU0_FMOV_FEST ] = { .pme_name = "PM_FPU0_FMOV_FEST", .pme_code = 0x110, .pme_short_desc = "FPU0 executed FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ", }, [ POWER4_PME_PM_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_LSU0_FLUSH_SRQ", .pme_code = 0xc03, .pme_short_desc = "LSU0 SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER4_PME_PM_LD_REF_L1_LSU0 ] = { .pme_name = "PM_LD_REF_L1_LSU0", .pme_code = 0xc10, .pme_short_desc = "LSU0 L1 D cache load references", .pme_long_desc = "A load executed on unit 0", }, [ POWER4_PME_PM_L2SC_SHR_INV ] = { .pme_name = "PM_L2SC_SHR_INV", .pme_code = 0xf25, .pme_short_desc = "L2 slice C transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER4_PME_PM_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_LSU1_FLUSH_SRQ", .pme_code = 0xc07, .pme_short_desc = "LSU1 SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. ", }, [ POWER4_PME_PM_LSU_LMQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LMQ_S0_ALLOC", .pme_code = 0x935, .pme_short_desc = "LMQ slot 0 allocated", .pme_long_desc = "The first entry in the LMQ was allocated.", }, [ POWER4_PME_PM_ST_REF_L1 ] = { .pme_name = "PM_ST_REF_L1", .pme_code = 0x7c10, .pme_short_desc = "L1 D cache store references", .pme_long_desc = "Total DL1 Store references", }, [ POWER4_PME_PM_LSU_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_SRQ_EMPTY_CYC", .pme_code = 0x4003, .pme_short_desc = "Cycles SRQ empty", .pme_long_desc = "The Store Request Queue is empty", }, [ POWER4_PME_PM_FPU1_STF ] = { .pme_name = "PM_FPU1_STF", .pme_code = 0x126, .pme_short_desc = "FPU1 executed store instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing a store instruction.", }, [ POWER4_PME_PM_L3B0_DIR_REF ] = { .pme_name = "PM_L3B0_DIR_REF", .pme_code = 0xf00, .pme_short_desc = "L3 bank 0 directory references", .pme_long_desc = "A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3", }, [ POWER4_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x1005, .pme_short_desc = "Run cycles", .pme_long_desc = "Processor Cycles gated by the run latch", }, [ POWER4_PME_PM_LSU_LMQ_S0_VALID ] = { .pme_name = "PM_LSU_LMQ_S0_VALID", .pme_code = 0x931, .pme_short_desc = "LMQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO", }, [ POWER4_PME_PM_LSU_LRQ_S0_VALID ] = { .pme_name = "PM_LSU_LRQ_S0_VALID", .pme_code = 0xc22, .pme_short_desc = "LRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin.", }, [ POWER4_PME_PM_LSU0_LDF ] = { .pme_name = "PM_LSU0_LDF", .pme_code = 0x930, .pme_short_desc = "LSU0 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed from LSU unit 0", }, [ POWER4_PME_PM_MRK_IMR_RELOAD ] = { .pme_name = "PM_MRK_IMR_RELOAD", .pme_code = 0x922, .pme_short_desc = "Marked IMR reloaded", .pme_long_desc = "A DL1 reload occurred due to marked load", }, [ POWER4_PME_PM_7INST_CLB_CYC ] = { .pme_name = "PM_7INST_CLB_CYC", .pme_code = 0x456, .pme_short_desc = "Cycles 7 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_MRK_GRP_TIMEO ] = { .pme_name = "PM_MRK_GRP_TIMEO", .pme_code = 0x5005, .pme_short_desc = "Marked group completion timeout", .pme_long_desc = "The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor", }, [ POWER4_PME_PM_FPU_FMOV_FEST ] = { .pme_name = "PM_FPU_FMOV_FEST", .pme_code = 0x8110, .pme_short_desc = "FPU executing FMOV or FEST instructions", .pme_long_desc = "This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_GRP_DISP_BLK_SB_CYC ] = { .pme_name = "PM_GRP_DISP_BLK_SB_CYC", .pme_code = 0x231, .pme_short_desc = "Cycles group dispatch blocked by scoreboard", .pme_long_desc = "The ISU sends a signal indicating that dispatch is blocked by scoreboard.", }, [ POWER4_PME_PM_XER_MAP_FULL_CYC ] = { .pme_name = "PM_XER_MAP_FULL_CYC", .pme_code = 0x202, .pme_short_desc = "Cycles XER mapper full", .pme_long_desc = "The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", }, [ POWER4_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0xc23, .pme_short_desc = "L1 D cache store misses", .pme_long_desc = "A store missed the dcache", }, [ POWER4_PME_PM_STOP_COMPLETION ] = { .pme_name = "PM_STOP_COMPLETION", .pme_code = 0x3001, .pme_short_desc = "Completion stopped", .pme_long_desc = "RAS Unit has signaled completion to stop", }, [ POWER4_PME_PM_MRK_GRP_CMPL ] = { .pme_name = "PM_MRK_GRP_CMPL", .pme_code = 0x4004, .pme_short_desc = "Marked group completed", .pme_long_desc = "A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER4_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x901, .pme_short_desc = "Instruction SLB misses", .pme_long_desc = "A SLB miss for an instruction fetch as occurred", }, [ POWER4_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0x7, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER4_PME_PM_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_LD_MISS_L1_LSU1", .pme_code = 0xc16, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "A load, executing on unit 1, missed the dcache", }, [ POWER4_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x921, .pme_short_desc = "STCX failed", .pme_long_desc = "A stcx (stwcx or stdcx) failed", }, [ POWER4_PME_PM_LSU1_SRQ_STFWD ] = { .pme_name = "PM_LSU1_SRQ_STFWD", .pme_code = 0xc24, .pme_short_desc = "LSU1 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 1", }, [ POWER4_PME_PM_GRP_DISP ] = { .pme_name = "PM_GRP_DISP", .pme_code = 0x2004, .pme_short_desc = "Group dispatches", .pme_long_desc = "A group was dispatched", }, [ POWER4_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x4c66, .pme_short_desc = "Data loaded from L2", .pme_long_desc = "DL1 was reloaded from the local L2 due to a demand load", }, [ POWER4_PME_PM_L2_PREF ] = { .pme_name = "PM_L2_PREF", .pme_code = 0xc34, .pme_short_desc = "L2 cache prefetches", .pme_long_desc = "A request to prefetch data into L2 was made", }, [ POWER4_PME_PM_FPU0_FPSCR ] = { .pme_name = "PM_FPU0_FPSCR", .pme_code = 0x130, .pme_short_desc = "FPU0 executed FPSCR instruction", .pme_long_desc = "This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs", }, [ POWER4_PME_PM_FPU1_DENORM ] = { .pme_name = "PM_FPU1_DENORM", .pme_code = 0x124, .pme_short_desc = "FPU1 received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized.", }, [ POWER4_PME_PM_MRK_DATA_FROM_L25_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD", .pme_code = 0x8c76, .pme_short_desc = "Marked data loaded from L2.5 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load", }, [ POWER4_PME_PM_L2SB_ST_REQ ] = { .pme_name = "PM_L2SB_ST_REQ", .pme_code = 0xf12, .pme_short_desc = "L2 slice B store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_L2SB_MOD_INV ] = { .pme_name = "PM_L2SB_MOD_INV", .pme_code = 0xf23, .pme_short_desc = "L2 slice B transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_FPU0_FSQRT ] = { .pme_name = "PM_FPU0_FSQRT", .pme_code = 0x102, .pme_short_desc = "FPU0 executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x8c10, .pme_short_desc = "L1 D cache load references", .pme_long_desc = "Total DL1 Load references", }, [ POWER4_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0xc74, .pme_short_desc = "Marked L1 reload data source valid", .pme_long_desc = "The source information is valid and is for a marked load", }, [ POWER4_PME_PM_L2SB_SHR_MOD ] = { .pme_name = "PM_L2SB_SHR_MOD", .pme_code = 0xf20, .pme_short_desc = "L2 slice B transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. ", }, [ POWER4_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x6327, .pme_short_desc = "Instruction fetched from L1", .pme_long_desc = "An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x5003, .pme_short_desc = "One or more PPC instruction completed", .pme_long_desc = "A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once.", }, [ POWER4_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x237, .pme_short_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_long_desc = "Cycles MSR(EE) bit off and external interrupt pending", }, [ POWER4_PME_PM_L2SC_SHR_MOD ] = { .pme_name = "PM_L2SC_SHR_MOD", .pme_code = 0xf24, .pme_short_desc = "L2 slice C transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. ", }, [ POWER4_PME_PM_LSU_LRQ_FULL_CYC ] = { .pme_name = "PM_LSU_LRQ_FULL_CYC", .pme_code = 0x212, .pme_short_desc = "Cycles LRQ full", .pme_long_desc = "The isu sends this signal when the lrq is full.", }, [ POWER4_PME_PM_IC_PREF_INSTALL ] = { .pme_name = "PM_IC_PREF_INSTALL", .pme_code = 0x325, .pme_short_desc = "Instruction prefetched installed in prefetch buffer", .pme_long_desc = "This signal is asserted when a prefetch buffer entry (line) is allocated but the request is not a demand fetch.", }, [ POWER4_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_SRQ", .pme_code = 0x917, .pme_short_desc = "LSU1 marked SRQ flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER4_PME_PM_GCT_FULL_CYC ] = { .pme_name = "PM_GCT_FULL_CYC", .pme_code = 0x200, .pme_short_desc = "Cycles GCT full", .pme_long_desc = "The ISU sends a signal indicating the gct is full. ", }, [ POWER4_PME_PM_INST_FROM_MEM ] = { .pme_name = "PM_INST_FROM_MEM", .pme_code = 0x1327, .pme_short_desc = "Instruction fetched from memory", .pme_long_desc = "An instruction fetch group was fetched from memory. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x6002, .pme_short_desc = "FXU busy", .pme_long_desc = "FXU0 and FXU1 are both busy", }, [ POWER4_PME_PM_ST_REF_L1_LSU1 ] = { .pme_name = "PM_ST_REF_L1_LSU1", .pme_code = 0xc15, .pme_short_desc = "LSU1 L1 D cache store references", .pme_long_desc = "A store executed on unit 1", }, [ POWER4_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x1920, .pme_short_desc = "Marked L1 D cache load misses", .pme_long_desc = "Marked L1 D cache load misses", }, [ POWER4_PME_PM_MRK_LSU1_INST_FIN ] = { .pme_name = "PM_MRK_LSU1_INST_FIN", .pme_code = 0xc32, .pme_short_desc = "LSU1 finished a marked instruction", .pme_long_desc = "LSU unit 1 finished a marked instruction", }, [ POWER4_PME_PM_L1_WRITE_CYC ] = { .pme_name = "PM_L1_WRITE_CYC", .pme_code = 0x333, .pme_short_desc = "Cycles writing to instruction L1", .pme_long_desc = "This signal is asserted each cycle a cache write is active.", }, [ POWER4_PME_PM_BIQ_IDU_FULL_CYC ] = { .pme_name = "PM_BIQ_IDU_FULL_CYC", .pme_code = 0x324, .pme_short_desc = "Cycles BIQ or IDU full", .pme_long_desc = "This signal will be asserted each time either the IDU is full or the BIQ is full.", }, [ POWER4_PME_PM_MRK_LSU0_INST_FIN ] = { .pme_name = "PM_MRK_LSU0_INST_FIN", .pme_code = 0xc31, .pme_short_desc = "LSU0 finished a marked instruction", .pme_long_desc = "LSU unit 0 finished a marked instruction", }, [ POWER4_PME_PM_L2SC_ST_REQ ] = { .pme_name = "PM_L2SC_ST_REQ", .pme_code = 0xf14, .pme_short_desc = "L2 slice C store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_LSU1_BUSY ] = { .pme_name = "PM_LSU1_BUSY", .pme_code = 0xc37, .pme_short_desc = "LSU1 busy", .pme_long_desc = "LSU unit 1 is busy rejecting instructions ", }, [ POWER4_PME_PM_FPU_ALL ] = { .pme_name = "PM_FPU_ALL", .pme_code = 0x5100, .pme_short_desc = "FPU executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_LSU_SRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_SRQ_S0_ALLOC", .pme_code = 0xc25, .pme_short_desc = "SRQ slot 0 allocated", .pme_long_desc = "SRQ Slot zero was allocated", }, [ POWER4_PME_PM_GRP_MRK ] = { .pme_name = "PM_GRP_MRK", .pme_code = 0x5004, .pme_short_desc = "Group marked in IDU", .pme_long_desc = "A group was sampled (marked)", }, [ POWER4_PME_PM_FPU1_FIN ] = { .pme_name = "PM_FPU1_FIN", .pme_code = 0x117, .pme_short_desc = "FPU1 produced a result", .pme_long_desc = "fp1 finished, produced a result. This only indicates finish, not completion. ", }, [ POWER4_PME_PM_DC_PREF_STREAM_ALLOC ] = { .pme_name = "PM_DC_PREF_STREAM_ALLOC", .pme_code = 0x907, .pme_short_desc = "D cache new prefetch stream allocated", .pme_long_desc = "A new Prefetch Stream was allocated", }, [ POWER4_PME_PM_BR_MPRED_CR ] = { .pme_name = "PM_BR_MPRED_CR", .pme_code = 0x331, .pme_short_desc = "Branch mispredictions due CR bit setting", .pme_long_desc = "This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction.", }, [ POWER4_PME_PM_BR_MPRED_TA ] = { .pme_name = "PM_BR_MPRED_TA", .pme_code = 0x332, .pme_short_desc = "Branch mispredictions due to target address", .pme_long_desc = "branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction.", }, [ POWER4_PME_PM_CRQ_FULL_CYC ] = { .pme_name = "PM_CRQ_FULL_CYC", .pme_code = 0x211, .pme_short_desc = "Cycles CR issue queue full", .pme_long_desc = "The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups).", }, [ POWER4_PME_PM_INST_FROM_PREF ] = { .pme_name = "PM_INST_FROM_PREF", .pme_code = 0x7327, .pme_short_desc = "Instructions fetched from prefetch", .pme_long_desc = "An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x3c10, .pme_short_desc = "L1 D cache load misses", .pme_long_desc = "Total DL1 Load references that miss the DL1", }, [ POWER4_PME_PM_STCX_PASS ] = { .pme_name = "PM_STCX_PASS", .pme_code = 0xc75, .pme_short_desc = "Stcx passes", .pme_long_desc = "A stcx (stwcx or stdcx) instruction was successful", }, [ POWER4_PME_PM_DC_INV_L2 ] = { .pme_name = "PM_DC_INV_L2", .pme_code = 0xc17, .pme_short_desc = "L1 D cache entries invalidated from L2", .pme_long_desc = "A dcache invalidated was received from the L2 because a line in L2 was castout.", }, [ POWER4_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x213, .pme_short_desc = "Cycles SRQ full", .pme_long_desc = "The isu sends this signal when the srq is full.", }, [ POWER4_PME_PM_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_LSU0_FLUSH_LRQ", .pme_code = 0xc02, .pme_short_desc = "LSU0 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_LSU_SRQ_S0_VALID ] = { .pme_name = "PM_LSU_SRQ_S0_VALID", .pme_code = 0xc21, .pme_short_desc = "SRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin.", }, [ POWER4_PME_PM_LARX_LSU0 ] = { .pme_name = "PM_LARX_LSU0", .pme_code = 0xc73, .pme_short_desc = "Larx executed on LSU0", .pme_long_desc = "A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0)", }, [ POWER4_PME_PM_GCT_EMPTY_CYC ] = { .pme_name = "PM_GCT_EMPTY_CYC", .pme_code = 0x1004, .pme_short_desc = "Cycles GCT empty", .pme_long_desc = "The Global Completion Table is completely empty", }, [ POWER4_PME_PM_FPU1_ALL ] = { .pme_name = "PM_FPU1_ALL", .pme_code = 0x107, .pme_short_desc = "FPU1 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo", }, [ POWER4_PME_PM_FPU1_FSQRT ] = { .pme_name = "PM_FPU1_FSQRT", .pme_code = 0x106, .pme_short_desc = "FPU1 executed FSQRT instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_FPU_FIN ] = { .pme_name = "PM_FPU_FIN", .pme_code = 0x4110, .pme_short_desc = "FPU produced a result", .pme_long_desc = "FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_L2SA_SHR_MOD ] = { .pme_name = "PM_L2SA_SHR_MOD", .pme_code = 0xf04, .pme_short_desc = "L2 slice A transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. ", }, [ POWER4_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU1", .pme_code = 0x924, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "A marked load, executing on unit 1, missed the dcache", }, [ POWER4_PME_PM_LSU_SRQ_STFWD ] = { .pme_name = "PM_LSU_SRQ_STFWD", .pme_code = 0x1c20, .pme_short_desc = "SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load", }, [ POWER4_PME_PM_FXU0_FIN ] = { .pme_name = "PM_FXU0_FIN", .pme_code = 0x232, .pme_short_desc = "FXU0 produced a result", .pme_long_desc = "The Fixed Point unit 0 finished an instruction and produced a result", }, [ POWER4_PME_PM_MRK_FPU_FIN ] = { .pme_name = "PM_MRK_FPU_FIN", .pme_code = 0x7004, .pme_short_desc = "Marked instruction FPU processing finished", .pme_long_desc = "One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_LSU_BUSY ] = { .pme_name = "PM_LSU_BUSY", .pme_code = 0x4c30, .pme_short_desc = "LSU busy", .pme_long_desc = "LSU (unit 0 + unit 1) is busy rejecting instructions ", }, [ POWER4_PME_PM_INST_FROM_L35 ] = { .pme_name = "PM_INST_FROM_L35", .pme_code = 0x4327, .pme_short_desc = "Instructions fetched from L3.5", .pme_long_desc = "An instruction fetch group was fetched from the L3 of another module. Fetch Groups can contain up to 8 instructions", }, [ POWER4_PME_PM_FPU1_FRSP_FCONV ] = { .pme_name = "PM_FPU1_FRSP_FCONV", .pme_code = 0x115, .pme_short_desc = "FPU1 executed FRSP or FCONV instructions", .pme_long_desc = "fThis signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER4_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x903, .pme_short_desc = "Snoop TLBIE", .pme_long_desc = "A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction.", }, [ POWER4_PME_PM_FPU0_FDIV ] = { .pme_name = "PM_FPU0_FDIV", .pme_code = 0x100, .pme_short_desc = "FPU0 executed FDIV instruction", .pme_long_desc = "This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER4_PME_PM_LD_REF_L1_LSU1 ] = { .pme_name = "PM_LD_REF_L1_LSU1", .pme_code = 0xc14, .pme_short_desc = "LSU1 L1 D cache load references", .pme_long_desc = "A load executed on unit 1", }, [ POWER4_PME_PM_MRK_DATA_FROM_L275_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD", .pme_code = 0x7c76, .pme_short_desc = "Marked data loaded from L2.75 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of another MCM due to a marked demand load. ", }, [ POWER4_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x3004, .pme_short_desc = "Hypervisor Cycles", .pme_long_desc = "Cycles when the processor is executing in Hypervisor (MSR[HV] = 0 and MSR[PR]=0)", }, [ POWER4_PME_PM_6INST_CLB_CYC ] = { .pme_name = "PM_6INST_CLB_CYC", .pme_code = 0x455, .pme_short_desc = "Cycles 6 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue.", }, [ POWER4_PME_PM_LR_CTR_MAP_FULL_CYC ] = { .pme_name = "PM_LR_CTR_MAP_FULL_CYC", .pme_code = 0x206, .pme_short_desc = "Cycles LR/CTR mapper full", .pme_long_desc = "The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be.", }, [ POWER4_PME_PM_L2SC_MOD_INV ] = { .pme_name = "PM_L2SC_MOD_INV", .pme_code = 0xf27, .pme_short_desc = "L2 slice C transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", }, [ POWER4_PME_PM_FPU_DENORM ] = { .pme_name = "PM_FPU_DENORM", .pme_code = 0x1120, .pme_short_desc = "FPU received denormalized data", .pme_long_desc = "This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1", }, [ POWER4_PME_PM_DATA_FROM_L275_MOD ] = { .pme_name = "PM_DATA_FROM_L275_MOD", .pme_code = 0x7c66, .pme_short_desc = "Data loaded from L2.75 modified", .pme_long_desc = "DL1 was reloaded with modified (M) data from the L2 of another MCM due to a demand load. ", }, [ POWER4_PME_PM_LSU1_DERAT_MISS ] = { .pme_name = "PM_LSU1_DERAT_MISS", .pme_code = 0x906, .pme_short_desc = "LSU1 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", }, [ POWER4_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x326, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "Asserted when a non-canceled prefetch is made to the cache interface unit (CIU).", }, [ POWER4_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x8004, .pme_short_desc = "Marked instruction LSU processing finished", .pme_long_desc = "One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER4_PME_PM_MRK_DATA_FROM_L3 ] = { .pme_name = "PM_MRK_DATA_FROM_L3", .pme_code = 0x1c76, .pme_short_desc = "Marked data loaded from L3", .pme_long_desc = "DL1 was reloaded from the local L3 due to a marked demand load", }, [ POWER4_PME_PM_MRK_DATA_FROM_MEM ] = { .pme_name = "PM_MRK_DATA_FROM_MEM", .pme_code = 0x2c76, .pme_short_desc = "Marked data loaded from memory", .pme_long_desc = "DL1 was reloaded from memory due to a marked demand load", }, [ POWER4_PME_PM_LSU0_FLUSH_UST ] = { .pme_name = "PM_LSU0_FLUSH_UST", .pme_code = 0xc01, .pme_short_desc = "LSU0 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary)", }, [ POWER4_PME_PM_LSU_FLUSH_LRQ ] = { .pme_name = "PM_LSU_FLUSH_LRQ", .pme_code = 0x6c00, .pme_short_desc = "LRQ flushes", .pme_long_desc = "A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER4_PME_PM_LSU_FLUSH_SRQ ] = { .pme_name = "PM_LSU_FLUSH_SRQ", .pme_code = 0x5c00, .pme_short_desc = "SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER4_PME_PM_L2SC_MOD_TAG ] = { .pme_name = "PM_L2SC_MOD_TAG", .pme_code = 0xf26, .pme_short_desc = "L2 slice C transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C.", } }; #endif papi-5.6.0/src/libpfm4/lib/events/mips_74k_events.h000664 001750 001750 00000047127 13216244364 024141 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Samara Technology Group, Inc * Contributed by Philip Mucci * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Based on: * MIPS32 74KTM Processor Core Family Software Users' Manual * Document Number: MD00519 Revision 01.05 March 30, 2011 */ static const mips_entry_t mips_74k_pe []={ { .name = "CYCLES", /* BOTH */ .code = 0x0, .desc = "Cycles", }, { .name = "INSTRUCTIONS", /* BOTH */ .code = 0x1, .desc = "Instructions graduated", }, { .name = "PREDICTED_JR_31", .code = 0x2, .desc = "jr $31 (return) instructions whose target is predicted", }, { .name = "JR_31_MISPREDICTIONS", .code = 0x82, .desc = "jr $31 (return) predicted but guessed wrong", }, { .name = "REDIRECT_STALLS", .code = 0x3, .desc = "Cycles where no instruction is fetched because it has no next address candidate. This includes stalls due to register indirect jumps such as jr, stalls following a wait or eret and stalls dues to exceptions from instruction fetch", }, { .name = "JR_31_NO_PREDICTIONS", .code = 0x83, .desc = "jr $31 (return) instructions fetched and not predicted using RPS", }, { .name = "ITLB_ACCESSES", .code = 0x4, .desc = "ITLB accesses", }, { .name = "ITLB_MISSES", .code = 0x84, .desc = "ITLB misses, which result in a JTLB access", }, { .name = "JTLB_INSN_MISSES", .code = 0x85, .desc = "JTLB instruction access misses (will lead to an exception)", }, { .name = "ICACHE_ACCESSES", .code = 0x6, .desc = "Instruction cache accesses. 74K cores have a 128-bit connection to the I-cache and fetch 4 instructions every access. This counts every such access, including accesses for instructions which are eventually discarded. For example, following a branch which is incorrectly predicted, the 74K core will continue to fetch instructions, which will eventually get thrown away", }, { .name = "ICACHE_MISSES", .code = 0x86, .desc = "I-cache misses. Includes misses resulting from fetch-ahead and speculation", }, { .name = "ICACHE_MISS_STALLS", .code = 0x7, .desc = "Cycles where no instruction is fetched because we missed in the I-cache", }, { .name = "UNCACHED_IFETCH_STALLS", .code = 0x8, .desc = "Cycles where no instruction is fetched because we're waiting for an I-fetch from uncached memory", }, { .name = "PDTRACE_BACK_STALLS", .code = 0x88, .desc = "PDTrace back stalls", }, { .name = "IFU_REPLAYS", .code = 0x9, .desc = "Number of times the instruction fetch pipeline is flushed and replayed because the IFU buffers are full and unable to accept any instructions", }, { .name = "KILLED_FETCH_SLOTS", .code = 0x89, .desc = "Valid fetch slots killed due to taken branches/jumps or stalling instructions", }, { .name = "DDQ0_FULL_DR_STALLS", .code = 0xd, .desc = "Cycles where no instructions are brought into the IDU because the ALU instruction candidate pool is full", }, { .name = "DDQ1_FULL_DR_STALLS", .code = 0x8d, .desc = "Cycles where no instructions are brought into the IDU because the AGEN instruction candidate pool is full", }, { .name = "ALCB_FULL_DR_STALLS", .code = 0xe, .desc = "Cycles where no instructions can be added to the issue pool, because we have run out of ALU completion buffers (CBs)", }, { .name = "AGCB_FULL_DR_STALLS", .code = 0x8e, .desc = "Cycles where no instructions can be added to the issue pool, because we have run out of AGEN completion buffers (CBs)", }, { .name = "CLDQ_FULL_DR_STALLS", .code = 0xf, .desc = "Cycles where no instructions can be added to the issue pool, because we've used all the FIFO entries in the CLDQ which keep track of data coming back from the FPU", }, { .name = "IODQ_FULL_DR_STALLS", .code = 0x8f, .desc = "Cycles where no instructions can be added to the issue pool, because we've filled the in order FIFO used for coprocessor 1 instructions (IOIQ)", }, { .name = "ALU_EMPTY_CYCLES", .code = 0x10, .desc = "Cycles with no ALU-pipe issue; no instructions available", }, { .name = "AGEN_EMPTY_CYCLES", .code = 0x90, .desc = "Cycles with no AGEN-pipe issue; no instructions available", }, { .name = "ALU_OPERANDS_NOT_READY_CYCLES", .code = 0x11, .desc = "Cycles with no ALU-pipe issue; we have instructions, but operands not ready", }, { .name = "AGEN_OPERANDS_NOT_READY_CYCLES", .code = 0x91, .desc = "Cycles with no AGEN-pipe issue; we have instructions, but operands not ready", }, { .name = "ALU_NO_ISSUE_CYCLES", .code = 0x12, .desc = "Cycles with no ALU-pipe issue; we have instructions, but some resource is unavailable. This includes, operands are not ready (same as event 17), div in progress inhibits MDU instructions, CorExtend resource limitation", }, { .name = "AGEN_NO_ISSUE_CYCLES", .code = 0x92, .desc = "Cycles with no AGEN-pipe issue; we have instructions, but some resource is unavailable. This includes, operands are not ready (same as event 17), Non-issued stores blocking ready to issue loads, issued cacheops blocking ready to issue loads", }, { .name = "ALU_BUBBLE_CYCLES", .code = 0x13, .desc = "ALU-pipe bubble issued. The resulting empty pipe stage guarantees that some resource will be unused for a cycle, sometime soon. Used, for example, to guarantee an opportunity to write mfc1 data into a CB", }, { .name = "AGEN_BUBBLE_CYCLES", .code = 0x93, .desc = "AGEN-pipe bubble issued. The resulting empty pipe stage guarantees that some resource will be unused for a cycle, sometime soon. Used, for example, to allow access to the data cache for refill or eviction", }, { .name = "SINGLE_ISSUE_CYCLES", .code = 0x14, .desc = "Cycles when one instruction is issued", }, { .name = "DUAL_ISSUE_CYCLES", .code = 0x94, .desc = "Cycles when two instructions are issued (one ALU, one AGEN)", }, { .name = "OOO_ALU_ISSUE_CYCLES", .code = 0x15, .desc = "Cycles when instructions are issued out of order into the ALU pipe. i.e. instruction issued is not the oldest in the pool", }, { .name = "OOO_AGEN_ISSUE_CYCLES", .code = 0x95, .desc = "Cycles when instructions are issued out of order into the AGEN pipe. i.e. instruction issued is not the oldest in the pool", }, { .name = "JALR_JALR_HB_INSNS", .code = 0x16, .desc = "Graduated JAR/JALR.HB", }, { .name = "DCACHE_LINE_REFILL_REQUESTS", .code = 0x96, .desc = "D-Cache line refill (not LD/ST misses)", }, { .name = "DCACHE_LOAD_ACCESSES", .code = 0x17, .desc = "Cacheable loads - Counts all accesses to the D-cache caused by load instructions. This count includes instructions that do not graduate", }, { .name = "DCACHE_ACCESSES", .code = 0x97, .desc = "All D-cache accesses (loads, stores, prefetch, cacheop etc). This count includes instructions that do not graduate", }, { .name = "DCACHE_WRITEBACKS", .code = 0x18, .desc = "D-Cache writebacks", }, { .name = "DCACHE_MISSES", .code = 0x98, .desc = "D-cache misses. This count is per instruction at graduation and includes load, store, prefetch, synci and address based cacheops", }, { .name = "JTLB_DATA_ACCESSES", .code = 0x19, .desc = "JTLB d-side (data side as opposed to instruction side) accesses", }, { .name = "JTLB_DATA_MISSES", .code = 0x99, .desc = "JTLB translation fails on d-side (data side as opposed to instruction side) accesses. This count includes instructions that do not graduate", }, { .name = "LOAD_STORE_REPLAYS", .code = 0x1a, .desc = "Load/store instruction redirects, which happen when the load/store follows too closely on a possibly matching cacheop", }, { .name = "DCACHE_VTAG_MISMATCH", .code = 0x9a, .desc = "The 74K core's D-cache has an auxiliary virtual tag, used to pick the right line early. When (occasionally) the physical tag match and virtual tag match do not line up, it is treated as a cache miss - in processing the miss the virtual tag is corrected for future accesses. This event counts those bogus misses", }, { .name = "L2_CACHE_WRITEBACKS", .code = 0x1c, .desc = "L2 cache writebacks", }, { .name = "L2_CACHE_ACCESSES", .code = 0x9c, .desc = "L2 cache accesses", }, { .name = "L2_CACHE_MISSES", .code = 0x1d, .desc = "L2 cache misses", }, { .name = "L2_CACHE_MISS_CYCLES", .code = 0x9d, .desc = "L2 cache miss cycles", }, { .name = "FSB_FULL_STALLS", .code = 0x1e, .desc = "Cycles Fill Store Buffer(FSB) are full and cause a pipe stall", }, { .name = "FSB_OVER_50_FULL", .code = 0x9e, .desc = "Cycles Fill Store Buffer(FSB) > 1/2 full", }, { .name = "LDQ_FULL_STALLS", .code = 0x1f, .desc = "Cycles Load Data Queue (LDQ) are full and cause a pipe stall", }, { .name = "LDQ_OVER_50_FULL", .code = 0x9f, .desc = "Cycles Load Data Queue(LDQ) > 1/2 full", }, { .name = "WBB_FULL_STALLS", .code = 0x20, .desc = "Cycles Writeback Buffer(WBB) are full and cause a pipe stall", }, { .name = "WBB_OVER_50_FULL", .code = 0xa0, .desc = "Cycles Writeback Buffer(WBB) > 1/2 full", }, { .name = "LOAD_MISS_CONSUMER_REPLAYS", .code = 0x23, .desc = "Replays following optimistic issue of instruction dependent on load which missed. Counted only when the dependent instruction graduates", }, { .name = "FPU_LOAD_INSNS", .code = 0xa3, .desc = "Floating Point Load instructions graduated", }, { .name = "JR_NON_31_INSNS", .code = 0x24, .desc = "jr (not $31) instructions graduated", }, { .name = "MISPREDICTED_JR_31_INSNS", .code = 0xa4, .desc = "jr $31 mispredicted at graduation", }, { .name = "INT_BRANCH_INSNS", .code = 0x25, .desc = "Integer branch instructions graduated", }, { .name = "FPU_BRANCH_INSNS", .code = 0xa5, .desc = "Floating point branch instructions graduated", }, { .name = "BRANCH_LIKELY_INSNS", .code = 0x26, .desc = "Branch-likely instructions graduated", }, { .name = "MISPREDICTED_BRANCH_LIKELY_INSNS", .code = 0xa6, .desc = "Mispredicted branch-likely instructions graduated", }, { .name = "COND_BRANCH_INSNS", .code = 0x27, .desc = "Conditional branches graduated", }, { .name = "MISPREDICTED_BRANCH_INSNS", .code = 0xa7, .desc = "Mispredicted conditional branches graduated", }, { .name = "INTEGER_INSNS", .code = 0x28, .desc = "Integer instructions graduated (includes nop, ssnop, ehb as well as all arithmetic, logical, shift and extract type operations)", }, { .name = "FPU_INSNS", .code = 0xa8, .desc = "Floating point instructions graduated (but not counting floating point load/store)", }, { .name = "LOAD_INSNS", .code = 0x29, .desc = "Loads graduated (includes floating point)", }, { .name = "STORE_INSNS", .code = 0xa9, .desc = "Stores graduated (includes floating point). Of sc instructions, only successful ones are counted", }, { .name = "J_JAL_INSNS", .code = 0x2a, .desc = "j/jal graduated", }, { .name = "MIPS16_INSNS", .code = 0xaa, .desc = "MIPS16e instructions graduated", }, { .name = "NOP_INSNS", .code = 0x2b, .desc = "no-ops graduated - included (sll, nop, ssnop, ehb)", }, { .name = "NT_MUL_DIV_INSNS", .code = 0xab, .desc = "integer multiply/divides graduated", }, { .name = "DSP_INSNS", .code = 0x2c, .desc = "DSP instructions graduated", }, { .name = "ALU_DSP_SATURATION_INSNS", .code = 0xac, .desc = "ALU-DSP instructions graduated, result was saturated", }, { .name = "DSP_BRANCH_INSNS", .code = 0x2d, .desc = "DSP branch instructions graduated", }, { .name = "MDU_DSP_SATURATION_INSNS", .code = 0xad, .desc = "MDU-DSP instructions graduated, result was saturated", }, { .name = "UNCACHED_LOAD_INSNS", .code = 0x2e, .desc = "Uncached loads graduated", }, { .name = "UNCACHED_STORE_INSNS", .code = 0xae, .desc = "Uncached stores graduated", }, { .name = "EJTAG_INSN_TRIGGERS", .code = 0x31, .desc = "EJTAG instruction triggers", }, { .name = "EJTAG_DATA_TRIGGERS", .code = 0xb1, .desc = "EJTAG data triggers", }, { .name = "CP1_BRANCH_MISPREDICTIONS", .code = 0x32, .desc = "CP1 branches mispredicted", }, { .name = "SC_INSNS", .code = 0x33, .desc = "sc instructions graduated", }, { .name = "FAILED_SC_INSNS", .code = 0xb3, .desc = "sc instructions failed", }, { .name = "PREFETCH_INSNS", .code = 0x34, .desc = "prefetch instructions graduated at the top of LSGB", }, { .name = "CACHE_HIT_PREFETCH_INSNS", .code = 0xb4, .desc = "prefetch instructions which did nothing, because they hit in the cache", }, { .name = "NO_INSN_CYCLES", .code = 0x35, .desc = "Cycles where no instructions graduated", }, { .name = "LOAD_MISS_INSNS", .code = 0xb5, .desc = "Load misses graduated. Includes floating point loads", }, { .name = "ONE_INSN_CYCLES", .code = 0x36, .desc = "Cycles where one instruction graduated", }, { .name = "TWO_INSNS_CYCLES", .code = 0xb6, .desc = "Cycles where two instructions graduated", }, { .name = "GFIFO_BLOCKED_CYCLES", .code = 0x37, .desc = "GFifo blocked cycles", }, { .name = "FPU_STORE_INSNS", .code = 0xb7, .desc = "Floating point stores graduated", }, { .name = "GFIFO_BLOCKED_TLB_CACHE", .code = 0x38, .desc = "GFifo blocked due to TLB or Cacheop", }, { .name = "NO_INSTRUCTIONS_FROM_REPLAY_CYCLES", .code = 0xb8, .desc = "Number of cycles no instructions graduated from the time the pipe was flushed because of a replay until the first new instruction graduates. This is an indicator of the graduation bandwidth loss due to replay. Often times this replay is a result of event 25 and therefor an indicator of bandwidth lost due to cache misses", }, { .name = "MISPREDICTION_BRANCH_NODELAY_CYCLES", .code = 0x39, /* even counters event 57 (raw 57) */ .desc = "Slot 0 misprediction branch instruction graduation cycles without the delay slot" }, { .name = "MISPREDICTION_BRANCH_DELAY_WAIT_CYCLES", .code = 0xb9, /* even counters event 57 (raw 57) */ .desc = "Cycles waiting for delay slot to graduate on a mispredicted branch", }, { .name = "EXCEPTIONS_TAKEN", .code = 0x3a, .desc = "Exceptions taken", }, { .name = "GRADUATION_REPLAYS", .code = 0xba, .desc = "Replays initiated from graduation", }, { .name = "COREEXTEND_EVENTS", .code = 0x3b, .desc = "Implementation specific CorExtend event. The integrator of this core may connect the core pin UDI_perfcnt_event to an event to be counted. This is intended for use with the CorExtend interface", }, { .name = "DSPRAM_EVENTS", .code = 0xbe, .desc = "Implementation-specific DSPRAM event. The integrator of this core may connect the core pin SP_prf_c13_e62_xx to the event to be counted", }, { .name = "L2_CACHE_SINGLE_BIT_ERRORS", .code = 0x3f, .desc = "L2 single-bit errors which were detected", }, { .name = "SYSTEM_EVENT_0", .code = 0x40, .desc = "SI_Event[0] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[0] to an event to be counted", }, { .name = "SYSTEM_EVENT_1", .code = 0xc0, .desc = "SI_Event[1] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[1] to an event to be counted", }, { .name = "SYSTEM_EVENT_2", .code = 0x41, .desc = "SI_Event[2] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[2] to an event to be counted", }, { .name = "SYSTEM_EVENT_3", .code = 0xc1, .desc = "SI_Event[3] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[3] to an event to be counted", }, { .name = "SYSTEM_EVENT_4", .code = 0x42, .desc = "SI_Event[4] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[4] to an event to be counted", }, { .name = "SYSTEM_EVENT_5", .code = 0xc2, .desc = "SI_Event[5] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[5] to an event to be counted", }, { .name = "SYSTEM_EVENT_6", .code = 0x43, .desc = "SI_Event[6] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[6] to an event to be counted", }, { .name = "SYSTEM_EVENT_7", .code = 0xc3, .desc = "SI_Event[7] - Implementation-specific system event. The integrator of this core may connect the core pin SI_PCEvent[7] to an event to be counted", }, { .name = "OCP_ALL_REQUESTS", .code = 0x44, .desc = "All OCP requests accepted", }, { .name = "OCP_ALL_CACHEABLE_REQUESTS", .code = 0xc4, .desc = "All OCP cacheable requests accepted", }, { .name = "OCP_READ_REQUESTS", .code = 0x45, .desc = "OCP read requests accepted", }, { .name = "OCP_READ_CACHEABLE_REQUESTS", .code = 0xc5, .desc = "OCP cacheable read requests accepted", }, { .name = "OCP_WRITE_REQUESTS", .code = 0x46, .desc = "OCP write requests accepted", }, { .name = "OCP_WRITE_CACHEABLE REQUESTS", .code = 0xc6, .desc = "OCP cacheable write requests accepted", }, { .name = "OCP_WRITE_DATA_SENT", .code = 0xc7, .desc = "OCP write data sent", }, { .name = "OCP_READ_DATA_RECEIVED", .code = 0xc8, .desc = "OCP read data received", }, { .name = "FSB_LESS_25_FULL", .code = 0x4a, .desc = "Cycles fill store buffer (FSB) < 1/4 full", }, { .name = "FSB_25_50_FULL", .code = 0xca, .desc = "Cycles fill store buffer (FSB) 1/4 to 1/2 full", }, { .name = "LDQ_LESS_25_FULL", .code = 0x4b, .desc = "Cycles load data queue (LDQ) < 1/4 full", }, { .name = "LDQ_25_50_FULL", .code = 0xcb, .desc = "Cycles load data queue (LDQ) 1/4 to 1/2 full", }, { .name = "WBB_LESS_25_FULL", .code = 0x4c, .desc = "Cycles writeback buffer (WBB) < 1/4 full", }, { .name = "WBB_25_50_FULL", .code = 0xcc, .desc = "Cycles writeback buffer (WBB) 1/4 to 1/2 full", }, }; papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_imc.c000664 001750 001750 00000005344 13216244365 025133 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snbep_unc_imc.c : Intel SandyBridge-EP Integrated Memory Controller (IMC) uncore PMU * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_snbep_unc_imc_events.h" #define DEFINE_IMC_BOX(n) \ pfmlib_pmu_t intel_snbep_unc_imc##n##_support = { \ .desc = "Intel Sandy Bridge-EP IMC"#n" uncore", \ .name = "snbep_unc_imc"#n, \ .perf_name = "uncore_imc_"#n, \ .pmu = PFM_PMU_INTEL_SNBEP_UNC_IMC##n, \ .pme_count = LIBPFM_ARRAY_SIZE(intel_snbep_unc_m_pe), \ .type = PFM_PMU_TYPE_UNCORE, \ .num_cntrs = 4, \ .num_fixed_cntrs = 1, \ .max_encoding = 1, \ .pe = intel_snbep_unc_m_pe, \ .atdesc = snbep_unc_mods, \ .flags = PFMLIB_PMU_FL_RAW_UMASK\ | PFMLIB_PMU_FL_NO_SMPL,\ .pmu_detect = pfm_intel_snbep_unc_detect, \ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, \ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), \ .get_event_first = pfm_intel_x86_get_event_first, \ .get_event_next = pfm_intel_x86_get_event_next, \ .event_is_valid = pfm_intel_x86_event_is_valid, \ .validate_table = pfm_intel_x86_validate_table, \ .get_event_info = pfm_intel_x86_get_event_info, \ .get_event_attr_info = pfm_intel_x86_get_event_attr_info, \ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), \ .get_event_nattrs = pfm_intel_x86_get_event_nattrs, \ }; DEFINE_IMC_BOX(0); DEFINE_IMC_BOX(1); DEFINE_IMC_BOX(2); DEFINE_IMC_BOX(3); papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_mips64.h000664 001750 001750 00000000633 13216244362 025317 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file should never be included directly, use * instead. */ #ifndef _PERFMON_MIPS64_H_ #define _PERFMON_MIPS64_H_ #define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ #define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ #endif /* _PERFMON_MIPS64_H_ */ papi-5.6.0/src/ctests/prof_utils.c000664 001750 001750 00000023252 13216244360 021164 0ustar00jshenry1963jshenry1963000000 000000 /* * File: prof_utils.c * Author: Dan Terpstra * terpstra@cs.utk.edu */ /* This file contains utility functions useful for all profiling tests It can be used by: - profile.c, - sprofile.c, - profile_pthreads.c, - profile_twoevents.c, - earprofile.c, - future profiling tests. */ #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #include "prof_utils.h" /* variables global to profiling tests */ long long **values; char event_name[PAPI_MAX_STR_LEN]; int PAPI_event; int EventSet = PAPI_NULL; void *profbuf[5]; /* Many profiling tests count one of {FP_INS, FP_OPS, TOT_INS} and TOT_CYC. This function creates an event set containing the appropriate pair of events. It also initializes the global event_name string to the event selected. Assumed globals: EventSet, PAPI_event, event_name. */ int prof_events( int num_tests) { int retval; int num_events, mask; /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ EventSet = add_two_nonderived_events( &num_events, &PAPI_event, &mask ); if (num_events==0) { return 0; } values = allocate_test_space( num_tests, num_events ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } return mask; } /* This function displays info from the prginfo structure in a standardized format. */ void prof_print_address( const char *title, const PAPI_exe_info_t * prginfo ) { printf( "%s\n", title ); printf ( "----------------------------------------------------------------\n" ); printf( "Text start: %p, Text end: %p, Text length: %#x\n", prginfo->address_info.text_start, prginfo->address_info.text_end, ( unsigned int ) ( prginfo->address_info.text_end - prginfo->address_info.text_start ) ); printf( "Data start: %p, Data end: %p\n", prginfo->address_info.data_start, prginfo->address_info.data_end ); printf( "BSS start : %p, BSS end : %p\n", prginfo->address_info.bss_start, prginfo->address_info.bss_end ); printf ( "----------------------------------------------------------------\n" ); } /* This function displays profining information useful for several profile tests. It (probably inappropriately) assumes use of a common THRESHOLD. This should probably be a passed parameter. Assumed globals: event_name, start, stop. */ void prof_print_prof_info( caddr_t start, caddr_t end, int threshold, char *event_name ) { printf( "Profiling event : %s\n", event_name ); printf( "Profile Threshold: %d\n", threshold ); printf( "Profile Iters : %d\n", ( getenv( "NUM_ITERS" ) ? atoi( getenv( "NUM_ITERS" ) ) : NUM_ITERS ) ); printf( "Profile Range : %p to %p\n", start, end ); printf ( "----------------------------------------------------------------\n" ); printf( "\n" ); } /* Most profile tests begin by counting the eventset with no profiling enabled. This function does that work. It assumes that the 'work' routine is do_both(). A better implementation would pass a pointer to the work function. Assumed globals: EventSet, values, event_name. */ void do_no_profile( int quiet ) { int retval; if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( getenv( "NUM_ITERS" ) ? atoi( getenv( "NUM_ITERS" ) ) : NUM_ITERS ); if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); if (!quiet) { printf( "Test type : \t%s\n", "No profiling" ); printf( TAB1, event_name, ( values[0] )[0] ); printf( TAB1, "PAPI_TOT_CYC", ( values[0] )[1] ); } } /* This routine allocates and initializes up to 5 equal sized profiling buffers. They need to be freed when profiling is completed. The number and size are passed parameters. The profbuf[] array of void * pointers is an assumed global. It should be cast to the required type by the parent routine. */ void prof_alloc( int num, unsigned long blength ) { int i; for ( i = 0; i < num; i++ ) { profbuf[i] = malloc( blength ); if ( profbuf[i] == NULL ) { test_fail( __FILE__, __LINE__, "malloc", PAPI_ESYS ); } memset( profbuf[i], 0x00, blength ); } } /* Given the profiling type (16, 32, or 64) this function returns the bucket size in bytes. NOTE: the bucket size does not ALWAYS correspond to the expected value, esp on architectures like Cray with weird data types. This is necessary because the posix_profile routine in extras.c relies on the data types and sizes produced by the compiler. */ int prof_buckets( int bucket ) { int bucket_size; switch ( bucket ) { case PAPI_PROFIL_BUCKET_16: bucket_size = sizeof ( short ); break; case PAPI_PROFIL_BUCKET_32: bucket_size = sizeof ( int ); break; case PAPI_PROFIL_BUCKET_64: bucket_size = sizeof ( unsigned long long ); break; default: bucket_size = 0; break; } return ( bucket_size ); } /* A standardized header printing routine. No assumed globals. */ void prof_head( unsigned long blength, int bucket, int num_buckets, const char *header ) { int bucket_size = prof_buckets( bucket ); printf ( "\n------------------------------------------------------------\n" ); printf( "PAPI_profil() hash table, Bucket size: %d bits.\n", bucket_size * 8 ); printf( "Number of buckets: %d.\nLength of buffer: %ld bytes.\n", num_buckets, blength ); printf( "------------------------------------------------------------\n" ); printf( "%s\n", header ); } /* This function prints a standardized profile output based on the bucket size. A row consisting of an address and 'n' data elements is displayed for each address with at least one non-zero bucket. Assumes global profbuf[] array pointers. */ void prof_out( caddr_t start, int n, int bucket, int num_buckets, unsigned int scale ) { int i, j; unsigned short buf_16; unsigned int buf_32; unsigned long long buf_64; unsigned short **buf16 = ( unsigned short ** ) profbuf; unsigned int **buf32 = ( unsigned int ** ) profbuf; unsigned long long **buf64 = ( unsigned long long ** ) profbuf; if ( !TESTS_QUIET ) { /* printf("%#lx\n",(unsigned long) start + (unsigned long) (2 * i)); */ /* printf("start: %p; i: %#x; scale: %#x; i*scale: %#x; i*scale >>15: %#x\n", start, i, scale, i*scale, (i*scale)>>15); */ switch ( bucket ) { case PAPI_PROFIL_BUCKET_16: for ( i = 0; i < num_buckets; i++ ) { for ( j = 0, buf_16 = 0; j < n; j++ ) buf_16 |= ( buf16[j] )[i]; if ( buf_16 ) { /* On 32bit builds with gcc 4.3 gcc complained about casting caddr_t => long long * Thus the unsigned long to long long cast */ printf( "%#-16llx", (long long) (unsigned long)start + ( ( ( long long ) i * scale ) >> 15 ) ); for ( j = 0, buf_16 = 0; j < n; j++ ) printf( "\t%d", ( buf16[j] )[i] ); printf( "\n" ); } } break; case PAPI_PROFIL_BUCKET_32: for ( i = 0; i < num_buckets; i++ ) { for ( j = 0, buf_32 = 0; j < n; j++ ) buf_32 |= ( buf32[j] )[i]; if ( buf_32 ) { printf( "%#-16llx", (long long) (unsigned long)start + ( ( ( long long ) i * scale ) >> 15 ) ); for ( j = 0, buf_32 = 0; j < n; j++ ) printf( "\t%d", ( buf32[j] )[i] ); printf( "\n" ); } } break; case PAPI_PROFIL_BUCKET_64: for ( i = 0; i < num_buckets; i++ ) { for ( j = 0, buf_64 = 0; j < n; j++ ) buf_64 |= ( buf64[j] )[i]; if ( buf_64 ) { printf( "%#-16llx", (long long) (unsigned long)start + ( ( ( long long ) i * scale ) >> 15 ) ); for ( j = 0, buf_64 = 0; j < n; j++ ) printf( "\t%lld", ( buf64[j] )[i] ); printf( "\n" ); } } break; } printf ( "------------------------------------------------------------\n\n" ); } } /* This function checks to make sure that some buffer value somewhere is nonzero. If all buffers are empty, zero is returned. This usually indicates a profiling failure. Assumes global profbuf[]. */ int prof_check( int n, int bucket, int num_buckets ) { int i, j; int retval = 0; unsigned short **buf16 = ( unsigned short ** ) profbuf; unsigned int **buf32 = ( unsigned int ** ) profbuf; unsigned long long **buf64 = ( unsigned long long ** ) profbuf; switch ( bucket ) { case PAPI_PROFIL_BUCKET_16: for ( i = 0; i < num_buckets; i++ ) for ( j = 0; j < n; j++ ) retval = retval || buf16[j][i]; break; case PAPI_PROFIL_BUCKET_32: for ( i = 0; i < num_buckets; i++ ) for ( j = 0; j < n; j++ ) retval = retval || buf32[j][i]; break; case PAPI_PROFIL_BUCKET_64: for ( i = 0; i < num_buckets; i++ ) for ( j = 0; j < n; j++ ) retval = retval || buf64[j][i]; break; } return ( retval ); } /* Computes the length (in bytes) of the buffer required for profiling. 'plength' is the profile length, or address range to be profiled. By convention, it is assumed that there are half as many buckets as addresses. The scale factor is a fixed point fraction in which 0xffff = ~1 0x8000 = 1/2 0x4000 = 1/4, etc. Thus, the number of profile buckets is (plength/2) * (scale/65536), and the length (in bytes) of the profile buffer is buckets * bucket size. */ unsigned long prof_size( unsigned long plength, unsigned scale, int bucket, int *num_buckets ) { unsigned long blength; long long llength = ( ( long long ) plength * scale ); int bucket_size = prof_buckets( bucket ); *num_buckets = ( int ) ( llength / 65536 / 2 ); blength = ( unsigned long ) ( *num_buckets * bucket_size ); return ( blength ); } papi-5.6.0/src/libpfm-3.y/examples_v2.x/ia64/ita2_rr.c000664 001750 001750 00000025715 13216244362 024161 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_rr.c - example of how to use data range restriction with the Itanium2 PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define clear_psr_ac() __rum(1UL<<3) #elif defined(__GNUC__) static inline void clear_psr_ac(void) { __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); } #else #error "You need to define clear_psr_ac() for your compiler" #endif #define TEST_DATA_COUNT 16 #define N_LOOP 100000000UL #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * here we capture only misaligned_loads because it cannot * be measured with misaligned_stores_retired at the same time */ static char *event_list[]={ "misaligned_loads_retired", NULL }; typedef union { unsigned long l_tab[2]; unsigned int i_tab[4]; unsigned short s_tab[8]; unsigned char c_tab[16]; } test_data_t; static int do_test(test_data_t *data) { unsigned int *l, v; l = (unsigned int *)(data->c_tab+1); if (((unsigned long)l & 0x1) == 0) { printf("Data is not unaligned, can't run test\n"); return -1; } v = *l; v++; *l = v; return 0; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(int argc, char **argv) { char **p; test_data_t *test_data, *test_data_fake; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita2_input_param_t ita2_inp; pfmlib_ita2_output_param_t ita2_outp; pfarg_pmd_t pd[NUM_PMDS]; pfarg_pmc_t pc[NUM_PMCS]; pfarg_ctx_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int i; int id, num_pmcs = 0; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * now let's allocate the data structure we will be monitoring */ test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data == NULL) { fatal_error("cannot allocate test data structure"); } test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data_fake == NULL) { fatal_error("cannot allocate test data structure"); } /* * Compute the range we are interested in */ range_start = (unsigned long)test_data; range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita2_inp,0, sizeof(ita2_inp)); memset(&ita2_outp,0, sizeof(ita2_outp)); /* * find requested event */ p = event_list; for (i=0; *p ; i++, p++) { if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", *p); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields in rr_infos on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. */ ita2_inp.pfp_ita2_drange.rr_used = 1; ita2_inp.pfp_ita2_drange.rr_limits[0].rr_start = range_start; ita2_inp.pfp_ita2_drange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" "start_offset:-0x%lx end_offset:+0x%lx\n", range_start, range_end, ita2_outp.pfp_ita2_drange.rr_nbr_used >> 1, ita2_outp.pfp_ita2_drange.rr_infos[0].rr_soff, ita2_outp.pfp_ita2_drange.rr_infos[0].rr_eoff); printf("fake data range: [0x%016lx-0x%016lx)\n", (unsigned long)test_data_fake, (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); /* * now create the context for self monitoring/per-task */ id = pfm_create_context(ctx, NULL, NULL, 0); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++, num_pmcs++) { pc[num_pmcs].reg_num = outp.pfp_pmcs[i].reg_num; pc[num_pmcs].reg_value = outp.pfp_pmcs[i].reg_value; } /* * propagate the setup for the debug registers from the library to the arguments * to the syscall. */ for (i=0; i < ita2_outp.pfp_ita2_drange.rr_nbr_used; i++, num_pmcs++) { pc[num_pmcs].reg_num = 264+ita2_outp.pfp_ita2_drange.rr_br[i].reg_num; pc[num_pmcs].reg_value = ita2_outp.pfp_ita2_drange.rr_br[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write_pmcs(id, pc, num_pmcs) == -1) fatal_error("child: pfm_write_pmc error errno %d\n",errno); if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) fatal_error( "child: pfm_write_pmds error errno %d\n",errno); /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (pfm_load_context(id, &load_args) == -1) { fatal_error("pfm_load_context error errno %d\n",errno); } /* * Let's make sure that the hardware does the unaligned accesses (do not use the * kernel software handler otherwise the PMU won't see the unaligned fault). */ clear_psr_ac(); /* * Let's roll now. * * The idea behind this test is to have two dynamically allocated data structures * which are access in a unaligned fashion. But we want to capture only the unaligned * accesses on one of the two. So the debug registers are programmed to cover the * first one ONLY. Then we activate monotoring and access the two data structures. * This is an artificial example just to demonstrate how to use data address range * restrictions. */ pfm_self_start(id); for(i=0; i < N_LOOP; i++) { do_test(test_data); do_test(test_data_fake); } pfm_self_stop(id); /* * now read the results */ if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) fatal_error( "pfm_read_pmds error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * * For this example, we expect to see a value of 1 for misaligned loads. * But it can be two when the test_data and test_data_fake * are allocated very close from each other and the range created with the debug * registers is larger then test_data. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, N_LOOP); if (pd[i].reg_value != N_LOOP) { printf("error: Result should be 1 for %s\n", name); break; } } /* * let's stop this now */ close(id); free(test_data); free(test_data_fake); return 0; } papi-5.6.0/src/perfctr-2.7.x/examples/self/arch.h000664 001750 001750 00000000364 13216244370 023401 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.1 2004/01/11 22:07:12 mikpe Exp $ * Architecture-specific support code. * * Copyright (C) 2004 Mikael Pettersson */ extern void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control); papi-5.6.0/src/components/cuda/tests/nvlink_bandwidth_cupti_only.cu000775 001750 001750 00000051314 13216244357 027724 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright 2015-2016 NVIDIA Corporation. All rights reserved. * * Sample to demonstrate use of NVlink CUPTI APIs */ #include #include #include #include #include #define CUPTI_CALL(call) \ do { \ CUptiResult _status = call; \ if (_status != CUPTI_SUCCESS) { \ const char *errstr; \ cuptiGetResultString(_status, &errstr); \ fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ __FILE__, __LINE__, #call, errstr); \ exit(-1); \ } \ } while (0) #define DRIVER_API_CALL(apiFuncCall) \ do { \ CUresult _status = apiFuncCall; \ if (_status != CUDA_SUCCESS) { \ fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ __FILE__, __LINE__, #apiFuncCall, _status); \ exit(-1); \ } \ } while (0) #define RUNTIME_API_CALL(apiFuncCall) \ do { \ cudaError_t _status = apiFuncCall; \ if (_status != cudaSuccess) { \ fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ exit(-1); \ } \ } while (0) #define MEMORY_ALLOCATION_CALL(var) \ do { \ if (var == NULL) { \ fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ __FILE__, __LINE__); \ exit(-1); \ } \ } while (0) #define MAX_DEVICES (32) #define BLOCK_SIZE (1024) #define GRID_SIZE (512) #define BUF_SIZE (32 * 1024) #define ALIGN_SIZE (8) #define SUCCESS (0) #define NUM_METRIC (4) #define NUM_EVENTS (2) #define MAX_SIZE (64*1024*1024) // 64 MB #define NUM_STREAMS (6) // gp100 has 6 physical copy engines CUpti_ActivityNvLink *nvlinkRec = NULL; int cpuToGpu = 0; int gpuToGpu = 0; int cpuToGpuAccess = 0; int gpuToGpuAccess = 0; extern "C" __global__ void test_nvlink_bandwidth(float *src, float *dst) { int idx = blockIdx.x * blockDim.x + threadIdx.x; dst[idx] = src[idx] * 2.0f; } static void printActivity(CUpti_Activity *record) { if (record->kind == CUPTI_ACTIVITY_KIND_NVLINK) { nvlinkRec = (CUpti_ActivityNvLink *)record; printf("typeDev0 %d, typeDev1 %d, sysmem %d, peer %d, physical links %d, portdev0 %d, %d, %d, %d, portDev1 %d, %d, %d, %d, bandwidth %llu\n", nvlinkRec->typeDev0, nvlinkRec->typeDev1, ((nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS) ? 1 : 0), ((nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS) ? 1 : 0), nvlinkRec->physicalNvLinkCount, nvlinkRec->portDev0[0], nvlinkRec->portDev0[1], nvlinkRec->portDev0[2], nvlinkRec->portDev0[3], nvlinkRec->portDev1[0], nvlinkRec->portDev1[1], nvlinkRec->portDev1[2], nvlinkRec->portDev1[3], (long long unsigned int)nvlinkRec->bandwidth); cpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS); gpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS); } else { printf("Error : Unexpected CUPTI activity kind.\nExpected Activity kind : CUPTI_ACTIVITY_KIND_NVLINK\n"); } } static void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) { *size = BUF_SIZE + ALIGN_SIZE; *buffer = (uint8_t*) calloc(1, *size); MEMORY_ALLOCATION_CALL(*buffer); *maxNumRecords = 0; } static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) { CUptiResult status; CUpti_Activity *record = NULL; do { status = cuptiActivityGetNextRecord(buffer, validSize, &record); if(status == CUPTI_SUCCESS) { printActivity(record); } else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { break; } else { CUPTI_CALL(status); } } while (1); size_t dropped; CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); if (dropped != 0) { printf("Dropped %u activity records\n", (unsigned int)dropped); } } #define DIM(x) (sizeof(x)/sizeof(*(x))) void calculateSize(char *result, uint64_t size) { int i; const char *sizes[] = { "TB", "GB", "MB", "KB", "B" }; uint64_t exbibytes = 1024ULL * 1024ULL * 1024ULL * 1024ULL; uint64_t multiplier = exbibytes; for (i = 0; (unsigned)i < DIM(sizes); i++, multiplier /= (uint64_t)1024) { if (size < multiplier) continue; sprintf(result, "%.1f %s", (float) size / multiplier, sizes[i]); return; } strcpy(result, "0"); return; } void readMetricValue(CUpti_EventGroup eventGroup, uint32_t numEvents, CUdevice dev, CUpti_MetricID *metricId, uint64_t timeDuration, CUpti_MetricValue *metricValue) { size_t bufferSizeBytes, numCountersRead; uint64_t *eventValueArray = NULL; CUpti_EventID *eventIdArray; size_t arraySizeBytes = 0; size_t numTotalInstancesSize = 0; uint64_t numTotalInstances = 0; uint64_t *aggrEventValueArray = NULL; size_t aggrEventValueArraySize; uint32_t i = 0, j = 0; CUpti_EventDomainID domainId; size_t domainSize; domainSize = sizeof(CUpti_EventDomainID); CUPTI_CALL(cuptiEventGroupGetAttribute(eventGroup, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, &domainSize, (void *)&domainId)); numTotalInstancesSize = sizeof(uint64_t); CUPTI_CALL(cuptiDeviceGetEventDomainAttribute(dev, domainId, CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, &numTotalInstancesSize, (void *)&numTotalInstances)); arraySizeBytes = sizeof(CUpti_EventID) * numEvents; bufferSizeBytes = sizeof(uint64_t) * numEvents * numTotalInstances; eventValueArray = (uint64_t *) malloc(bufferSizeBytes); MEMORY_ALLOCATION_CALL(eventValueArray); eventIdArray = (CUpti_EventID *) malloc(arraySizeBytes); MEMORY_ALLOCATION_CALL(eventIdArray); aggrEventValueArray = (uint64_t *) calloc(numEvents, sizeof(uint64_t)); MEMORY_ALLOCATION_CALL(aggrEventValueArray); aggrEventValueArraySize = sizeof(uint64_t) * numEvents; CUPTI_CALL(cuptiEventGroupReadAllEvents(eventGroup, CUPTI_EVENT_READ_FLAG_NONE, &bufferSizeBytes, eventValueArray, &arraySizeBytes, eventIdArray, &numCountersRead)); for (i = 0; i < numEvents; i++) { for (j = 0; j < numTotalInstances; j++) { aggrEventValueArray[i] += eventValueArray[i + numEvents * j]; //printf("For event %d (id %d) instance %d value %ul aggregate %d = %ul\n", i, eventIdArray[i], j, eventValueArray[i + numEvents * j], i, aggrEventValueArray[i]); } } for (i = 0; i < NUM_METRIC; i++) { CUPTI_CALL(cuptiMetricGetValue(dev, metricId[i], arraySizeBytes, eventIdArray, aggrEventValueArraySize, aggrEventValueArray, timeDuration, &metricValue[i])); } free(eventValueArray); free(eventIdArray); } // Print metric value, we format based on the value kind int printMetricValue(CUpti_MetricID metricId, CUpti_MetricValue metricValue, const char *metricName, uint64_t timeDuration) { CUpti_MetricValueKind valueKind; char str[64]; size_t valueKindSize = sizeof(valueKind); CUPTI_CALL(cuptiMetricGetAttribute(metricId, CUPTI_METRIC_ATTR_VALUE_KIND, &valueKindSize, &valueKind)); switch (valueKind) { case CUPTI_METRIC_VALUE_KIND_DOUBLE: printf("%s = ", metricName); calculateSize(str, (uint64_t)metricValue.metricValueDouble); // printf("%s (val %lu %lu nsec)\n", str, metricValue.metricValueUint64, timeDuration); printf("%s\n", str); break; case CUPTI_METRIC_VALUE_KIND_UINT64: printf("%s = ", metricName); calculateSize(str, (uint64_t)metricValue.metricValueUint64); printf("%s\n", str); break; case CUPTI_METRIC_VALUE_KIND_INT64: printf("%s = ", metricName); calculateSize(str, (uint64_t)metricValue.metricValueInt64); printf("%s\n", str); break; case CUPTI_METRIC_VALUE_KIND_THROUGHPUT: printf("%s = ", metricName); calculateSize(str, (uint64_t)metricValue.metricValueThroughput); printf("%s\n", str); break; default: fprintf(stderr, "error: unknown value kind\n"); return -1; } return 0; } void testCpuToGpu(CUpti_EventGroup *eventGroup, CUdeviceptr *pDevBuffer, float** pHostBuffer, size_t bufferSize, cudaStream_t *cudaStreams, uint64_t *timeDuration, int numEventGroup) { int i; uint32_t value = 1; uint64_t startTimestamp, endTimestamp; for (i = 0; i < numEventGroup; i++) { CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), (void*)&value)); } CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); //Unidirectional copy H2D for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); //Unidirectional copy D2H for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i], (void *)pDevBuffer[i], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i]));} RUNTIME_API_CALL(cudaDeviceSynchronize()); //Bidirectional copy for (i = 0; i < NUM_STREAMS; i+=2) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i+1], (void *)pDevBuffer[i+1], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i+1])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); *timeDuration = endTimestamp - startTimestamp; } void testGpuToGpu(CUpti_EventGroup *eventGroup, CUdeviceptr *pDevBuffer0, CUdeviceptr *pDevBuffer1, float** pHostBuffer, size_t bufferSize, cudaStream_t *cudaStreams, uint64_t *timeDuration, int numEventGroup) { int i; uint32_t value = 1; uint64_t startTimestamp, endTimestamp; RUNTIME_API_CALL(cudaSetDevice(0)); RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(1, 0)); RUNTIME_API_CALL(cudaSetDevice(1)); RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(0, 0)); //Unidirectional copy H2D for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer0[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer1[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); for (i = 0; i < numEventGroup; i++) { printf("cuptiEventGroupEnable(eventGroup[%d])\n", i); CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), (void*)&value)); } CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer0[i], (void *)pDevBuffer1[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); for (i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer1[i], (void *)pDevBuffer0[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); for (i = 0; i < NUM_STREAMS; i++) { test_nvlink_bandwidth<<>>((float*)pDevBuffer1[i], (float*)pDevBuffer0[i]); } CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); *timeDuration = endTimestamp - startTimestamp; } static void printUsage() { printf("usage: Demonstrate use of NVlink CUPTI APIs\n"); printf(" -help : display help message\n"); printf(" --cpu-to-gpu : Show results for data transfer between CPU and GPU \n"); printf(" --gpu-to-gpu : Show results for data transfer between two GPUs \n"); } void parseCommandLineArgs(int argc, char *argv[]) { if (argc != 2) { printf("Invalid number of options\n"); exit(0); } if (strcmp(argv[1], "--cpu-to-gpu") == 0) { cpuToGpu = 1; } else if (strcmp(argv[1], "--gpu-to-gpu") == 0) { gpuToGpu = 1; } else if ((strcmp(argv[1], "--help") == 0) || (strcmp(argv[1], "-help") == 0) || (strcmp(argv[1], "-h") == 0)) { printUsage(); exit(0); } else { cpuToGpu = 1; } } int main(int argc, char *argv[]) { int deviceCount = 0, i = 0, j = 0, numEventGroup = 0; size_t bufferSize = 0, freeMemory = 0, totalMemory = 0; CUpti_EventGroupSets *passes = NULL; CUcontext ctx; char str[64]; CUdeviceptr pDevBuffer0[NUM_STREAMS]; CUdeviceptr pDevBuffer1[NUM_STREAMS]; float* pHostBuffer[NUM_STREAMS]; cudaStream_t cudaStreams[NUM_STREAMS] = {0}; CUpti_EventGroup eventGroup[32]; CUpti_MetricID metricId[NUM_METRIC]; uint32_t numEvents[NUM_METRIC]; CUpti_MetricValue metricValue[NUM_METRIC]; cudaDeviceProp prop[MAX_DEVICES]; uint64_t timeDuration; // Adding nvlink Metrics. const char *metricName[NUM_METRIC] = {"nvlink_total_data_transmitted", "nvlink_total_data_received", "nvlink_transmit_throughput", "nvlink_receive_throughput"}; // Parse command line arguments parseCommandLineArgs(argc, argv); CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NVLINK)); CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); DRIVER_API_CALL(cuInit(0)); RUNTIME_API_CALL(cudaGetDeviceCount(&deviceCount)); printf("There are %d devices.\n", deviceCount); if (deviceCount == 0) { printf("There is no device supporting CUDA.\n"); exit(-1); } for (i = 0; i < deviceCount; i++) { RUNTIME_API_CALL(cudaGetDeviceProperties(&prop[i], i)); printf("CUDA Device %d Name: %s\n", i, prop[i].name); } // Set memcpy size based on available device memory RUNTIME_API_CALL(cudaMemGetInfo(&freeMemory, &totalMemory)); bufferSize = MAX_SIZE < (freeMemory/4) ? MAX_SIZE : (freeMemory/4); printf("Total Device Memory available : "); calculateSize(str, (uint64_t)totalMemory); printf("%s\n", str); printf("Memcpy size is set to %llu B (%llu MB)\n", (unsigned long long)bufferSize, (unsigned long long)bufferSize/(1024*1024)); for(i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaStreamCreate(&cudaStreams[i])); } RUNTIME_API_CALL(cudaDeviceSynchronize()); // Nvlink-topology Records are generated even before cudaMemcpy API is called. CUPTI_CALL(cuptiActivityFlushAll(0)); // Transfer Data between Host And Device, if Nvlink is Present // Check condition : nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS // True : Nvlink is present between CPU & GPU // False : Nvlink is not present. if ((nvlinkRec) && (((cpuToGpu) && (cpuToGpuAccess)) || ((gpuToGpu) && (gpuToGpuAccess)))) { for (i = 0; i < NUM_METRIC; i++) { CUPTI_CALL(cuptiMetricGetIdFromName(0, metricName[i], &metricId[i])); CUPTI_CALL(cuptiMetricGetNumEvents(metricId[i], &numEvents[i])); } DRIVER_API_CALL(cuCtxCreate(&ctx, 0, 0)); CUPTI_CALL(cuptiMetricCreateEventGroupSets(ctx, (sizeof metricId) ,metricId, &passes)); // EventGroups required to profile Nvlink metrics. for (i = 0; i < (signed)passes->numSets; i++) { for (j = 0; j < (signed)passes->sets[i].numEventGroups; j++) { eventGroup[numEventGroup] = passes->sets[i].eventGroups[j]; if (!eventGroup[numEventGroup]) { printf("\n eventGroup initialization failed \n"); exit(-1); } numEventGroup++; } } CUPTI_CALL(cuptiSetEventCollectionMode(ctx, CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS)); // ===== Allocate Memory ===================================== for(i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMalloc((void**)&pDevBuffer0[i], bufferSize)); pHostBuffer[i] = (float *)malloc(bufferSize); MEMORY_ALLOCATION_CALL(pHostBuffer[i]); } if (cpuToGpu) { testCpuToGpu(eventGroup, pDevBuffer0, pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); printf("Data tranferred between CPU & Device%d : \n", (int)nvlinkRec->typeDev0); } else if(gpuToGpu) { RUNTIME_API_CALL(cudaSetDevice(1)); for(i = 0; i < NUM_STREAMS; i++) { RUNTIME_API_CALL(cudaMalloc((void**)&pDevBuffer1[i], bufferSize)); } testGpuToGpu(eventGroup, pDevBuffer0, pDevBuffer1,pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); printf("Data tranferred between Device 0 & Device 1 : \n"); } // Collect Nvlink Metric values for the data transfer via Nvlink for all the eventGroups. for (i = 0; i < numEventGroup; i++) { readMetricValue(eventGroup[i], NUM_EVENTS, 0, metricId, timeDuration, metricValue); CUPTI_CALL(cuptiEventGroupDisable(eventGroup[i])); CUPTI_CALL(cuptiEventGroupDestroy(eventGroup[i])); for (i = 0; i < NUM_METRIC; i++) { if (printMetricValue(metricId[i], metricValue[i], metricName[i], timeDuration) != 0) { printf("\n printMetricValue failed \n"); exit(-1); } } } } else { printf("No Nvlink supported device found\n"); } return 0; } papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/RELEASE-NOTES000664 001750 001750 00000251774 13216244370 025734 0ustar00jshenry1963jshenry1963000000 000000 $Id: RELEASE-NOTES,v 1.341 2007/10/06 13:02:07 mikpe Exp $ RELEASE NOTES ============= Version 2.7.21.1, 2007-10-06 - Bumped copyright dates. Version 2.7.21, 2007-10-03 - Updated many files for deprecation change in kernel 2.6.19. - Updated log messages containing my email address to use my current @it.uu.se address, as @csd.uu.se == /dev/null. - Updated virtual.c for workqueue changes in kernel 2.6.20. - Updated virtual.c for i_blksize change in kernel 2.6.19. - Updated x86.c to match perfctr-2.6.28, including updates for Intel Core and Core2, a P4 HT detection bug fix, and updates for nmi watchdog changes in kernels 2.6.19, 2.6.21, and 2.6.22. - Updated Makefile to unbreak call-site backpatching in x86.c. Version 2.7.20, 2006-08-20 - Moved ppc64 perfctr.h from asm-ppc64/ to asm-powerpc/. - Updated ppc.c to use {reserve,release}_pmc_hardware() to dynamically claim and release the PMC resources. - Updated ppc.c, virtual.c, and x86.c to use new-style mutexes instead of old-style binary semaphores. - Updated virtual.c for get_sb_pseudo() change in kernel 2.6.18. - Updated x86.c for change in kernel 2.6.18. - Updated x86_tests.c for sync_core() change in kernel 2.6.16. Version 2.7.19, 2005-11-08 - Updated ppc64.c to handle that CPU_FTR_PMC8 has been replaced by cur_cpu_spec->num_pmcs. From David Gibson. - The dual-core P4s changed the layout rules for the initial APIC ID, which broke the x86 driver on DC P4s. Updated the HT thread ID detection code to match current IA32 SDM Vol3. - x86.c: pm_message_t fix for suspend(), from 2.6.13-mm2. - The ppc32 driver will now compile in kernels that lack Open Firmware support, which is needed for some embedded systems. Version 2.7.17, 2005-05-26 - (All archs) Added u32 seqlock to user-visible state, replacing samplecnt on ppc/ppc64. perfctr_cpu_{suspend,resume,sample}() now bump the seqlock with write_perfseq_begin/end pairs. User-space now does read_perfseq_begin/retry on all archs instead of kludging it with TSC or samplecnt. - Fix counter wraparound issues in ppc64 driver. From David Gibson. - Clean up K8 multicore detection. From 2.6.12-rc5-mm1. - Added code to detect multicore K8s and prevent threads in the thread-centric API from using northbridge events. This avoids resource conflicts, and an erratum in Revision E chips. Forward-port from perfctr-2.6.15. Version 2.7.16, 2005-04-09 - : In user-visible state, make start fields 64 bits (for future-proofing the ABI). Remove map field from pmc[] array to avoid underutilised cache lines. - Change drivers to retrieve mapping from ->control.pmc_map[]. - ppc32/ppc64: Add sampling counter to user-visible state, and increment it in perfctr_cpu_resume() and perfctr_cpu_sample(). - ppc64.c: fix whitespace damage (from -mm branch). Version 2.7.15, 2005-03-31 - : use u64 instead of unsigned long long for the mmcr fields. Version 2.7.14, 2005-03-29 - Swapped cstatus and k1 fields in struct perfctr_cpu_state. Moved now contiguous user-visible fields to struct perfctr_cpu_state_user. Inlined now obsolete k1 struct. Some cleanups. - Added state_user_offset sysfs attribute providing user-space with the offset from the start of the mapping to the user-visible state. - : Change number fields in register descriptors to 64 bits. Otherwise i386 binaries break on x86_64 kernels since the descriptors get larger alignment and sizes on x86_64 than on i386. Version 2.7.13, 2005-03-23 - x86: finalise_backpatching() now exercises all control flow paths, to ensure that calls in cloned control flows are backpatched properly. This is needed for gcc-4.0. - x86/ppc: Eliminate power-of-two sizeof assumption in access_regs(). - x86/ppc: Merge check_ireset() and setup_imode_start_values(). - ppc: If check_ireset() fails, clear state->cstatus to undo any settings check_control() may have done. - : Change value fields in register descriptors to 64 bits. This will be needed for ppc64 kernel-space and ppc32 user-space, and may eventually also be needed on x86. - Added David Gibson's ppc64 code for perfctr-2.7.10. - Updated ppc64 code to perfctr-2.7.13. Version 2.7.12, 2005-03-18 - init.c cleanups: remove unused , don't initialise perfctr_info, don't show dummy cpu_type, show driver version directly from VERSION. - ppc.c cleanups: don't initialise perfctr_info.cpu_type. - cleanups: remove cpu_type constants and PERFCTR_CPU_VERSION, use explicitly-sized types in user-visible types, #ifdef __KERNEL__ around perfctr_cpu_control. - cleanups: ditto - cleanups: remove types & constants not used in the kernel (perfctr_cpu_mask, PERFCTR_ABI_VERSION, and vperfctr_state), make perfctr_info kernel-only and remove unused fields, use explicitly-sized types in user-visible types. Version 2.7.11, 2005-03-14 - Switch x86 driver to use physically-indexed control data. * Rearrange struct perfctr_cpu_control. Remove _reserved fields. * On P5 and P5 clones users must now format the two counters' control data into a single CESR image. * On P4 check ESCR value after retrieving the counter's ESCR number. - Switch ppc32 driver to use physically-indexed control data. * Rearrange struct perfctr_cpu_control. Remove _reserved fields. * ppc_mmcr[] array in struct perfctr_cpu_state is no longer needed. * In perfctr_cpu_update_control, call check_ireset after check_control, since check_ireset now needs to use the virtual-to-physical map. * Users must now format the 2-6 event selector value into the MMCR0/MMCR1 images. * Verify that unused/non-existent parts of MMCR images are zero. - Move tsc_on/nracrs/nrictrs fields to new struct cpu_control_header. - Add declarations of common arch-specific domain numbers and corresponding data structures to . - Implement perfctr_cpu_control_write()/read() in-kernel API. Only handle PERFCTR_DOMAIN_CPU_REGS, as the other domains will be handled in generic code. - Convert sys_vperfctr_write() to accept triples. Have it interpret code for the common domains, and pass unknown domains to perfctr_cpu_control_write(). - In sys_vperfctr_read(), replace "cmd" by "domain" and complete conversion to fine-grained domains for control data. - Remove _reserved and cpu_control fields from struct vperfctr_control. Version 2.7.10, 2005-02-20 - Added new sys_vperfctr_control(), with UNLINK, SUSPEND, RESUME, and CLEAR sub-commands. Deleted sys_vperfctr_unlink() and sys_vperfctr_iresume(). Changed sys_vperfctr_write() to only update control data and not reenable the context. RESUME now works both for resuming after overflow interrupts and for restarting after changing control data. - Renamed old sys_vperfctr_control() to sys_vperfctr_write(). - Define static spinlocks with DEFINE_SPINLOCK(), following new coding style in 2.6.11-rc1. Version 2.7.9, 2005-01-16 - Global information is now published via sysfs, in /sys/class/perfctr/. Removed the perfctr_info() syscall. Renumbered the remaining perfctr syscalls. Version 2.7.8, 2004-11-24 - Changed sys_vperfctr_control() to handle variable-sized cpu_control data. Moved cpu_control field to end of struct vperfctr_control, added size parameter to sys_vperfctr_control(), and changed do_vperfctr_control() to only copy as many bytes as user-space indicates. - x86.c: make interrupts_masked flag share cache line with the id field. - Removed ____cacheline_aligned from per-cpu objects. - Changed per-counter control fields to array-of-struct layout placed at the end of struct perfctr_cpu_control. Version 2.7.7, 2004-11-13 - x86/x86-64 kernel change: perfctr_suspend_thread() must be done in switch_to() before entering __switch_to(), so that the suspend is done while owner still is current. Suspending in __switch_to() leaves a window where an interrupt can be delivered to a task (next) which isn't the owner. - Virtual: Updates to handle x86 delayed interrupts issue. Check for pending interrupts at resume. Split interrupt handler to avoid recursion with suspend or resume. Mask signal before sending it to avoid waking current while in switch_to(). suspend now runs in owner's context. - PPC32: Enable overflow interrupt support on IBM 750GX and IBM 750FX DD2.3 processors. Provide dummy pending interrupt check function for compatibility with new API. - x86/x86-64 fixes for delayed LVTPC interrupts: add per-cpu interrupt mask and per-state pending interrupt flag; isuspend masks interrupts, if any i-mode counters are in overflow state the pending interrupt flag is set; iresume unmasks interrupts; identify_overflow clears the pending interrupt flag - Converted sys_vperfctr_read() to a command token based API. - PPC32: Add support for MPC7447A. Add support for MPC7448, except for decoding its PLL_CFG. Enable overflow interrupt support for all G4 processors starting with 7410 Rev 1.3. - remap_page_range() was changed to remap_pfn_range() in kernels 2.6.9-mm1/2.6.10-rc1. Updated virtual.c accordingly. Version 2.7.6, 2004-10-19 - Repair PERFCTR_INTERRUPT_SUPPORT Kconfig option on x86, which got broken by the PPC32 interrupt support changes. - PPC32: Preliminary performance monitor interrupt support. - PPC32: Correct MMCR0 handling for FCECE/TRIGGER. Read MMCR0 at suspend and then freeze the counters. Move this code from read_counters() to suspend(). At resume, reload MMCR0 to unfreeze the counters. Clean up the cstatus checks controlling this behaviour. - virtual.c: replace nrctrs_lock with a mutex. Avoids illegal may-sleep-while-holding-lock, caused by mutex operations in perfctr_cpu_{reserve,release}(). Version 2.7.5.1, 2004-09-19 - Fixed p4_clear_counters() to not access IQ_ESCR{0,1} on P4 models >= 3. - Kconfig: make PERFCTR_VIRTUAL default to y. Version 2.7.5, 2004-08-18 - Added missing spin_lock_init(&perfctr->children_lock) to virtual.c:get_empty_vperfctr(), to prevent hangs on SMP. - Changed __vperfctr_release() to use schedule_work() to do the task_lock(parent) etc in a different thread's context. This is because release_task() has a write lock on the task list lock, and task_lock() is forbidden in that case. When current == parent, this is bypassed and the merge work is done immediately without taking task_lock(). Added children_lock to struct vperfctr, to synchronise accesses (release/update_control/read) to the children array. - Use macros to clean up x86 per-cpu cache accesses. - Fix x86_tests to handle P4 in 64-bit mode. - Do sync_core() before rdtsc() in x86_tests, to avoid bogus benchmarking data on K8. Add sync_core() implementation for the 32-bit kernel. Add sync_core() benchmark. - Changed __vperfctr_release() to put the vperfctr but delay the vperfctr_free() via schedule_work(). __vperfctr_release() is called with a write_lock_irq() active, but vperfctr_free() can sleep, which is illegal in this context. This error caused kernel warnings when CONFIG_DEBUG_SPINLOCK_SLEEP=y. - Added __perfctr_mk_cstatus() to allow x86.c:finalise_backpatching() to create a cstatus with i-mode counters marked as present, but with zero actual counters. This prevents perfctr_cpu_isuspend() from clearing the control register for counter #0 at init-time, when the hardware doesn't belong to this driver. On AMD and P6 this would accidentally disable the NMI watchdog. Version 2.7.4, 2004-07-17 - (Re)Implemented inheritanace. Locking a task's ->thread.perfctr now always takes the task_lock(), as preempt_disable() doesn't handle concurrent child releases. Added __vperfctr_copy() and __vperfctr_release() [for perfctr_release_task()]. Delayed detach from __vperfctr_exit() to __vperfctr_release(). Take task_lock() in control before updating inheritance id and clearing children counts. Added 4th parameter to sys_vperfctr_read(), for reading the children counts. Added perfctr_release_task(). Changed perfctr_copy_thread(&p->thread) to perfctr_copy_task(p, regs). - Remove cpus_andnot() simulation from cpumask.h, as Linus' 2.6.8-rc1 included the new cpumask code. - Fix ppc_check_control() to allow 7400/7410 processors to specify MMCR2[THRESHMULT]. - Use printk_ratelimit() in __vperfctr_set_cpus_allowed(). - Add comment at vperfctr_alloc() that a whole page is claimed and reserved due to mmap(). - Make perfctr_{init,exit}() static. - Stack usage reductions: kmalloc space for control and sum copies in do_vperfctr_control() and do_vperfctr_read(). Eliminate several unnecessary cpumask_t copies. - PPC32 cleanups: make get_cpu_cache() return pointer not lvalue, eliminate duplicated initialisation/cleanup code. - __user annotations on relevant system calls and helper functions. - Add some URLs to the top-level Kconfig option. - Make PERFCTR_INTERRUPT_SUPPORT a Kconfig-derived option. Ditto PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED. - Kconfig: default CONFIG_PERFCTR_INIT_TESTS to n. - x86_tests: fix CONFIG_X86_LOCAL_APIC=n linkage error. - PPC32: support generic CPUs using only the TB. - PPC32: query OF for CPU/TB frequencies, drop /proc/cpuinfo parsing code. - PPC32: avoid CPU re-detection in tests code. - PPC32: clean up and sync with current perfctr-2.6 code. - Add simulation of cpus_andnot() to cpumask.h, allowing the driver to work in Linus' 2.6.7-rc3 w/o the new cpumask code. - Remove __FILE__ from printk()s in ppc.c. 2.6 kernels expand __FILE__ to a long path, not just the basename. - Recognize model 13 Pentium-Ms. - Replace cpus_complement() mess with cpus_andnot() in virtual.c. - Change plain #if to #ifdef or #if defined() for PERFCTR_{INTERRUPT_SUPPORT,CPUS_FORBIDDEN_MASK_NEEDED}. - Trailing tabs cleanup in ppc_tests.c. - Add dummy struct declarations in the CONFIG_PERFCTR=n case in . They are needed to prevent warnings from the system call prototypes in . Version 2.7.3, 2004-05-31 - More cleanups for 2.6-mm submission: * Bumped API version to 6.0. * Removed temporary dispatch-on-cmd function in virtual.c. Created helper functions for the fd -> vperfctr/tsk mapping, and used them directly in the sys_vperfctr_ entries. * Replaced sys_perfctr() with new syscalls, one per operation. Merged vperfctr open and creat to a single operation. Merged vperfctr read sum and read control. Merged info/cpus/cpus_forbidden to a single operation. There are now 5 vperfctr syscalls and one get-info syscall. * Added init_done handling to the low-level drivers, to ensure unrecognised HW is left alone. Can't rely on the high-level drivers for this check. * Removed structure marshalling code. Syscall parameters are now copied in the traditional way. * Changed ppc32 isuspend_cpu to be int not void*, for 32/64-bit compatibility in possible future G5/970 support. * Extended ppc32 perfctr_sum_ctrs and perfctr_cpu_control to handle 8 counters, for possible future G5/970 support. * Added recognition of PowerPC 750GX. * Removed global-mode perfctrs, to permit a cleaner API for just the virtual perfctrs. Global-mode perfctrs can be reimplemented as a module later if the low-level driver procedures are EXPORT_SYMBOL():d. * Removed x86_compat.h. Added #include to x86.c and x86_tests.c, and cpu_has_msr #define to x86.c. * Moved x86 CPU type constants from to . * Changed x86-64 to use the x86.c driver code. Marked initial targets of backpatchable calls 'noinline' to prevent gcc-3.4.0 on x86-64 from inlining them, which completely breaks the backpatching mechanism. Intel's 64-bit P4 should now work in the x86-64 kernel. * Changed x86-64 to use . Added definition of perfctr_cpus_forbidden_mask to x86_64.c, since enables it on SMP. * Changed isuspend_cpu on x86 to be like x86-64's: it now stores a CPU number instead of a cache pointer. * Moved PowerPC register numbers and bit field definitions to . Requested by Tom Rini. * Changed x86 and x86-64 drivers to restrict CPU type detection to what the drivers actually need. perfctr_info.cpu_type is no longer set up at all, and perfctr_cpu_name is more approximate. The drivers record a simplified CPU type for x86_tests, but this only occurs if PERFCTR_INIT_TESTS is configured. * Changed sys_perfctr() to return long instead of int. This fixed a bug where x86-64 zero-extended negative return values to positive ones. The old ioctl() interface did not have this bug because ->ioctl() is declared to return int, so sys_ioctl() did sign-extension. * Changed P4 driver to set up and check an explicit flag for EXTENDED_CASCADE availability. perfctr_info.cpu_type is now unused except for perfctr_x86_init_tests(). * Changed NR_CPUS arrays to per_cpu data. * Reformatted "if( x )" to "if (x)" and similarly for while and switch statements. Version 2.7.2, 2004-05-14 - Cleanups for 2.6-mm submission: * Removed support for kernels older than 2.6.6. * Removed support for building the driver as a module. * Moved user-only parts of the structure marshalling code to the library. * Replaced ioctl() interface with a new system call. * Removed obsolete debug code and DEBUG config option. * Merged ${ARCH}_setup.c into ${ARCH}.c. * Added CAP_SYS_PTRACE check to global.c's syscall. (Without /dev/perfctr we can't use the file system permissions any more.) * Removed obsolete compat.h. * Added mutex to ppc.c's reserve/release procedures. * Replaced NMI_LOCAL_APIC #ifdef:s by CONFIG_X86_LOCAL_APIC #ifdef:s in x86.c. Version 2.7.1, 2004-05-10 - Updated x86.c and x86_64.c for the cleaned up version of the lapic NMI ownership patch which was included in kernel 2.6.6. - In kernel 2.6.6 we no longer need access to nmi_perfctr_msr, so removed EXPORT_SYMBOL() and patches related to this variable. For older kernels we still need access to it. Version 2.7.0, 2004-05-04 - Added reassign_lapic_nmi_watchdog()/release_lapic_nmi_watchdog() API to arch/i386/kernel/nmi.c, to allow different drivers to acquire and release the hardware dynamically. If an enabled watchdog is reassigned to a different driver, then it will automatically be enabled upon release. Updated oprofile to use this API and bail out if the hardware is unavailable. Currently this is only available for the 2.6.6-rc3 kernel. - Updated x86.c to use the new API. Added simulation (without the non-conflict guarantees) for older kernels. - Moved hardware reservation to x86.c's "reserve" procedure. The init code now only does read-only hardware detection. - Added a mutex to the reserve/release procedures, eliminating a long-standing race possibility. - Changed x86.c to reserve and release the hardware around its call to perfctr_x86_init_tests(). - Similarly updated x86_64.c for the new API. Version 2.6.7, 2004-05-04 - Replaced x86_64_tests.{c,h} with x86_tests.{c,h}. - sys_device_{,un}register() was renamed as sysdev_{,un}register() in 2.6.4-rc2. Updated x86.c and x86_64.c accordingly, and added a compatibility definition in compat.h. - Removed unnecessary '#include "compat.h"' from x86_tests.c. - Replaced x86_64_setup.c with x86_setup.c. - Replaced x86_64_compat.h with x86_compat.h. - Moved perfctr_interrupt entry point from x86_setup.c to patch kit, for kernels older than 2.4.21. Cleanup to facilitate future merge of x86_setup.c and x86_64_setup.c. Version 2.6.6, 2004-02-21 - Fixed a bug in x86-64's perfctr interrupt entry code in 2.4 kernels, causing it to pass the wrong value for "struct pt_regs*". This was harmless since the retrieved "rip" was unused, but still wrong. Renamed do_perfctr_interrupt to smp_perfctr_interrupt to allow using the 2.4 kernel's standard BUILD_SMP_INTERRUPT macro. - Unmask LVTPC after interrupt on Pentium-M. An oprofile user reports that P-M auto-masks LVTPC just like P4. Preliminary measurements indicate a 40 to 60 cycle cost for the apic write on P4s and P6s, so the unmask is not done unconditionally. - Measure LVTPC write overhead in x86{,_64}_tests.c. - Add Pentium 4 Model 3 detection. - The 2.4.21-193 SuSE kernel does EXPORT_SYMBOL(mmu_cr4_features). Add compat24.h workaround for this. Version 2.6.5, 2004-01-26 - Added perfctr_info.cpu_type constants to . - Init filp->f_mapping in virtual.c for 2.6.2-rc1+ kernels. - Updated p4_check_control(): * Allow ESCR.CPL_T1 to be non-zero when using global-mode counters on HT processors. * Don't require ESCR.CPL_T0 to be non-zero. CPL_T0==0b00 is safe and potentially useful (global counters on HT). * Require CCCR.ACTIVE_THREAD==0b11 on non-HT processors, as documented in the IA32 Volume 3 manual. Old non-HT P4s seem to work Ok for all four values (see perfctr-2.6.0-pre3 notes), but this is neither guaranteed nor useful. - x86.c now detects & records P4 HT-ness also in UP kernels. - Added 'is_global' parameter to perfctr_cpu_update_control(). This flag is ignored on everything except P4 (sigh). Version 2.6.4, 2004-01-12 - Added 'tsc_to_cpu_mult' field to struct perfctr_info, replacing '_reserved1'. This is needed on PowerPC to map time-base ticks to actual time. On x86/AMD64, tsc_to_cpu_mult == 1. - Added support for PowerPC 604/7xx/74xx processors. Overflow interrupts are currently not allowed due to the PMI/DECR erratum. - Replaced perfctr_cpus_mask() with cpus_addr(). Updated cpumask.h to define cpus_addr() for kernels older than 2.6.1. Version 2.6.3-pl1, 2004-01-01 - Moved the x86 interrupt handler definition from x86_setup.c to the patch kit for 2.4.21 and later 2.4 kernels, like it already is done for 2.6 kernels. This change is needed due to extensive interrupt handler changes in RedHat's 2.4.21-6.EL kernel. - Simplified : now that early 2.4 kernels no longer are supported, LOCAL_PERFCTR_VECTOR is known to be defined, so CONFIG_X86_LOCAL_APIC implies PERFCTR_INTERRUPT_SUPPORT. Version 2.6.3, 2003-12-21 - Removed gperfctr_cpu_state_only_cpu_sdesc's total_sizeof optimisation. The ABI change in 2.6.2 broke it, leading to the new fields not being cleared and later causing EOVERFLOW. - The perfctr_ioctl32_handler() workaround is now only applied to kernels older than 2.4.23, since 2.4.23 added the "NULL handler == sys_ioctl" logic. Version 2.6.2, 2003-11-23 - Added 16 bytes (four fields) of reserved data to perfctr_info, perfctr_cpu_control, vperfctr_control, gperfctr_cpu_control, and gperfctr_cpu_state. Renumbered marshalling tags for generic structures. Bumped ABI versions. - Only allow use of IQ_ESCR{0,1} on P4 models <= 2. These ESCRs were removed from later models, according to a recent Intel documentation update (252046-006). - Fixes for Fedora Core 1's 2.4.22-1.2115.nptl kernel: * Work around their incomplete and broken cpumask_t backport. * Avoid name conflict due to their on_each_cpu() backport. * Handle their preempt_disable()/enable() macros. - Added new perfctr_cpu_is_forbidden() macro to fix a compilation error affecting AMD64 in SMP 2.6 kernels. SMP cpu_isset() requires that mask is an lvalue, but for AMD64 the mask is a constant. Version 2.6.1, 2003-10-05 - Kernel 2.6.0-test6 changed /proc/self and the /proc// namespace to refer to "processes" (groups of CLONE tasks) instead of actual kernel tasks. This forced the planned transition of the vperfctr API from /proc//perfctr to /dev/perfctr to occur immediately. Changes: * Moved /dev/perfctr implementation from global.c to init.c. * Implemented VPERFCTR_{CREAT,OPEN}, vperfctr_attach(), and the vperfctrfs pseudo-fs needed to support the magic files. The fs code was ported from perfctr-1.6/3.1, but updated for 2.6 and fixed to permit module unloading in 2.4. * Fixed VPERFCTR_OPEN to accept tsk->thread.perfctr == NULL. (Needed to info querying commands.) * Removed /proc//perfctr code. Simplified vperfctr_stub code. * Updated vperfctr_attach() to mimic the old /proc vperfctr_open(). This fixes some synchronisation issues. - Cleanups: * Removed #if checks and code for kernels older than 2.4.16. * Eliminated compat macros that are identical in 2.6 and 2.4. * Moved ptrace_check_attach EXPORT_SYMBOL from x86{,_64}_setup.c to virtual_stub.c. * get_task_by_proc_pid_inode() is now trivial. Eliminated it. * p4_ht_finalise() is now trivial. Eliminated it. - Added MODULE_ALIAS() declaration, eliminating the need for an alias in /etc/modprobe.conf with 2.6 kernels. Added MODULE_ALIAS() compatibility #define in compat24.h. - Added detection of AMD K8 Revision C processors. - Updated K8C detection for Revision C Athlon64s. Version 2.6.0, 2003-09-08 - Handle set_cpus_allowed() when PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED: * Add bad_cpus_allowed flag to struct vperfctr. * Check bad_cpus_allowed in __vperfctr_resume: if resuming with PMCs on forbidden CPU, kill counters and SIGILL current. * __vperfctr_set_cpus_allowed() callback: set bad_cpus_allowed and print warning if mask allows forbidden CPUs. * Use task_lock/unlock instead of preempt_disable/enable to synchronise task_struct accesses. * Ensure sampling_timer and bad_cpus_allowed share cache line. * #include explicitly for 2.4.18 and older kernels; newer kernels include it from . * Hook in virtual_stub.c. * Hook and cpumask_t typedef in . - Simplify #if test for set_cpus_allowed() emulation code. Also don't define it if CONFIG_PERFCTR_VIRTUAL isn't set. - cpumask.h only typedefs cpumask_t if hasn't. - Don't hide #include in compat24.h. - Fixed compat24.h to test for MODULE not CONFIG_MODULES at the __module_get/module_put macros. Version 2.6.0-pre5, 2003-08-31 - printk() is not allowed in switch_to(). Disabled debug code which could violate that rule. Changed virtual_stub.c to BUG() instead of printk() if the driver is invoked when not loaded. - Renamed vperfctr_exit2() to vperfctr_unlink() for clarity. - gcc-3.3.1 issued several "dereferencing type-punned pointer will break strict-aliasing rules" warnings for marshal.c. Used explicit unions to fix the warnings and clean up the code. - Removed compat22.h. - cpumask_t was included in standard 2.6.0-test4; replace #ifndef test in cpumask.h with normal kernel version test. - x86-64 fix: sys_ioctl() isn't exported to modules, so call filp->f_op->ioctl() instead in perfctr_ioctl32_handler(). - x86-64 fix: init.c must include not for compatibility with 2.4 kernels. Version 2.6.0-pre4, 2003-08-19 - Fix x86-64 register_ioctl32_conversion() usage for 2.4 kernels: * Supply dummy handler since a NULL handler oopses the kernel. * Test CONFIG_IA32_EMULATION since CONFIG_COMPAT is post-2.4. - Fixed and merged the new API struct marshalling code: * New files marshal.c and marshal.h contain the marshalling code and high-level helper functions (source shared with the library). * User-space structs are struct perfctr_struct_buf and accessed using perfctr_copy_{from,to}_user() with ptr to appropriate descriptor. The cpumask stuff isn't changed. * All ioctls registered as trivially 32-bit compatible on x86-64. * Changed perfctr_info cpu_type/cpu_features from short to int: this avoids the need for UINT16 marshalling support, and cpumask_t caused perfctr_info to change binary representation anyway. - Declared VPERFCTR_{CREAT,OPEN} ioctls, but left them unimplemented. - Fixed vperfctr_open() preemption bug. The O_CREAT check+install code could be preempted, leading to remote-control races. - Fixed perfctr_exit_thread() preemption bug. It detached the vperfctr before calling __vperfctr_exit(). If current was preempted before __vperfctr_exit() called vperfctr_suspend(), perfctr_suspend_thread() would fail to suspend the counters. The suspend+detach is now done atomically within __vperfctr_exit(). - Changes to handle 2.6 kernels with the cpumask_t patch (-mm, -osdl): * Convert perfctr_cpus_forbidden_mask accesses to cpumask_t API. Based in part on a patch for the -osdl kernel by Stephen Hemminger. * Remove cpus and cpus_forbidden from struct perfctr_info, since their sizes depend on the kernel configuration. * Add struct perfctr_cpu_mask to export cpumask_t objects sanely (i.e., using ints not longs) to user-space. * Add CPUS and CPUS_FORBIDDEN commands to retrieve these sets. * Add cpumask.h to emulate cpumask_t API in cpumask_t-free kernels. * Move perfctr_cpus_forbidden_mask declaration/#define from to cpumask.h -- necessary since doesn't have access to the driver's compatibility definitions. - Cleaned up perfctr_cpu_ireload(). - Removed struct field offset check from init.c. - 2.4.22-rc1 does EXPORT_SYMBOL(mmu_cr4_features). Added new compat #define to handle this. - Rename x86.c's rdmsrl() to rdmsr_low() to work around msr.h changes in 2.6.0-test3. Also rename rdpmcl() to rdpmc_low(). - Replaced __attribute__((__aligned__(SMP_CACHE_BYTES))) usage with the official ____cacheline_aligned macro. - Detect cpuid 0x69x VIA C3s (Antaur/Nehemiah). Version 2.6.0-pre3, 2003-08-03 - Changed perfctr_info.cpus and cpus_forbidden to be int instead of long, to make x86-32 and x86-64 compatible. This is a temporary solution, as there are patches for >32 CPUs on x86-32. The real solution is to make these sets variable-sized, and have user-space retrieve them with a new command. - Simplified GPERFCTR_CONTROL to update a single CPU instead of a set of CPUs. Moved cstatus clearing to release_hardware(). - Moved gperfctr start to new GPERFCTR_START command. - Simplified GPERFCTR_READ to access a single CPU instead of a set of CPUs. - Removed the requirement that CCCR.ACTIVE_THREAD == 3 on P4. HT processors define behaviour for all four possible values, and non-HT processors behave sanely for all four values. - Moved struct perfctr_low_ctrs definition from to the corresponding low-level driver, since it's only used there. - Changed perfctr_info.cpu_khz and vperfctr_control.preserve to be int instead of long. This corrects x86-64 and makes it compatible with x86-32. - Updated x86.c to permit extended cascading on P4M2. - Fixed a bug where the perfctr module's refcount could be zero with code still running in the module (pending returns to exit_thread()). This could race with rmmod in preemptive kernels, and in theory also in SMP kernels. * module owner field added to vperfctr_stub * _vperfctr_exit() in the modular case is now a function in vperfctr_stub.c, which brackets the vperfctr_stub.exit() call with __module_get() and module_put() on vperfctr_stub.owner * updated 2.4 and 2.2 compat definitions of __module_get() and module_put() to work for modules != THIS_MODULE - Replaced uses of (void)try_module_get() with __module_get() as the latter is more appropriate for 2.6 kernels. Updated compat stuff. Version 2.6.0-pre2, 2003-07-13 - vperfctr API fixes: * The new VPERFCTR_READ_CONTROL command retrieves a vperfctr's control data. * Renamed VPERFCTR_SAMPLE to VPERFCTR_READ_SUM, and made it write the sums to a perfctr_sum_ctrs user-space buffer. * Non-write commands are now always permitted on unlinked perfctrs. The first change was needed since the control data no longer is accessible via the mmap()ed state. The other changes clean up and simplify perfex and the library's slow-path read_ctrs() operation. - sys_vperfctr_ functions now mark the tsk parameter as "const" if they don't need write access to it. Typically they only need to compare it with current to detect self-access cases. - perfctr_cpu_state no longer makes the perfctr_cpu_control part accessible to user-space (via mmap() of vperfctrs). - Simplified {set,is}_isuspend_cpu() in x86_64.c by having callers pass the CPU number instead of the cache pointer (which was only used to derive the CPU number). - Eliminated NMI_LOCAL_APIC #ifs from x86-64 code since x86-64 always defines it. - x86.c cleanups: the non-PERFCTR_INTERRUPT_SUPPORT case now uses dummy stub functions, eliminated six #ifdefs. - x86_64_setup.c needs . - Protected cpu_has_mmx and cpu_has_ht #defines in x86_compat.h with #ifndef since 2.4.22-pre3 added those #defines. - Eliminated PERFCTR_INTERRUPT_SUPPORT #ifs from x86-64 code since x86-64 always defines CONFIG_X86_LOCAL_APIC. - Removed the P4-specific versions of isuspend() and iresume(). P4 now uses p6_like_{isuspend,iresume}(), just like P6/K7/K8. - Long overdue cleanup in x86.c/x86_64.c: renamed per_cpu_cache pointer variables from 'cpu' to 'cache'. - Added inline functions in virtual.c for registering the overflow handler and for clearing iresume_cstatus. Cleaned out several #if PERFCTR_INTERRUPT_SUPPORT occurrences from the main code. (Partial backport from the abandoned perfctr-3.1 branch.) - Inlined now useless 'struct vperfctr_state' in 'struct vperfctr'. Version 2.6.0-pre1, 2003-07-02 - Rearranged 'struct perfctr_cpu_state' to reduce the number of cache lines needed to be touched by key operations (suspend, resume, sample). Switched from struct-of-arrays to array-of-struct for perfctr counts, and copied pmc_map into the PMC data array. The old representation touched at least 3 cache lines at key operations, the new one only needs one cache line in most cases. The user-space mmap() view of the new representation is binary compatible between x86 and x86-64. - Changed 'isuspend_cpu' in perfctr_cpu_state on x86-64 to be a 32-bit CPU number, to maintain binary compatibility with x86. - Removed the union of p5_cesr and id; use id throughout. - Removed _filler and si_signo from 'struct vperfctr_state', making the user-space view of it identical to 'struct perfctr_cpu_state'. Version 2.5.5, 2003-06-15 - Updated x86 driver for 2.5.71 local APIC driver model changes. - Updated x86-64 driver for 2.5.71 NMI watchdog enable/disable API. - x86-64 is broken in 2.5.71 since x86-64 updated to driver model for local APIC and NMI watchdog, at the same time as x86 moved to a newer version of the "system device" driver model. Updated the x86-64 driver for the new model, which is expected to be in x86-64 by 2.5.72 (patch exists for 2.5.71). Version 2.5.4, 2003-06-01 - The generic-x86-with-TSC driver now uses rdpmc_read_counters and p6_write_control instead of its own procedures. - K8 docs are now available. Updated comment in x86.c accordingly. - P4 OVF_PMI+FORCE_OVF counters didn't work at all, resulting in BUG messages from the driver since identify_overflow failed to detect which counters had overflowed, and vperfctr_ihandler left the vperfctr in an inconsistent state. This works now. However, hardware quirks makes this configuration only useful for one-shot counters, since resuming generates a new interrupt and the faulting instruction again doesn't complete. The same problem can occur with regular OVF_PMI counters if ireset is a small-magnitude value, like -5. This is a user-space problem; the driver survives. - On P4, OVF_PMI+FORCE_OVF counters must have an ireset value of -1. This allows the regular overflow check to also handle FORCE_OVF counters. Not having this restriction would lead to MAJOR complications in the driver's "detect overflow counters" code. There is no loss of functionality since the ireset value doesn't affect the counter's PMI rate for FORCE_OVF counters. - Moved P4 APIC_LVTPC reinit from p4_isuspend() to identify_overflow(). Reduces context-switch overheads when i-mode counters are active. - Corrected vperfctr_suspend()'s precondition. - Corrected comment in to state that ireset[] values must be negative rather than non-positive. - Made 'perfctr_cpu_name' __initdata, like its predecessor. Version 2.5.3.1, 2003-05-21 - Replaced 'char *perfctr_cpu_name[]' by 'char *perfctr_cpu_name'. This is needed for x86-64 and other non-x86 architectures. - Changed to use 'long long' for 64-bit sums. This doesn't change the ABI, but improves user-space source code compatibility with 32-bit x86. - Removed the !defined(set_cpus_allowed) check added to compat24.h in 2.5.3. It's wrong for SMP builds with modules and MODVERSIONS, since the set_cpus_allowed() emulation function becomes a #define from include/linux/modules/x86_setup.ver. Instead add the already used HAVE_SET_CPUS_ALLOWED #define to include/linux/config.h in the kernel patch, but make it conditional on CONFIG_X86_64. Version 2.5.3, 2003-05-16 - Added detection code for Pentium M. MISC_ENABLE_PERF_AVAIL is now checked on both P4 and Pentium M. - Added x86_64 driver code. Both x86_64.c and asm-x86_64/perfctr.h are basically simplified versions of corresponding x86 files, with P5 and P4 support removed, 2.2 kernel support removed, and 'long long' for sums replaced by 'long'. The last change is painful for user-space and may be reverted. - compat24.h: don't define set_cpus_allowed() if already #defined, workaround for RawHide's 2.4.20-9.2 x86_64 kernel. - Removed list of supported CPUs from Kconfig. That information belongs elsewhere (and it's a pain to maintain for 2.2/2.4). Version 2.5.2, 2003-04-13 - Minor cleanup: use PROC_I() unconditionally in virtual.c, implement trivial compat macro in compat24.h. - Updated power management code for the local APIC and NMI watchdog driver model changes in kernel 2.5.67. The suspend/resume procedures are still no-ops, however. This revealed a bug in the lapic_nmi_watchdog resume code: it resumes the lapic_nmi_watchdog even when it was disabled before suspend. Perfctr's 2.5.67 kernel patch includes a fix. - perfctr_sample_thread() is now used also on UP. Anton Ertl's 2.26GHz UP P4 managed to execute a process for more than 2^32 cycles before suspending it, causing TSC inaccuracies. - RH9's 2.4.20-8 kernel changed cpu_online(), put_task_struct() and remap_page_range() to be more like in 2.5 kernels, and moved the declaration of ptrace_check_attach() from mm.h to ptrace.h, also like in 2.5 kernels, requiring fixes to compat24.h and x86_setup.c. - Added note in x86.c about the new Pentium M processor. Version 2.5.1, 2003-03-23 - Fix P4 HT initialisation. I've seen several boot logs from people running MP P4 Xeons with HT disabled: this produces an ugly "restricting access for CPUs 0x0" message, and would cause P4 HT init to unnecessarily return error in older kernels lacking set_cpus_allowed(). Now only print the message or signal error if non-zero siblings actually are found. - The set_cpus_allowed() emulation doesn't compile in 2.4 kernels older than 2.4.15 due to the p->cpus_running field. Updated version checks to skip it in 2.4.x when x<15. - Fix set_cpus_allowed() emulation compile error on BUG_ON() in 2.4 kernels older than 2.4.19. - Added Nehemiah note/reminder in x86.c:centaur_init(). Version 2.5.0, 2003-03-10 - Reverted the 2.5.0-pre2 change that replaced the PERFCTR_INFO ioctl by read(): it made the API look too weird. Added a PERFCTR_ABI ioctl which only retrieves 'abi_version'. - Cleaned up struct perfctr_info: renamed abi_magic to abi_version, and version to driver_version. Renamed PERFCTR_*_MAGIC too. - Cleaned up struct perfctr_cpu_control: moved evntsel_aux[] into the p4 sub-struct and renamed it as escr[]. Only P4 needs it anyway, and the new name clarifies its purpose. - Renumbered the vperfctr ioctls to the 8-15 range (8-11 are used) and reserved 0-7 (0-1 are used) for generic ioctls. - Added 'use_nmi' field to struct gperfctr_control, reserved for future use if/when support for i-mode gperfctrs is implemented. - Replaced some preempt/smp_call_function combinations with 2.5.64's new on_each_cpu() construct. Added compatibility definitions to compat24.h and compat22.h. Version 2.5.0-pre2, 2003-03-03 - Added ABI version to perfctr_info. Replaced PERFCTR_INFO ioctl by read() on the fd, since that allows reading the ABI version even in the case of a version mismatch. Removed binary layout magic number from vperfctr_state. Rearranged perfctr_info to make the 'long' fields 8-byte aligned. - Added #ifdef CONFIG_KPERFCTR to to ensure that isn't included unless CONFIG_KPERFCTR=y. This allows the patched kernel source to compile cleanly also in archs not yet supported by perfctr. - Removed PERFCTR_PROC_PID_MODE #define and replaced it with /*notype*/S_IRUSR in the patch files. - Added perfctr_vector_init() to . Cleaned up arch/i386/kernel/i8259.c patch. - Removed apic_lvtpc_irqs[] array. Removed irq.c patch. - Updated CONFIG_PERFCTR_INIT_TESTS help text to match reality. - Kernel 2.4.21-pre5 added set_cpus_allowed(), which required fixing compat24.h and x86_setup.c. - Fixed init.c for kernel 2.5.63 removing EXPORT_NO_SYMBOLS. - Cleaned up compat.h by moving 2.2/2.4 stuff to separate files. Version 2.5.0-pre1, 2003-02-19 - Repair global perfctr API: the target CPUs are now explicit in the calls to write control and read state. Global perfctrs now work on 2.5 SMP kernels (which no longer have smp_num_cpus or cpu_logical_map()), and HT P4s (asymmetric MPs). - struct perfctr_info has new bitmask fields for the set of CPUs (cpu_online_map) and forbidden CPUs; dropped the nrcpus field. - add cpu_online() compat macro to compat.h - VPERFCTR_STOP is subsumed by VPERFCTR_CONTROL. Removed it. - Detect K8 as K8 not K7. They are not identical. - Makefile cleanup: moved 2.4/2.2 kernel stuff to Makefile24. - Makefile fix: removed export-objs for 2.5 kernels. - Kconfig fix: don't mention obsolete .o module suffix. Version 2.4.5, 2003-02-09 - Fixed two minor compile warnings in x86_tests.c for 2.5 kernels. Version 2.4.4, 2003-01-18 - Fixed a bug in iresume() where an interrupt-mode counter could increment unexpectedly, and also miss the overflow interrupt. The following setup would cause the problem: P1 has EVNTSELn in non-interrupt mode, counting some high- frequency event (e.g. INST_RETIRED) in kernel-mode. P2 has EVNTSELn in interrupt-mode, counting some low-frequency event (e.g. MMX_ASSIST) in user-mode. P1 suspends. Since EVNTSELn is in non-interrupt mode, it is not disabled. P2 resumes. First iresume() finds that the CPU cache ID is not P2's, so it reloads PERFCTRn with P2's restart value. Then write_control() reloads EVNTSELn with P2's EVNTSEL. At this point, P2's PERFCTRn has been counting with P1's EVNTSELn since iresume(), so it will no longer equal P2's restart value. And if PERFCTRn overflowed, the overflow will go undetected since P1's EVNTSELn was in non-interrupt mode. To avoid this problem, iresume() now ensures that a counter's control register is disabled before reloading the counter. - Fixed some ugly log messages from the new HT P4 init code: * forbidden_mask would be printed as "0X" (capital X) * finalise_backpatching() could trigger a BUG! printk from p4_write_control() if the CPU the init code runs on was in the forbidden set. At init-time this is not an error. Avoided this by temporarily resetting the forbidden_mask. - Added preliminary support for AMD K8 processors with the regular 32-bit x86 kernel. The K8 performance counters appear to be identical or very similar to the K7 performance counters. Version 2.4.3, 2002-12-11 - Added x86.c:perfctr_cpus_forbidden_mask. This bitmask describes the set of CPUs that must not access the perfctrs. On HT P4 MPs, only logical CPU #0 in each package is allowed access -- this avoids the resource conflict that would occur if both logical processors were to access the perfctrs. In other cases (UP or non-HT-P4 MPs) the mask is zero. - vperfctr_control() now calls set_cpus_allowed() to ensure that the task stays away from CPUs in perfctr_cpus_forbidden_mask. This is racy with sys_sched_setaffinity(), and possibly some of the kernel's internal set_cpus_allowed() calls, but the race is unlikely to occur in current 2.4 kernels. - Cleaned up the parameter passing protocol between vperfctr_ioctl() and the individual vperfctr "system call" procedures. - Added safety check in global.c to disallow global-mode perfctrs on asymmetric MPs until the API has been fixed. - Added set_cpus_allowed() implementation for 2.4 kernels, except those that already have it as indicated by HAVE_SET_CPUS_ALLOWED: this symbol is added to by the kernel patch. - 2.2 kernels can't enforce CPU affinity masks, so x86.c warns if a HT P4 MP runs a 2.2 kernel, and falls back to generic x86 mode. Added dummy set_cpus_allowed() macro for 2.2 kernels. - x86_compat.h now implements cpuid_ebx() and cpu_has_ht for old kernels. - Makefile cleanup: Rules.make is obsolete in 2.5. - Compile fixes in x86.c and virtual_stub.c: needs to be included explicitly for the 2.5.50 kernel. Version 2.4.2, 2002-11-25 - Fixed virtual.c:inc_nrctrs() to handle the -EBUSY case correctly. If the HW was busy (e.g. global running), then the first attempt to open a vperfctr would fail but further attempts would succeed. Updated error propagation to distinguish -EBUSY from -ENOMEM. - Updated global.c for preempt-safety. - Made the driver safe for preemptible kernels. This required a lot of analysis, but resulted in relatively few actual code changes. (Backport from the perfctr-3.1 branch.) - Ported to 2.5.48: Replaced MOD_INC_USE_COUNT by try_module_get() and MOD_DEC_USE_COUNT by module_put(). Updated compat.h. - Ported to 2.5.45: added Kconfig, removed Config.help. Version 2.4.1, 2002-10-12 - RedHat 8.0's 2.4.18-14 kernel does EXPORT_SYMBOL(cpu_khz) while the vanilla 2.4.18 does not. This clashes with x86_setup.c's EXPORT_SYMBOL(cpu_khz). I've found no easy way to distinguish between these kernels at C preprocessing time, so I changed x86_setup.c to define a trivial perfctr_cpu_khz() function and EXPORT_SYMBOL that one instead. Version 2.4.0, 2002-09-26 - Config.help updated to state that Pentium 4 is supported. - 2.5.32 moved ptrace_check_attach() declaration to . - Removed redundant /proc//perfctr access control check from vperfctr_stub_open(). Since 2.4.0-pre1 this check didn't match the real one, which prevented remote opens when the driver was built as a module. Version 2.4.0-pre2, 2002-08-27 - vperfctr_control() now allows the user to specify that some PMC sums are not to be cleared when updating the control. There is a new bitmap field `preserve' in struct vperfctr_control: if bit i is set then PMC(i)'s sum is not cleared. `preserve' is a simple `unsigned long' for now, since this type fits all currently known CPU types. This change breaks binary compatibility, but user-space code which clears the entire control record before filling in relevant fields will continue to work as before after a recompile. This feature removes a limitation which some people felt was a problem for some usage scenarios. Version 2.4.0-pre1, 2002-08-12 - Initial implementation of a new remote-control API for virtual per-process perfctrs. A monitor process may access a target process' perfctrs via /proc/pid/perfctr and operations on that file, if the monitor holds the target under ptrace ATTACH control. Updated virtual.c to allow remote access. Updated x86.c:perfctr_cpu_ireload() to work also in the remote control case on SMP machines. Version 2.3.12, 2002-08-12 - Trivial comment fixes in compat.h and x86_compat.h. - Removed __vperfctr_sample(), vperfctr_stub.sample, and bug_sample() from UP builds, since they are needed only on SMP. Version 2.3.11, 2002-07-21 - Accumulated sums are now maintained for interrupt-mode perfctrs. User-space can use the standard syscall-less algorithm for computing these counters' current sums, should that be needed. Version 2.3.10, 2002-07-19 - Added PERFCTR_X86_INTEL_P4M2 CPU type for Model 2 P4s, since they have ESCR Event Mask changes in a few events. - The driver now supports replay tagging events on P4, using the pebs_enable and pebs_matrix_vert control fields added in 2.3.8. - Some Pentium MMX and Pentium Pro processors have an erratum (Pentium erratum #74, Pentium Pro erratum 26) which causes SMM to shut down if CR4.PCE is set. intel_init() now clears the RDPMC feature on the affected steppings, to avoid the problem. - perfctr_cpu_release() now clears the hardware registers and invalidates the per-cpu cache. This should allow the counter hardware to power down when not used, especially on P4. - Callers of update_control() have no active i-mode counters. Documented this as a precondition, and changed update_control() to not call isuspend(). update_control() no longer needs hardware access, which should ease a port to CONFIG_PREEMPT=y. Version 2.3.9, 2002-06-27 - Updated p4_escr_addr() in x86.c to match the latest revision of Intel's IA32 Volume 3 manual, #245472-007. An error in previous revisions of this document caused the driver to program the wrong ESCR in some cases. (CCCRs 12/13/16 with ESCR_SELECT(2) were mapped to SSU_ESCR0 instead of RAT_ESCR0, affecting the uop_type event.) Version 2.3.8, 2002-06-26 - Added counter overflow interrupt support for Intel P4. - 2.5.23 dropped smp_num_cpus and cpu_logical_map(). Added temporary workarounds to x86.c and global.c to allow compilation and testing under 2.5. May have to change the API (esp. global's) to be based on the sparse cpu_online_map instead. - RedHat's 2.4.9-34 defines cpu_relax(). Updated compat.h. - Added pebs_enable and pebs_matrix_vert fields (currently unused) to perfctr_cpu_control to support replay tagging events on P4. Updated the perfctr_cpu_state binary layout magic number. - Silenced redefinition warnings for MSR_P6_PERFCTR0 and cpu_has_mmx. - Updated Makefile for the 2.5.19 kernel's Makefile changes. - Merged the P6 and K7 isuspend/iresume/write_control driver code. - Added a VC3 specific clear_counters() procedure. - Removed pointless code from perfctr_cpu_identify_overflow(). - Removed _vperfctr_get/set_thread() wrappers and thread->perfctr clobber checks from the DEBUG code. Removed unused "ibuf" and obsolete si_code fields from vperfctr state and control objects. Updated the vperfctr state magic number. - Fixed the CONFIG_PREEMPT anti-dependency check in Config.in. - vperfctr_control() now preserves the TSC sum on STOP;CONTROL transitions. The failure to do this caused problems for the PAPI P4 support being developed. Version 2.3.7, 2002-04-14 - Kernel 2.5.8-pre3 changed the way APIC/SMP interrupt entries are defined. Defining these with asm() in C is no longer practical, so the kernel patch for 2.5.8-pre3 now defines the perfctr interrupt entry in arch/i386/kernel/entry.S. - Permit use of cascading counters on P4: in the slave counter one sets the CASCADE flag instead of the ENABLE flag. - Added P4 hyperthreading bit field definitions. - Preliminary infrastructure to support a new remote-control interface via ptrace(). Updates to compat.h, virtual.c, virtual_stub.c, and x86_setup.c. ptrace_check_attach() emulation for older kernels is in x86_setup.c since virtual_stub.c isn't compiled if the driver isn't a module. Version 2.3.6, 2002-03-21 - Rewrote sys_vperfctr_control() to do a proper suspend before updating the control, and to skip trying to preserve the TSC start value around the resume. This cleaned up the code and eliminated the bogus "BUG! resuming non-suspended perfctr" warnings that control calls to active perfctrs caused. - Rewrote sys_vperfctr_iresume() to not preserve the TSC start value around the resume. Since we had just done a suspend(), this would cause double-accounting of the TSC. Version 2.3.5, 2002-03-17 - Added detection of the VIA C3 Ezra-T processor. - CPU detection now uses current_cpu_data instead of boot_cpu_data, to avoid the boot_cpu_data.x86_vendor bug which is present is all current 2.2/2.4/2.5 kernels. The bug caused the x86_vendor field to be cleared on SMP machines, which in turn tricked the driver to identify MP AMD K7 machines as MP Intel P6, with disastrous results when the wrong MSRs were programmed. - Updated compat.h for /proc// inode change in 2.5.4. - Added a check to prevent building on preemptible 2.4/2.5 kernels, since the driver isn't yet safe for those. - Put perfctr's configuration help text in Config.help in this directory: kernel 2.5.3-pre5 changed from a having a common Configure.help file to having local Config.help files. Version 2.3.4, 2002-01-23 - Updated virtual.c for remap_page_range() change in 2.5.3-pre1. Added emulation for older kernels to compat.h. - Permit use of tagging on P4 for at-retirement counting. This may not yet work as expected, since up-stream (tag producing) counters aren't disabled at context switches: a process may therefore see more tagged uops than expected. - Fixed uses of __FUNCTION__ to comply with changes in GCC 3.0.3. Version 2.3.3, 2001-12-31 - Minor x86.c cleanup: reordered function definitions so that write_control comes after isuspend/iresume: this makes it easier to follow the runtime control flow. - Fixed isuspend()/iresume()'s broken cache checking protocol. The old protocol didn't handle process migration across CPUs in SMP machines correctly, as illustrated by the following scenario: P1 runs on CPU1 and suspends. P1 and CPU1 now have the same cache id (->k1.id). P1 is resumed and suspended on CPU2: the state in CPU1 is now stale. Then P1 is resumed on CPU1, and no other process has been using CPU1's performance counters since P1's last suspend on CPU1. The old protocol would see matching cache ids and that P1's i-mode EVNTSELs are stopped, so it would accept the cache and resume P1 with CPU1's stale PERFCTRS values. In the new protocol isuspend() records the active CPU in the state object, and iresume() checks if both the CPU and the control id match. The new protocol is also simpler since iresume() no longer checks if the i-mode EVNTSELs are cleared or not. - P6 nasty i-mode to a-mode context switch bug fixed: p6_isuspend() used to simply clear EVNTSEL0's Enable flag in order to stop all i-mode counters. Unfortunately, that was insufficient as shown by the following case (which actually happened). P1 has EVNTSEL0 in a-mode and EVNTSEL1 in i-mode. P1 suspends: PERFCTR1 is stopped but EVNTSEL1 is still in i-mode. P2 has EVNTSEL0 in a-mode and no EVNTSEL1. P2 resumes and updates EVNTSEL0. This activates not only P2's PERFCTR0 but also the dormant PERFCTR1. If PERFCTR1 overflows, then P2 will receive an unexpected interrupt. If PERFCTR1 doesn't overflow, but P2 suspends and P1 resumes, then P1 will find that PERFCTR1 has a larger than expected value. p6_isuspend() and p6_iresume() were changed to ignore the global Enable flag and to disable/enable each i-mode EVNTSEL individually, just like how it's done on the K7. - x86.c cleanups: P5MMX, MII, C6, VC3, P6, K7, and P4 now all use the same rdpmc_read_counters() method. VIA C3 now uses p6_write_control() instead of its own method. - Removed "pmc_map[] must be identity" restriction from P6 and K7. The API uses the virtual counter index to distinguish a-mode and i-mode counters, but P6 events aren't entirely symmetric: this lead to some strange cases with the old pmc_map[] rule. P6 and K7 isuspend() now need access to the control, so update_control() and its callers had to be changed to allow it to isuspend() _before_ the new control is installed. - P4 write_control fixes: changed the ESCR cache to be indexed by MSR offset from 0x3A0, and changed P4 write_control to index the CCCR/ESCR cache with physical instead of virtual indices. Added call to debug_evntsel_cache(), after updating it for pmc_map[]. - Added P4 and Generic support to x86_tests.c, and some cleanups. Version 2.3.2, 2001-11-19 - P4 fix: the mapping from CCCR 17 to its associated ESCRs was wrong due to an off-by-one error in x86.c:p4_escr_addr(). - P4 fix: also clear the PEBS MSRs when initialising the driver. - Minor cleanup in x86.c: replaced the "clear MSRs" loops with calls to a helper procedure. Version 2.3.1, 2001-11-06 - Microscopic P4 cleanups. Testing on my new P4 box has confirmed that the PMAVAIL flag in MSR_IA32_MISC_ENABLE is read-only. Version 2.3, 2001-10-24 - Added support for multiple interrupt-mode virtual perfctrs with automatic restart. Added an identify_overflow() method to x86.c to identify and reset the overflowed counters. Added checks to ensure that the user-specified restart values for interrupt-mode counters are negative. Updated virtual.c's signal delivery interface to pass a bitmask describing which counters overflowed; the siginfo si_code is now fixed as SI_PMC_OVF (fault-class). - Fixed some typos in x86.c. Added a note about the C3 Ezra. - Added EXPORT_NO_SYMBOLS to init.c, for compatibility with announced changes in modutils 2.5. Version 2.2, 2001-10-09 - Added preliminary support for the Pentium 4. Only basic stuff for now: no cascading counters, overflow interrupts, tagged micro-ops, or use of DS/PEBS. The code compiles but hasn't been tested on an actual Pentium 4. Version 2.1.4, 2001-09-30 - No driver-level changes. Version 2.1.3, 2001-09-13 - Fixed a compilation problem where virtual_stub couldn't be compiled in modular kernels older than 2.2.20pre10 if KMOD was disabled, due to an incompatible stub definition of request_module(). - Replaced most occurrences of "VIA Cyrix III / C3" with "VIA C3". Version 2.1.2, 2001-09-05 - Added MODULE_LICENSE() tag, for compatibility with the tainted/ non-tainted kernel stuff being put into 2.4.9-ac and modutils. - VIA C3 support is not "preliminary" any more. Testing has revealed that the reserved bits in the C3's EVNTSEL1 have no function and need not be preserved. The driver now fills these bits with zeroes. (Thanks to Dave Jones @ SuSE for running these tests.) - Minor bug fix in the perfctr interrupt assembly code. (Inherited from the 2.4 kernel. Fixed in 2.4.9-ac4.) Version 2.1.1, 2001-08-28 - Preliminary recognition of Pentium 4 processors, including checking the IA32_MISC_ENABLE MSR. - Moved %cr4 access functions from to x86_compat.h, to work around changes in 2.4.9-ac3. - More %cr4 cleanups possible since the removal of dodgy_tsc() in Version 2.1: moved {set,clear}_in_cr4_local() into x86.c, and eliminated the set_in_cr4() compat macro. - Fixed a bug in x86.c:finalise_backpatching(): the fake cstatus mustn't include i-mode counters unless we have PCINT support. Failure to check this cased fatal init-time oopses in some configs (CONFIG_X86_UP_APIC set but no local APIC in the CPU). - Minor comment updates in x86.c due to AMD #22007 Revision J. - Removed '%' before 'cr4' in printouts from x86_tests.c, to avoid the '%' being mutated by log-reading user-space code. Version 2.1, 2001-08-19 - Fixed a call backpatching bug, caused by an incompatibility between the 2.4 and 2.2 kernels' xchg() macros. The 2.2 version lacks a "volatile" causing gcc to remove the entire statement if xchg() is used for side-effect only. Reverted to a plain assignment, which is safe since the 2.0.1 backpatching changes. - Fixed a bug where an attempt to use /proc//perfctr on an unsupported processor would cause a (well-behaved) kernel oops, due to calling a NULL function pointer in x86.c, vperfctr_open() now returns -ENODEV if virtual.c hasn't been initialised. - Removed the WinChip configuration option, the dodgy_tsc() callback, and the clr_cap_tsc() x86_compat macro. WinChip users should configure for generic 586 or less and use the kernel's "notsc" boot parameter. This cleans up the driver and the 2.4 kernel patches, at the expense of more code in the 2.2 kernel patches to implement "notsc" support. - Minor cleanup: moved version number definition from init.c to a separate file, version.h. Version 2.0.1, 2001-08-14 - The unsynchronised backpatching in x86.c didn't work on SMP, due to Pentium III erratum E49, and similar errata for other P6 processors. (The change in 2.0-pre6 was insufficient.) x86.c now finalises the backpatching at driver init time, by "priming" the relevant code paths. To make this feasible, the isuspend() and iresume() methods are now merged into the other high-level methods; virtual.c became a bit cleaner. - Removed obsolete "WinChip pmc_map[] must be identity" check. Version 2.0, 2001-08-08 - Resurrected partial support for interrupt-mode virtual perfctrs. virtual.c permits a single i-mode perfctr, in addition to TSC and a number of a-mode perfctrs. BUG: The i-mode PMC must be last, which constrains CPUs like the P6 where we currently restrict the pmc_map[] to be the identity mapping. (Not a problem for K7 since it is symmetric, or P4 since it is expected to use a non-identity pmc_map[].) New perfctr_cpu_ireload() procedure to force reload of i-mode PMCs from their start values before resuming. Currently, this just invalidates the CPU cache, which forces the following iresume() and resume() to do the right thing. perfctr_cpu_update_control() now calls setup_imode_start_values() to "prime" i-mode PMCs from the control.ireset[] array. - Bug fix in perfctr_cpu_update_control(): start by clearing cstatus. Prevents a failed attempt to update the control from leaving the object in a state with old cstatus != 0 but new control. Version 2.0-pre7, 2001-08-07 - Cleaned up the driver's debugging code (virtual, x86). - Internal driver rearrangements. The low-level driver (x86) now handles sampling/suspending/resuming counters. Merged counter state (sums and start values) and CPU control data to a single "CPU state" object. This simplifies the high-level drivers, and permits some optimisations in the low-level driver by avoiding the need to buffer tsc/pmc samples in memory before updating the accumulated sums (not yet implemented). - Removed the read_counters, write_control, disable_rdpmc, and enable_rdpmc methods from , since they have been obsoleted by the new suspend/resume/sample methods. - Rearranged the 'cstatus' encoding slightly by putting 'nractrs' in the low 7 bits; this was done because 'nractrs' is retrieved more often than 'nrctrs'. - Removed the obsolete 'status' field from vperfctr_state. Exported 'cstatus' and its access methods to user-space. (Remove the control.tsc_on/nractrs/nrictrs fields entirely?) - Removed WinChip "fake TSC" support. The user-space library can now sample with slightly less overhead on sane processors. - WinChip and VIA C3 now use p5mmx_read_counters() instead of their own versions. Version 2.0-pre6, 2001-07-27 - New patches for kernels 2.4.6, 2.4.7, and 2.4.7-ac1. - Sampling bug fix for SMP. Normally processes are suspended and resumed many times per second, but on SMP machines it is possible for a process to run for a long time without being suspended. Since sampling is performed at the suspend and resume actions, a performance counter may wrap around more than once between sampling points. When this occurs, the accumulated counts will be highly variable and much lower than expected. A software timer is now used to ensure that sampling deadlines aren't missed on SMP machines. (The timer is run by the same code which runs the ITIMER_VIRTUAL interval timer.) - Bug fix in the x86 "redirect call" backpatching routine. To be SMP safe, a bus-locked write to the code must be used. - Bug fix in the internal debugging code (CONFIG_PERFCTR_DEBUG). The "shadow" data structure used to detect if a process' perfctr pointer has been clobbered could cause lockups with SMP kernels. Rewrote the code to be simpler and more robust. - Minor performance tweak for the P5/P5MMX read counters procedures, to work around the P5's cache which doesn't allocate a cache line on a write miss. - To avoid undetected data layout mismatches, the user-space library now checks the data layout version field in a virtual perfctr when it is being mmap:ed into the user's address space. - A few minor cleanups. Version 2.0-pre5, 2001-06-11 - Internally use a single 'cstatus' field instead of the three tsc_on/nractrs/nrictrs fields. Should reduce overhead slightly. - Reorder the fields in cpu_control so that 'cstatus' and other frequently used fields get small offsets -- avoids some disp32 addressing modes in timing-critical code. - Fixed a bug in p6_iresume where it forgot to invalidate the EVNTSEL cache, causing p6_write_control to fail to reload the MSRs. (K7 had a similar bug.) Since i-mode support is disabled at the moment, no-one was actually bitten by this. - Fixed another iresume/write_control cache invalidation bug where a switch to an "uninitialised" CPU would fail to initialise the MSRs. - Added a CONFIG_PERFCTR_DEBUG option to enable internal consistency checks. Currently, this checks that a task's vperfctr pointer isn't clobbered behind our backs, that resume and suspend for a vperfctr are performed on the same CPU, and that the EVNTSEL cache is semi-consistent when reloading is optimised away. ("semi" because it only checks that the cache agrees with the user's control data, and not that the cache agrees with the MSRs.) - Minor cleanups. Version 2.0-pre4, 2001-04-30 - Cleanups in x86.c. #defines introduced for magic constants. More sharing of procedures between different CPU drivers. Fixed a bug where k7_iresume() could cause k7_write_control() to fail to reload the correct EVNTSELs. The WinChip C6/2/3 driver now "fakes" an incrementing TSC. - General cleanups: s/__inline__/inline/ following Linux kernel coding standards, and renamed the low-level control objects to cpu_control to distinguish them from {v,g}perfctr_control objects. - O_CREAT is now interpreted when /proc/self/perfctr is opened: if the vperfctr does not exist, then it is created; if the vperfctr does exist, then EEXIST is returned (unfortunately O_EXCL doesn't work, since it's intercepted by the VFS layer). "perfex -i" uses this to avoid having to create a vperfctr when only an INFO command is to be issued. libperfctr.c:vperfctr_open() uses this to decide whether to UNLINK the newly opened vperfctr in case of errors or not. - Cleaned up virtual.c's 2.4/2.2 VFS interface code a little, and eliminated the OWNER_THIS_MODULE compat macro. - Added MOD_{INC,DEC}_USE_COUNTs to virtual.c's file_operations open and release procedures for 2.2 kernels. This should simulate 2.4's fops_get/put at >open() and >release(). Version 2.0-pre3, 2001-04-17 - Interrupt-mode virtual perfctrs are temporarily disabled since x86.c doesn't yet detect which PMC overflowed. The old API could be made to work, but it was broken anyway. - Integrated the new P4-ready data structures and APIs. The driver compiles but the user-space stuff hasn't been updated yet, so there may be some remaining bugs. I have not yet committed to all details of this API. Some things, like accumulating counters in virtual.c and global.c, are uglier now, and going from a single "status == nrctrs" field to three separate fields (tsc_on, nrctrs, nrictrs) cannot be good for performance. In the new API the control information is split in separate arrays depending on their use, i.e. a struct-of-arrays layout instead of an array-of-struct layout. The advantage of the struct-of-arrays layout is that it should cause fewer cache lines to be touched at the performance-critical operations. The disadvantage is that the layout changes whenever the number of array elements has to be increased -- as is the case for the future Pentium 4 support (18 counters). Version 2.0-pre2, 2001-04-07 - Removed automatic inheritance of per-process virtual perfctrs across fork(). Unless wait4() is modified, it's difficult to communicate the final values back to the parent: the now abandoned code did this in a way which made it impossible to distinguish one child's final counts from another's. Inheritance can be implemented in user-space anyway, so the loss is not great. The interface between the driver and the rest of the kernel is now smaller and simpler than before. - Simulating cpu_khz by a macro in very old kernels broke since there's also a struct field with that name :-( Instead of putting the ugly workaround back in, I decided to drop support for kernels older than 2.2.16. - Preliminary support for the VIA C3 processor -- the C3 is apparently a faster version of the VIA Cyrix III. - Added rdtsc cost deduction to the init tests code, and changed it to output per-instruction costs as well. - More cleanups, making 2.2 compatibility crud less visible. Version 2.0-pre1, 2001-03-25 - First round of API and coding changes/cleanups for version 2.0: made perfctr_info.version a string, moved some perfctr_info inits to x86.c and eliminated some redundant variables, removed dead VFS code from virtual.c, removed obsolete K7 tests from x86_tests.c, removed mmu_cr4_features wrappers from x86_compat.h, minor cleanup in virtual_stub.c. - Fixed an include file problem which made some C compilers (not gcc) fail when compiling user-space applications using the driver. - Added missing EXPORT_SYMBOL declarations needed by the UP-APIC PM code when the driver is built as a module. - Preliminary changes in x86.c to deal with UP-APIC power management issues in 2.4-ac kernels. The PM callback is only a stub for now. Version 1.9, 2001-02-13 - Fixed compilation problems for 2.2 and SMP kernels. - Found updated documentation on "VIA Cyrix III". Apparently, there are two distinct chips: the older Joshua (a Cyrix design) and the newer Samuel (a Centaur design). Our current code supported Joshua, but mistook Samuel for Joshua. Corrected the identification of Samuel and added explicit support for it. Samuel's EVNTSEL1 is not well- documented, so there are some new Samuel-specific tests in x86_tests.c. - Added preliminary interrupt-mode support for AMD K7. - Small tweaks to virtual.c's interrupt handling. Version 1.8, 2001-01-23 - Added preliminary interrupt-mode support to virtual perfctrs. Currently for P6 only, and the local APIC must have been enabled. Tested on 2.4.0-ac10 with CONFIG_X86_UP_APIC=y. When an i-mode vperfctr interrupts on overflow, the counters are suspended and a user-specified signal is sent to the process. The user's signal handler can read the trap pc from the mmap:ed vperfctr, and should then issue an IRESUME ioctl to restart the counters. The next version will support buffering and automatic restart. - Some cleanups in the x86.c init and exit code. Removed the implicit smp_call_function() calls from x86_compat.h. Version 1.7, 2001-01-01 - Updated Makefile for 2.4.0-test13-pre3 Rules.make changes. - Removed PERFCTR_ATTACH ioctl from /dev/perfctr, making the vperfctrs only accessible via /proc/self/perfctr. Removed the "attach" code from virtual.c, and temporarily commented out the "vperfctr fs" code. Moved /dev/perfctr initialisation and implementation from init.c to global.c. - Eliminated CONFIG_VPERFCTR_PROC, making /proc/pid/perfctr mandatory if CONFIG_PERFCTR_VIRTUAL is set. - Some 2.2/2.4 compatibility cleanups. - VIA Cyrix III detection bug fix. Contrary to VIA's documentation, the Cyrix III vendor field is Centaur, not Cyrix. Version 1.6, 2000-11-21 - Preliminary implementation of /proc/pid/perfctr. Seems to work, but virtual.c and virtual_stub.c is again filled with #if LINUX_VERSION_CODE crap which will need to be cleaned up. The INFO ioctl is now implemented by vperfctrs too, to avoid the need for opening /dev/perfctr. - virtual.c now puts the perfctr pointer in filp->private_data instead of inode->u.generic_ip. The main reason for this change is that proc-fs places a dentry pointer in inode->u.generic_ip. - sys_vperfctr_control() no longer resets the virtual TSC if it already is active. The virtual TSC therefore runs continuously from its first activation until the process stops or unlinks its vperfctrs. - Updates for 2.4.0-test11pre6. Use 2.4-style cpu_has_XXX feature testing macros. Updated x86_compat.h to implement missing cpu_has_mmx and cpu_has_msr, and compatibility macros for 2.2. Changed vperfctr_fs_read_super() to use new_inode(sb) instead of get_empty_inode() + some init code. - Updates for 2.4.0-test9. Fixed x86_compat.h for cpu_khz change. Since drivers/Makefile was converted to the new list style, it became more difficult to handle CONFIG_PERFCTR=m. Changed Config.in to set CONFIG_KPERFCTR=y when CONFIG_PERFCTR != n, resulting in a much cleaner kernel patch for 2.4.0-test9. - Removed d_alloc_root wrapper since 2.2 doesn't need it any more. - When building for 2.2.18pre, use some of its 2.4 compatibility features (module_init, module_exit and DECLARE_MUTEX). - Updates for 2.4.0-test8: repaired kernel patch for new parameter in do_fork, and fixed CLONE_PERFCTR conflict with CLONE_THREAD. Version 1.5, 2000-09-03 - Dropped support for intermediate 2.3 and early 2.4.0-test kernels. The code now supports kernels 2.2.xx and 2.4.0-test7 or later only. Cleanups in compat.h and virtual.c. - Rewrote the Makefile to use object file lists instead of conditionals. This gets slightly hairy since kernel extensions are needed even when the driver proper is built as a module. - Removed the definition of CONFIG_PERFCTR_X86 from Config.in. Use the 2.4 standard CONFIG_X86 instead. The 2.2.xx kernel patches now define CONFIG_X86 in arch/i386/config.in. - Cleaned up the vperfctr inheritance filter. Instead of setting a disable flag (CLONE_KTHREAD) when kernel-internal threads are created, I now set CLONE_PERFCTR in sys_fork and sys_vfork. - /dev/perfctr no longer accepts the SAMPLE and UNLINK ioctls. All operations pertaining to a process' virtual perfctrs must be applied to the fd returned from the ATTACH ioctl. - Removed the remote-control features from the virtual perfctrs. Significant simplifications in virtual.c. Removed some now unused stuff from compat.h and virtual_stub.c. Version 1.4, 2000-08-11 - Fixed a memory leak bug in virtual.c. An extraneous dget() in get_vperfctr_filp() prevented reclaiming the dentry and inode allocated for a vperfctr file. - Major changes to the VFS interface in virtual.c. Starting with 2.4.0-test6, inode->i_sb == NULL no longer works. Added code to register a "vperfctr" fs and define a superblock and a mount point. Completely rewrote the dentry init code. Most of the new code is adapted from fs/pipe.c, with simplifications and macros to continue supporting 2.2.x kernels. `ls -l /proc/*/fd/' now prints recognizable names for vperfctr files. - Cleaned up virtual.c slightly. Removed "#if 1" tests around the vperfctr inheritance code. Rewrote vperfctr_alloc and vperfctr_free to use the virt_to_page and {Set,Clear}PageReserved macros; also updated compat.h to provide these for older kernels. - Updated for 2.4.0-test3: a dummy `open' file operation is no longer required by drivers/char/misc.c. - Updated for `owner' field in file_operations added in 2.4.0-test2. Removed MOD_{INC,DEC}_USE_COUNT from init.c (except when compiling for 2.2.x) and virtual.c. Added MOD_{INC,DEC}_USE_COUNT to the reserve/release functions in x86.c -- needed because the driver may be active even if no open file refers to it. Using can_unload in the module struct instead is possible but not as tidy. Version 1.3, 2000-06-29 - Implemented inheritance for virtual perfctrs: fork() copies the evntsel data to the child, exit() stops the child's counters but does not detach the vperfctr object, and wait() adds the child's counters to the parent's `children' counters. Added a CLONE_KTHREAD flag to prevent inheritance to threads created implicitly by request_module() and kernel_thread(). - Fixed a half-broken printk() in x86_tests.c. - Added checks to virtual.c to prevent the remote-control interface from trying to activate dead vperfctrs. - Updated vperfctr_attach() for changes in 2.3.99-pre7 and 2.4.0-test2. - Fixed a problem introduced in 1.2 which caused linker errors if CONFIG_PERFCTR=m and CONFIG_PERFCTR_INIT_TESTS=y. - Export CPU kHz via a new field in PERFCTR_INFO ioctl, to enable user-space to map accumulated TSC counts to actual time. Version 1.2, 2000-05-24 - Added support for generic x86 processors with a time-stamp counter but no performance-monitoring counters. By using the driver to virtualise the TSC, accurate cycle-count measurements are now possible on PMC-less processors like the AMD K6. - Removed some of the special-casing of the x86 time-stamp counter. It's now "just another counter", except that no evntsel is needed to enable it. - WinChip bug fix: the "fake TSC" code would increment an uninitialised counter. - Reorganised the x86 driver. Moved the optional init-time testing code to a separate source file. - Miscellaneous code cleanups and naming convention changes. Version 1.1, 2000-05-13 - vperfctr_attach() now accepts pid 0 as an alias for the current process. This reduces the number of getpid() calls needed in the user-space library. (Suggested by Ulrich Drepper.) - Added support for the VIA Cyrix III processor. - Tuned the x86 driver interface. Replaced function pointers with stubs which rewrite callers to invoke the correct callees. - Added ARRAY_SIZE definition to compat.h for 2.2.x builds. - Updated for 2.3.48 inode changes. - Moved code closer to 2.3.x coding standards. Removed init_module and cleanup_module, added __exit, module_init, and module_exit, and extended "compat.h" accordingly. Cleaned up and a little. Version 1.0, 2000-01-31 - Prepared the driver to cope with non-x86 architectures: - Moved generic parts of to . - Merged driver's private "x86.h" into . - Config.in now defines CONFIG_PERFCTR_${ARCH}, and Makefile uses it to select appropriate arch-dependent object files - The driver now reads the low 32 bits of the counters, instead of 40 or 48 bits zero-extended to 64 bits. Sums are still 64 bits. This was done to reduce the number of cache lines needed for certain data structures, to simplify and improve the performance of the sampling procedures, and to change 64+(64-64) arithmetic to 64+(32-32) for the benefit of gcc on x86. This change doesn't reduce precision, as long as no event occurs more than 2^32 times between two sampling points. - PERFCTR_GLOBAL_READ now forces all CPUs to be sampled, if the sampling timer isn't running. Version 0.11, 2000-01-30 - Added a missing EXPORT_SYMBOL which prevented the driver from being built as a module in SMP kernels. - Support for the CPU sampling instructions (i.e. RDPMC and RDTSC on x86) is now announced explicitly by PERFCTR_INFO. - The x86 hardware driver now keeps CR4.PCE globally enabled. There are two reasons for this. First, the cost of toggling this flag at process suspend/resume is high. Second, changes in kernel 2.3.40 imply that any processor's %cr4 may be updated asynchronously from the global variable mmu_cr4_features. Version 0.10, 2000-01-23 - Added support for global-mode perfctrs (global.c). - There is now a config option controlling whether to perform init-time hardware tests or not. - Added a hardware reserve/release mechanism so that multiple high-level services don't simultaneously use the hardware. - The driver is now officially device . - Tuned the 64-bit tsc/msr/pmc read operations in x86.c. - Support for virtual perfctrs can now be enabled or disabled via CONFIG_PERFCTR_VIRTUAL. - Added support for the WinChip 3 processor. - Split the code into several files: x86.c (x86 drivers), virtual.c (virtualised perfctrs), setup.c (boot-time actions), init.c (driver top-level and init code). Version 0.9, 2000-01-02 - The driver can now be built as a module. - Dropped sys_perfctr() system call and went back to using a /dev/perfctr character device. Generic operations are now ioctl commands on /dev/perfctr, and control operations on virtual perfctrs are ioctl commands on their file descriptors. Initially this change was done because new system calls in 2.3.x made maintenance and binary compatibility with 2.2.x hard, but the new API is actually cleaner than the previous system call. - Moved this code from arch/i386/kernel/ to drivers/perfctr/. Version 0.8, 1999-11-14 - Made the process management callback functions inline to reduce scheduling overhead for processes not using perfctrs. - Changed the 'status' field to contain the number of active counters. Changed read_counters, write_control, and accumulate to use this information to avoid unnecessary work. - Fixed a bug in k7_check_control() which caused it to require all four counters to be enabled. - Fixed sys_perfctr() to return -ENODEV instead of -ENOSYS if the processor doesn't support perfctrs. - Some code cleanups. - Evntsel MSRs are updated lazily, and counters are not written to. The following table lists the costs (in cycles) of various instructions which access the counter or evntsel registers. The table was derived from data collected by init-time tests run by previous versions of this driver. Processor P5 P5MMX PII PIII K7 Clock freq. (MHz) 133 233 266 450 500 RDPMC n/a 14 31 36 13 RDMSR (counter) 29 28 81 80 52 WRMSR (counter) 35 37 97 115 80 WRMSR (evntsel) 33 37 88 105 232 Several things are apparent from this table: 1. It's much cheaper to use RDPMC than RDMSR to read the counters. 2. It's much more expensive to reset a counter than to read it. 3. It's expensive to write to an evntsel register. As of version 0.8, this driver uses the following strategies: * The evntsel registers are updated lazily. A per_cpu_control[] array caches the contents of each CPU's evntsel registers, and only when a process requires a different setup are the evntsel registers written to. In most cases, this eliminates the need to reprogram the evntsel registers when switching processes. The older drivers would write to the evntsel registers both at process suspend and resume. * The counter registers are read both at process resume and suspend, and the difference is added to the process' accumulated counters. The older drivers would reset the counters at resume, read them at suspend, and add the values read to the accumulated counters. * Only those registers enabled by the user's control information are manipulated, instead of blindly manipulating all of them. Version 0.7 1999-10-25 - The init-time checks in version 0.6 of this driver showed that RDMSR is a lot slower than RDPMC for reading the PMCs. The driver now uses RDPMC instead of RDMSR whenever possible. - Added an mmap() operation to perfctr files. This allows any client to read the accumulated counter state without making a system call. The old "sync to user-provided buffer" method has been removed, as it entailed additional copy operations and only worked for the "active" process. The PERFCTR_READ operation has been replaced by a simpler PERFCTR_SAMPLE operation, for the benefit of pre-MMX Intel P5 processors which cannot sample counters in user-mode. This rewrite actually simplified the code. - The AMD K7 should now be supported correctly. The init-time checks in version 0.6 of this driver revealed that each K7 counter has its own ENable bit. (Thanks to Nathan Slingerland for running the test and reporting the results to me.) - Plugged a potential memory leak in perfctr_attach_task(). - No longer piggyback on prctl(); sys_perfctr() is a real system call. - Some code cleanups. Version 0.6 1999-09-08 - Temporarily added some init-time code that checks the costs of RDPMC/RDMSR/WRMSR operations applied to perfctr MSRs, the semantics of the ENable bit on the Athlon, and gets the boot-time value of the WinChip CESR register. This code can be turned off by #defining INIT_DEBUG to 0. - Preliminary support for the AMD K7 Athlon processor. - The code will now build in both 2.3.x and 2.2.x kernels. Version 0.5 1999-08-29 - The user-space buffer is updated whenever state.status changes, even when a remote command triggers the change. - Reworked and simplified the high-level code. All accesses now require an attached file in order to implement proper accounting and syncronisation. The only exception is UNLINK: a process may always UNLINK its own PMCs. - Fixed counting bug in sys_perfctr_read(). - Improved support for the Intel Pentium III. - Another WinChip fix: fake TSC update at process resume. - The code should now be safe for 'gcc -fstrict-aliasing'. Version 0.4 1999-07-31 - Implemented PERFCTR_ATTACH and PERFCTR_{READ,CONTROL,STOP,UNLINK} on attached perfctrs. An attached perfctr is represented as a file. - Fixed an error in the WinChip-specific code. - Perfctrs now survive exec(). Version 0.3 1999-07-22 - Interface now via sys_prctl() instead of /dev/perfctr. - Added NYI stubs for accessing other processes' perfctrs. - Moved to dynamic allocation of a task's perfctr state. - Minor code cleanups. Version 0.2 1999-06-07 - Added support for WinChip CPUs. - Restart counters from zero, not their previous values. This corrected a problem for Intel P6 (WRMSR writes 32 bits to a PERFCTR MSR and then sign-extends to 40 bits), and also simplified the code. - Added support for syncing the kernel's counter values to a user- provided buffer each time a process is resumed. This feature, and the fact that the driver enables RDPMC in processes using PMCs, allows user-level computation of a process' accumulated counter values without incurring the overhead of making a system call. Version 0.1 1999-05-30 - First public release. papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.9-55.EL-redhat000664 001750 001750 00000040455 13216244367 026152 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.9-55.EL.perfctr26/CREDITS.~1~ 2004-10-18 23:54:39.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/CREDITS 2008-06-22 23:08:24.000000000 +0200 @@ -2583,9 +2583,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.9-55.EL.perfctr26/Documentation/ioctl-number.txt.~1~ 2004-10-18 23:55:27.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/Documentation/ioctl-number.txt 2008-06-22 23:08:18.000000000 +0200 @@ -188,6 +188,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.9-55.EL.perfctr26/MAINTAINERS.~1~ 2007-06-17 18:08:56.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/MAINTAINERS 2008-06-22 23:08:24.000000000 +0200 @@ -1730,6 +1730,12 @@ M: tsbogend@alpha.franken.de L: linux-net@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + POSIX CLOCKS and TIMERS P: George Anzinger M: george@mvista.com --- linux-2.6.9-55.EL.perfctr26/arch/i386/Kconfig.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/i386/Kconfig 2008-06-22 23:08:18.000000000 +0200 @@ -960,6 +960,8 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/entry.S.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/entry.S 2008-06-22 23:08:18.000000000 +0200 @@ -561,6 +561,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/i8259.c.~1~ 2004-10-18 23:55:18.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 23:08:18.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -413,6 +414,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/process.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/i386/kernel/process.c 2008-06-22 23:08:18.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -324,6 +325,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -399,6 +401,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -661,6 +665,8 @@ struct task_struct fastcall * __switch_t if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.9-55.EL.perfctr26/arch/ppc/Kconfig.~1~ 2004-10-18 23:55:29.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/ppc/Kconfig 2008-06-22 23:08:18.000000000 +0200 @@ -243,6 +243,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.9-55.EL.perfctr26/arch/ppc/kernel/head.S.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/ppc/kernel/head.S 2008-06-22 23:08:18.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.9-55.EL.perfctr26/arch/ppc/kernel/process.c.~1~ 2007-06-17 18:08:55.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/ppc/kernel/process.c 2008-06-22 23:08:18.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -370,6 +373,7 @@ void exit_thread(void) last_task_used_math = NULL; if (last_task_used_altivec == current) last_task_used_altivec = NULL; + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -460,6 +464,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.9-55.EL.perfctr26/arch/x86_64/Kconfig.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/x86_64/Kconfig 2008-06-22 23:08:18.000000000 +0200 @@ -401,6 +401,8 @@ config X86_MCE_AMD Additional support for AMD specific MCE features such as the DRAM Error Threshold. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2007-06-17 18:08:46.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 23:08:18.000000000 +0200 @@ -563,6 +563,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2007-06-17 18:08:46.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 23:08:18.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -563,6 +564,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/process.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 23:08:18.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -379,6 +381,8 @@ int copy_thread(int nr, unsigned long cl asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -532,6 +536,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.9-55.EL.perfctr26/drivers/Makefile.~1~ 2007-06-17 18:08:56.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/drivers/Makefile 2008-06-22 23:08:18.000000000 +0200 @@ -62,5 +62,6 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-y += firmware/ --- linux-2.6.9-55.EL.perfctr26/fs/exec.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/fs/exec.c 2008-06-22 23:08:24.000000000 +0200 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -906,6 +907,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RELOCEXEC; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.9-55.EL.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2004-10-18 23:53:44.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 23:08:18.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.9-55.EL.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2004-10-18 23:53:13.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 23:08:18.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.9-55.EL.perfctr26/include/asm-i386/processor.h.~1~ 2007-06-17 18:08:47.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-i386/processor.h 2008-06-22 23:08:18.000000000 +0200 @@ -454,6 +454,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.9-55.EL.perfctr26/include/asm-i386/system.h.~1~ 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-i386/system.h 2008-06-22 23:08:18.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.9-55.EL.perfctr26/include/asm-ppc/processor.h.~1~ 2004-10-18 23:53:06.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-ppc/processor.h 2008-06-22 23:08:18.000000000 +0200 @@ -126,6 +126,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2007-06-17 18:08:46.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 23:08:18.000000000 +0200 @@ -66,14 +66,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/irq.h.~1~ 2007-06-17 18:08:52.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/irq.h 2008-06-22 23:08:18.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/processor.h.~1~ 2007-06-17 18:08:45.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/processor.h 2008-06-22 23:08:18.000000000 +0200 @@ -262,6 +262,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/system.h.~1~ 2007-06-17 18:08:46.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/include/asm-x86_64/system.h 2008-06-22 23:08:18.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.9-55.EL.perfctr26/kernel/exit.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/kernel/exit.c 2008-06-22 23:08:18.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include /* for audit_free() */ #include @@ -91,6 +92,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.9-55.EL.perfctr26/kernel/sched.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/kernel/sched.c 2008-06-22 23:08:18.000000000 +0200 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -4006,6 +4007,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.9-55.EL.perfctr26/kernel/timer.c.~1~ 2007-06-17 18:08:57.000000000 +0200 +++ linux-2.6.9-55.EL.perfctr26/kernel/timer.c 2008-06-22 23:08:18.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -858,6 +859,7 @@ static void update_one_process(struct ta do_process_times(p, user, system); do_it_virt(p, user); do_it_prof(p); + perfctr_sample_thread(&p->thread); } /* papi-5.6.0/doc/Doxyfile-man3000664 001750 001750 00000004514 13216244355 017651 0ustar00jshenry1963jshenry1963000000 000000 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for PAPI utilities man-pages # The following overrides default values in Doxyfile-common # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") @INCLUDE = Doxyfile-common #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = ../src/papi.h ../src/papi.c ../src/papi_hl.c \ ../src/papi_fwrappers.c FILE_PATTERNS = *.c *.h # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = YES # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO papi-5.6.0/doc/Doxyfile-man1000664 001750 001750 00000004771 13216244355 017654 0ustar00jshenry1963jshenry1963000000 000000 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for PAPI utilities man-pages # The following overrides default values in Doxyfile-common # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") @INCLUDE = Doxyfile-common #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = ../src/utils/ FILE_PATTERNS = *.c # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = NO #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = YES # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .1 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO papi-5.6.0/src/libpfm4/lib/pfmlib_intel_rapl.c000664 001750 001750 00000014130 13216244365 023256 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_rapl.c : Intel RAPL PMU * * Copyright (c) 2013 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * RAPL PMU (SNB, IVB, HSW) */ /* private headers */ #include "pfmlib_priv.h" /* * for now, we reuse the x86 table entry format and callback to avoid duplicating * code. We may revisit this later on */ #include "pfmlib_intel_x86_priv.h" extern pfmlib_pmu_t intel_rapl_support; #define RAPL_COMMON_EVENTS \ { .name = "RAPL_ENERGY_CORES",\ .desc = "Number of Joules consumed by all cores on the package. Unit is 2^-32 Joules",\ .cntmsk = 0x1,\ .code = 0x1,\ },\ { .name = "RAPL_ENERGY_PKG",\ .desc = "Number of Joules consumed by all cores and Last level cache on the package. Unit is 2^-32 Joules",\ .cntmsk = 0x2,\ .code = 0x2,\ } static const intel_x86_entry_t intel_rapl_cln_pe[]={ RAPL_COMMON_EVENTS, { .name = "RAPL_ENERGY_GPU", .desc = "Number of Joules consumed by the builtin GPU. Unit is 2^-32 Joules", .cntmsk = 0x8, .code = 0x4, } }; static const intel_x86_entry_t intel_rapl_skl_cln_pe[]={ RAPL_COMMON_EVENTS, { .name = "RAPL_ENERGY_GPU", .desc = "Number of Joules consumed by the builtin GPU. Unit is 2^-32 Joules", .cntmsk = 0x8, .code = 0x4, }, { .name = "RAPL_ENERGY_PSYS", .desc = "Number of Joules consumed by the builtin PSYS. Unit is 2^-32 Joules", .cntmsk = 0x8, .code = 0x5, } }; static const intel_x86_entry_t intel_rapl_srv_pe[]={ RAPL_COMMON_EVENTS, { .name = "RAPL_ENERGY_DRAM", .desc = "Number of Joules consumed by the DRAM. Unit is 2^-32 Joules", .cntmsk = 0x4, .code = 0x3, }, }; static const intel_x86_entry_t intel_rapl_hswep_pe[]={ /* * RAPL_ENERGY_CORES not supported in HSW-EP */ { .name = "RAPL_ENERGY_PKG", .desc = "Number of Joules consumed by all cores and Last level cache on the package. Unit is 2^-32 Joules", .cntmsk = 0x2, .code = 0x2, }, { .name = "RAPL_ENERGY_DRAM", .desc = "Number of Joules consumed by the DRAM. Unit is 2^-32 Joules", .cntmsk = 0x4, .code = 0x3, }, }; static int pfm_rapl_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) return ret; if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; switch(pfm_intel_x86_cfg.model) { case 42: /* Sandy Bridge */ case 58: /* Ivy Bridge */ case 60: /* Haswell */ case 69: /* Haswell */ case 70: /* Haswell */ case 61: /* Broadwell */ case 71: /* Broadwell GT3E */ case 92: /* Goldmont */ /* already setup by default */ break; case 45: /* Sandy Bridg-EP */ case 62: /* Ivy Bridge-EP */ intel_rapl_support.pe = intel_rapl_srv_pe; intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_srv_pe); break; case 78: /* Skylake */ case 94: /* Skylake H/S */ case 142: /* Kabylake */ case 158: /* Kabylake */ intel_rapl_support.pe = intel_rapl_skl_cln_pe; intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_skl_cln_pe); break; case 63: /* Haswell-EP */ case 79: /* Broadwell-EP */ case 86: /* Broadwell D */ case 85: /* Skylake X */ intel_rapl_support.pe = intel_rapl_hswep_pe; intel_rapl_support.pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_hswep_pe); break; default : return PFM_ERR_NOTSUPP; } return PFM_SUCCESS; } static int pfm_intel_rapl_get_encoding(void *this, pfmlib_event_desc_t *e) { const intel_x86_entry_t *pe; pe = this_pe(this); e->fstr[0] = '\0'; e->codes[0] = pe[e->event].code; e->count = 1; evt_strcat(e->fstr, "%s", pe[e->event].name); __pfm_vbprintf("[0x%"PRIx64" event=0x%x] %s\n", e->codes[0], e->codes[0], e->fstr); return PFM_SUCCESS; } /* * number modifiers for RAPL * define an empty modifier to avoid firing the * sanity pfm_intel_x86_validate_table(). We are * using this function to avoid duplicating code. */ static const pfmlib_attr_desc_t rapl_mods[]= {}; pfmlib_pmu_t intel_rapl_support={ .desc = "Intel RAPL", .name = "rapl", .perf_name = "power", .pmu = PFM_PMU_INTEL_RAPL, .pme_count = LIBPFM_ARRAY_SIZE(intel_rapl_cln_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 0, .num_fixed_cntrs = 3, .max_encoding = 1, .pe = intel_rapl_cln_pe, /* default, maybe updated */ .pmu_detect = pfm_rapl_detect, .atdesc = rapl_mods, .get_event_encoding[PFM_OS_NONE] = pfm_intel_rapl_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/validation_tests/vector_testcode.c000664 001750 001750 00000011463 13216244370 024243 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #define NUMBER 100 inline void inline_packed_sse_add( float *aa, float *bb, float *cc ) { __asm__ __volatile__( "movaps (%0), %%xmm0;" "movaps (%1), %%xmm1;" "addps %%xmm0, %%xmm1;" "movaps %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_packed_sse_mul( float *aa, float *bb, float *cc ) { __asm__ __volatile__( "movaps (%0), %%xmm0;" "movaps (%1), %%xmm1;" "mulps %%xmm0, %%xmm1;" "movaps %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_packed_sse2_add( double *aa, double *bb, double *cc ) { __asm__ __volatile__( "movapd (%0), %%xmm0;" "movapd (%1), %%xmm1;" "addpd %%xmm0, %%xmm1;" "movapd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_packed_sse2_mul( double *aa, double *bb, double *cc ) { __asm__ __volatile__( "movapd (%0), %%xmm0;" "movapd (%1), %%xmm1;" "mulpd %%xmm0, %%xmm1;" "movapd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_unpacked_sse_add( float *aa, float *bb, float *cc ) { __asm__ __volatile__( "movss (%0), %%xmm0;" "movss (%1), %%xmm1;" "addss %%xmm0, %%xmm1;" "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_unpacked_sse_mul( float *aa, float *bb, float *cc ) { __asm__ __volatile__( "movss (%0), %%xmm0;" "movss (%1), %%xmm1;" "mulss %%xmm0, %%xmm1;" "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_unpacked_sse2_add( double *aa, double *bb, double *cc ) { __asm__ __volatile__( "movsd (%0), %%xmm0;" "movsd (%1), %%xmm1;" "addsd %%xmm0, %%xmm1;" "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } inline void inline_unpacked_sse2_mul( double *aa, double *bb, double *cc ) { __asm__ __volatile__( "movsd (%0), %%xmm0;" "movsd (%1), %%xmm1;" "mulsd %%xmm0, %%xmm1;" "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc ) :"%xmm0", "%xmm1" ); } int main( int argc, char **argv ) { int i, packed = 0, sse = 0; float a[4] = { 1.0, 2.0, 3.0, 4.0 }; float b[4] = { 2.0, 3.0, 4.0, 5.0 }; float c[4] = { 0.0, 0.0, 0.0, 0.0 }; double d[4] = { 1.0, 2.0, 3.0, 4.0 }; double e[4] = { 2.0, 3.0, 4.0, 5.0 }; double f[4] = { 0.0, 0.0, 0.0, 0.0 }; if ( argc != 3 ) { bail: printf( "Usage %s: \n", argv[0] ); exit( 1 ); } if ( strcasecmp( argv[1], "packed" ) == 0 ) packed = 1; else if ( strcasecmp( argv[1], "unpacked" ) == 0 ) packed = 0; else goto bail; if ( strcasecmp( argv[2], "sse" ) == 0 ) sse = 1; else if ( strcasecmp( argv[2], "sse2" ) == 0 ) sse = 0; else goto bail; #if 0 if ( ( sse ) && ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) { printf( "This processor does not have SSE.\n" ); exit( 1 ); } if ( ( sse == 0 ) && ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) { printf( "This processor does not have SSE2.\n" ); exit( 1 ); } #endif printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] ); printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] ); if ( ( packed == 0 ) && ( sse == 1 ) ) { for ( i = 0; i < NUMBER; i++ ) { inline_unpacked_sse_add( &a[0], &b[0], &c[0] ); } printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] ); for ( i = 0; i < NUMBER; i++ ) { inline_unpacked_sse_mul( &a[0], &b[0], &c[0] ); } printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] ); } if ( ( packed == 1 ) && ( sse == 1 ) ) { for ( i = 0; i < NUMBER; i++ ) { inline_packed_sse_add( a, b, c ); } printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1], c[2], c[3] ); for ( i = 0; i < NUMBER; i++ ) { inline_packed_sse_mul( a, b, c ); } printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1], c[2], c[3] ); } if ( ( packed == 0 ) && ( sse == 0 ) ) { for ( i = 0; i < NUMBER; i++ ) { inline_unpacked_sse2_add( &d[0], &e[0], &f[0] ); } printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] ); for ( i = 0; i < NUMBER; i++ ) { inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] ); } printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] ); } if ( ( packed == 1 ) && ( sse == 0 ) ) { for ( i = 0; i < NUMBER; i++ ) { inline_packed_sse2_add( &d[0], &e[0], &f[0] ); } printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] ); for ( i = 0; i < NUMBER; i++ ) { inline_packed_sse2_mul( &d[0], &e[0], &f[0] ); } printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] ); } exit( 0 ); } papi-5.6.0/man/man3/PAPIF_set_cmp_domain.3000664 001750 001750 00000001064 13216244355 022134 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_set_cmp_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_set_cmp_domain \- .PP Set the default counting domain for new event sets bound to the specified component\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Prototype:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_set_cmp_domain( C_INT domain, C_INT cidx, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_set_cmp_domain\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPI_get_event_component.3000664 001750 001750 00000001336 13216244356 023112 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_event_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_event_component \- .PP return component an event belongs to .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @retval ENOCMP component does not exist @param EventCode EventCode for which we want to know the component index @par Examples: .fi .PP .PP .nf int cidx,eventcode; cidx = PAPI_get_event_component(eventcode); * .fi .PP \fBPAPI_get_event_component()\fP returns the component an event belongs to\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_event_info\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/solaris-common.h000664 001750 001750 00000003007 13216244370 020435 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PAPI_SOLARIS_H #define _PAPI_SOLARIS_H #include #include #include #include #include int _solaris_update_shlib_info( papi_mdi_t *mdi ); int _solaris_get_system_info( papi_mdi_t *mdi ); long long _solaris_get_real_usec( void ); long long _solaris_get_real_cycles( void ); long long _solaris_get_virt_usec( void ); /* Assembler prototypes */ extern void cpu_sync( void ); extern caddr_t _start, _end, _etext, _edata; extern rwlock_t lock[PAPI_MAX_LOCK]; #define _papi_hwd_lock(lck) rw_wrlock(&lock[lck]); #define _papi_hwd_unlock(lck) rw_unlock(&lock[lck]); #endif #if 0 #include ! #include "solaris-ultra.h" ! These functions blatantly stolen from perfmon ! The author of the package "perfmon" is Richard J. Enbody ! and the home page for "perfmon" is ! http://www.cps.msu.edu/~enbody/perfmon/index.html ! ! extern void cpu_sync(void); ! ! Make sure all instructinos and memory references before us ! have been completed. .global cpu_sync ENTRY(cpu_sync) membar #Sync ! Wait for all outstanding things to finish retl ! Return to the caller nop ! Delay slot SET_SIZE(cpu_sync) ! ! extern unsigned long long get_tick(void) ! ! Read the tick register and return it .global get_tick ENTRY(get_tick) rd %tick, %o0 ! Get the current value of TICK clruw %o0, %o1 ! put the lower 32 bits into %o1 retl ! Return to the caller srlx %o0, 32, %o0 ! put the upper 32 bits into %o0 SET_SIZE(get_tick) #endif papi-5.6.0/src/libpfm-3.y/examples_v3.x/detect_pmcs.h000664 001750 001750 00000003244 13216244362 024345 0ustar00jshenry1963jshenry1963000000 000000 /* * detect_pmcs.h - detect unavailable PMD/PMC registers based on perfmon2 information * * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef __DETECT_PMCS_H__ #define __DETECT_PMCS_H__ #include #include extern int get_sif(int flags, pfarg_sinfo_t *sif); extern int detect_unavail_pmu_regs(pfarg_sinfo_t *sif, pfmlib_regmask_t *r_pmcs, pfmlib_regmask_t *r_pmds); #endif /* __DETECT_PMCS_H__ */ papi-5.6.0/man/man3/PAPI_preload_info_t.3000664 001750 001750 00000000753 13216244356 022036 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_preload_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_preload_info_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "char \fBlib_preload_env\fP [128]" .br .ti -1c .RI "char \fBlib_preload_sep\fP" .br .ti -1c .RI "char \fBlib_dir_env\fP [128]" .br .ti -1c .RI "char \fBlib_dir_sep\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/solaris-common.c000664 001750 001750 00000052223 13216244370 020434 0ustar00jshenry1963jshenry1963000000 000000 #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "solaris-common.h" #include #if 0 /* once the bug in dladdr is fixed by SUN, (now dladdr caused deadlock when used with pthreads) this function can be used again */ int _solaris_update_shlib_info( papi_mdi_t *mdi ) { char fname[80], name[PAPI_HUGE_STR_LEN]; prmap_t newp; int count, t_index; FILE *map_f; void *vaddr; Dl_info dlip; PAPI_address_map_t *tmp = NULL; sprintf( fname, "/proc/%d/map", getpid( ) ); map_f = fopen( fname, "r" ); if ( !map_f ) { PAPIERROR( "fopen(%s) returned < 0", fname ); return ( PAPI_OK ); } /* count the entries we need */ count = 0; t_index = 0; while ( fread( &newp, sizeof ( prmap_t ), 1, map_f ) > 0 ) { vaddr = ( void * ) ( 1 + ( newp.pr_vaddr ) ); // map base address if ( dladdr( vaddr, &dlip ) > 0 ) { count++; if ( ( newp.pr_mflags & MA_EXEC ) && ( newp.pr_mflags & MA_READ ) ) { if ( !( newp.pr_mflags & MA_WRITE ) ) t_index++; } strcpy( name, dlip.dli_fname ); if ( strcmp( _papi_hwi_system_info.exe_info.address_info.name, basename( name ) ) == 0 ) { if ( ( newp.pr_mflags & MA_EXEC ) && ( newp.pr_mflags & MA_READ ) ) { if ( !( newp.pr_mflags & MA_WRITE ) ) { _papi_hwi_system_info.exe_info.address_info.text_start = ( caddr_t ) newp.pr_vaddr; _papi_hwi_system_info.exe_info.address_info.text_end = ( caddr_t ) ( newp.pr_vaddr + newp.pr_size ); } else { _papi_hwi_system_info.exe_info.address_info.data_start = ( caddr_t ) newp.pr_vaddr; _papi_hwi_system_info.exe_info.address_info.data_end = ( caddr_t ) ( newp.pr_vaddr + newp.pr_size ); } } } } } rewind( map_f ); tmp = ( PAPI_address_map_t * ) papi_calloc( t_index - 1, sizeof ( PAPI_address_map_t ) ); if ( tmp == NULL ) { PAPIERROR( "Error allocating shared library address map" ); return ( PAPI_ENOMEM ); } t_index = -1; while ( fread( &newp, sizeof ( prmap_t ), 1, map_f ) > 0 ) { vaddr = ( void * ) ( 1 + ( newp.pr_vaddr ) ); // map base address if ( dladdr( vaddr, &dlip ) > 0 ) { // valid name strcpy( name, dlip.dli_fname ); if ( strcmp( _papi_hwi_system_info.exe_info.address_info.name, basename( name ) ) == 0 ) continue; if ( ( newp.pr_mflags & MA_EXEC ) && ( newp.pr_mflags & MA_READ ) ) { if ( !( newp.pr_mflags & MA_WRITE ) ) { t_index++; tmp[t_index].text_start = ( caddr_t ) newp.pr_vaddr; tmp[t_index].text_end = ( caddr_t ) ( newp.pr_vaddr + newp.pr_size ); strncpy( tmp[t_index].name, dlip.dli_fname, PAPI_HUGE_STR_LEN - 1 ); tmp[t_index].name[PAPI_HUGE_STR_LEN - 1] = '\0'; } else { if ( t_index < 0 ) continue; tmp[t_index].data_start = ( caddr_t ) newp.pr_vaddr; tmp[t_index].data_end = ( caddr_t ) ( newp.pr_vaddr + newp.pr_size ); } } } } fclose( map_f ); if ( _papi_hwi_system_info.shlib_info.map ) papi_free( _papi_hwi_system_info.shlib_info.map ); _papi_hwi_system_info.shlib_info.map = tmp; _papi_hwi_system_info.shlib_info.count = t_index + 1; return PAPI_OK; } #endif int _papi_hwi_init_os(void) { struct utsname uname_buffer; uname(&uname_buffer); strncpy(_papi_os_info.name,uname_buffer.sysname,PAPI_MAX_STR_LEN); strncpy(_papi_os_info.version,uname_buffer.release,PAPI_MAX_STR_LEN); _papi_os_info.itimer_sig = PAPI_INT_MPX_SIGNAL; _papi_os_info.itimer_num = PAPI_INT_ITIMER; _papi_os_info.itimer_ns = PAPI_INT_MPX_DEF_US * 1000; _papi_os_info.itimer_res_ns = 1; return PAPI_OK; } #if 0 int _ultra_hwd_update_shlib_info( papi_mdi_t *mdi ) { /*??? system call takes very long */ char cmd_line[PAPI_HUGE_STR_LEN + PAPI_HUGE_STR_LEN], fname[L_tmpnam]; char line[256]; char address[16], size[10], flags[64], objname[256]; PAPI_address_map_t *tmp = NULL; FILE *f = NULL; int t_index = 0, i; struct map_record { long address; int size; int flags; char objname[256]; struct map_record *next; } *tmpr, *head, *curr; tmpnam( fname ); SUBDBG( "Temporary name %s\n", fname ); sprintf( cmd_line, "/bin/pmap %d > %s", ( int ) getpid( ), fname ); if ( system( cmd_line ) != 0 ) { PAPIERROR( "Could not run %s to get shared library address map", cmd_line ); return ( PAPI_OK ); } f = fopen( fname, "r" ); if ( f == NULL ) { PAPIERROR( "fopen(%s) returned < 0", fname ); remove( fname ); return ( PAPI_OK ); } /* ignore the first line */ fgets( line, 256, f ); head = curr = NULL; while ( fgets( line, 256, f ) != NULL ) { /* discard the last line */ if ( strncmp( line, " total", 6 ) != 0 ) { sscanf( line, "%s %s %s %s", address, size, flags, objname ); if ( objname[0] == '/' ) { tmpr = ( struct map_record * ) papi_malloc( sizeof ( struct map_record ) ); if ( tmpr == NULL ) return ( -1 ); tmpr->next = NULL; if ( curr ) { curr->next = tmpr; curr = tmpr; } if ( head == NULL ) { curr = head = tmpr; } SUBDBG( "%s\n", objname ); if ( ( strstr( flags, "read" ) && strstr( flags, "exec" ) ) || ( strstr( flags, "r" ) && strstr( flags, "x" ) ) ) { if ( !( strstr( flags, "write" ) || strstr( flags, "w" ) ) ) { /* text segment */ t_index++; tmpr->flags = 1; } else { tmpr->flags = 0; } sscanf( address, "%lx", &tmpr->address ); sscanf( size, "%d", &tmpr->size ); tmpr->size *= 1024; strcpy( tmpr->objname, objname ); } } } } tmp = ( PAPI_address_map_t * ) papi_calloc( t_index - 1, sizeof ( PAPI_address_map_t ) ); if ( tmp == NULL ) { PAPIERROR( "Error allocating shared library address map" ); return ( PAPI_ENOMEM ); } t_index = -1; tmpr = curr = head; i = 0; while ( curr != NULL ) { if ( strcmp( _papi_hwi_system_info.exe_info.address_info.name, basename( curr->objname ) ) == 0 ) { if ( curr->flags ) { _papi_hwi_system_info.exe_info.address_info.text_start = ( caddr_t ) curr->address; _papi_hwi_system_info.exe_info.address_info.text_end = ( caddr_t ) ( curr->address + curr->size ); } else { _papi_hwi_system_info.exe_info.address_info.data_start = ( caddr_t ) curr->address; _papi_hwi_system_info.exe_info.address_info.data_end = ( caddr_t ) ( curr->address + curr->size ); } } else { if ( curr->flags ) { t_index++; tmp[t_index].text_start = ( caddr_t ) curr->address; tmp[t_index].text_end = ( caddr_t ) ( curr->address + curr->size ); strncpy( tmp[t_index].name, curr->objname, PAPI_HUGE_STR_LEN - 1 ); tmp[t_index].name[PAPI_HUGE_STR_LEN - 1] = '\0'; } else { if ( t_index < 0 ) continue; tmp[t_index].data_start = ( caddr_t ) curr->address; tmp[t_index].data_end = ( caddr_t ) ( curr->address + curr->size ); } } tmpr = curr->next; /* free the temporary allocated memory */ papi_free( curr ); curr = tmpr; } /* end of while */ remove( fname ); fclose( f ); if ( _papi_hwi_system_info.shlib_info.map ) papi_free( _papi_hwi_system_info.shlib_info.map ); _papi_hwi_system_info.shlib_info.map = tmp; _papi_hwi_system_info.shlib_info.count = t_index + 1; return ( PAPI_OK ); } #endif /* From niagara2 code */ int _solaris_update_shlib_info( papi_mdi_t *mdi ) { char *file = "/proc/self/map"; char *resolve_pattern = "/proc/self/path/%s"; char lastobject[PRMAPSZ]; char link[PAPI_HUGE_STR_LEN]; char path[PAPI_HUGE_STR_LEN]; prmap_t mapping; int fd, count = 0, total = 0, position = -1, first = 1; caddr_t t_min, t_max, d_min, d_max; PAPI_address_map_t *pam, *cur; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif fd = open( file, O_RDONLY ); if ( fd == -1 ) { return PAPI_ESYS; } memset( lastobject, 0, PRMAPSZ ); #ifdef DEBUG SUBDBG( " -> %s: Preprocessing memory maps from procfs\n", __func__ ); #endif /* Search through the list of mappings in order to identify a) how many mappings are available and b) how many unique mappings are available. */ while ( read( fd, &mapping, sizeof ( prmap_t ) ) > 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Found a new memory map entry\n", __func__ ); #endif /* Another entry found, just the total count of entries. */ total++; /* Is the mapping accessible and not anonymous? */ if ( mapping.pr_mflags & ( MA_READ | MA_WRITE | MA_EXEC ) && !( mapping.pr_mflags & MA_ANON ) ) { /* Test if a new library has been found. If a new library has been found a new entry needs to be counted. */ if ( strcmp( lastobject, mapping.pr_mapname ) != 0 ) { strncpy( lastobject, mapping.pr_mapname, PRMAPSZ ); count++; #ifdef DEBUG SUBDBG( " -> %s: Memory mapping entry valid for %s\n", __func__, mapping.pr_mapname ); #endif } } } #ifdef DEBUG SUBDBG( " -> %s: Preprocessing done, starting to analyze\n", __func__ ); #endif /* Start from the beginning, now fill in the found mappings */ if ( lseek( fd, 0, SEEK_SET ) == -1 ) { return PAPI_ESYS; } memset( lastobject, 0, PRMAPSZ ); /* Allocate memory */ pam = ( PAPI_address_map_t * ) papi_calloc( count, sizeof ( PAPI_address_map_t ) ); while ( read( fd, &mapping, sizeof ( prmap_t ) ) > 0 ) { if ( mapping.pr_mflags & MA_ANON ) { #ifdef DEBUG SUBDBG ( " -> %s: Anonymous mapping (MA_ANON) found for %s, skipping\n", __func__, mapping.pr_mapname ); #endif continue; } /* Check for a new entry */ if ( strcmp( mapping.pr_mapname, lastobject ) != 0 ) { #ifdef DEBUG SUBDBG( " -> %s: Analyzing mapping for %s\n", __func__, mapping.pr_mapname ); #endif cur = &( pam[++position] ); strncpy( lastobject, mapping.pr_mapname, PRMAPSZ ); snprintf( link, PAPI_HUGE_STR_LEN, resolve_pattern, lastobject ); memset( path, 0, PAPI_HUGE_STR_LEN ); readlink( link, path, PAPI_HUGE_STR_LEN ); strncpy( cur->name, path, PAPI_HUGE_STR_LEN ); #ifdef DEBUG SUBDBG( " -> %s: Resolved name for %s: %s\n", __func__, mapping.pr_mapname, cur->name ); #endif } if ( mapping.pr_mflags & MA_READ ) { /* Data (MA_WRITE) or text (MA_READ) segment? */ if ( mapping.pr_mflags & MA_WRITE ) { cur->data_start = ( caddr_t ) mapping.pr_vaddr; cur->data_end = ( caddr_t ) ( mapping.pr_vaddr + mapping.pr_size ); if ( strcmp ( cur->name, _papi_hwi_system_info.exe_info.fullname ) == 0 ) { _papi_hwi_system_info.exe_info.address_info.data_start = cur->data_start; _papi_hwi_system_info.exe_info.address_info.data_end = cur->data_end; } if ( first ) d_min = cur->data_start; if ( first ) d_max = cur->data_end; if ( cur->data_start < d_min ) { d_min = cur->data_start; } if ( cur->data_end > d_max ) { d_max = cur->data_end; } } else if ( mapping.pr_mflags & MA_EXEC ) { cur->text_start = ( caddr_t ) mapping.pr_vaddr; cur->text_end = ( caddr_t ) ( mapping.pr_vaddr + mapping.pr_size ); if ( strcmp ( cur->name, _papi_hwi_system_info.exe_info.fullname ) == 0 ) { _papi_hwi_system_info.exe_info.address_info.text_start = cur->text_start; _papi_hwi_system_info.exe_info.address_info.text_end = cur->text_end; } if ( first ) t_min = cur->text_start; if ( first ) t_max = cur->text_end; if ( cur->text_start < t_min ) { t_min = cur->text_start; } if ( cur->text_end > t_max ) { t_max = cur->text_end; } } } first = 0; } close( fd ); /* During the walk of shared objects the upper and lower bound of the segments could be discovered. The bounds are stored in the PAPI info structure. The information is important for the profiling functions of PAPI. */ /* This variant would pass the addresses of all text and data segments _papi_hwi_system_info.exe_info.address_info.text_start = t_min; _papi_hwi_system_info.exe_info.address_info.text_end = t_max; _papi_hwi_system_info.exe_info.address_info.data_start = d_min; _papi_hwi_system_info.exe_info.address_info.data_end = d_max; */ #ifdef DEBUG SUBDBG( " -> %s: Analysis of memory maps done, results:\n", __func__ ); SUBDBG( " -> %s: text_start=%#x, text_end=%#x, text_size=%lld\n", __func__, _papi_hwi_system_info.exe_info.address_info.text_start, _papi_hwi_system_info.exe_info.address_info.text_end, _papi_hwi_system_info.exe_info.address_info.text_end - _papi_hwi_system_info.exe_info.address_info.text_start ); SUBDBG( " -> %s: data_start=%#x, data_end=%#x, data_size=%lld\n", __func__, _papi_hwi_system_info.exe_info.address_info.data_start, _papi_hwi_system_info.exe_info.address_info.data_end, _papi_hwi_system_info.exe_info.address_info.data_end - _papi_hwi_system_info.exe_info.address_info.data_start ); #endif /* Store the map read and the total count of shlibs found */ _papi_hwi_system_info.shlib_info.map = pam; _papi_hwi_system_info.shlib_info.count = count; #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } #if 0 int _niagara2_get_system_info( papi_mdi_t *mdi ) { // Used for evaluating return values int retval = 0; // Check for process settings pstatus_t *proc_status; psinfo_t *proc_info; // Used for string truncating char *c_ptr; // For retrieving the executable full name char exec_name[PAPI_HUGE_STR_LEN]; // For retrieving processor information __sol_processor_information_t cpus; #ifdef DEBUG SUBDBG( "ENTERING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif /* Get and set pid */ pid = getpid( ); /* Check for microstate accounting */ proc_status = __sol_get_proc_status( pid ); if ( proc_status->pr_flags & PR_MSACCT == 0 || proc_status->pr_flags & PR_MSFORK == 0 ) { /* Solaris 10 should have microstate accounting always activated */ return PAPI_ECMP; } /* Fill _papi_hwi_system_info.exe_info.fullname */ proc_info = __sol_get_proc_info( pid ); // If there are arguments, trim the string to the executable name. if ( proc_info->pr_argc > 1 ) { c_ptr = strchr( proc_info->pr_psargs, ' ' ); if ( c_ptr != NULL ) c_ptr = '\0'; } /* If the path can be qualified, use the full path, otherwise the trimmed name. */ if ( realpath( proc_info->pr_psargs, exec_name ) != NULL ) { strncpy( _papi_hwi_system_info.exe_info.fullname, exec_name, PAPI_HUGE_STR_LEN ); } else { strncpy( _papi_hwi_system_info.exe_info.fullname, proc_info->pr_psargs, PAPI_HUGE_STR_LEN ); } /* Fill _papi_hwi_system_info.exe_info.address_info */ // Taken from the old component strncpy( _papi_hwi_system_info.exe_info.address_info.name, basename( _papi_hwi_system_info.exe_info.fullname ), PAPI_HUGE_STR_LEN ); __CHECK_ERR_PAPI( _niagara2_update_shlib_info( &_papi_hwi_system_info ) ); /* Fill _papi_hwi_system_info.hw_info */ // Taken from the old component _papi_hwi_system_info.hw_info.ncpu = sysconf( _SC_NPROCESSORS_ONLN ); _papi_hwi_system_info.hw_info.nnodes = 1; _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_SUN; strcpy( _papi_hwi_system_info.hw_info.vendor_string, "SUN" ); _papi_hwi_system_info.hw_info.totalcpus = sysconf( _SC_NPROCESSORS_CONF ); _papi_hwi_system_info.hw_info.model = 1; strcpy( _papi_hwi_system_info.hw_info.model_string, cpc_cciname( cpc ) ); /* The field sparc-version is no longer in prtconf -pv */ _papi_hwi_system_info.hw_info.revision = 1; /* Clock speed */ _papi_hwi_system_info.hw_info.mhz = ( float ) __sol_get_processor_clock( ); _papi_hwi_system_info.hw_info.clock_mhz = __sol_get_processor_clock( ); _papi_hwi_system_info.hw_info.cpu_max_mhz = __sol_get_processor_clock( ); _papi_hwi_system_info.hw_info.cpu_min_mhz = __sol_get_processor_clock( ); /* Fill _niagara2_vector.cmp_info.mem_hierarchy */ _niagara2_get_memory_info( &_papi_hwi_system_info.hw_info, 0 ); /* Fill _papi_hwi_system_info.sub_info */ strcpy( _niagara2_vector.cmp_info.name, "SunNiagara2" ); strcpy( _niagara2_vector.cmp_info.version, "ALPHA" ); strcpy( _niagara2_vector.cmp_info.support_version, "libcpc2" ); strcpy( _niagara2_vector.cmp_info.kernel_version, "libcpc2" ); /* libcpc2 uses SIGEMT using real hardware signals, no sw emu */ #ifdef DEBUG SUBDBG( "LEAVING FUNCTION >>%s<< at %s:%d\n", __func__, __FILE__, __LINE__ ); #endif return PAPI_OK; } #endif int _solaris_get_system_info( papi_mdi_t *mdi ) { int retval; pid_t pid; char maxargs[PAPI_MAX_STR_LEN] = ""; psinfo_t psi; int fd; int hz, version; char cpuname[PAPI_MAX_STR_LEN], pname[PAPI_HUGE_STR_LEN]; /* Check counter access */ if ( cpc_version( CPC_VER_CURRENT ) != CPC_VER_CURRENT ) return PAPI_ECMP; SUBDBG( "CPC version %d successfully opened\n", CPC_VER_CURRENT ); if ( cpc_access( ) == -1 ) return PAPI_ECMP; /* Global variable cpuver */ cpuver = cpc_getcpuver( ); SUBDBG( "Got %d from cpc_getcpuver()\n", cpuver ); if ( cpuver == -1 ) return PAPI_ECMP; #ifdef DEBUG { if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { const char *name; int i; name = cpc_getcpuref( cpuver ); if ( name ) { SUBDBG( "CPC CPU reference: %s\n", name ); } else { SUBDBG( "Could not get a CPC CPU reference\n" ); } for ( i = 0; i < cpc_getnpic( cpuver ); i++ ) { SUBDBG( "\n%6s %-40s %8s\n", "Reg", "Symbolic name", "Code" ); cpc_walk_names( cpuver, i, "%6d %-40s %02x\n", print_walk_names ); } SUBDBG( "\n" ); } } #endif /* Initialize other globals */ if ( ( retval = build_tables( ) ) != PAPI_OK ) return retval; preset_search_map = preset_table; if ( cpuver <= CPC_ULTRA2 ) { SUBDBG( "cpuver (==%d) <= CPC_ULTRA2 (==%d)\n", cpuver, CPC_ULTRA2 ); pcr_shift[0] = CPC_ULTRA_PCR_PIC0_SHIFT; pcr_shift[1] = CPC_ULTRA_PCR_PIC1_SHIFT; } else if ( cpuver <= LASTULTRA3 ) { SUBDBG( "cpuver (==%d) <= CPC_ULTRA3x (==%d)\n", cpuver, LASTULTRA3 ); pcr_shift[0] = CPC_ULTRA_PCR_PIC0_SHIFT; pcr_shift[1] = CPC_ULTRA_PCR_PIC1_SHIFT; _solaris_vector.cmp_info.hardware_intr = 1; _solaris_vector.cmp_info.hardware_intr_sig = SIGEMT; } else return PAPI_ECMP; /* Path and args */ pid = getpid( ); if ( pid == -1 ) return ( PAPI_ESYS ); /* Turn on microstate accounting for this process and any LWPs. */ sprintf( maxargs, "/proc/%d/ctl", ( int ) pid ); if ( ( fd = open( maxargs, O_WRONLY ) ) == -1 ) return ( PAPI_ESYS ); { int retval; struct { long cmd; long flags; } cmd; cmd.cmd = PCSET; cmd.flags = PR_MSACCT | PR_MSFORK; retval = write( fd, &cmd, sizeof ( cmd ) ); close( fd ); SUBDBG( "Write PCSET returned %d\n", retval ); if ( retval != sizeof ( cmd ) ) return ( PAPI_ESYS ); } /* Get executable info */ sprintf( maxargs, "/proc/%d/psinfo", ( int ) pid ); if ( ( fd = open( maxargs, O_RDONLY ) ) == -1 ) return ( PAPI_ESYS ); read( fd, &psi, sizeof ( psi ) ); close( fd ); /* Cut off any arguments to exe */ { char *tmp; tmp = strchr( psi.pr_psargs, ' ' ); if ( tmp != NULL ) *tmp = '\0'; } if ( realpath( psi.pr_psargs, pname ) ) strncpy( _papi_hwi_system_info.exe_info.fullname, pname, PAPI_HUGE_STR_LEN ); else strncpy( _papi_hwi_system_info.exe_info.fullname, psi.pr_psargs, PAPI_HUGE_STR_LEN ); /* please don't use pr_fname here, because it can only store less that 16 characters */ strcpy( _papi_hwi_system_info.exe_info.address_info.name, basename( _papi_hwi_system_info.exe_info.fullname ) ); SUBDBG( "Full Executable is %s\n", _papi_hwi_system_info.exe_info.fullname ); /* Executable regions, reading /proc/pid/maps file */ retval = _ultra_hwd_update_shlib_info( &_papi_hwi_system_info ); /* Hardware info */ _papi_hwi_system_info.hw_info.ncpu = sysconf( _SC_NPROCESSORS_ONLN ); _papi_hwi_system_info.hw_info.nnodes = 1; _papi_hwi_system_info.hw_info.totalcpus = sysconf( _SC_NPROCESSORS_CONF ); retval = scan_prtconf( cpuname, PAPI_MAX_STR_LEN, &hz, &version ); if ( retval == -1 ) return PAPI_ECMP; strcpy( _papi_hwi_system_info.hw_info.model_string, cpc_getcciname( cpuver ) ); _papi_hwi_system_info.hw_info.model = cpuver; strcpy( _papi_hwi_system_info.hw_info.vendor_string, "SUN" ); _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_SUN; _papi_hwi_system_info.hw_info.revision = version; _papi_hwi_system_info.hw_info.mhz = ( ( float ) hz / 1.0e6 ); SUBDBG( "hw_info.mhz = %f\n", _papi_hwi_system_info.hw_info.mhz ); _papi_hwi_system_info.hw_info.cpu_max_mhz = _papi_hwi_system_info.hw_info.mhz; _papi_hwi_system_info.hw_info.cpu_min_mhz = _papi_hwi_system_info.hw_info.mhz; /* Number of PMCs */ retval = cpc_getnpic( cpuver ); if ( retval < 0 ) return PAPI_ECMP; _solaris_vector.cmp_info.num_cntrs = retval; _solaris_vector.cmp_info.fast_real_timer = 1; _solaris_vector.cmp_info.fast_virtual_timer = 1; _solaris_vector.cmp_info.default_domain = PAPI_DOM_USER; _solaris_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL; /* Setup presets */ retval = _papi_hwi_setup_all_presets( preset_search_map, NULL ); if ( retval ) return ( retval ); return ( PAPI_OK ); } long long _solaris_get_real_usec( void ) { return ( ( long long ) gethrtime( ) / ( long long ) 1000 ); } long long _solaris_get_real_cycles( void ) { return ( _ultra_hwd_get_real_usec( ) * ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz ); } long long _solaris_get_virt_usec( void ) { return ( ( long long ) gethrvtime( ) / ( long long ) 1000 ); } papi-5.6.0/src/ctests/overflow.c000664 001750 001750 00000012715 13216244360 020643 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow.c * Author: Philip Mucci * mucci@cs.utk.edu */ /* This file performs the following test: overflow dispatch The Eventset contains: + PAPI_TOT_CYC + PAPI_FP_INS (overflow monitor) - Start eventset 1 - Do flops - Stop and measure eventset 1 - Set up overflow on eventset 1 - Start eventset 1 - Do flops - Stop eventset 1 */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" #define OUT_FMT "%-12s : %16lld%16lld\n" static int total = 0; /* total overflows */ void handler( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) context; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); } total++; } int main( int argc, char **argv ) { int EventSet = PAPI_NULL; long long ( values[2] )[2]; long long min, max; int num_flops = NUM_FLOPS, retval; int PAPI_event, mythreshold = THRESHOLD; char event_name1[PAPI_MAX_STR_LEN]; const PAPI_hw_info_t *hw_info = NULL; int num_events, mask; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); /* Init PAPI */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Get hardware info */ hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); } /* add PAPI_TOT_CYC and one of the events in */ /* PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, */ /* depending on the availability of the event on */ /* the platform */ EventSet = add_two_nonderived_events( &num_events, &PAPI_event, &mask ); if (num_events==0) { if (!quiet) printf("Trouble adding event!\n"); test_skip(__FILE__,__LINE__,"Event add",1); } if (!quiet) { printf("Using %#x for the overflow event\n",PAPI_event); } if ( PAPI_event == PAPI_FP_INS ) { mythreshold = THRESHOLD; } else { #if defined(linux) mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; #else mythreshold = THRESHOLD * 2; #endif } /* Start the run calibration run */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); /* stop the calibration run */ retval = PAPI_stop( EventSet, values[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); /* set up overflow handler */ retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); } /* Start overflow run */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( num_flops ); /* stop overflow run */ retval = PAPI_stop( EventSet, values[1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); if ( !TESTS_QUIET ) { retval = PAPI_event_code_to_name( PAPI_event, event_name1 ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" ); printf( "---------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", mythreshold ); printf( "Using %d iterations of c += a*b\n", num_flops ); printf( "-----------------------------------------------\n" ); printf( "Test type : %16d%16d\n", 1, 2 ); printf( OUT_FMT, event_name1, ( values[0] )[1], ( values[1] )[1] ); printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[0], ( values[1] )[0] ); printf( "Overflows : %16s%16d\n", "", total ); printf( "-----------------------------------------------\n" ); } retval = PAPI_cleanup_eventset( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); retval = PAPI_destroy_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); if ( !TESTS_QUIET ) { printf( "Verification:\n" ); #if defined(linux) || defined(__ia64__) || defined(_POWER4) num_flops *= 2; #endif if ( PAPI_event == PAPI_FP_INS || PAPI_event == PAPI_FP_OPS ) { printf( "Row 1 approximately equals %d %d\n", num_flops, num_flops ); } printf( "Column 1 approximately equals column 2\n" ); printf( "Row 3 approximately equals %u +- %u %%\n", ( unsigned ) ( ( values[0] )[1] / ( long long ) mythreshold ), ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); } /* min = (long long)((values[0])[1]*(1.0-TOLERANCE)); max = (long long)((values[0])[1]*(1.0+TOLERANCE)); if ( (values[0])[1] > max || (values[0])[1] < min ) test_fail(__FILE__, __LINE__, event_name, 1); */ min = ( long long ) ( ( ( double ) values[0][1] * ( 1.0 - OVR_TOLERANCE ) ) / ( double ) mythreshold ); max = ( long long ) ( ( ( double ) values[0][1] * ( 1.0 + OVR_TOLERANCE ) ) / ( double ) mythreshold ); if (!quiet) { printf( "Overflows: total(%d) > max(%lld) || " "total(%d) < min(%lld) \n", total, max, total, min ); } if ( total > max || total < min ) { test_fail( __FILE__, __LINE__, "Overflows", 1 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/examples_v3.x/detect_pmcs.c000664 001750 001750 00000005676 13216244362 024353 0ustar00jshenry1963jshenry1963000000 000000 /* * detect_pmu_regs.c - detect unavailable PMD/PMC registers based on perfmon3 information * * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include int get_sif(int flags, pfarg_sinfo_t *sif) { int fd; /* initialize as all available */ if (sif) { memset(sif->sif_avail_pmcs, 0xff, sizeof(sif->sif_avail_pmcs)); memset(sif->sif_avail_pmds, 0xff, sizeof(sif->sif_avail_pmds)); } fd = pfm_create(flags, sif); if (fd > -1) close(fd); return fd > -1 ? 0 : -1; } /* * The goal of this function is to help pfm_dispatch_events() * in situations where not all PMC/PMD registers are available. * * It builds bitmasks of *unavailable* PMC/PMD registers from the * information returned by pfm_create_session(). * * arguments: * sif: pfarg_sinfo_t pointer * r_pmcs: a bitmask for PMC availability, NULL if not needed * r_pmcs: a bitmask for PMD availability, NULL if not needed */ void detect_unavail_pmu_regs(pfarg_sinfo_t *sif, pfmlib_regmask_t *r_pmcs, pfmlib_regmask_t *r_pmds) { int i, j, max; if (r_pmcs) { memset(r_pmcs, 0, sizeof(*r_pmcs)); max = PFMLIB_REG_BV < PFM_PMC_BV ? PFMLIB_REG_BV : PFM_PMC_BV; for(i=0; i < max; i++) { for(j=0; j < 64; j++) { if ((sif->sif_avail_pmcs[i] & (1ULL << j)) == 0) pfm_regmask_set(r_pmcs, (i<<6)+j); } } } if (r_pmds) { memset(r_pmds, 0, sizeof(*r_pmds)); max = PFMLIB_REG_BV < PFM_PMD_BV ? PFMLIB_REG_BV : PFM_PMD_BV; for(i=0; i < max; i++) { for(j=0; j < 64; j++) { if ((sif->sif_avail_pmds[i] & (1ULL << j)) == 0) pfm_regmask_set(r_pmds, (i<<6)+j); } } } } papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_x86_64.h000664 001750 001750 00000000521 13216244362 025127 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file should never be included directly, use * instead. */ #ifndef _PERFMON_X86_64_H_ #define _PERFMON_X86_64_H_ #include #endif /* _PERFMON_X86_64_H_ */ papi-5.6.0/src/perfctr-2.7.x/examples/self/Makefile000664 001750 001750 00000001345 13216244370 023753 0ustar00jshenry1963jshenry1963000000 000000 # $Id: Makefile,v 1.8 2005/04/09 10:51:17 mikpe Exp $ SHELL=/bin/sh ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) CC=$(CROSS_COMPILE)gcc CFLAGS=-O2 -fomit-frame-pointer -Wall CPPFLAGS=-I../../linux/include -I../../usr.lib HDEP=../../usr.lib/libperfctr.h ../../linux/include/linux/perfctr.h ../../linux/include/asm/perfctr.h arch.h TARGET=self i386_OBJS=x86.o x86_64_OBJS=x86.o ppc_OBJS=ppc.o ppc64_OBJS=ppc64.o ARCH_OBJS=$($(ARCH)_OBJS) OBJS=self.o $(ARCH_OBJS) ifeq ($(ARCH),ppc64) CPPFLAGS += -DPPC64 endif default: $(TARGET) $(TARGET): $(OBJS) ../../usr.lib/libperfctr.a $(OBJS): $(HDEP) install: distclean realclean mrproper: clean clean: rm -f $(TARGET) $(OBJS) core a.out papi-5.6.0/src/libpfm-3.y/examples_v3.x/showreginfo.c000664 001750 001750 00000014113 13216244362 024375 0ustar00jshenry1963jshenry1963000000 000000 /* * showreginfo.c - show PMU register information * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static int get_value(char *fn, char *buffer, size_t maxlen) { int fd; ssize_t ret; fd = open(fn, O_RDONLY); if (fd == -1) return -1; ret = read(fd, buffer, maxlen-1); if (ret == -1) fatal_error("cannot read from %s\n", fn); buffer[ret-1] = '\0'; close(fd); return 0; } /* * This example shows how to retrieve the PMU register mapping information. * It does not use the libpfm library. * The mapping gives the translation between the logical register names, * as exposed by the perfmon interface, and the actual hardware registers. * Depending on the PMU and perfmon implementation, not all registers are * necessarily PMU registers, some may correspond to software resources. */ int main(int argc, char **argv) { unsigned long long dfl, rsvd; unsigned long hw_addr; pfarg_ctx_t ctx; char pname[64]; char name[64], buffer[32]; unsigned int i, num_pmcs = 0, num_pmds = 0; int c, ret, ret2 = 0; int use_html = 0; while((c=getopt(argc, argv, "hH")) != -1) { switch(c) { case 'h': printf("usage: showreginfo [-h] [-H]\n"); return 0; case 'H': use_html = 1; break; default: return -1; } } try_again: ret = get_value("/sys/kernel/perfmon/pmu_desc/model", buffer, sizeof(buffer)); if (ret == -1) { /* * try to trigger automatic PMU description loading */ if (ret2 == 0) { memset(&ctx, 0, sizeof(ctx)); ret2 = pfm_create(0, NULL); if (ret2 > -1) { close(ret2); goto try_again; } fatal_error("invalid or missing perfmon support for your CPU (need at least v3.0)\n"); } } if (use_html) { puts(""); puts(""); puts(""); puts(""); puts(""); printf("\n", buffer); puts(""); puts(""); puts(""); } else { printf("model : %s\n", buffer); puts( "----------------------------------------------------------------------------\n" "name | default value | reserved mask | hw address | description\n" "-------+--------------------+--------------------+------------+-------------"); } for(i=0; i < PFM_MAX_PMCS; i++) { sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/name", i); ret = get_value(pname, name, sizeof(name)); if (ret) continue; num_pmcs++; sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/dfl_val", i); get_value(pname, buffer, sizeof(buffer)); dfl = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/rsvd_msk", i); get_value(pname, buffer, sizeof(buffer)); rsvd = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/addr", i); get_value(pname, buffer, sizeof(buffer)); hw_addr = strtoul(buffer, NULL, 0); if (use_html) { printf("\n", i, hw_addr, name); } else { printf("pmc%-3d | 0x%016llx | 0x%016llx | 0x%-8lx | %s\n", i, dfl, rsvd, hw_addr, name); } } if (use_html) puts(""); else puts("-------+--------------------+--------------------+------------+-------------"); for(i=0; i < PFM_MAX_PMDS; i++) { sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/name", i); ret = get_value(pname, name, sizeof(name)); if (ret) continue; num_pmds++; sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/dfl_val", i); get_value(pname, buffer, sizeof(buffer)); dfl = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/rsvd_msk", i); get_value(pname, buffer, sizeof(buffer)); rsvd = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/addr", i); get_value(pname, buffer, sizeof(buffer)); hw_addr = strtoul(buffer, NULL, 0); if (use_html) { printf("\n", i, hw_addr, name); } else { printf("pmd%-3d | 0x%016llx | 0x%016llx | 0x%-8lx | %s\n", i, dfl, rsvd, hw_addr, name); } } if (use_html) { puts("
%s
NameHW ADDRDescription
PMC%d0x%lx%s
PMC%d0x%lx%s
"); puts(""); puts(""); } else { puts("----------------------------------------------------------------------------"); printf("%u PMC registers, %u PMD registers\n", num_pmcs, num_pmds); } return 0; } papi-5.6.0/src/000775 001750 001750 00000000000 13216244473 015326 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPI_ipc.3000664 001750 001750 00000003556 13216244356 017631 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_ipc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_ipc \- .PP Simplified call to get instructions per cycle, real and processor time\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface: \fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc )\fP; .RE .PP \fBParameters:\fP .RS 4 \fI*rtime\fP total realtime since the first call .br \fI*ptime\fP total process time since the first call .br \fI*ins\fP total instructions since the first call .br \fI*ipc\fP incremental instructions per cycle since the last call .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_ipc()\fP\&. .br \fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. .br \fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. .RE .PP The first call to \fBPAPI_ipc()\fP will initialize the PAPI High Level interface, set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events and start the counters\&. .PP Subsequent calls will read the counters and return total real time, total process time, total instructions since the start of the measurement and the IPC rate since the latest call to \fBPAPI_ipc()\fP\&. .PP A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. .PP \fBPAPI_ipc\fP should return a ratio greater than 1\&.0, indicating instruction level parallelism within the chip\&. The larger this ratio the more effeciently the program is running\&. .PP \fBSee Also:\fP .RS 4 \fBPAPI_flips()\fP .PP \fBPAPI_flops()\fP .PP \fBPAPI_epc()\fP .PP \fBPAPI_stop_counters()\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_v2.x/showreginfo.c000664 001750 001750 00000014136 13216244362 024401 0ustar00jshenry1963jshenry1963000000 000000 /* * showreginfo.c - show PMU register information * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static int get_value(char *fn, char *buffer, size_t maxlen) { int fd; ssize_t ret; fd = open(fn, O_RDONLY); if (fd == -1) return -1; ret = read(fd, buffer, maxlen-1); if (ret == -1) fatal_error("cannot read from %s\n", fn); buffer[ret-1] = '\0'; close(fd); return 0; } /* * This example shows how to retrieve the PMU register mapping information. * It does not use the libpfm library. * The mapping gives the translation between the logical register names, * as exposed by the perfmon interface, and the actual hardware registers. * Depending on the PMU and perfmon implementation, not all registers are * necessarily PMU registers, some may correspond to software resources. */ int main(int argc, char **argv) { unsigned long long dfl, rsvd; unsigned long hw_addr; pfarg_ctx_t ctx; char pname[64]; char name[64], buffer[32]; unsigned int i, num_pmcs = 0, num_pmds = 0; int c, ret, ret2 = 0; int use_html = 0; while((c=getopt(argc, argv, "hH")) != -1) { switch(c) { case 'h': printf("usage: showreginfo [-h] [-H]\n"); return 0; case 'H': use_html = 1; break; default: return -1; } } try_again: ret = get_value("/sys/kernel/perfmon/pmu_desc/model", buffer, sizeof(buffer)); if (ret == -1) { /* * try to trigger automatic PMU description loading */ if (ret2 == 0) { memset(&ctx, 0, sizeof(ctx)); ret2 = pfm_create_context(&ctx, NULL, NULL, 0); if (ret2 > 0) { close(ret2); goto try_again; } fatal_error("invalid or missing perfmon support for your CPU (need at least v2.3)\n"); } } if (use_html) { puts(""); puts(""); puts(""); puts(""); puts(""); printf("\n", buffer); puts(""); puts(""); puts(""); } else { printf("model : %s\n", buffer); puts( "----------------------------------------------------------------------------\n" "name | default value | reserved mask | hw address | description\n" "-------+--------------------+--------------------+------------+-------------"); } for(i=0; i < PFM_MAX_PMCS; i++) { sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/name", i); ret = get_value(pname, name, sizeof(name)); if (ret) continue; num_pmcs++; sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/dfl_val", i); get_value(pname, buffer, sizeof(buffer)); dfl = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/rsvd_msk", i); get_value(pname, buffer, sizeof(buffer)); rsvd = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmc%d/addr", i); get_value(pname, buffer, sizeof(buffer)); hw_addr = strtoul(buffer, NULL, 0); if (use_html) { printf("\n", i, hw_addr, name); } else { printf("pmc%-3d | 0x%016llx | 0x%016llx | 0x%-8lx | %s\n", i, dfl, rsvd, hw_addr, name); } } if (use_html) puts(""); else puts("-------+--------------------+--------------------+------------+-------------"); for(i=0; i < PFM_MAX_PMDS; i++) { sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/name", i); ret = get_value(pname, name, sizeof(name)); if (ret) continue; num_pmds++; sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/dfl_val", i); get_value(pname, buffer, sizeof(buffer)); dfl = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/rsvd_msk", i); get_value(pname, buffer, sizeof(buffer)); rsvd = strtoull(buffer, NULL, 16); sprintf(pname, "/sys/kernel/perfmon/pmu_desc/pmd%d/addr", i); get_value(pname, buffer, sizeof(buffer)); hw_addr = strtoul(buffer, NULL, 0); if (use_html) { printf("\n", i, hw_addr, name); } else { printf("pmd%-3d | 0x%016llx | 0x%016llx | 0x%-8lx | %s\n", i, dfl, rsvd, hw_addr, name); } } if (use_html) { puts("
%s
NameHW ADDRDescription
PMC%d0x%lx%s
PMC%d0x%lx%s
"); puts(""); puts(""); } else { puts("----------------------------------------------------------------------------"); printf("%u PMC registers, %u PMD registers\n", num_pmcs, num_pmds); } return 0; } papi-5.6.0/src/validation_tests/papi_l2_dca.c000664 001750 001750 00000010764 13216244370 023207 0ustar00jshenry1963jshenry1963000000 000000 /* This code attempts to test the L2 Data Cache Acceesses */ /* performance counter PAPI_L2_DCA */ /* Notes: */ /* Should this be equivelent to PAPI_L1_DCM? */ /* (on IVY it is) */ /* On Haswell/Broadwell/Skylake this maps to : */ /* L2_RQSTS:ALL_DEMAND_REFERENCES */ /* Should this include *all* L2 accesses or just those */ /* caused by the user? Prefetch? MESI? */ /* by Vince Weaver, vincent.weaver@maine.edu */ #include #include #include #include "papi.h" #include "papi_test.h" #include "cache_helper.h" #include "display_error.h" #include "testcode.h" #define NUM_RUNS 100 int main(int argc, char **argv) { int i; int eventset=PAPI_NULL; int num_runs=NUM_RUNS; long long high,low,average,expected; long long count,total; int retval; int l1_size,l2_size,l1_linesize,l2_linesize,l2_entries; int arraysize; int quiet,errors=0; double error; double *array; double aSumm = 0.0; quiet=tests_quiet(argc,argv); if (!quiet) { printf("Testing the PAPI_L2_DCA event\n"); } /* Init the PAPI library */ retval = PAPI_library_init(PAPI_VER_CURRENT); if (retval != PAPI_VER_CURRENT) { test_fail(__FILE__,__LINE__,"PAPI_library_init",retval); } retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } retval=PAPI_add_named_event(eventset,"PAPI_L2_DCA"); if (retval!=PAPI_OK) { test_skip( __FILE__, __LINE__, "adding PAPI_L2_DCA", retval ); } l1_size=get_cachesize(L1D_CACHE); l1_linesize=get_linesize(L1D_CACHE); l2_size=get_cachesize(L2_CACHE); l2_linesize=get_linesize(L2_CACHE); l2_entries=get_entries(L2_CACHE); if ((l2_size==0) || (l2_linesize==0)) { if (!quiet) { printf("Unable to determine size of L2 cache!\n"); } test_skip( __FILE__, __LINE__, "adding PAPI_L2_DCA", retval ); } if (!quiet) { printf("\tDetected %dk L1 DCache, %dB linesize\n", l1_size/1024,l1_linesize); printf("\tDetected %dk L2 DCache, %dB linesize, %d entries\n", l2_size/1024,l2_linesize,l2_entries); } arraysize=l2_size/sizeof(double); if (!quiet) { printf("\tAllocating %zu bytes of memory (%d doubles)\n", arraysize*sizeof(double),arraysize); } array=calloc(arraysize,sizeof(double)); if (array==NULL) { test_fail(__FILE__,__LINE__,"Can't allocate memory",0); } /******************/ /* Testing Writes */ /******************/ if (!quiet) { printf("\nWrite Test: Initializing an array of %d doubles:\n", arraysize); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); errors++; } if (!quiet) printf("\n"); /******************/ /* Testing Reads */ /******************/ if (!quiet) { printf("\nRead Test: Summing an array of %d doubles:\n", arraysize); } high=0; low=0; total=0; for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); errors++; } if (!quiet) { printf("\n"); } /* Warn for now, as we get errors we can't easily */ /* explain on haswell and more recent Intel chips */ if (errors) { test_warn( __FILE__, __LINE__, "Error too high", 1 ); } test_pass(__FILE__); return 0; } papi-5.6.0/src/libpfm-3.y/lib/pfmlib_intel_atom_priv.h000664 001750 001750 00000006530 13216244363 024653 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #ifndef __PFMLIB_INTEL_ATOM_PRIV_H__ #define __PFMLIB_INTEL_ATOM_PRIV_H__ #define PFMLIB_INTEL_ATOM_MAX_UMASK 16 typedef struct { char *pme_uname; /* unit mask name */ char *pme_udesc; /* event/umask description */ unsigned int pme_ucode; /* unit mask code */ unsigned int pme_flags; /* unit mask flags */ } pme_intel_atom_umask_t; typedef struct { char *pme_name; /* event name */ char *pme_desc; /* event description */ unsigned int pme_code; /* event code */ unsigned int pme_numasks; /* number of umasks */ unsigned int pme_flags; /* flags */ unsigned int pme_fixed; /* fixed counter index, < FIXED_CTR0 if unsupported */ pme_intel_atom_umask_t pme_umasks[PFMLIB_INTEL_ATOM_MAX_UMASK]; /* umask desc */ } pme_intel_atom_entry_t; /* * pme_flags value */ /* * pme_flags value (event and unit mask) */ #define PFMLIB_INTEL_ATOM_UMASK_NCOMBO 0x01 /* unit mask cannot be combined (default exclusive) */ #define PFMLIB_INTEL_ATOM_FIXED0 0x02 /* event supported by FIXED_CTR0, can work on generic counters */ #define PFMLIB_INTEL_ATOM_FIXED1 0x04 /* event supported by FIXED_CTR1, can work on generic counters */ #define PFMLIB_INTEL_ATOM_FIXED2_ONLY 0x08 /* works only on FIXED_CTR2 */ #define PFMLIB_INTEL_ATOM_PEBS 0x10 /* support PEBS (precise event) */ #define PFMLIB_INTEL_ATOM_PMC0 0x20 /* works only on IA32_PMC0 */ #define PFMLIB_INTEL_ATOM_PMC1 0x40 /* works only on IA32_PMC1 */ typedef struct { unsigned int version:8; unsigned int num_cnt:8; unsigned int cnt_width:8; unsigned int ebx_length:8; } pmu_eax_t; typedef struct { unsigned int num_cnt:6; unsigned int cnt_width:6; unsigned int reserved:20; } pmu_edx_t; typedef struct { unsigned int no_core_cycle:1; unsigned int no_inst_retired:1; unsigned int no_ref_cycle:1; unsigned int no_llc_ref:1; unsigned int no_llc_miss:1; unsigned int no_br_retired:1; unsigned int no_br_mispred_retired:1; unsigned int reserved:25; } pmu_ebx_t; #endif /* __PFMLIB_INTEL_ATOM_PRIV_H__ */ papi-5.6.0/src/perfctr-2.6.x/usr.lib/misc.c000775 001750 001750 00000007273 13216244367 022237 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: misc.c,v 1.20.2.1 2005/12/22 22:44:49 mikpe Exp $ * Miscellaneous perfctr operations. * * Copyright (C) 1999-2004 Mikael Pettersson */ #include #include #include #include #include #include #include #include "libperfctr.h" #include "marshal.h" #include "arch.h" int _perfctr_abi_check_fd(int fd, unsigned int user_abi_version) { unsigned int driver_abi_version; if( ioctl(fd, PERFCTR_ABI, &driver_abi_version) < 0 ) { perror("perfctr_abi_check"); return -1; } if( (driver_abi_version ^ user_abi_version) & 0xFF00FF00 ) { fprintf(stderr, "Error: perfctr ABI major version mismatch: " "driver ABI 0x%08X, user ABI 0x%08X\n", driver_abi_version, user_abi_version); errno = EPROTO; return -1; } return 0; } int perfctr_info(int fd, struct perfctr_info *info) { int err = perfctr_ioctl_r(fd, PERFCTR_INFO, info, &perfctr_info_sdesc); if( err < 0 ) return err; perfctr_info_cpu_init(info); return 0; } int perfctr_get_info(struct perfctr_info *info) { int fd, ret; fd = open("/dev/perfctr", O_RDONLY); if (fd < 0) return -1; ret = perfctr_info(fd, info); close(fd); return ret; } struct perfctr_cpus_info *perfctr_cpus_info(int fd) { struct perfctr_cpu_mask dummy; struct perfctr_cpus_info *info; unsigned int cpu_mask_bytes; dummy.nrwords = 0; if( ioctl(fd, PERFCTR_CPUS, &dummy) >= 0 || errno != EOVERFLOW || dummy.nrwords == 0 ) { perror("PERFCTR_CPUS"); return NULL; } cpu_mask_bytes = offsetof(struct perfctr_cpu_mask, mask[dummy.nrwords]); info = malloc(sizeof(struct perfctr_cpus_info) + 2*cpu_mask_bytes); if( !info ) { perror("malloc"); return NULL; } info->cpus = (struct perfctr_cpu_mask*)(info + 1); info->cpus->nrwords = dummy.nrwords; info->cpus_forbidden = (struct perfctr_cpu_mask*)((char*)(info + 1) + cpu_mask_bytes); info->cpus_forbidden->nrwords = dummy.nrwords; if( ioctl(fd, PERFCTR_CPUS, info->cpus) < 0 || ioctl(fd, PERFCTR_CPUS_FORBIDDEN, info->cpus_forbidden) < 0 ) { perror("PERFCTR_CPUS"); free(info); return NULL; } return info; } void perfctr_info_print(const struct perfctr_info *info) { static const char * const features[] = { "rdpmc", "rdtsc", "pcint" }; int fi, comma; printf("abi_version\t\t0x%08X\n", info->abi_version); printf("driver_version\t\t%s\n", info->driver_version); printf("cpu_type\t\t%u (%s)\n", info->cpu_type, perfctr_info_cpu_name(info)); printf("cpu_features\t\t%#x (", info->cpu_features); for(comma = 0, fi = 0; fi < sizeof features / sizeof features[0]; ++fi) { unsigned fmask = 1 << fi; if( info->cpu_features & fmask ) { if( comma ) printf(","); printf("%s", features[fi]); comma = 1; } } printf(")\n"); printf("cpu_khz\t\t\t%u\n", info->cpu_khz); printf("tsc_to_cpu_mult\t\t%u%s\n", info->tsc_to_cpu_mult, info->tsc_to_cpu_mult ? "" : " (unspecified, assume 1)"); printf("cpu_nrctrs\t\t%u\n", perfctr_info_nrctrs(info)); } static void print_cpus(const struct perfctr_cpu_mask *cpus) { unsigned int nrcpus, nr, i, cpumask, bitmask; printf("["); nrcpus = 0; for(i = 0; i < cpus->nrwords; ++i) { cpumask = cpus->mask[i]; nr = i * 8 * sizeof(int); for(bitmask = 1; cpumask != 0; ++nr, bitmask <<= 1) { if( cpumask & bitmask ) { cpumask &= ~bitmask; if( nrcpus ) printf(","); ++nrcpus; printf("%u", nr); } } } printf("], total: %u\n", nrcpus); } void perfctr_cpus_info_print(const struct perfctr_cpus_info *info) { printf("cpus\t\t\t"); print_cpus(info->cpus); printf("cpus_forbidden\t\t"); print_cpus(info->cpus_forbidden); } papi-5.6.0/src/perfctr-2.7.x/linux/include/000775 001750 001750 00000000000 13216244370 022323 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_core.h000664 001750 001750 00000006246 13216244362 024736 0ustar00jshenry1963jshenry1963000000 000000 /* * Intel Core PMU * * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_CORE_H__ #define __PFMLIB_CORE_H__ #include /* * privilege level mask usage for Intel Core * * PFM_PLM0 = OS (kernel, hypervisor, ..) * PFM_PLM1 = unused (ignored) * PFM_PLM2 = unused (ignored) * PFM_PLM3 = USR (user level) */ #ifdef __cplusplus extern "C" { #endif #define PMU_CORE_NUM_FIXED_COUNTERS 3 /* number of fixed counters */ #define PMU_CORE_NUM_GEN_COUNTERS 2 /* number of generic counters */ #define PMU_CORE_NUM_COUNTERS 5 /* number of counters */ typedef union { unsigned long long val; /* complete register value */ struct { unsigned long sel_event_select:8; /* event mask */ unsigned long sel_unit_mask:8; /* unit mask */ unsigned long sel_usr:1; /* user level */ unsigned long sel_os:1; /* system level */ unsigned long sel_edge:1; /* edge detec */ unsigned long sel_pc:1; /* pin control */ unsigned long sel_int:1; /* enable APIC intr */ unsigned long sel_res1:1; /* reserved */ unsigned long sel_en:1; /* enable */ unsigned long sel_inv:1; /* invert counter mask */ unsigned long sel_cnt_mask:8; /* counter mask */ unsigned long sel_res2:32; } perfevtsel; } pfm_core_sel_reg_t; typedef struct { unsigned long cnt_mask; /* threshold (cnt_mask) */ unsigned int flags; /* counter specific flag */ } pfmlib_core_counter_t; #define PFM_CORE_SEL_INV 0x1 /* inverse */ #define PFM_CORE_SEL_EDGE 0x2 /* edge detect */ /* * model-specific parameters for the library */ typedef struct { unsigned int pebs_used; /* set to 1 if PEBS is used */ } pfmlib_core_pebs_t; typedef struct { pfmlib_core_counter_t pfp_core_counters[PMU_CORE_NUM_COUNTERS]; pfmlib_core_pebs_t pfp_core_pebs; uint64_t reserved[4]; /* for future use */ } pfmlib_core_input_param_t; typedef struct { uint64_t reserved[8]; /* for future use */ } pfmlib_core_output_param_t; #ifdef __cplusplus /* extern C */ } #endif /* * PMU-specific interface */ extern int pfm_core_is_pebs(pfmlib_event_t *e); #endif /* __PFMLIB_CORE_H__ */ papi-5.6.0/man/man3/PAPIF_stop.3000664 001750 001750 00000000762 13216244356 020145 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_stop" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_stop \- .PP Stop counting hardware events in an EventSet\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_stop\fP( C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_stop\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/ctests/remove_events.c000664 001750 001750 00000006536 13216244360 021665 0ustar00jshenry1963jshenry1963000000 000000 /* This test checks if removing events works properly at the low level by Vince Weaver (vweaver1@eecs.utk.edu) */ #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int main( int argc, char **argv ) { int retval; int EventSet = PAPI_NULL; long long values1[2],values2[2]; const char *event_names[] = {"PAPI_TOT_CYC","PAPI_TOT_INS"}; char add_event_str[PAPI_MAX_STR_LEN]; double instructions_error; long long old_instructions; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create an empty event set */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* add the events named above */ retval = PAPI_add_named_event( EventSet, event_names[0] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); if (!quiet) printf("Trouble %s\n",add_event_str); test_skip( __FILE__, __LINE__, add_event_str, retval ); } retval = PAPI_add_named_event( EventSet, event_names[1] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our test code */ do_flops( NUM_FLOPS ); /* Stop PAPI */ retval = PAPI_stop( EventSet, values1 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } old_instructions=values1[1]; if ( !quiet ) { printf( "========================\n" ); /* cycles is first, other event second */ sprintf( add_event_str, "%-12s : \t", event_names[0] ); printf( TAB1, add_event_str, values1[0] ); sprintf( add_event_str, "%-12s : \t", event_names[1] ); printf( TAB1, add_event_str, values1[1] ); } /* remove PAPI_TOT_CYC */ retval = PAPI_remove_named_event( EventSet, event_names[0] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our test code */ do_flops( NUM_FLOPS ); /* Stop PAPI */ retval = PAPI_stop( EventSet, values2 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* test if after removing the event, the second event */ /* still points to the proper native event */ /* this only works if IPC != 1 */ if ( !quiet ) { printf( "==========================\n" ); printf( "After removing PAP_TOT_CYC\n"); sprintf( add_event_str, "%-12s : \t", event_names[1] ); printf( TAB1, add_event_str, values2[0] ); instructions_error=((double)old_instructions - (double)values2[0])/ (double)old_instructions; if (instructions_error>10.0) { printf("Error of %.2f%%\n",instructions_error); test_fail( __FILE__, __LINE__, "validation", 0 ); } } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/utils/papi_decode.c000664 001750 001750 00000007442 13216244370 021071 0ustar00jshenry1963jshenry1963000000 000000 /* This file decodes the preset events into a csv format file */ /** file papi_decode.c * @brief papi_decode utility. * @page papi_decode * @section NAME * papi_decode - provides availability and detail information for PAPI preset events. * * @section Synopsis * papi_decode [-ah] * * @section Description * papi_decode is a PAPI utility program that converts the PAPI presets * for the existing library into a comma separated value format that can * then be viewed or modified in spreadsheet applications or text editors, * and can be supplied to PAPI_encode_events (3) as a way of adding or * modifying event definitions for specialized applications. * The format for the csv output consists of a line of field names, followed * by a blank line, followed by one line of comma separated values for each * event contained in the preset table. * A portion of this output (for Pentium 4) is shown below: * @code * name,derived,postfix,short_descr,long_descr,note,[native,...] * PAPI_L1_ICM,NOT_DERIVED,,"L1I cache misses","Level 1 instruction cache misses",,BPU_fetch_request_TCMISS * PAPI_L2_TCM,NOT_DERIVED,,"L2 cache misses","Level 2 cache misses",,BSQ_cache_reference_RD_2ndL_MISS_WR_2ndL_MISS * PAPI_TLB_DM,NOT_DERIVED,,"Data TLB misses","Data translation lookaside buffer misses",,page_walk_type_DTMISS * @endcode * * @section Options *
    *
  • -a Convert only the available PAPI preset events. *
  • -h Display help information about this utility. *
* * @section Bugs * There are no known bugs in this utility. * If you find a bug, it should be reported to the * PAPI Mailing List at . */ #include #include #include #include "papi.h" static void print_help( void ) { printf( "This is the PAPI decode utility program.\n" ); printf( "It decodes PAPI preset events into csv formatted text.\n" ); printf( "By default all presets are decoded.\n" ); printf( "The text goes to stdout, but can be piped to a file.\n" ); printf( "Such a file can be edited in a text editor or spreadsheet.\n" ); printf( "It can also be parsed by PAPI_encode_events.\n" ); printf( "Usage:\n\n" ); printf( " decode [options]\n\n" ); printf( "Options:\n\n" ); printf( " -a decode only available PAPI preset events\n" ); printf( " -h print this help message\n" ); printf( "\n" ); } int main( int argc, char **argv ) { int i, j; int retval; int print_avail_only = 0; PAPI_event_info_t info; (void)argc; (void)argv; for ( i = 1; i < argc; i++ ) if ( argv[i] ) { if ( !strcmp( argv[i], "-a" ) ) print_avail_only = PAPI_PRESET_ENUM_AVAIL; else if ( !strcmp( argv[i], "-h" ) ) { print_help( ); exit( 1 ); } else { print_help( ); exit( 1 ); } } retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { fprintf(stderr,"Error with PAPI_library_init!\n"); return retval; } retval = PAPI_set_debug( PAPI_VERB_ECONT ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error with PAPI_set_debug\n"); return retval; } i = PAPI_PRESET_MASK; printf ( "name,derived,postfix,short_descr,long_descr,note,[native,...]\n\n" ); do { if ( PAPI_get_event_info( i, &info ) == PAPI_OK ) { printf( "%s,%s,%s,", info.symbol, info.derived, info.postfix ); if ( info.short_descr[0] ) { printf( "\"%s\",", info.short_descr ); } else { printf( "," ); } if ( info.long_descr[0] ) { printf( "\"%s\",", info.long_descr ); } else { printf( "," ); } if ( info.note[0] ) printf( "\"%s\"", info.note ); for ( j = 0; j < ( int ) info.count; j++ ) printf( ",%s", info.name[j] ); printf( "\n" ); } } while ( PAPI_enum_event( &i, print_avail_only ) == PAPI_OK ); return 0; } papi-5.6.0/src/components/perf_event_uncore/tests/perf_event_uncore_attach.c000664 001750 001750 00000007504 13216244357 031574 0ustar00jshenry1963jshenry1963000000 000000 /* * This file tests uncore events on perf_event kernels * * It uses the older PAPI_set_opt() way of specifying the CPU/granularity * rather than the new :cpu=0 method */ #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #include "perf_event_uncore_lib.h" int main( int argc, char **argv ) { int retval,quiet; int EventSet = PAPI_NULL; long long values[1]; char *uncore_event=NULL; char event_name[BUFSIZ]; int uncore_cidx=-1; const PAPI_component_info_t *info; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); if (!quiet) { printf("Testing creating an uncore event using PAPI_set_opt() to specify CPU\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Find the uncore PMU */ uncore_cidx=PAPI_get_component_index("perf_event_uncore"); if (uncore_cidx<0) { if (!quiet) { printf("perf_event_uncore component not found\n"); } test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); } /* Check if component disabled */ info=PAPI_get_component_info(uncore_cidx); if (info->disabled) { if (!quiet) { printf("perf_event_uncore component is disabled\n"); } test_skip(__FILE__,__LINE__,"uncore component disabled",0); } /* Get a relevant event name */ uncore_event=get_uncore_event(event_name, BUFSIZ); if (uncore_event==NULL) { if (!quiet) { printf("uncore event name not available\n"); } test_skip( __FILE__, __LINE__, "PAPI does not support uncore on this processor", PAPI_ENOSUPP ); } /* Create an eventset */ retval = PAPI_create_eventset(&EventSet); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); } /* Set a component for the EventSet */ retval = PAPI_assign_eventset_component(EventSet, uncore_cidx); /* we need to set to a certain cpu for uncore to work */ PAPI_cpu_option_t cpu_opt; cpu_opt.eventset=EventSet; cpu_opt.cpu_num=0; retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); if (retval != PAPI_OK) { if (!quiet) { printf("Could not cpu attach\n"); } test_skip( __FILE__, __LINE__, "this test; trying to PAPI_CPU_ATTACH; need to run as root", retval); } /* we need to set the granularity to system-wide for uncore to work */ PAPI_granularity_option_t gran_opt; gran_opt.def_cidx=0; gran_opt.eventset=EventSet; gran_opt.granularity=PAPI_GRN_SYS; retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); if (retval != PAPI_OK) { test_skip( __FILE__, __LINE__, "this test; trying to set PAPI_GRN_SYS", retval); } /* we need to set domain to be as inclusive as possible */ PAPI_domain_option_t domain_opt; domain_opt.def_cidx=0; domain_opt.eventset=EventSet; domain_opt.domain=PAPI_DOM_ALL; retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); if (retval != PAPI_OK) { if (!quiet) { printf("could not set PAPI_DOM_ALL\n"); } test_skip( __FILE__, __LINE__, "this test; trying to set PAPI_DOM_ALL; need to run as root", retval); } /* Add our uncore event */ retval = PAPI_add_named_event(EventSet, uncore_event); if (retval != PAPI_OK) { if ( !quiet ) { printf("Error trying to use event %s\n", uncore_event); } test_fail(__FILE__, __LINE__, "adding uncore event",retval); } /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our work code */ do_flops( NUM_FLOPS ); /* Stop PAPI */ retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } if ( !quiet ) { printf("\tUsing event %s\n",uncore_event); printf("\t%s: %lld\n",uncore_event,values[0]); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_mips_priv.h000664 001750 001750 00000010214 13216244365 023141 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Samara Technology Group, Inc * Contributed by Philip Mucci * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef __PFMLIB_MIPS_PRIV_H__ #define __PFMLIB_MIPS_PRIV_H__ /* * This file contains the definitions used for MIPS processors */ /* * event description */ typedef struct { const char *name; /* event name */ const char *desc; /* event description */ unsigned int mask; /* which counters event lives on */ unsigned int code; /* event code */ } mips_entry_t; #if __BYTE_ORDER == __LITTLE_ENDIAN typedef union { uint64_t val; /* complete register value */ struct { unsigned long sel_exl:1; /* int level */ unsigned long sel_os:1; /* system level */ unsigned long sel_sup:1; /* supervisor level */ unsigned long sel_usr:1; /* user level */ unsigned long sel_int:1; /* enable intr */ unsigned long sel_event_mask:7; /* event mask */ unsigned long sel_res1:20; /* reserved */ unsigned long sel_res2:32; /* reserved */ } perfsel64; } pfm_mips_sel_reg_t; #elif __BYTE_ORDER == __BIG_ENDIAN typedef union { uint64_t val; /* complete register value */ struct { unsigned long sel_res2:32; /* reserved */ unsigned long sel_res1:20; /* reserved */ unsigned long sel_event_mask:7; /* event mask */ unsigned long sel_int:1; /* enable intr */ unsigned long sel_usr:1; /* user level */ unsigned long sel_sup:1; /* supervisor level */ unsigned long sel_os:1; /* system level */ unsigned long sel_exl:1; /* int level */ } perfsel64; } pfm_mips_sel_reg_t; #else #error "cannot determine endianess" #endif typedef struct { char model[1024]; int implementer; int architecture; int part; } pfm_mips_config_t; extern pfm_mips_config_t pfm_mips_cfg; #define MIPS_ATTR_K 0 /* system level */ #define MIPS_ATTR_U 1 /* user level */ #define MIPS_ATTR_S 2 /* supervisor level */ #define MIPS_ATTR_E 3 /* exception level */ #define MIPS_NUM_ATTRS 4 #define _MIPS_ATTR_K (1 << MIPS_ATTR_K) #define _MIPS_ATTR_U (1 << MIPS_ATTR_U) #define _MIPS_ATTR_S (1 << MIPS_ATTR_S) #define _MIPS_ATTR_E (1 << MIPS_ATTR_E) #define MIPS_PLM_ALL ( _MIPS_ATTR_K |\ _MIPS_ATTR_U |\ _MIPS_ATTR_S |\ _MIPS_ATTR_E) extern int pfm_mips_detect(void *this); extern int pfm_mips_get_encoding(void *this, pfmlib_event_desc_t *e); extern int pfm_mips_get_event_first(void *this); extern int pfm_mips_get_event_next(void *this, int idx); extern int pfm_mips_event_is_valid(void *this, int pidx); extern int pfm_mips_validate_table(void *this, FILE *fp); extern int pfm_mips_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); extern int pfm_mips_get_event_info(void *this, int idx, pfm_event_info_t *info); extern unsigned int pfm_mips_get_event_nattrs(void *this, int pidx); extern void pfm_mips_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); extern int pfm_mips_get_perf_encoding(void *this, pfmlib_event_desc_t *e); #endif /* __PFMLIB_MIPS_PRIV_H__ */ papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.32000664 001750 001750 00000037064 13216244367 024235 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.32.perfctr26/CREDITS.~1~ 2009-12-03 12:38:30.000000000 +0100 +++ linux-2.6.32.perfctr26/CREDITS 2009-12-03 12:49:22.000000000 +0100 @@ -2771,6 +2771,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.32.perfctr26/Documentation/ioctl/ioctl-number.txt.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/Documentation/ioctl/ioctl-number.txt 2009-12-03 12:49:22.000000000 +0100 @@ -200,6 +200,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.32.perfctr26/MAINTAINERS.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/MAINTAINERS 2009-12-03 12:49:22.000000000 +0100 @@ -4119,6 +4119,11 @@ F: arch/*/lib/perf_event.c F: arch/*/kernel/perf_callchain.c F: tools/perf/ +PERFORMANCE-MONITORING COUNTERS DRIVER +M: Mikael Pettersson +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING M: Christoph Hellwig L: linux-abi-devel@lists.sourceforge.net --- linux-2.6.32.perfctr26/arch/arm/Kconfig.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/arm/Kconfig 2009-12-03 12:49:22.000000000 +0100 @@ -827,6 +827,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.32.perfctr26/arch/arm/include/asm/processor.h.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.32.perfctr26/arch/arm/include/asm/processor.h 2009-12-03 12:49:22.000000000 +0100 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.32.perfctr26/arch/arm/include/asm/system.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.32.perfctr26/arch/arm/include/asm/system.h 2009-12-03 12:49:22.000000000 +0100 @@ -215,7 +215,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.32.perfctr26/arch/arm/kernel/process.c.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/arm/kernel/process.c 2009-12-03 12:49:22.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -279,6 +280,7 @@ void show_regs(struct pt_regs * regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -324,6 +326,8 @@ copy_thread(unsigned long clone_flags, u if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.32.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.32.perfctr26/arch/powerpc/include/asm/processor.h 2009-12-03 12:49:22.000000000 +0100 @@ -197,6 +197,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.32.perfctr26/arch/powerpc/kernel/process.c.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/powerpc/kernel/process.c 2009-12-03 12:49:22.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -409,8 +410,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -550,6 +553,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -678,6 +682,8 @@ int copy_thread(unsigned long clone_flag kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.32.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2009-12-03 12:49:22.000000000 +0100 @@ -318,4 +318,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.32.perfctr26/arch/x86/Kconfig.~1~ 2009-12-03 12:38:31.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/Kconfig 2009-12-03 12:49:22.000000000 +0100 @@ -1459,6 +1459,8 @@ config CC_STACKPROTECTOR detected and for those versions, this configuration option is ignored. (and a warning is printed during bootup) +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.32.perfctr26/arch/x86/include/asm/irq_vectors.h.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.32.perfctr26/arch/x86/include/asm/irq_vectors.h 2009-12-03 12:49:22.000000000 +0100 @@ -102,6 +102,7 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* * Generic system vector for platform specific use @@ -121,7 +122,7 @@ #define MCE_SELF_VECTOR 0xeb /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xea) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.32.perfctr26/arch/x86/include/asm/processor.h.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/include/asm/processor.h 2009-12-03 12:49:22.000000000 +0100 @@ -471,6 +471,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; /* Debug Store context; see asm/ds.h */ --- linux-2.6.32.perfctr26/arch/x86/include/asm/system.h.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/include/asm/system.h 2009-12-03 12:49:22.000000000 +0100 @@ -55,6 +55,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ @@ -123,7 +124,8 @@ do { \ #endif /* CC_STACKPROTECTOR */ /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -146,7 +148,8 @@ do { \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (per_cpu_var(current_task)) \ __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.32.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.32.perfctr26/arch/x86/kernel/entry_32.S 2009-12-03 12:49:22.000000000 +0100 @@ -832,6 +832,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 --- linux-2.6.32.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/kernel/entry_64.S 2009-12-03 12:49:22.000000000 +0100 @@ -1020,6 +1020,11 @@ apicinterrupt LOCAL_PENDING_VECTOR \ perf_pending_interrupt smp_perf_pending_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +apicinterrupt LOCAL_PERFCTR_VECTOR \ + perfctr_interrupt smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.32.perfctr26/arch/x86/kernel/irqinit.c.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/kernel/irqinit.c 2009-12-03 12:49:22.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include /* @@ -235,6 +236,8 @@ void __init native_init_IRQ(void) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); } + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); --- linux-2.6.32.perfctr26/arch/x86/kernel/process.c.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/kernel/process.c 2009-12-03 12:49:22.000000000 +0100 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ void exit_thread(void) put_cpu(); kfree(bp); } + perfctr_exit_thread(t); } void flush_thread(void) --- linux-2.6.32.perfctr26/arch/x86/kernel/process_32.c.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/kernel/process_32.c 2009-12-03 12:49:22.000000000 +0100 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -259,6 +260,8 @@ int copy_thread(unsigned long clone_flag task_user_gs(p) = get_user_gs(regs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -427,6 +430,8 @@ __switch_to(struct task_struct *prev_p, percpu_write(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.32.perfctr26/arch/x86/kernel/process_64.c.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/arch/x86/kernel/process_64.c 2009-12-03 12:49:22.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -303,6 +304,8 @@ int copy_thread(unsigned long clone_flag savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -495,6 +498,9 @@ __switch_to(struct task_struct *prev_p, */ if (preload_fpu) __math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.32.perfctr26/drivers/Makefile.~1~ 2009-12-03 12:38:32.000000000 +0100 +++ linux-2.6.32.perfctr26/drivers/Makefile 2009-12-03 12:49:22.000000000 +0100 @@ -96,6 +96,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.32.perfctr26/fs/exec.c.~1~ 2009-12-03 12:38:35.000000000 +0100 +++ linux-2.6.32.perfctr26/fs/exec.c 2009-12-03 12:49:22.000000000 +0100 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -976,6 +977,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.32.perfctr26/kernel/exit.c.~1~ 2009-12-03 12:38:35.000000000 +0100 +++ linux-2.6.32.perfctr26/kernel/exit.c 2009-12-03 12:49:22.000000000 +0100 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.32.perfctr26/kernel/pid.c.~1~ 2009-12-03 12:38:35.000000000 +0100 +++ linux-2.6.32.perfctr26/kernel/pid.c 2009-12-03 12:49:22.000000000 +0100 @@ -387,6 +387,7 @@ struct task_struct *find_task_by_vpid(pi { return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); } +EXPORT_SYMBOL(find_task_by_vpid); struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { --- linux-2.6.32.perfctr26/kernel/sched.c.~1~ 2009-12-03 12:38:35.000000000 +0100 +++ linux-2.6.32.perfctr26/kernel/sched.c 2009-12-03 12:49:22.000000000 +0100 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -7054,6 +7055,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; --- linux-2.6.32.perfctr26/kernel/timer.c.~1~ 2009-12-03 12:38:35.000000000 +0100 +++ linux-2.6.32.perfctr26/kernel/timer.c 2009-12-03 12:49:22.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1197,6 +1198,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); rcu_check_callbacks(cpu, user_tick); printk_tick(); papi-5.6.0/src/perfctr-2.6.x/linux/include/000775 001750 001750 00000000000 13216244367 022330 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/hwinfo.c000664 001750 001750 00000004065 13216244360 020271 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: valid fields in hw_info */ #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int retval, i, j; const PAPI_hw_info_t *hwinfo = NULL; const PAPI_mh_info_t *mh; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); if (!TESTS_QUIET) { printf( "Test case hwinfo.c: " "Check output of PAPI_get_hardware_info.\n"); } hwinfo=PAPI_get_hardware_info(); if ( hwinfo == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); } mh = &hwinfo->mem_hierarchy; validate_string( hwinfo->vendor_string, "vendor_string" ); validate_string( hwinfo->model_string, "model_string" ); if ( hwinfo->vendor == PAPI_VENDOR_UNKNOWN ) test_fail( __FILE__, __LINE__, "Vendor unknown", 0 ); if ( hwinfo->cpu_max_mhz == 0.0 ) test_fail( __FILE__, __LINE__, "Mhz unknown", 0 ); if ( hwinfo->ncpu < 1 ) test_fail( __FILE__, __LINE__, "ncpu < 1", 0 ); if ( hwinfo->totalcpus < 1 ) test_fail( __FILE__, __LINE__, "totalcpus < 1", 0 ); /* if ( PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ) < 1 ) test_fail( __FILE__, __LINE__, "get_opt(MAX_HWCTRS) < 1", 0 ); if ( PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ) < 1 ) test_fail( __FILE__, __LINE__, "get_opt(MAX_MPX_CTRS) < 1", 0 );*/ if ( mh->levels < 0 ) test_fail( __FILE__, __LINE__, "max mh level < 0", 0 ); if (!TESTS_QUIET) { printf( "Max level of TLB or Cache: %d\n", mh->levels ); for ( i = 0; i < mh->levels; i++ ) { for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) { const PAPI_mh_cache_info_t *c = &mh->level[i].cache[j]; const PAPI_mh_tlb_info_t *t = &mh->level[i].tlb[j]; printf( "Level %d, TLB %d: %d, %d, %d\n", i, j, t->type, t->num_entries, t->associativity ); printf( "Level %d, Cache %d: %d, %d, %d, %d, %d\n", i, j, c->type, c->size, c->line_size, c->num_lines, c->associativity ); } } } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.31000664 001750 001750 00000037135 13216244367 024233 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.31.perfctr26/CREDITS.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/CREDITS 2009-09-10 14:12:53.000000000 +0200 @@ -2771,6 +2771,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.31.perfctr26/Documentation/ioctl/ioctl-number.txt.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/Documentation/ioctl/ioctl-number.txt 2009-09-10 14:12:53.000000000 +0200 @@ -198,6 +198,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.31.perfctr26/MAINTAINERS.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/MAINTAINERS 2009-09-10 14:12:53.000000000 +0200 @@ -3951,6 +3951,11 @@ M: Paul Mackerras M: Ingo Molnar S: Supported +PERFORMANCE-MONITORING COUNTERS DRIVER +M: Mikael Pettersson +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING M: Christoph Hellwig L: linux-abi-devel@lists.sourceforge.net --- linux-2.6.31.perfctr26/arch/arm/Kconfig.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/arm/Kconfig 2009-09-10 14:12:53.000000000 +0200 @@ -753,6 +753,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.31.perfctr26/arch/arm/include/asm/processor.h.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/arm/include/asm/processor.h 2009-09-10 14:12:53.000000000 +0200 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.31.perfctr26/arch/arm/include/asm/system.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/arm/include/asm/system.h 2009-09-10 14:12:53.000000000 +0200 @@ -215,7 +215,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.31.perfctr26/arch/arm/kernel/process.c.~1~ 2009-09-10 13:41:49.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/arm/kernel/process.c 2009-09-10 14:12:53.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -279,6 +280,7 @@ void show_regs(struct pt_regs * regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -324,6 +326,8 @@ copy_thread(unsigned long clone_flags, u if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.31.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/powerpc/include/asm/processor.h 2009-09-10 14:12:53.000000000 +0200 @@ -197,6 +197,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.31.perfctr26/arch/powerpc/kernel/process.c.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/powerpc/kernel/process.c 2009-09-10 14:12:53.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -409,8 +410,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -550,6 +553,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -676,6 +680,8 @@ int copy_thread(unsigned long clone_flag kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.31.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2009-09-10 14:12:53.000000000 +0200 @@ -290,4 +290,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.31.perfctr26/arch/x86/Kconfig.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/Kconfig 2009-09-10 14:12:53.000000000 +0200 @@ -1464,6 +1464,8 @@ config CC_STACKPROTECTOR detected and for those versions, this configuration option is ignored. (and a warning is printed during bootup) +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.31.perfctr26/arch/x86/include/asm/irq_vectors.h.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/include/asm/irq_vectors.h 2009-09-10 14:12:53.000000000 +0200 @@ -102,6 +102,7 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* * Generic system vector for platform specific use @@ -121,7 +122,7 @@ #define MCE_SELF_VECTOR 0xeb /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xea) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.31.perfctr26/arch/x86/include/asm/processor.h.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/include/asm/processor.h 2009-09-10 14:12:53.000000000 +0200 @@ -460,6 +460,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; /* Debug Store context; see asm/ds.h */ --- linux-2.6.31.perfctr26/arch/x86/include/asm/system.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/include/asm/system.h 2009-09-10 14:12:53.000000000 +0200 @@ -55,6 +55,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ @@ -123,7 +124,8 @@ do { \ #endif /* CC_STACKPROTECTOR */ /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -146,7 +148,8 @@ do { \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (per_cpu_var(current_task)) \ __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.31.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/entry_32.S 2009-09-10 14:12:53.000000000 +0200 @@ -832,6 +832,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 --- linux-2.6.31.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/entry_64.S 2009-09-10 14:12:53.000000000 +0200 @@ -1026,6 +1026,11 @@ apicinterrupt LOCAL_PENDING_VECTOR \ perf_pending_interrupt smp_perf_pending_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +apicinterrupt LOCAL_PERFCTR_VECTOR \ + perfctr_interrupt smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.31.perfctr26/arch/x86/kernel/irqinit.c.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/irqinit.c 2009-09-10 14:12:53.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include /* @@ -253,6 +254,8 @@ void __init native_init_IRQ(void) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); } + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); --- linux-2.6.31.perfctr26/arch/x86/kernel/process.c.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/process.c 2009-09-10 14:12:53.000000000 +0200 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,7 @@ void exit_thread(void) put_cpu(); kfree(bp); } + perfctr_exit_thread(t); } void flush_thread(void) --- linux-2.6.31.perfctr26/arch/x86/kernel/process_32.c.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/process_32.c 2009-09-10 14:12:53.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -262,6 +263,8 @@ int copy_thread(unsigned long clone_flag task_user_gs(p) = get_user_gs(regs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -425,6 +428,8 @@ __switch_to(struct task_struct *prev_p, percpu_write(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.31.perfctr26/arch/x86/kernel/process_64.c.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/arch/x86/kernel/process_64.c 2009-09-10 14:12:53.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -306,6 +307,8 @@ int copy_thread(unsigned long clone_flag savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -489,6 +492,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.31.perfctr26/drivers/Makefile.~1~ 2009-09-10 13:41:50.000000000 +0200 +++ linux-2.6.31.perfctr26/drivers/Makefile 2009-09-10 14:12:53.000000000 +0200 @@ -95,6 +95,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.31.perfctr26/fs/exec.c.~1~ 2009-09-10 13:41:55.000000000 +0200 +++ linux-2.6.31.perfctr26/fs/exec.c 2009-09-10 14:12:53.000000000 +0200 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -973,6 +974,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.31.perfctr26/kernel/exit.c.~1~ 2009-09-10 13:41:55.000000000 +0200 +++ linux-2.6.31.perfctr26/kernel/exit.c 2009-09-10 14:12:53.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.31.perfctr26/kernel/pid.c.~1~ 2009-09-10 13:41:55.000000000 +0200 +++ linux-2.6.31.perfctr26/kernel/pid.c 2009-09-10 14:12:53.000000000 +0200 @@ -387,6 +387,7 @@ struct task_struct *find_task_by_vpid(pi { return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); } +EXPORT_SYMBOL(find_task_by_vpid); struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { --- linux-2.6.31.perfctr26/kernel/sched.c.~1~ 2009-09-10 13:41:55.000000000 +0200 +++ linux-2.6.31.perfctr26/kernel/sched.c 2009-09-10 14:12:53.000000000 +0200 @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -6967,6 +6968,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; --- linux-2.6.31.perfctr26/kernel/timer.c.~1~ 2009-09-10 13:41:55.000000000 +0200 +++ linux-2.6.31.perfctr26/kernel/timer.c 2009-09-10 14:12:53.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1155,6 +1156,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.30000664 001750 001750 00000037446 13216244367 024237 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.30.perfctr26/CREDITS.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/CREDITS 2009-06-10 12:36:14.000000000 +0200 @@ -2764,6 +2764,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.30.perfctr26/Documentation/ioctl/ioctl-number.txt.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/Documentation/ioctl/ioctl-number.txt 2009-06-10 12:36:14.000000000 +0200 @@ -195,6 +195,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.30.perfctr26/MAINTAINERS.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/MAINTAINERS 2009-06-10 12:36:14.000000000 +0200 @@ -4392,6 +4392,12 @@ S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.30.perfctr26/arch/arm/Kconfig.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/arm/Kconfig 2009-06-10 12:36:14.000000000 +0200 @@ -732,6 +732,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.30.perfctr26/arch/arm/include/asm/processor.h.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.30.perfctr26/arch/arm/include/asm/processor.h 2009-06-10 12:36:14.000000000 +0200 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.30.perfctr26/arch/arm/include/asm/system.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/arm/include/asm/system.h 2009-06-10 12:36:14.000000000 +0200 @@ -215,7 +215,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.30.perfctr26/arch/arm/kernel/process.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/arm/kernel/process.c 2009-06-10 12:36:14.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -273,6 +274,7 @@ void show_regs(struct pt_regs * regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -318,6 +320,8 @@ copy_thread(unsigned long clone_flags, u if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.30.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/powerpc/include/asm/processor.h 2009-06-10 12:36:14.000000000 +0200 @@ -197,6 +197,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.30.perfctr26/arch/powerpc/kernel/process.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/powerpc/kernel/process.c 2009-06-10 12:36:14.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -409,8 +410,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -550,6 +553,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -676,6 +680,8 @@ int copy_thread(unsigned long clone_flag kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.30.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2009-06-10 12:36:14.000000000 +0200 @@ -276,4 +276,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.30.perfctr26/arch/x86/Kconfig.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/Kconfig 2009-06-10 12:36:14.000000000 +0200 @@ -1424,6 +1424,8 @@ config CC_STACKPROTECTOR detected and for those versions, this configuration option is ignored. (and a warning is printed during bootup) +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.30.perfctr26/arch/x86/include/asm/irq_vectors.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/include/asm/irq_vectors.h 2009-06-10 12:36:14.000000000 +0200 @@ -106,10 +106,14 @@ */ #define LOCAL_TIMER_VECTOR 0xef +#if !defined(CONFIG_PERF_COUNTERS) +#define LOCAL_PERFCTR_VECTOR 0xee +#else /* * Performance monitoring interrupt vector: */ #define LOCAL_PERF_VECTOR 0xee +#endif /* * Generic system vector for platform specific use @@ -117,7 +121,7 @@ #define GENERIC_INTERRUPT_VECTOR 0xed /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xec) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.30.perfctr26/arch/x86/include/asm/processor.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/include/asm/processor.h 2009-06-10 12:36:14.000000000 +0200 @@ -458,6 +458,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; #ifdef CONFIG_X86_DS --- linux-2.6.30.perfctr26/arch/x86/include/asm/system.h.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/include/asm/system.h 2009-06-10 12:36:14.000000000 +0200 @@ -55,6 +55,7 @@ do { \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ @@ -123,7 +124,8 @@ do { \ #endif /* CC_STACKPROTECTOR */ /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -146,7 +148,8 @@ do { \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (per_cpu_var(current_task)) \ __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.30.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/entry_32.S 2009-06-10 12:36:14.000000000 +0200 @@ -812,6 +812,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 --- linux-2.6.30.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/entry_64.S 2009-06-10 12:36:14.000000000 +0200 @@ -1025,6 +1025,11 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +apicinterrupt LOCAL_PERFCTR_VECTOR \ + perfctr_interrupt smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.30.perfctr26/arch/x86/kernel/irqinit_32.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/irqinit_32.c 2009-06-10 12:36:14.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -189,6 +190,8 @@ void __init native_init_IRQ(void) if (!acpi_ioapic) setup_irq(2, &irq2); + perfctr_vector_init(); + /* * Call quirks after call gates are initialised (usually add in * the architecture specific gates): --- linux-2.6.30.perfctr26/arch/x86/kernel/irqinit_64.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/irqinit_64.c 2009-06-10 12:36:14.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -172,6 +173,8 @@ void __init native_init_IRQ(void) apic_intr_init(); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.30.perfctr26/arch/x86/kernel/process.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/process.c 2009-06-10 12:36:14.000000000 +0200 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ void exit_thread(void) put_cpu(); kfree(bp); } + perfctr_exit_thread(t); ds_exit_thread(current); } --- linux-2.6.30.perfctr26/arch/x86/kernel/process_32.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/process_32.c 2009-06-10 12:36:14.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,8 @@ int copy_thread(unsigned long clone_flag task_user_gs(p) = get_user_gs(regs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -427,6 +430,8 @@ __switch_to(struct task_struct *prev_p, percpu_write(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.30.perfctr26/arch/x86/kernel/process_64.c.~1~ 2009-06-10 12:00:43.000000000 +0200 +++ linux-2.6.30.perfctr26/arch/x86/kernel/process_64.c 2009-06-10 12:36:14.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -309,6 +310,8 @@ int copy_thread(unsigned long clone_flag savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -491,6 +494,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.30.perfctr26/drivers/Makefile.~1~ 2009-06-10 12:00:44.000000000 +0200 +++ linux-2.6.30.perfctr26/drivers/Makefile 2009-06-10 12:36:14.000000000 +0200 @@ -94,6 +94,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.30.perfctr26/fs/exec.c.~1~ 2009-06-10 12:00:46.000000000 +0200 +++ linux-2.6.30.perfctr26/fs/exec.c 2009-06-10 12:36:14.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -971,6 +972,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.30.perfctr26/kernel/exit.c.~1~ 2009-06-10 12:00:47.000000000 +0200 +++ linux-2.6.30.perfctr26/kernel/exit.c 2009-06-10 12:36:14.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.30.perfctr26/kernel/sched.c.~1~ 2009-06-10 12:00:47.000000000 +0200 +++ linux-2.6.30.perfctr26/kernel/sched.c 2009-06-10 12:36:14.000000000 +0200 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -6646,6 +6647,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; --- linux-2.6.30.perfctr26/kernel/timer.c.~1~ 2009-06-10 12:00:47.000000000 +0200 +++ linux-2.6.30.perfctr26/kernel/timer.c 2009-06-10 12:36:14.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1114,6 +1115,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/testlib/papi_test.h000664 001750 001750 00000006361 13216244370 021137 0ustar00jshenry1963jshenry1963000000 000000 /* Standard headers for PAPI test applications. This file is customized to hide Windows / Unix differences. */ #ifdef __cplusplus extern "C" { #endif //#if (!defined(NO_DLFCN) && !defined(_BGL) && !defined(_BGP)) //#include //#endif //#include //#if !defined(__FreeBSD__) && !defined(__APPLE__) //#include //#endif /* Masks to select operations for add_test_events() and remove_test_events() Mask value tells us what events to select. */ #define MASK_FP_OPS 0x80000 #define MASK_L1_DCA 0x40000 /* three new events for POWER4 */ #define MASK_L1_DCW 0x20000 #define MASK_L1_DCR 0x10000 #define MASK_TOT_IIS 0x04000 /* Try this if TOT_INS won't work */ #define MASK_BR_PRC 0x02000 #define MASK_BR_MSP 0x01000 #define MASK_BR_CN 0x00800 #define MASK_L2_TCH 0x00400 #define MASK_L2_TCA 0x00200 #define MASK_L2_TCM 0x00100 #define MASK_L1_DCM 0x00040 #define MASK_L1_ICM 0x00020 #define MASK_L1_TCM 0x00010 #define MASK_FP_INS 0x00004 #define MASK_TOT_INS 0x00002 #define MASK_TOT_CYC 0x00001 #define MAX_TEST_EVENTS 18 struct test_events_t { unsigned int mask; unsigned int event; }; extern struct test_events_t test_events[]; void validate_string(const char *name, char *s); void *get_overflow_address(void *context); void free_test_space(long long ** values, int num_tests); long long **allocate_test_space(int num_tests, int num_events); int add_test_events(int *number, int *mask, int allow_derived); int add_two_events(int *num_events, int *papi_event, int *mask); int add_two_nonderived_events(int *num_events, int *papi_event, int *mask); int add_test_events_r(int *number, int *mask, void *handle); int find_nonderived_event( void ); int enum_add_native_events(int *num_events, int **evtcodes, int need_interrupts, int no_software_events, int cidx); int remove_test_events(int *EventSet, int mask); char *stringify_domain(int domain); char *stringify_all_domains(int domains); char *stringify_granularity(int granularity); char *stringify_all_granularities(int granularities); int tests_quiet(int argc, char **argv); void test_pass(const char *filename); void test_fail(const char *file, int line, const char *call, int retval); void test_skip(const char *file, int line, const char *call, int retval); void test_warn(const char *file, int line, const char *call, int retval); void test_print_event_header(const char *call, int evset); int approx_equals(double a, double b); /* Unix systems use %lld to display long long values Windows uses %I64d for the same purpose. Since these occur inside a quoted string, we must #define the entire format string. Below are several common forms of this string for both platforms. */ #define ONEHDR " %12s" #define TAB2HDR "%s %12s %12s\n" #define TAB3HDR "%s %12s %12s %12s\n" #define TAB4HDR "%s %12s %12s %12s %12s\n" #define ONENUM " %12lld" #define TAB1 "%-12s %12lld\n" #define TAB2 "%-12s %12lld %12lld\n" #define TAB3 "%-12s %12lld %12lld %12lld\n" #define TAB4 "%-12s %12lld %12lld %12lld %12lld\n" #define TAB5 "%-12s %12lld %12lld %12lld %12lld %12lld\n" #define TWO12 "%12lld %12lld %s" #define LLDFMT "%lld" #define LLDFMT10 "%10lld" #define LLDFMT12 "%12lld" #define LLDFMT15 "%15lld" extern int TESTS_QUIET; /* Declared in test_utils.c */ #ifdef __cplusplus } #endif papi-5.6.0/man/man3/PAPI_get_component_info.3000664 001750 001750 00000002347 13216244356 022727 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_component_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_component_info \- .PP get information about a specific software component .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @param cidx Component index This function returns a pointer to a structure containing detailed information about a specific software component in the PAPI library. This includes versioning information, preset and native event information, and more. For full details, see @ref PAPI_component_info_t. @par Examples: .fi .PP .PP .nf const PAPI_component_info_t *cmpinfo = NULL; if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) exit(1); if ((cmpinfo = PAPI_get_component_info(0)) == NULL) exit(1); printf("This component supports %d Preset Events and %d Native events\&.\n", cmpinfo->num_preset_events, cmpinfo->num_native_events); * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_executable_info\fP .PP \fBPAPI_get_hardware_info\fP .PP \fBPAPI_get_dmem_info\fP .PP \fBPAPI_get_opt\fP .PP \fBPAPI_component_info_t\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/docs/man3/pfm_find_event.3000664 001750 001750 00000007471 13216244364 023530 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "September, 2009" "" "Linux Programmer's Manual" .SH NAME pfm_find_event \- search for an event masks .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_find_event(const char *"str ");" .sp .SH DESCRIPTION This function is used to convert an event string passed in \fBstr\fR into an opaque event identifier, i.e., the return value. Events are first manipulated a strings which contain the event name, sub-event names and optional filters and modifiers. This function analyzes the string and try to find the matching event. The event string is a structured string and it is composed as follows: .TP .B [pmu_name::]event_name[:unit_mask][:modifier|:modifier=val] .PP The various components are separated by \fB:\fR or \fB::\fR, they are defined as follows: .TP .B pmu_name This is an optional prefix to designate a specific PMU model. With the prefix the event which matches the event_name is used. In case multiple PMU models are activated, there may be conflict with identical event names to mean the same or different things. In that case, it is necessary to fully specify the event with a pmu_name. That string corresponds to what is returned by \fBpfm_get_pmu_name()\fR. .TP .B event_name This is the event name and is required. The library is not case sensitive on event string. The event name must match \fBcompletely\fR the actual event name; it cannot be a substring. .TP .B unit_mask The optional unit mask which can be considered like a sub-event of the major event. If a event has unit masks, and there is no default, then at least one unit mask must be passed in the string. Multiple unit masks may be specified for a single event. .TP .B modifier A modifier is an optional filter which is provided by the hardware register hosting the event or by the underlying kernel infrastructure. Typical modifiers include privilege level filters. Some modifiers are simple boolean, in which case just passing their names is equivalent to setting their value to \fBtrue\fR. Other modifiers need a specific value, in which case it is provided after the equal sign. No space is tolerate around the equal sign. The list of modifiers depends on the host PMU and underlying kernel API. They are documented in PMU-specific documentation. Multiple modifiers may be passed. There is not order between unit masks and modifiers. .PP The library uses the generic term \fBattribute\fR to designate both unit masks and modifiers. Here are a few examples of event strings: .TP .B amd64::RETIRED_INSTRUCTIONS:u Event RETIRED_INSTRUCTION on AMD64 processor, measure at user privilege level only .TP .B RS_UOPS_DISPATCHED:c=1:i:u Event RS_UOPS_DISPATCHED measured at user privilege level only, and with counter-mask set to 1 .PP For the purpose of this function, only the pmu_name and event_name are considered, everything else is parsed, thus must be valid, but is ignored. The function searches only for one event per call. As a convenience, the function will identify the event up to the first comma. In other words, if \fBstr\fR is equal to "EVENTA,EVENTB", then the function will only look at EVENTA and will not return an error because of invalid event string. This is handy when parsing constant event strings containing multiple, comma-separated, events. .SH RETURN The function returns the opaque event identifier that corresponds that the event string. In case of error, a negative error code is returned instead. .SH ERRORS .TP .B PFMLIB_ERR_NOINIT The library has not been initialized properly. .TP .B PFMLIB_ERR_INVAL The event string is NULL. .TP .B PFMLIB_ERR_NOMEM The library ran out of memory. .TP .B PFMLIB_ERR_NOTFOUND The event was not found .TP .B PFMLIB_ERR_ATTR Invalid event attribute .TP .B PFMLIB_ERR_ATTR_VAL Invalid event attribute value .TP .B PFMLIB_ERR_TOOMANY Too many event attributes passed .SH AUTHOR Stephane Eranian .PP papi-5.6.0/ChangeLogP511.txt000664 001750 001750 00000017324 13216244355 017504 0ustar00jshenry1963jshenry1963000000 000000 2013-05-21 * 602d8dbc man/man1/papi_avail.1 man/man1/papi_clockres.1 man/man1/papi_command_line.1...: Rebuild man pages for a 5.1.1 release. * 93d9be34 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version number for a 5.1.1 release. 2013-04-15 * 8e47838d src/components/cuda/linux-cuda.c: When creating two event sets - one for the CUDA and one for the CPU component - the order of event set creation appears crucial. When the CPU event set has been created before the CUDA event set then PAPI_start() for the CUDA event set works fine. However, if the CUDA event set has been created before the CPU event set, then PAPI_start(CUDA_event_set) forces the CUDA control state to be updated one more time, even if the CUDA event set has not been modified. The CUDA control state function did not properly handle this case and hence cause PAPI_start() to fail. This has been fixed. 2013-05-13 * c93dfa68 src/perf_events.c: perf_event component: update error returns This passes more error return values back to PAPI. Before this change a lot of places were hardcoded to PAPI_EPERM even if sys_perf_event_open() was reporting a different error. 2013-05-08 * d1db58e8 src/configure src/configure.in: Force the use of pthread_mutexes on ARM This lets the system libraries worry about the best way to define mutexes, rather than trying to hand-code in assembly around all of the various issues there are with atomic instructions in the ARM architecture. It might make sense to enable this for *all* Linux architectures, but for now just do it for ARM. * 29662e3e src/linux-lock.h: Commit 59d3d7584b2925bd05b4b5d0f4fe89666eb8494a removed the definition of mb(). mb() was defined as rmb(). This just corrects it back. (Note from VMW -- this fixes some things, but ARM still won't build on a Cortex A9 pandaboard due to the use of the "swp" instruction. Proper fix is probably to enforce posix-mutexes on ARM) 2013-04-22 * ff29fd12 src/run_tests.sh: The test for determining whether to run valgrind was backwards. Correcting that allow the run_test.sh script to stay the same and one just needs to define "VALGRIND=yes" (or any non-null string) to make run_test.sh use valgrind. --- src/run_tests.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/run_tests.sh b/src/run_tests.sh index d1ce205..9337ff2 100755 --- a/src/run_tests.sh +++ b/src/run_tests.sh @@ -19,10 +19,8 @@ else export TESTS_QUIET fi -if [ "x$VALGRIND" = "x" ]; then -# Uncomment the following line to run tests using Valgrind -# VALGRIND="valgrind --leak-check=full"; - VALGRIND=""; +if [ "x$VALGRIND" != "x" ]; then + VALGRIND="valgrind --leak-check=full"; fi #CTESTS=`find ctests -maxdepth 1 -perm -u+x -type f`; -- 2013-03-28 * 1e8101f6 src/run_tests.sh: run_tests.sh: further refine component test find Exclude *.cu when looking for component tests. 2013-03-25 * 0b600bc5 src/run_tests.sh: run_tests.sh: File mode changes. run_tests.sh is now expected to run from the install location in addition to src. The script tried to remove execute from *.[c|h], now it just excludes *.[c|h] from the find commands. 2013-03-18 * 06f9c43b src/perfctr-x86.c: perfctr: don't read in event table multiple times papi_libpfm3_events.c now reads in the predefined events, we don't also need to do this in perfctr setup_x86_presets() * 48d7330c src/perfctr.c: Fix segfault in perfctr.c The preset lookup uses the cidx index, but in perfctr.c we weren't passing a cidx value (it was being left off). The old perfctr code plays games with defining extern functions so the compiler wasn't giving us a warning. 2013-03-14 * eda94e50 src/components/bgpm/L2unit/linux-L2unit.c src/linux-bgq.c: If a counter is not set to overflow (threshold==0; happens when PAPI_shutdown is called) then we do not want to rebuild the BGPM event set, even if the event set has been used previously and hence "applied or attached". Usually if an event set has been applied or attached prior to setting overflow, the BGPM event set needs to be deleted and recreated (which implies malloc() from within BGPM). Not so, though, if threshold is 0 which is the case when PAPI_shutdown is called. Note, this only applies to Punit and L2unit, not IOunit since an IOunit event set in not applied or attached. 2013-03-13 * 46f6123a src/components/bgpm/IOunit/linux-IOunit.c src/components/bgpm/IOunit/linux-IOunit.h src/components/bgpm/L2unit/linux-L2unit.c...: Overflow issue on BG/Q resolved. Overflow with multiple components worked; overflow with multiple components and multiple events did not work as supposed to. 2013-03-07 * 6a0813f8 src/linux-common.c src/linux-memory.c: Fix the build on Linux-SPARC I dug out an old SPARC machine and fixed the PAPI build on it. * 51fe7e53 src/perf_events.c: More comprehensive sys_perf_open to PAPI error mappings This tries to cover more of the errors returned by sys_perf_open and map them to better results. EINVAL is a problem because it can mean Conflict as well as Event not found and many other things, so it's unclear what to do with it. * 1479a67f src/perf_events.c src/sys_perf_event_open.c: Return proper error codes for sys_perf_event_open For some reason on x86 and x86_64 we were trying to set errno manually and thus over-writing the proper errno value, causing all errors to look like PAPI_EPERM This removes that code, as well as adds code to report ENOENT as PAPI_ENOEVENT. With this change, on IVY this happens which looks more correct. ./utils/papi_command_line perf::L1-ICACHE-PREFETCHES Failed adding: perf::L1-ICACHE-PREFETCHES because: Event does not exist command_line.c PASSED 2013-03-06 * 7a3e75e8 src/papi_libpfm4_events.c src/papi_user_events.c: Coverity fixes: Coverity pointed out that there was a case where load_user_eent_table() could leak memory. The change in the location of the papi_free(foo) ensures that the allocated memory is freed. Coverity pointed out one path through the code in _papi_libpfm4_ntv_code_to_descr() that did not free up memory allocated in the function. Added a free on the path in free up that memory. Thanks Will Cohen. 2013-03-04 * b19bd1a2 src/components/rapl/linux-rapl.c: Remove a stray debug statement. Thanks to Harald Servat for catching this. 2013-03-01 * 6e5be510 src/utils/command_line.c: Wrestled some horribly convoluted indexing into shape. The -u and -x options now print as expected (I think). 2013-01-31 * 02bd70ad src/components/nvml/linux-nvml.c: linux-nvml.c: Fix type warning. CUDA and NVML have an signed vs unsigned thing going on in their returned device counts, cast away the warning. 2013-01-23 * a5bed384 src/linux-memory.c src/linux-timer.c: ia64 fixes. Thanks to Tony Jones for patches. 2013-01-16 * 021db23a src/components/nvml/linux-nvml.c: nvml component: cleanup a memory leak We did not free a buffer at shutdown time. 2013-05-17 * b25fc417 src/perf_events.c: perf_event: allow running with perf_event_paranoid is 2 perf_event_paranoid set to 2 means allow user monitoring only (no kernel domain). The code before this mistakenly disabled all events in this case. Also set the allowed domains to exclude PAPI_DOM_KERNEL. 2013-05-16 * 12768bec src/papi_events.csv: papi_events.csv Revert a little mishap in adding ivbep support Somehow the contents of papi_hl.c ended up in the events file. * 5e97ad7f src/papi_events.csv: Add identifier for ivb_ep 2013-01-29 * e201b8eb src/papi.c: General doxygen cleanup: remove all "No known bugs" messages; correct and cleanup examples for PAPI_code_to_name and PAPI_name_to_code papi-5.6.0/src/perfctr-2.6.x/etc/costs/PentiumIIIXeon-700000775 001750 001750 00000001344 13216244366 024515 0ustar00jshenry1963jshenry1963000000 000000 [data from a 700 MHz Pentium III Xeon (Cascades) (quad Dell PE 6400)] PERFCTR INIT: vendor 0, family 6, model 10, stepping 4, clock 699579 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 101 cycles PERFCTR INIT: rdtsc cost is 30.8 cycles (2075 total) PERFCTR INIT: rdpmc cost is 33.0 cycles (2213 total) PERFCTR INIT: rdmsr (counter) cost is 91.2 cycles (5939 total) PERFCTR INIT: rdmsr (evntsel) cost is 72.7 cycles (4754 total) PERFCTR INIT: wrmsr (counter) cost is 80.5 cycles (5257 total) PERFCTR INIT: wrmsr (evntsel) cost is 75.9 cycles (4962 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (212 total) PERFCTR INIT: write cr4 cost is 42.0 cycles (2792 total) PERFCTR INIT: write LVTPC cost is 40.4 cycles (2690 total) papi-5.6.0/src/examples/PAPI_flips.c000664 001750 001750 00000005056 13216244361 021240 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * This example demonstrates the usage of the high level function PAPI_flips * * which measures the number of floating point instructions executed and the * * MegaFlop rate(defined as the number of floating point instructions per * * microsecond). To use PAPI_flips you need to have floating point * * instructions event supported by the platform. * *****************************************************************************/ /***************************************************************************** * The first call to PAPI_flips initializes the PAPI library, set up the * * counters to monitor PAPI_FP_INS and PAPI_TOT_CYC events, and start the * * counters. Subsequent calls will read the counters and return total real * * time, total process time, total floating point instructions, and the * * Mflins/s rate since the last call to PAPI_flips. * *****************************************************************************/ #include #include #include "papi.h" main() { float real_time, proc_time,mflips; long long flpins; float ireal_time, iproc_time, imflips; long long iflpins; int retval; /*********************************************************************** * if PAPI_FP_INS is a derived event in your platform, then your * * platform must have at least three counters to support PAPI_flips, * * because PAPI needs one counter to cycles. So in UltraSparcIII, even * * the platform supports PAPI_FP_INS, but UltraSparcIII only have two * * available hardware counters and PAPI_FP_INS is a derived event in * * this platform, so PAPI_flops returns an error. * ***********************************************************************/ if((retval=PAPI_flips(&ireal_time,&iproc_time,&iflpins,&imflips)) < PAPI_OK) { printf("Could not initialise PAPI_flips \n"); printf("Your platform may not support floating point instruction event.\n"); printf("retval: %d\n", retval); exit(1); } your_slow_code(); if((retval=PAPI_flips( &real_time, &proc_time, &flpins, &mflips)) #include #include #include "libperfctr.h" #include "arch.h" void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { memset(cpu_control, 0, sizeof *cpu_control); cpu_control->tsc_on = 1; cpu_control->nractrs = 1; cpu_control->pmc_map[0] = 0; /* Set up counter 1 to count PM_INST_CMPL. Other counters are set up * to count some event, but are not used in this test. */ if ((info->cpu_type == PERFCTR_PPC64_POWER4) || (info->cpu_type == PERFCTR_PPC64_POWER4p)) { cpu_control->ppc64.mmcr0 = 0x4000090EULL; cpu_control->ppc64.mmcr1 = 0x1003400045F29420ULL; cpu_control->ppc64.mmcra = 0x00002000ULL; } else if (info->cpu_type == PERFCTR_PPC64_POWER5) { cpu_control->ppc64.mmcr0 = 0x00000000ULL; cpu_control->ppc64.mmcr1 = 0x8103000602CACE8EULL; cpu_control->ppc64.mmcra = 0x00000001ULL; } else if (info->cpu_type == PERFCTR_PPC64_970 || info->cpu_type == PERFCTR_PPC64_970MP) { cpu_control->ppc64.mmcr0 = 0x0000091EULL; cpu_control->ppc64.mmcr1 = 0x4003001005F09000ULL; cpu_control->ppc64.mmcra = 0x00002000ULL; } } papi-5.6.0/src/perfctr-2.7.x/linux/drivers/perfctr/version.h000664 001750 001750 00000000033 13216244370 025655 0ustar00jshenry1963jshenry1963000000 000000 #define VERSION "2.7.21.1" papi-5.6.0/src/libpfm4/docs/man3/libpfm_amd64_k7.3000664 001750 001750 00000002427 13216244363 023405 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2010" "" "Linux Programmer's Manual" .SH NAME libpfm_amd64_k7 - support for AMD64 K7 processors .SH SYNOPSIS .nf .B #include .sp .B PMU name: amd64_k7 .B PMU desc: AMD64 K7 .sp .SH DESCRIPTION The library supports AMD K7 processors in both 32 and 64-bit modes. They correspond to processor family 6. .SH MODIFIERS The following modifiers are supported on AMD64 K7 processors: .TP .B u Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at kernel level which includes privilege level 0. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .SH AUTHORS .nf Stephane Eranian Robert Richter .if .PP papi-5.6.0/src/libpfm4/lib/events/power5+_events.h000664 001750 001750 00000537451 13216244365 024005 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ #ifndef __POWER5p_EVENTS_H__ #define __POWER5p_EVENTS_H__ /* * File: power5+_events.h * CVS: * Author: Corey Ashford * cjashfor@us.ibm.com * Mods: * * * (C) Copyright IBM Corporation, 2009. All Rights Reserved. * Contributed by Corey Ashford * * Note: This code was automatically generated and should not be modified by * hand. * */ #define POWER5p_PME_PM_LSU_REJECT_RELOAD_CDF 0 #define POWER5p_PME_PM_FPU1_SINGLE 1 #define POWER5p_PME_PM_L3SB_REF 2 #define POWER5p_PME_PM_THRD_PRIO_DIFF_3or4_CYC 3 #define POWER5p_PME_PM_INST_FROM_L275_SHR 4 #define POWER5p_PME_PM_MRK_DATA_FROM_L375_MOD 5 #define POWER5p_PME_PM_DTLB_MISS_4K 6 #define POWER5p_PME_PM_CLB_FULL_CYC 7 #define POWER5p_PME_PM_MRK_ST_CMPL 8 #define POWER5p_PME_PM_LSU_FLUSH_LRQ_FULL 9 #define POWER5p_PME_PM_MRK_DATA_FROM_L275_SHR 10 #define POWER5p_PME_PM_1INST_CLB_CYC 11 #define POWER5p_PME_PM_MEM_SPEC_RD_CANCEL 12 #define POWER5p_PME_PM_MRK_DTLB_MISS_16M 13 #define POWER5p_PME_PM_FPU_FDIV 14 #define POWER5p_PME_PM_FPU_SINGLE 15 #define POWER5p_PME_PM_FPU0_FMA 16 #define POWER5p_PME_PM_SLB_MISS 17 #define POWER5p_PME_PM_LSU1_FLUSH_LRQ 18 #define POWER5p_PME_PM_L2SA_ST_HIT 19 #define POWER5p_PME_PM_DTLB_MISS 20 #define POWER5p_PME_PM_BR_PRED_TA 21 #define POWER5p_PME_PM_MRK_DATA_FROM_L375_MOD_CYC 22 #define POWER5p_PME_PM_CMPLU_STALL_FXU 23 #define POWER5p_PME_PM_EXT_INT 24 #define POWER5p_PME_PM_MRK_LSU1_FLUSH_LRQ 25 #define POWER5p_PME_PM_MRK_ST_GPS 26 #define POWER5p_PME_PM_LSU1_LDF 27 #define POWER5p_PME_PM_FAB_CMD_ISSUED 28 #define POWER5p_PME_PM_LSU0_SRQ_STFWD 29 #define POWER5p_PME_PM_CR_MAP_FULL_CYC 30 #define POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL 31 #define POWER5p_PME_PM_MRK_LSU0_FLUSH_ULD 32 #define POWER5p_PME_PM_LSU_FLUSH_SRQ_FULL 33 #define POWER5p_PME_PM_MEM_RQ_DISP_Q16to19 34 #define POWER5p_PME_PM_FLUSH_IMBAL 35 #define POWER5p_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC 36 #define POWER5p_PME_PM_DATA_FROM_L35_MOD 37 #define POWER5p_PME_PM_MEM_HI_PRIO_WR_CMPL 38 #define POWER5p_PME_PM_FPU1_FDIV 39 #define POWER5p_PME_PM_MEM_RQ_DISP 40 #define POWER5p_PME_PM_FPU0_FRSP_FCONV 41 #define POWER5p_PME_PM_LWSYNC_HELD 42 #define POWER5p_PME_PM_FXU_FIN 43 #define POWER5p_PME_PM_DSLB_MISS 44 #define POWER5p_PME_PM_DATA_FROM_L275_SHR 45 #define POWER5p_PME_PM_FXLS1_FULL_CYC 46 #define POWER5p_PME_PM_THRD_SEL_T0 47 #define POWER5p_PME_PM_PTEG_RELOAD_VALID 48 #define POWER5p_PME_PM_MRK_STCX_FAIL 49 #define POWER5p_PME_PM_LSU_LMQ_LHR_MERGE 50 #define POWER5p_PME_PM_2INST_CLB_CYC 51 #define POWER5p_PME_PM_FAB_PNtoVN_DIRECT 52 #define POWER5p_PME_PM_PTEG_FROM_L2MISS 53 #define POWER5p_PME_PM_CMPLU_STALL_LSU 54 #define POWER5p_PME_PM_MRK_DSLB_MISS 55 #define POWER5p_PME_PM_LSU_FLUSH_ULD 56 #define POWER5p_PME_PM_PTEG_FROM_LMEM 57 #define POWER5p_PME_PM_MRK_BRU_FIN 58 #define POWER5p_PME_PM_MEM_WQ_DISP_WRITE 59 #define POWER5p_PME_PM_MRK_DATA_FROM_L275_MOD_CYC 60 #define POWER5p_PME_PM_LSU1_NCLD 61 #define POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER 62 #define POWER5p_PME_PM_SNOOP_PW_RETRY_WQ_PWQ 63 #define POWER5p_PME_PM_FPU1_FULL_CYC 64 #define POWER5p_PME_PM_FPR_MAP_FULL_CYC 65 #define POWER5p_PME_PM_L3SA_ALL_BUSY 66 #define POWER5p_PME_PM_3INST_CLB_CYC 67 #define POWER5p_PME_PM_MEM_PWQ_DISP_Q2or3 68 #define POWER5p_PME_PM_L2SA_SHR_INV 69 #define POWER5p_PME_PM_THRESH_TIMEO 70 #define POWER5p_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL 71 #define POWER5p_PME_PM_THRD_SEL_OVER_GCT_IMBAL 72 #define POWER5p_PME_PM_FPU_FSQRT 73 #define POWER5p_PME_PM_PMC1_OVERFLOW 74 #define POWER5p_PME_PM_MRK_LSU0_FLUSH_LRQ 75 #define POWER5p_PME_PM_L3SC_SNOOP_RETRY 76 #define POWER5p_PME_PM_DATA_TABLEWALK_CYC 77 #define POWER5p_PME_PM_THRD_PRIO_6_CYC 78 #define POWER5p_PME_PM_FPU_FEST 79 #define POWER5p_PME_PM_FAB_M1toP1_SIDECAR_EMPTY 80 #define POWER5p_PME_PM_MRK_DATA_FROM_RMEM 81 #define POWER5p_PME_PM_MRK_DATA_FROM_L35_MOD_CYC 82 #define POWER5p_PME_PM_MEM_PWQ_DISP 83 #define POWER5p_PME_PM_FAB_P1toM1_SIDECAR_EMPTY 84 #define POWER5p_PME_PM_LD_MISS_L1_LSU0 85 #define POWER5p_PME_PM_SNOOP_PARTIAL_RTRY_QFULL 86 #define POWER5p_PME_PM_FPU1_STALL3 87 #define POWER5p_PME_PM_GCT_USAGE_80to99_CYC 88 #define POWER5p_PME_PM_WORK_HELD 89 #define POWER5p_PME_PM_INST_CMPL 90 #define POWER5p_PME_PM_LSU1_FLUSH_UST 91 #define POWER5p_PME_PM_FXU_IDLE 92 #define POWER5p_PME_PM_LSU0_FLUSH_ULD 93 #define POWER5p_PME_PM_LSU1_REJECT_LMQ_FULL 94 #define POWER5p_PME_PM_GRP_DISP_REJECT 95 #define POWER5p_PME_PM_PTEG_FROM_L25_SHR 96 #define POWER5p_PME_PM_L2SA_MOD_INV 97 #define POWER5p_PME_PM_FAB_CMD_RETRIED 98 #define POWER5p_PME_PM_L3SA_SHR_INV 99 #define POWER5p_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL 100 #define POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_ADDR 101 #define POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL 102 #define POWER5p_PME_PM_PTEG_FROM_L375_MOD 103 #define POWER5p_PME_PM_MRK_LSU1_FLUSH_UST 104 #define POWER5p_PME_PM_BR_ISSUED 105 #define POWER5p_PME_PM_MRK_GRP_BR_REDIR 106 #define POWER5p_PME_PM_EE_OFF 107 #define POWER5p_PME_PM_IERAT_XLATE_WR_LP 108 #define POWER5p_PME_PM_DTLB_REF_64K 109 #define POWER5p_PME_PM_MEM_RQ_DISP_Q4to7 110 #define POWER5p_PME_PM_MEM_FAST_PATH_RD_DISP 111 #define POWER5p_PME_PM_INST_FROM_L3 112 #define POWER5p_PME_PM_ITLB_MISS 113 #define POWER5p_PME_PM_FXU1_BUSY_FXU0_IDLE 114 #define POWER5p_PME_PM_DTLB_REF_4K 115 #define POWER5p_PME_PM_FXLS_FULL_CYC 116 #define POWER5p_PME_PM_GRP_DISP_VALID 117 #define POWER5p_PME_PM_LSU_FLUSH_UST 118 #define POWER5p_PME_PM_FXU1_FIN 119 #define POWER5p_PME_PM_THRD_PRIO_4_CYC 120 #define POWER5p_PME_PM_MRK_DATA_FROM_L35_MOD 121 #define POWER5p_PME_PM_4INST_CLB_CYC 122 #define POWER5p_PME_PM_MRK_DTLB_REF_16M 123 #define POWER5p_PME_PM_INST_FROM_L375_MOD 124 #define POWER5p_PME_PM_GRP_CMPL 125 #define POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_ADDR 126 #define POWER5p_PME_PM_FPU1_1FLOP 127 #define POWER5p_PME_PM_FPU_FRSP_FCONV 128 #define POWER5p_PME_PM_L3SC_REF 129 #define POWER5p_PME_PM_5INST_CLB_CYC 130 #define POWER5p_PME_PM_THRD_L2MISS_BOTH_CYC 131 #define POWER5p_PME_PM_MEM_PW_GATH 132 #define POWER5p_PME_PM_DTLB_REF_16G 133 #define POWER5p_PME_PM_FAB_DCLAIM_ISSUED 134 #define POWER5p_PME_PM_FAB_PNtoNN_SIDECAR 135 #define POWER5p_PME_PM_GRP_IC_MISS 136 #define POWER5p_PME_PM_INST_FROM_L35_SHR 137 #define POWER5p_PME_PM_LSU_LMQ_FULL_CYC 138 #define POWER5p_PME_PM_MRK_DATA_FROM_L2_CYC 139 #define POWER5p_PME_PM_LSU_SRQ_SYNC_CYC 140 #define POWER5p_PME_PM_LSU0_BUSY_REJECT 141 #define POWER5p_PME_PM_LSU_REJECT_ERAT_MISS 142 #define POWER5p_PME_PM_MRK_DATA_FROM_RMEM_CYC 143 #define POWER5p_PME_PM_DATA_FROM_L375_SHR 144 #define POWER5p_PME_PM_PTEG_FROM_L25_MOD 145 #define POWER5p_PME_PM_FPU0_FMOV_FEST 146 #define POWER5p_PME_PM_THRD_PRIO_7_CYC 147 #define POWER5p_PME_PM_LSU1_FLUSH_SRQ 148 #define POWER5p_PME_PM_LD_REF_L1_LSU0 149 #define POWER5p_PME_PM_L2SC_RCST_DISP 150 #define POWER5p_PME_PM_CMPLU_STALL_DIV 151 #define POWER5p_PME_PM_MEM_RQ_DISP_Q12to15 152 #define POWER5p_PME_PM_INST_FROM_L375_SHR 153 #define POWER5p_PME_PM_ST_REF_L1 154 #define POWER5p_PME_PM_L3SB_ALL_BUSY 155 #define POWER5p_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY 156 #define POWER5p_PME_PM_MRK_DATA_FROM_L275_SHR_CYC 157 #define POWER5p_PME_PM_FAB_HOLDtoNN_EMPTY 158 #define POWER5p_PME_PM_DATA_FROM_LMEM 159 #define POWER5p_PME_PM_RUN_CYC 160 #define POWER5p_PME_PM_PTEG_FROM_RMEM 161 #define POWER5p_PME_PM_L2SC_RCLD_DISP 162 #define POWER5p_PME_PM_LSU_LRQ_S0_VALID 163 #define POWER5p_PME_PM_LSU0_LDF 164 #define POWER5p_PME_PM_PMC3_OVERFLOW 165 #define POWER5p_PME_PM_MRK_IMR_RELOAD 166 #define POWER5p_PME_PM_MRK_GRP_TIMEO 167 #define POWER5p_PME_PM_ST_MISS_L1 168 #define POWER5p_PME_PM_STOP_COMPLETION 169 #define POWER5p_PME_PM_LSU_BUSY_REJECT 170 #define POWER5p_PME_PM_ISLB_MISS 171 #define POWER5p_PME_PM_CYC 172 #define POWER5p_PME_PM_THRD_ONE_RUN_CYC 173 #define POWER5p_PME_PM_GRP_BR_REDIR_NONSPEC 174 #define POWER5p_PME_PM_LSU1_SRQ_STFWD 175 #define POWER5p_PME_PM_L3SC_MOD_INV 176 #define POWER5p_PME_PM_L2_PREF 177 #define POWER5p_PME_PM_GCT_NOSLOT_BR_MPRED 178 #define POWER5p_PME_PM_MRK_DATA_FROM_L25_MOD 179 #define POWER5p_PME_PM_L2SB_ST_REQ 180 #define POWER5p_PME_PM_L2SB_MOD_INV 181 #define POWER5p_PME_PM_MRK_L1_RELOAD_VALID 182 #define POWER5p_PME_PM_L3SB_HIT 183 #define POWER5p_PME_PM_L2SB_SHR_MOD 184 #define POWER5p_PME_PM_EE_OFF_EXT_INT 185 #define POWER5p_PME_PM_1PLUS_PPC_CMPL 186 #define POWER5p_PME_PM_L2SC_SHR_MOD 187 #define POWER5p_PME_PM_PMC6_OVERFLOW 188 #define POWER5p_PME_PM_IC_PREF_INSTALL 189 #define POWER5p_PME_PM_LSU_LRQ_FULL_CYC 190 #define POWER5p_PME_PM_TLB_MISS 191 #define POWER5p_PME_PM_GCT_FULL_CYC 192 #define POWER5p_PME_PM_FXU_BUSY 193 #define POWER5p_PME_PM_MRK_DATA_FROM_L3_CYC 194 #define POWER5p_PME_PM_LSU_REJECT_LMQ_FULL 195 #define POWER5p_PME_PM_LSU_SRQ_S0_ALLOC 196 #define POWER5p_PME_PM_GRP_MRK 197 #define POWER5p_PME_PM_INST_FROM_L25_SHR 198 #define POWER5p_PME_PM_DC_PREF_STREAM_ALLOC 199 #define POWER5p_PME_PM_FPU1_FIN 200 #define POWER5p_PME_PM_BR_MPRED_TA 201 #define POWER5p_PME_PM_MRK_DTLB_REF_64K 202 #define POWER5p_PME_PM_RUN_INST_CMPL 203 #define POWER5p_PME_PM_CRQ_FULL_CYC 204 #define POWER5p_PME_PM_L2SA_RCLD_DISP 205 #define POWER5p_PME_PM_SNOOP_WR_RETRY_QFULL 206 #define POWER5p_PME_PM_MRK_DTLB_REF_4K 207 #define POWER5p_PME_PM_LSU_SRQ_S0_VALID 208 #define POWER5p_PME_PM_LSU0_FLUSH_LRQ 209 #define POWER5p_PME_PM_INST_FROM_L275_MOD 210 #define POWER5p_PME_PM_GCT_EMPTY_CYC 211 #define POWER5p_PME_PM_LARX_LSU0 212 #define POWER5p_PME_PM_THRD_PRIO_DIFF_5or6_CYC 213 #define POWER5p_PME_PM_SNOOP_RETRY_1AHEAD 214 #define POWER5p_PME_PM_FPU1_FSQRT 215 #define POWER5p_PME_PM_MRK_LD_MISS_L1_LSU1 216 #define POWER5p_PME_PM_MRK_FPU_FIN 217 #define POWER5p_PME_PM_THRD_PRIO_5_CYC 218 #define POWER5p_PME_PM_MRK_DATA_FROM_LMEM 219 #define POWER5p_PME_PM_SNOOP_TLBIE 220 #define POWER5p_PME_PM_FPU1_FRSP_FCONV 221 #define POWER5p_PME_PM_DTLB_MISS_16G 222 #define POWER5p_PME_PM_L3SB_SNOOP_RETRY 223 #define POWER5p_PME_PM_FAB_VBYPASS_EMPTY 224 #define POWER5p_PME_PM_MRK_DATA_FROM_L275_MOD 225 #define POWER5p_PME_PM_L2SB_RCST_DISP 226 #define POWER5p_PME_PM_6INST_CLB_CYC 227 #define POWER5p_PME_PM_FLUSH 228 #define POWER5p_PME_PM_L2SC_MOD_INV 229 #define POWER5p_PME_PM_FPU_DENORM 230 #define POWER5p_PME_PM_L3SC_HIT 231 #define POWER5p_PME_PM_SNOOP_WR_RETRY_RQ 232 #define POWER5p_PME_PM_LSU1_REJECT_SRQ 233 #define POWER5p_PME_PM_L3SC_ALL_BUSY 234 #define POWER5p_PME_PM_IC_PREF_REQ 235 #define POWER5p_PME_PM_MRK_GRP_IC_MISS 236 #define POWER5p_PME_PM_GCT_NOSLOT_IC_MISS 237 #define POWER5p_PME_PM_MRK_DATA_FROM_L3 238 #define POWER5p_PME_PM_GCT_NOSLOT_SRQ_FULL 239 #define POWER5p_PME_PM_CMPLU_STALL_DCACHE_MISS 240 #define POWER5p_PME_PM_THRD_SEL_OVER_ISU_HOLD 241 #define POWER5p_PME_PM_LSU_FLUSH_LRQ 242 #define POWER5p_PME_PM_THRD_PRIO_2_CYC 243 #define POWER5p_PME_PM_L3SA_MOD_INV 244 #define POWER5p_PME_PM_LSU_FLUSH_SRQ 245 #define POWER5p_PME_PM_MRK_LSU_SRQ_INST_VALID 246 #define POWER5p_PME_PM_L3SA_REF 247 #define POWER5p_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL 248 #define POWER5p_PME_PM_FPU0_STALL3 249 #define POWER5p_PME_PM_TB_BIT_TRANS 250 #define POWER5p_PME_PM_GPR_MAP_FULL_CYC 251 #define POWER5p_PME_PM_MRK_LSU_FLUSH_LRQ 252 #define POWER5p_PME_PM_FPU0_STF 253 #define POWER5p_PME_PM_MRK_DTLB_MISS 254 #define POWER5p_PME_PM_FPU1_FMA 255 #define POWER5p_PME_PM_L2SA_MOD_TAG 256 #define POWER5p_PME_PM_LSU1_FLUSH_ULD 257 #define POWER5p_PME_PM_MRK_INST_FIN 258 #define POWER5p_PME_PM_MRK_LSU0_FLUSH_UST 259 #define POWER5p_PME_PM_FPU0_FULL_CYC 260 #define POWER5p_PME_PM_LSU_LRQ_S0_ALLOC 261 #define POWER5p_PME_PM_MRK_LSU1_FLUSH_ULD 262 #define POWER5p_PME_PM_MRK_DTLB_REF 263 #define POWER5p_PME_PM_BR_UNCOND 264 #define POWER5p_PME_PM_THRD_SEL_OVER_L2MISS 265 #define POWER5p_PME_PM_L2SB_SHR_INV 266 #define POWER5p_PME_PM_MEM_LO_PRIO_WR_CMPL 267 #define POWER5p_PME_PM_MRK_DTLB_MISS_64K 268 #define POWER5p_PME_PM_MRK_ST_MISS_L1 269 #define POWER5p_PME_PM_L3SC_MOD_TAG 270 #define POWER5p_PME_PM_GRP_DISP_SUCCESS 271 #define POWER5p_PME_PM_THRD_PRIO_DIFF_1or2_CYC 272 #define POWER5p_PME_PM_IC_DEMAND_L2_BHT_REDIRECT 273 #define POWER5p_PME_PM_LSU_DERAT_MISS 274 #define POWER5p_PME_PM_MEM_WQ_DISP_Q8to15 275 #define POWER5p_PME_PM_FPU0_SINGLE 276 #define POWER5p_PME_PM_THRD_PRIO_1_CYC 277 #define POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_OTHER 278 #define POWER5p_PME_PM_SNOOP_RD_RETRY_RQ 279 #define POWER5p_PME_PM_FAB_HOLDtoVN_EMPTY 280 #define POWER5p_PME_PM_FPU1_FEST 281 #define POWER5p_PME_PM_SNOOP_DCLAIM_RETRY_QFULL 282 #define POWER5p_PME_PM_MRK_DATA_FROM_L25_SHR_CYC 283 #define POWER5p_PME_PM_MRK_ST_CMPL_INT 284 #define POWER5p_PME_PM_FLUSH_BR_MPRED 285 #define POWER5p_PME_PM_MRK_DTLB_MISS_16G 286 #define POWER5p_PME_PM_FPU_STF 287 #define POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR 288 #define POWER5p_PME_PM_CMPLU_STALL_FPU 289 #define POWER5p_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC 290 #define POWER5p_PME_PM_GCT_NOSLOT_CYC 291 #define POWER5p_PME_PM_FXU0_BUSY_FXU1_IDLE 292 #define POWER5p_PME_PM_PTEG_FROM_L35_SHR 293 #define POWER5p_PME_PM_MRK_DTLB_REF_16G 294 #define POWER5p_PME_PM_MRK_LSU_FLUSH_UST 295 #define POWER5p_PME_PM_MRK_DATA_FROM_L25_SHR 296 #define POWER5p_PME_PM_L3SA_HIT 297 #define POWER5p_PME_PM_MRK_DATA_FROM_L35_SHR 298 #define POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_ADDR 299 #define POWER5p_PME_PM_IERAT_XLATE_WR 300 #define POWER5p_PME_PM_L2SA_ST_REQ 301 #define POWER5p_PME_PM_INST_FROM_LMEM 302 #define POWER5p_PME_PM_THRD_SEL_T1 303 #define POWER5p_PME_PM_IC_DEMAND_L2_BR_REDIRECT 304 #define POWER5p_PME_PM_MRK_DATA_FROM_L35_SHR_CYC 305 #define POWER5p_PME_PM_FPU0_1FLOP 306 #define POWER5p_PME_PM_PTEG_FROM_L2 307 #define POWER5p_PME_PM_MEM_PW_CMPL 308 #define POWER5p_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC 309 #define POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER 310 #define POWER5p_PME_PM_MRK_DTLB_MISS_4K 311 #define POWER5p_PME_PM_FPU0_FIN 312 #define POWER5p_PME_PM_L3SC_SHR_INV 313 #define POWER5p_PME_PM_GRP_BR_REDIR 314 #define POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL 315 #define POWER5p_PME_PM_MRK_LSU_FLUSH_SRQ 316 #define POWER5p_PME_PM_PTEG_FROM_L275_SHR 317 #define POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL 318 #define POWER5p_PME_PM_SNOOP_RD_RETRY_WQ 319 #define POWER5p_PME_PM_FAB_DCLAIM_RETRIED 320 #define POWER5p_PME_PM_LSU0_NCLD 321 #define POWER5p_PME_PM_LSU1_BUSY_REJECT 322 #define POWER5p_PME_PM_FXLS0_FULL_CYC 323 #define POWER5p_PME_PM_DTLB_REF_16M 324 #define POWER5p_PME_PM_FPU0_FEST 325 #define POWER5p_PME_PM_GCT_USAGE_60to79_CYC 326 #define POWER5p_PME_PM_DATA_FROM_L25_MOD 327 #define POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR 328 #define POWER5p_PME_PM_LSU0_REJECT_ERAT_MISS 329 #define POWER5p_PME_PM_DATA_FROM_L375_MOD 330 #define POWER5p_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC 331 #define POWER5p_PME_PM_DTLB_MISS_64K 332 #define POWER5p_PME_PM_LSU0_REJECT_RELOAD_CDF 333 #define POWER5p_PME_PM_0INST_FETCH 334 #define POWER5p_PME_PM_LSU1_REJECT_RELOAD_CDF 335 #define POWER5p_PME_PM_MEM_WQ_DISP_Q0to7 336 #define POWER5p_PME_PM_L1_PREF 337 #define POWER5p_PME_PM_MRK_DATA_FROM_LMEM_CYC 338 #define POWER5p_PME_PM_BRQ_FULL_CYC 339 #define POWER5p_PME_PM_GRP_IC_MISS_NONSPEC 340 #define POWER5p_PME_PM_PTEG_FROM_L275_MOD 341 #define POWER5p_PME_PM_MRK_LD_MISS_L1_LSU0 342 #define POWER5p_PME_PM_MRK_DATA_FROM_L375_SHR_CYC 343 #define POWER5p_PME_PM_DATA_FROM_L3 344 #define POWER5p_PME_PM_INST_FROM_L2 345 #define POWER5p_PME_PM_LSU_FLUSH 346 #define POWER5p_PME_PM_PMC2_OVERFLOW 347 #define POWER5p_PME_PM_FPU0_DENORM 348 #define POWER5p_PME_PM_FPU1_FMOV_FEST 349 #define POWER5p_PME_PM_INST_FETCH_CYC 350 #define POWER5p_PME_PM_INST_DISP 351 #define POWER5p_PME_PM_LSU_LDF 352 #define POWER5p_PME_PM_DATA_FROM_L25_SHR 353 #define POWER5p_PME_PM_L1_DCACHE_RELOAD_VALID 354 #define POWER5p_PME_PM_MEM_WQ_DISP_DCLAIM 355 #define POWER5p_PME_PM_MRK_GRP_ISSUED 356 #define POWER5p_PME_PM_FPU_FULL_CYC 357 #define POWER5p_PME_PM_INST_FROM_L35_MOD 358 #define POWER5p_PME_PM_FPU_FMA 359 #define POWER5p_PME_PM_THRD_PRIO_3_CYC 360 #define POWER5p_PME_PM_MRK_CRU_FIN 361 #define POWER5p_PME_PM_SNOOP_WR_RETRY_WQ 362 #define POWER5p_PME_PM_CMPLU_STALL_REJECT 363 #define POWER5p_PME_PM_MRK_FXU_FIN 364 #define POWER5p_PME_PM_LSU1_REJECT_ERAT_MISS 365 #define POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_OTHER 366 #define POWER5p_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY 367 #define POWER5p_PME_PM_PMC4_OVERFLOW 368 #define POWER5p_PME_PM_L3SA_SNOOP_RETRY 369 #define POWER5p_PME_PM_PTEG_FROM_L35_MOD 370 #define POWER5p_PME_PM_INST_FROM_L25_MOD 371 #define POWER5p_PME_PM_THRD_SMT_HANG 372 #define POWER5p_PME_PM_CMPLU_STALL_ERAT_MISS 373 #define POWER5p_PME_PM_L3SA_MOD_TAG 374 #define POWER5p_PME_PM_INST_FROM_L2MISS 375 #define POWER5p_PME_PM_FLUSH_SYNC 376 #define POWER5p_PME_PM_MRK_GRP_DISP 377 #define POWER5p_PME_PM_MEM_RQ_DISP_Q8to11 378 #define POWER5p_PME_PM_L2SC_ST_HIT 379 #define POWER5p_PME_PM_L2SB_MOD_TAG 380 #define POWER5p_PME_PM_CLB_EMPTY_CYC 381 #define POWER5p_PME_PM_L2SB_ST_HIT 382 #define POWER5p_PME_PM_MEM_NONSPEC_RD_CANCEL 383 #define POWER5p_PME_PM_BR_PRED_CR_TA 384 #define POWER5p_PME_PM_MRK_LSU0_FLUSH_SRQ 385 #define POWER5p_PME_PM_MRK_LSU_FLUSH_ULD 386 #define POWER5p_PME_PM_INST_DISP_ATTEMPT 387 #define POWER5p_PME_PM_INST_FROM_RMEM 388 #define POWER5p_PME_PM_ST_REF_L1_LSU0 389 #define POWER5p_PME_PM_LSU0_DERAT_MISS 390 #define POWER5p_PME_PM_FPU_STALL3 391 #define POWER5p_PME_PM_L2SB_RCLD_DISP 392 #define POWER5p_PME_PM_BR_PRED_CR 393 #define POWER5p_PME_PM_MRK_DATA_FROM_L2 394 #define POWER5p_PME_PM_LSU0_FLUSH_SRQ 395 #define POWER5p_PME_PM_FAB_PNtoNN_DIRECT 396 #define POWER5p_PME_PM_IOPS_CMPL 397 #define POWER5p_PME_PM_L2SA_RCST_DISP 398 #define POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_OTHER 399 #define POWER5p_PME_PM_L2SC_SHR_INV 400 #define POWER5p_PME_PM_SNOOP_RETRY_AB_COLLISION 401 #define POWER5p_PME_PM_FAB_PNtoVN_SIDECAR 402 #define POWER5p_PME_PM_LSU0_REJECT_LMQ_FULL 403 #define POWER5p_PME_PM_LSU_LMQ_S0_ALLOC 404 #define POWER5p_PME_PM_SNOOP_PW_RETRY_RQ 405 #define POWER5p_PME_PM_DTLB_REF 406 #define POWER5p_PME_PM_PTEG_FROM_L3 407 #define POWER5p_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY 408 #define POWER5p_PME_PM_LSU_SRQ_EMPTY_CYC 409 #define POWER5p_PME_PM_FPU1_STF 410 #define POWER5p_PME_PM_LSU_LMQ_S0_VALID 411 #define POWER5p_PME_PM_GCT_USAGE_00to59_CYC 412 #define POWER5p_PME_PM_FPU_FMOV_FEST 413 #define POWER5p_PME_PM_DATA_FROM_L2MISS 414 #define POWER5p_PME_PM_XER_MAP_FULL_CYC 415 #define POWER5p_PME_PM_GRP_DISP_BLK_SB_CYC 416 #define POWER5p_PME_PM_FLUSH_SB 417 #define POWER5p_PME_PM_MRK_DATA_FROM_L375_SHR 418 #define POWER5p_PME_PM_MRK_GRP_CMPL 419 #define POWER5p_PME_PM_SUSPENDED 420 #define POWER5p_PME_PM_SNOOP_RD_RETRY_QFULL 421 #define POWER5p_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC 422 #define POWER5p_PME_PM_DATA_FROM_L35_SHR 423 #define POWER5p_PME_PM_L3SB_MOD_INV 424 #define POWER5p_PME_PM_STCX_FAIL 425 #define POWER5p_PME_PM_LD_MISS_L1_LSU1 426 #define POWER5p_PME_PM_GRP_DISP 427 #define POWER5p_PME_PM_DC_PREF_DST 428 #define POWER5p_PME_PM_FPU1_DENORM 429 #define POWER5p_PME_PM_FPU0_FPSCR 430 #define POWER5p_PME_PM_DATA_FROM_L2 431 #define POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR 432 #define POWER5p_PME_PM_FPU_1FLOP 433 #define POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER 434 #define POWER5p_PME_PM_FPU0_FSQRT 435 #define POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL 436 #define POWER5p_PME_PM_LD_REF_L1 437 #define POWER5p_PME_PM_INST_FROM_L1 438 #define POWER5p_PME_PM_TLBIE_HELD 439 #define POWER5p_PME_PM_DC_PREF_OUT_OF_STREAMS 440 #define POWER5p_PME_PM_MRK_DATA_FROM_L25_MOD_CYC 441 #define POWER5p_PME_PM_MRK_LSU1_FLUSH_SRQ 442 #define POWER5p_PME_PM_MEM_RQ_DISP_Q0to3 443 #define POWER5p_PME_PM_ST_REF_L1_LSU1 444 #define POWER5p_PME_PM_MRK_LD_MISS_L1 445 #define POWER5p_PME_PM_L1_WRITE_CYC 446 #define POWER5p_PME_PM_L2SC_ST_REQ 447 #define POWER5p_PME_PM_CMPLU_STALL_FDIV 448 #define POWER5p_PME_PM_THRD_SEL_OVER_CLB_EMPTY 449 #define POWER5p_PME_PM_BR_MPRED_CR 450 #define POWER5p_PME_PM_L3SB_MOD_TAG 451 #define POWER5p_PME_PM_MRK_DATA_FROM_L2MISS 452 #define POWER5p_PME_PM_LSU_REJECT_SRQ 453 #define POWER5p_PME_PM_LD_MISS_L1 454 #define POWER5p_PME_PM_INST_FROM_PREF 455 #define POWER5p_PME_PM_STCX_PASS 456 #define POWER5p_PME_PM_DC_INV_L2 457 #define POWER5p_PME_PM_LSU_SRQ_FULL_CYC 458 #define POWER5p_PME_PM_FPU_FIN 459 #define POWER5p_PME_PM_LSU_SRQ_STFWD 460 #define POWER5p_PME_PM_L2SA_SHR_MOD 461 #define POWER5p_PME_PM_0INST_CLB_CYC 462 #define POWER5p_PME_PM_FXU0_FIN 463 #define POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL 464 #define POWER5p_PME_PM_THRD_GRP_CMPL_BOTH_CYC 465 #define POWER5p_PME_PM_PMC5_OVERFLOW 466 #define POWER5p_PME_PM_FPU0_FDIV 467 #define POWER5p_PME_PM_PTEG_FROM_L375_SHR 468 #define POWER5p_PME_PM_HV_CYC 469 #define POWER5p_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY 470 #define POWER5p_PME_PM_THRD_PRIO_DIFF_0_CYC 471 #define POWER5p_PME_PM_LR_CTR_MAP_FULL_CYC 472 #define POWER5p_PME_PM_L3SB_SHR_INV 473 #define POWER5p_PME_PM_DATA_FROM_RMEM 474 #define POWER5p_PME_PM_DATA_FROM_L275_MOD 475 #define POWER5p_PME_PM_LSU0_REJECT_SRQ 476 #define POWER5p_PME_PM_LSU1_DERAT_MISS 477 #define POWER5p_PME_PM_MRK_LSU_FIN 478 #define POWER5p_PME_PM_DTLB_MISS_16M 479 #define POWER5p_PME_PM_LSU0_FLUSH_UST 480 #define POWER5p_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY 481 #define POWER5p_PME_PM_L2SC_MOD_TAG 482 static const pme_power_entry_t power5p_pe[] = { [ POWER5p_PME_PM_LSU_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU_REJECT_RELOAD_CDF", .pme_code = 0x2c4090, .pme_short_desc = "LSU reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_FPU1_SINGLE ] = { .pme_name = "PM_FPU1_SINGLE", .pme_code = 0x20e7, .pme_short_desc = "FPU1 executed single precision instruction", .pme_long_desc = "FPU1 has executed a single precision instruction.", }, [ POWER5p_PME_PM_L3SB_REF ] = { .pme_name = "PM_L3SB_REF", .pme_code = 0x701c4, .pme_short_desc = "L3 slice B references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_3or4_CYC", .pme_code = 0x430e5, .pme_short_desc = "Cycles thread priority difference is 3 or 4", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 3 or 4.", }, [ POWER5p_PME_PM_INST_FROM_L275_SHR ] = { .pme_name = "PM_INST_FROM_L275_SHR", .pme_code = 0x322096, .pme_short_desc = "Instruction fetched from L2.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L375_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD", .pme_code = 0x1c70a7, .pme_short_desc = "Marked data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load.", }, [ POWER5p_PME_PM_DTLB_MISS_4K ] = { .pme_name = "PM_DTLB_MISS_4K", .pme_code = 0x1c208d, .pme_short_desc = "Data TLB miss for 4K page", .pme_long_desc = "Data TLB references to 4KB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_CLB_FULL_CYC ] = { .pme_name = "PM_CLB_FULL_CYC", .pme_code = 0x220e5, .pme_short_desc = "Cycles CLB full", .pme_long_desc = "Cycles when both thread's CLB is full.", }, [ POWER5p_PME_PM_MRK_ST_CMPL ] = { .pme_name = "PM_MRK_ST_CMPL", .pme_code = 0x100003, .pme_short_desc = "Marked store instruction completed", .pme_long_desc = "A sampled store has completed (data home)", }, [ POWER5p_PME_PM_LSU_FLUSH_LRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_LRQ_FULL", .pme_code = 0x320e7, .pme_short_desc = "Flush caused by LRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L275_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR", .pme_code = 0x3c7097, .pme_short_desc = "Marked data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a marked load.", }, [ POWER5p_PME_PM_1INST_CLB_CYC ] = { .pme_name = "PM_1INST_CLB_CYC", .pme_code = 0x400c1, .pme_short_desc = "Cycles 1 instruction in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_MEM_SPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_SPEC_RD_CANCEL", .pme_code = 0x721e6, .pme_short_desc = "Speculative memory read cancelled", .pme_long_desc = "Speculative memory read cancelled (i.e. cresp = sourced by L2/L3)", }, [ POWER5p_PME_PM_MRK_DTLB_MISS_16M ] = { .pme_name = "PM_MRK_DTLB_MISS_16M", .pme_code = 0x3c608d, .pme_short_desc = "Marked Data TLB misses for 16M page", .pme_long_desc = "Marked Data TLB misses for 16M page", }, [ POWER5p_PME_PM_FPU_FDIV ] = { .pme_name = "PM_FPU_FDIV", .pme_code = 0x100088, .pme_short_desc = "FPU executed FDIV instruction", .pme_long_desc = "The floating point unit has executed a divide instruction. This could be fdiv, fdivs, fdiv., fdivs.. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_FPU_SINGLE ] = { .pme_name = "PM_FPU_SINGLE", .pme_code = 0x102090, .pme_short_desc = "FPU executed single precision instruction", .pme_long_desc = "FPU is executing single precision instruction. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_FPU0_FMA ] = { .pme_name = "PM_FPU0_FMA", .pme_code = 0xc1, .pme_short_desc = "FPU0 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_SLB_MISS ] = { .pme_name = "PM_SLB_MISS", .pme_code = 0x280088, .pme_short_desc = "SLB misses", .pme_long_desc = "Total of all Segment Lookaside Buffer (SLB) misses, Instructions + Data.", }, [ POWER5p_PME_PM_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_LSU1_FLUSH_LRQ", .pme_code = 0xc00c6, .pme_short_desc = "LSU1 LRQ flushes", .pme_long_desc = "A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5p_PME_PM_L2SA_ST_HIT ] = { .pme_name = "PM_L2SA_ST_HIT", .pme_code = 0x733e0, .pme_short_desc = "L2 slice A store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C.", }, [ POWER5p_PME_PM_DTLB_MISS ] = { .pme_name = "PM_DTLB_MISS", .pme_code = 0x800c4, .pme_short_desc = "Data TLB misses", .pme_long_desc = "Data TLB misses, all page sizes.", }, [ POWER5p_PME_PM_BR_PRED_TA ] = { .pme_name = "PM_BR_PRED_TA", .pme_code = 0x230e3, .pme_short_desc = "A conditional branch was predicted, target prediction", .pme_long_desc = "The target address of a branch instruction was predicted.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L375_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_MOD_CYC", .pme_code = 0x4c70a7, .pme_short_desc = "Marked load latency from L3.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_CMPLU_STALL_FXU ] = { .pme_name = "PM_CMPLU_STALL_FXU", .pme_code = 0x211099, .pme_short_desc = "Completion stall caused by FXU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point instruction.", }, [ POWER5p_PME_PM_EXT_INT ] = { .pme_name = "PM_EXT_INT", .pme_code = 0x400003, .pme_short_desc = "External interrupts", .pme_long_desc = "An interrupt due to an external exception occurred", }, [ POWER5p_PME_PM_MRK_LSU1_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_LRQ", .pme_code = 0x810c6, .pme_short_desc = "LSU1 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5p_PME_PM_MRK_ST_GPS ] = { .pme_name = "PM_MRK_ST_GPS", .pme_code = 0x200003, .pme_short_desc = "Marked store sent to GPS", .pme_long_desc = "A sampled store has been sent to the memory subsystem", }, [ POWER5p_PME_PM_LSU1_LDF ] = { .pme_name = "PM_LSU1_LDF", .pme_code = 0xc50c4, .pme_short_desc = "LSU1 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU1", }, [ POWER5p_PME_PM_FAB_CMD_ISSUED ] = { .pme_name = "PM_FAB_CMD_ISSUED", .pme_code = 0x700c7, .pme_short_desc = "Fabric command issued", .pme_long_desc = "Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5p_PME_PM_LSU0_SRQ_STFWD ] = { .pme_name = "PM_LSU0_SRQ_STFWD", .pme_code = 0xc60e1, .pme_short_desc = "LSU0 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", }, [ POWER5p_PME_PM_CR_MAP_FULL_CYC ] = { .pme_name = "PM_CR_MAP_FULL_CYC", .pme_code = 0x100c4, .pme_short_desc = "Cycles CR logical operation mapper full", .pme_long_desc = "The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5p_PME_PM_MRK_LSU0_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU0_FLUSH_ULD", .pme_code = 0x810c1, .pme_short_desc = "LSU0 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5p_PME_PM_LSU_FLUSH_SRQ_FULL ] = { .pme_name = "PM_LSU_FLUSH_SRQ_FULL", .pme_code = 0x330e0, .pme_short_desc = "Flush caused by SRQ full", .pme_long_desc = "This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5p_PME_PM_MEM_RQ_DISP_Q16to19 ] = { .pme_name = "PM_MEM_RQ_DISP_Q16to19", .pme_code = 0x727e6, .pme_short_desc = "Memory read queue dispatched to queues 16-19", .pme_long_desc = "A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FLUSH_IMBAL ] = { .pme_name = "PM_FLUSH_IMBAL", .pme_code = 0x330e3, .pme_short_desc = "Flush caused by thread GCT imbalance", .pme_long_desc = "This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_minus3or4_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus3or4_CYC", .pme_code = 0x430e1, .pme_short_desc = "Cycles thread priority difference is -3 or -4", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 3 or 4.", }, [ POWER5p_PME_PM_DATA_FROM_L35_MOD ] = { .pme_name = "PM_DATA_FROM_L35_MOD", .pme_code = 0x2c309e, .pme_short_desc = "Data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_MEM_HI_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_HI_PRIO_WR_CMPL", .pme_code = 0x726e6, .pme_short_desc = "High priority write completed", .pme_long_desc = "A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU1_FDIV ] = { .pme_name = "PM_FPU1_FDIV", .pme_code = 0xc4, .pme_short_desc = "FPU1 executed FDIV instruction", .pme_long_desc = "FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER5p_PME_PM_MEM_RQ_DISP ] = { .pme_name = "PM_MEM_RQ_DISP", .pme_code = 0x701c6, .pme_short_desc = "Memory read queue dispatched", .pme_long_desc = "A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU0_FRSP_FCONV ] = { .pme_name = "PM_FPU0_FRSP_FCONV", .pme_code = 0x10c1, .pme_short_desc = "FPU0 executed FRSP or FCONV instructions", .pme_long_desc = "FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_LWSYNC_HELD ] = { .pme_name = "PM_LWSYNC_HELD", .pme_code = 0x130e0, .pme_short_desc = "LWSYNC held at dispatch", .pme_long_desc = "Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response.", }, [ POWER5p_PME_PM_FXU_FIN ] = { .pme_name = "PM_FXU_FIN", .pme_code = 0x313088, .pme_short_desc = "FXU produced a result", .pme_long_desc = "The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_DSLB_MISS ] = { .pme_name = "PM_DSLB_MISS", .pme_code = 0x800c5, .pme_short_desc = "Data SLB misses", .pme_long_desc = "A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve.", }, [ POWER5p_PME_PM_DATA_FROM_L275_SHR ] = { .pme_name = "PM_DATA_FROM_L275_SHR", .pme_code = 0x3c3097, .pme_short_desc = "Data loaded from L2.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a demand load.", }, [ POWER5p_PME_PM_FXLS1_FULL_CYC ] = { .pme_name = "PM_FXLS1_FULL_CYC", .pme_code = 0x110c4, .pme_short_desc = "Cycles FXU1/LS1 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_THRD_SEL_T0 ] = { .pme_name = "PM_THRD_SEL_T0", .pme_code = 0x410c0, .pme_short_desc = "Decode selected thread 0", .pme_long_desc = "Thread selection picked thread 0 for decode.", }, [ POWER5p_PME_PM_PTEG_RELOAD_VALID ] = { .pme_name = "PM_PTEG_RELOAD_VALID", .pme_code = 0x830e4, .pme_short_desc = "PTEG reload valid", .pme_long_desc = "A Page Table Entry was loaded into the TLB.", }, [ POWER5p_PME_PM_MRK_STCX_FAIL ] = { .pme_name = "PM_MRK_STCX_FAIL", .pme_code = 0x820e6, .pme_short_desc = "Marked STCX failed", .pme_long_desc = "A marked stcx (stwcx or stdcx) failed", }, [ POWER5p_PME_PM_LSU_LMQ_LHR_MERGE ] = { .pme_name = "PM_LSU_LMQ_LHR_MERGE", .pme_code = 0xc70e5, .pme_short_desc = "LMQ LHR merges", .pme_long_desc = "A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry.", }, [ POWER5p_PME_PM_2INST_CLB_CYC ] = { .pme_name = "PM_2INST_CLB_CYC", .pme_code = 0x400c2, .pme_short_desc = "Cycles 2 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_FAB_PNtoVN_DIRECT ] = { .pme_name = "PM_FAB_PNtoVN_DIRECT", .pme_code = 0x723e7, .pme_short_desc = "PN to VN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_PTEG_FROM_L2MISS ] = { .pme_name = "PM_PTEG_FROM_L2MISS", .pme_code = 0x38309b, .pme_short_desc = "PTEG loaded from L2 miss", .pme_long_desc = "A Page Table Entry was loaded into the TLB but not from the local L2.", }, [ POWER5p_PME_PM_CMPLU_STALL_LSU ] = { .pme_name = "PM_CMPLU_STALL_LSU", .pme_code = 0x211098, .pme_short_desc = "Completion stall caused by LSU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a load/store instruction.", }, [ POWER5p_PME_PM_MRK_DSLB_MISS ] = { .pme_name = "PM_MRK_DSLB_MISS", .pme_code = 0xc50c7, .pme_short_desc = "Marked Data SLB misses", .pme_long_desc = "A Data SLB miss was caused by a marked instruction.", }, [ POWER5p_PME_PM_LSU_FLUSH_ULD ] = { .pme_name = "PM_LSU_FLUSH_ULD", .pme_code = 0x1c0088, .pme_short_desc = "LRQ unaligned load flushes", .pme_long_desc = "A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1). Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_PTEG_FROM_LMEM ] = { .pme_name = "PM_PTEG_FROM_LMEM", .pme_code = 0x283087, .pme_short_desc = "PTEG loaded from local memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on.", }, [ POWER5p_PME_PM_MRK_BRU_FIN ] = { .pme_name = "PM_MRK_BRU_FIN", .pme_code = 0x200005, .pme_short_desc = "Marked instruction BRU processing finished", .pme_long_desc = "The branch unit finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_MEM_WQ_DISP_WRITE ] = { .pme_name = "PM_MEM_WQ_DISP_WRITE", .pme_code = 0x703c6, .pme_short_desc = "Memory write queue dispatched due to write", .pme_long_desc = "A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L275_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD_CYC", .pme_code = 0x4c70a3, .pme_short_desc = "Marked load latency from L2.75 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_LSU1_NCLD ] = { .pme_name = "PM_LSU1_NCLD", .pme_code = 0xc50c5, .pme_short_desc = "LSU1 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by Unit 0.", }, [ POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5p_PME_PM_SNOOP_PW_RETRY_WQ_PWQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_WQ_PWQ", .pme_code = 0x717c6, .pme_short_desc = "Snoop partial-write retry due to collision with active write or partial-write queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU1_FULL_CYC ] = { .pme_name = "PM_FPU1_FULL_CYC", .pme_code = 0x100c7, .pme_short_desc = "Cycles FPU1 issue queue full", .pme_long_desc = "The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped", }, [ POWER5p_PME_PM_FPR_MAP_FULL_CYC ] = { .pme_name = "PM_FPR_MAP_FULL_CYC", .pme_code = 0x100c1, .pme_short_desc = "Cycles FPR mapper full", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5p_PME_PM_L3SA_ALL_BUSY ] = { .pme_name = "PM_L3SA_ALL_BUSY", .pme_code = 0x721e3, .pme_short_desc = "L3 slice A active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5p_PME_PM_3INST_CLB_CYC ] = { .pme_name = "PM_3INST_CLB_CYC", .pme_code = 0x400c3, .pme_short_desc = "Cycles 3 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_MEM_PWQ_DISP_Q2or3 ] = { .pme_name = "PM_MEM_PWQ_DISP_Q2or3", .pme_code = 0x734e6, .pme_short_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3", .pme_long_desc = "Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_L2SA_SHR_INV ] = { .pme_name = "PM_L2SA_SHR_INV", .pme_code = 0x710c0, .pme_short_desc = "L2 slice A transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5p_PME_PM_THRESH_TIMEO ] = { .pme_name = "PM_THRESH_TIMEO", .pme_code = 0x30000b, .pme_short_desc = "Threshold timeout", .pme_long_desc = "The threshold timer expired", }, [ POWER5p_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5p_PME_PM_THRD_SEL_OVER_GCT_IMBAL ] = { .pme_name = "PM_THRD_SEL_OVER_GCT_IMBAL", .pme_code = 0x410c4, .pme_short_desc = "Thread selection overrides caused by GCT imbalance", .pme_long_desc = "Thread selection was overridden because of a GCT imbalance.", }, [ POWER5p_PME_PM_FPU_FSQRT ] = { .pme_name = "PM_FPU_FSQRT", .pme_code = 0x200090, .pme_short_desc = "FPU executed FSQRT instruction", .pme_long_desc = "The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_PMC1_OVERFLOW ] = { .pme_name = "PM_PMC1_OVERFLOW", .pme_code = 0x20000a, .pme_short_desc = "PMC1 Overflow", .pme_long_desc = "Overflows from PMC1 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_MRK_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_LRQ", .pme_code = 0x810c2, .pme_short_desc = "LSU0 marked LRQ flushes", .pme_long_desc = "A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5p_PME_PM_L3SC_SNOOP_RETRY ] = { .pme_name = "PM_L3SC_SNOOP_RETRY", .pme_code = 0x731e5, .pme_short_desc = "L3 slice C snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5p_PME_PM_DATA_TABLEWALK_CYC ] = { .pme_name = "PM_DATA_TABLEWALK_CYC", .pme_code = 0x800c7, .pme_short_desc = "Cycles doing data tablewalks", .pme_long_desc = "Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried.", }, [ POWER5p_PME_PM_THRD_PRIO_6_CYC ] = { .pme_name = "PM_THRD_PRIO_6_CYC", .pme_code = 0x420e5, .pme_short_desc = "Cycles thread running at priority level 6", .pme_long_desc = "Cycles this thread was running at priority level 6.", }, [ POWER5p_PME_PM_FPU_FEST ] = { .pme_name = "PM_FPU_FEST", .pme_code = 0x1010a8, .pme_short_desc = "FPU executed FEST instruction", .pme_long_desc = "The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_FAB_M1toP1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toP1_SIDECAR_EMPTY", .pme_code = 0x702c7, .pme_short_desc = "M1 to P1 sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_RMEM ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM", .pme_code = 0x1c70a1, .pme_short_desc = "Marked data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L35_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD_CYC", .pme_code = 0x4c70a6, .pme_short_desc = "Marked load latency from L3.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_MEM_PWQ_DISP ] = { .pme_name = "PM_MEM_PWQ_DISP", .pme_code = 0x704c6, .pme_short_desc = "Memory partial-write queue dispatched", .pme_long_desc = "Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FAB_P1toM1_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toM1_SIDECAR_EMPTY", .pme_code = 0x701c7, .pme_short_desc = "P1 to M1 sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_LD_MISS_L1_LSU0", .pme_code = 0xc10c2, .pme_short_desc = "LSU0 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 0.", }, [ POWER5p_PME_PM_SNOOP_PARTIAL_RTRY_QFULL ] = { .pme_name = "PM_SNOOP_PARTIAL_RTRY_QFULL", .pme_code = 0x730e6, .pme_short_desc = "Snoop partial write retry due to partial-write queues full", .pme_long_desc = "A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU1_STALL3 ] = { .pme_name = "PM_FPU1_STALL3", .pme_code = 0x20e5, .pme_short_desc = "FPU1 stalled in pipe3", .pme_long_desc = "FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", }, [ POWER5p_PME_PM_GCT_USAGE_80to99_CYC ] = { .pme_name = "PM_GCT_USAGE_80to99_CYC", .pme_code = 0x30001f, .pme_short_desc = "Cycles GCT 80-99% full", .pme_long_desc = "Cycles when the Global Completion Table has between 80% and 99% of its slots used. The GCT has 20 entries shared between threads", }, [ POWER5p_PME_PM_WORK_HELD ] = { .pme_name = "PM_WORK_HELD", .pme_code = 0x40000c, .pme_short_desc = "Work held", .pme_long_desc = "RAS Unit has signaled completion to stop and there are groups waiting to complete", }, [ POWER5p_PME_PM_INST_CMPL ] = { .pme_name = "PM_INST_CMPL", .pme_code = 0x100009, .pme_short_desc = "Instructions completed", .pme_long_desc = "Number of PowerPC instructions that completed.", }, [ POWER5p_PME_PM_LSU1_FLUSH_UST ] = { .pme_name = "PM_LSU1_FLUSH_UST", .pme_code = 0xc00c5, .pme_short_desc = "LSU1 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary)", }, [ POWER5p_PME_PM_FXU_IDLE ] = { .pme_name = "PM_FXU_IDLE", .pme_code = 0x100012, .pme_short_desc = "FXU idle", .pme_long_desc = "FXU0 and FXU1 are both idle.", }, [ POWER5p_PME_PM_LSU0_FLUSH_ULD ] = { .pme_name = "PM_LSU0_FLUSH_ULD", .pme_code = 0xc00c0, .pme_short_desc = "LSU0 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1)", }, [ POWER5p_PME_PM_LSU1_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU1_REJECT_LMQ_FULL", .pme_code = 0xc40c5, .pme_short_desc = "LSU1 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", }, [ POWER5p_PME_PM_GRP_DISP_REJECT ] = { .pme_name = "PM_GRP_DISP_REJECT", .pme_code = 0x120e4, .pme_short_desc = "Group dispatch rejected", .pme_long_desc = "A group that previously attempted dispatch was rejected.", }, [ POWER5p_PME_PM_PTEG_FROM_L25_SHR ] = { .pme_name = "PM_PTEG_FROM_L25_SHR", .pme_code = 0x183097, .pme_short_desc = "PTEG loaded from L2.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_L2SA_MOD_INV ] = { .pme_name = "PM_L2SA_MOD_INV", .pme_code = 0x730e0, .pme_short_desc = "L2 slice A transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_FAB_CMD_RETRIED ] = { .pme_name = "PM_FAB_CMD_RETRIED", .pme_code = 0x710c7, .pme_short_desc = "Fabric command retried", .pme_long_desc = "Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_L3SA_SHR_INV ] = { .pme_name = "PM_L3SA_SHR_INV", .pme_code = 0x710c3, .pme_short_desc = "L3 slice A transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5p_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5p_PME_PM_PTEG_FROM_L375_MOD ] = { .pme_name = "PM_PTEG_FROM_L375_MOD", .pme_code = 0x1830a7, .pme_short_desc = "PTEG loaded from L3.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", }, [ POWER5p_PME_PM_MRK_LSU1_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU1_FLUSH_UST", .pme_code = 0x810c5, .pme_short_desc = "LSU1 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary)", }, [ POWER5p_PME_PM_BR_ISSUED ] = { .pme_name = "PM_BR_ISSUED", .pme_code = 0x230e4, .pme_short_desc = "Branches issued", .pme_long_desc = "A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times.", }, [ POWER5p_PME_PM_MRK_GRP_BR_REDIR ] = { .pme_name = "PM_MRK_GRP_BR_REDIR", .pme_code = 0x212091, .pme_short_desc = "Group experienced marked branch redirect", .pme_long_desc = "A group containing a marked (sampled) instruction experienced a branch redirect.", }, [ POWER5p_PME_PM_EE_OFF ] = { .pme_name = "PM_EE_OFF", .pme_code = 0x130e3, .pme_short_desc = "Cycles MSR(EE) bit off", .pme_long_desc = "Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked.", }, [ POWER5p_PME_PM_IERAT_XLATE_WR_LP ] = { .pme_name = "PM_IERAT_XLATE_WR_LP", .pme_code = 0x210c6, .pme_short_desc = "Large page translation written to ierat", .pme_long_desc = "An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed.", }, [ POWER5p_PME_PM_DTLB_REF_64K ] = { .pme_name = "PM_DTLB_REF_64K", .pme_code = 0x2c2086, .pme_short_desc = "Data TLB reference for 64K page", .pme_long_desc = "Data TLB references for 64KB pages. Includes hits + misses.", }, [ POWER5p_PME_PM_MEM_RQ_DISP_Q4to7 ] = { .pme_name = "PM_MEM_RQ_DISP_Q4to7", .pme_code = 0x712c6, .pme_short_desc = "Memory read queue dispatched to queues 4-7", .pme_long_desc = "A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_MEM_FAST_PATH_RD_DISP ] = { .pme_name = "PM_MEM_FAST_PATH_RD_DISP", .pme_code = 0x731e6, .pme_short_desc = "Fast path memory read dispatched", .pme_long_desc = "Fast path memory read dispatched", }, [ POWER5p_PME_PM_INST_FROM_L3 ] = { .pme_name = "PM_INST_FROM_L3", .pme_code = 0x12208d, .pme_short_desc = "Instruction fetched from L3", .pme_long_desc = "An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_ITLB_MISS ] = { .pme_name = "PM_ITLB_MISS", .pme_code = 0x800c0, .pme_short_desc = "Instruction TLB misses", .pme_long_desc = "A TLB miss for an Instruction Fetch has occurred", }, [ POWER5p_PME_PM_FXU1_BUSY_FXU0_IDLE ] = { .pme_name = "PM_FXU1_BUSY_FXU0_IDLE", .pme_code = 0x400012, .pme_short_desc = "FXU1 busy FXU0 idle", .pme_long_desc = "FXU0 was idle while FXU1 was busy.", }, [ POWER5p_PME_PM_DTLB_REF_4K ] = { .pme_name = "PM_DTLB_REF_4K", .pme_code = 0x1c2086, .pme_short_desc = "Data TLB reference for 4K page", .pme_long_desc = "Data TLB references for 4KB pages. Includes hits + misses.", }, [ POWER5p_PME_PM_FXLS_FULL_CYC ] = { .pme_name = "PM_FXLS_FULL_CYC", .pme_code = 0x1110a8, .pme_short_desc = "Cycles FXLS queue is full", .pme_long_desc = "Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", }, [ POWER5p_PME_PM_GRP_DISP_VALID ] = { .pme_name = "PM_GRP_DISP_VALID", .pme_code = 0x120e3, .pme_short_desc = "Group dispatch valid", .pme_long_desc = "A group is available for dispatch. This does not mean it was successfully dispatched.", }, [ POWER5p_PME_PM_LSU_FLUSH_UST ] = { .pme_name = "PM_LSU_FLUSH_UST", .pme_code = 0x2c0088, .pme_short_desc = "SRQ unaligned store flushes", .pme_long_desc = "A store was flushed because it was unaligned (crossed a 4K boundary). Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_FXU1_FIN ] = { .pme_name = "PM_FXU1_FIN", .pme_code = 0x130e6, .pme_short_desc = "FXU1 produced a result", .pme_long_desc = "The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_THRD_PRIO_4_CYC ] = { .pme_name = "PM_THRD_PRIO_4_CYC", .pme_code = 0x420e3, .pme_short_desc = "Cycles thread running at priority level 4", .pme_long_desc = "Cycles this thread was running at priority level 4.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L35_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L35_MOD", .pme_code = 0x2c709e, .pme_short_desc = "Marked data loaded from L3.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5p_PME_PM_4INST_CLB_CYC ] = { .pme_name = "PM_4INST_CLB_CYC", .pme_code = 0x400c4, .pme_short_desc = "Cycles 4 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_MRK_DTLB_REF_16M ] = { .pme_name = "PM_MRK_DTLB_REF_16M", .pme_code = 0x3c6086, .pme_short_desc = "Marked Data TLB reference for 16M page", .pme_long_desc = "Data TLB references by a marked instruction for 16MB pages.", }, [ POWER5p_PME_PM_INST_FROM_L375_MOD ] = { .pme_name = "PM_INST_FROM_L375_MOD", .pme_code = 0x42209d, .pme_short_desc = "Instruction fetched from L3.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_GRP_CMPL ] = { .pme_name = "PM_GRP_CMPL", .pme_code = 0x300013, .pme_short_desc = "Group completed", .pme_long_desc = "A group completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_FPU1_1FLOP ] = { .pme_name = "PM_FPU1_1FLOP", .pme_code = 0xc7, .pme_short_desc = "FPU1 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5p_PME_PM_FPU_FRSP_FCONV ] = { .pme_name = "PM_FPU_FRSP_FCONV", .pme_code = 0x2010a8, .pme_short_desc = "FPU executed FRSP or FCONV instructions", .pme_long_desc = "The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_L3SC_REF ] = { .pme_name = "PM_L3SC_REF", .pme_code = 0x701c5, .pme_short_desc = "L3 slice C references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice.", }, [ POWER5p_PME_PM_5INST_CLB_CYC ] = { .pme_name = "PM_5INST_CLB_CYC", .pme_code = 0x400c5, .pme_short_desc = "Cycles 5 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_THRD_L2MISS_BOTH_CYC ] = { .pme_name = "PM_THRD_L2MISS_BOTH_CYC", .pme_code = 0x410c7, .pme_short_desc = "Cycles both threads in L2 misses", .pme_long_desc = "Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used.", }, [ POWER5p_PME_PM_MEM_PW_GATH ] = { .pme_name = "PM_MEM_PW_GATH", .pme_code = 0x714c6, .pme_short_desc = "Memory partial-write gathered", .pme_long_desc = "Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_DTLB_REF_16G ] = { .pme_name = "PM_DTLB_REF_16G", .pme_code = 0x4c2086, .pme_short_desc = "Data TLB reference for 16G page", .pme_long_desc = "Data TLB references for 16GB pages. Includes hits + misses.", }, [ POWER5p_PME_PM_FAB_DCLAIM_ISSUED ] = { .pme_name = "PM_FAB_DCLAIM_ISSUED", .pme_code = 0x720e7, .pme_short_desc = "dclaim issued", .pme_long_desc = "A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_FAB_PNtoNN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoNN_SIDECAR", .pme_code = 0x713c7, .pme_short_desc = "PN to NN beat went to sidecar first", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5p_PME_PM_GRP_IC_MISS ] = { .pme_name = "PM_GRP_IC_MISS", .pme_code = 0x120e7, .pme_short_desc = "Group experienced I cache miss", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count.", }, [ POWER5p_PME_PM_INST_FROM_L35_SHR ] = { .pme_name = "PM_INST_FROM_L35_SHR", .pme_code = 0x12209d, .pme_short_desc = "Instruction fetched from L3.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_LSU_LMQ_FULL_CYC ] = { .pme_name = "PM_LSU_LMQ_FULL_CYC", .pme_code = 0xc30e7, .pme_short_desc = "Cycles LMQ full", .pme_long_desc = "The Load Miss Queue was full.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L2_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L2_CYC", .pme_code = 0x2c70a0, .pme_short_desc = "Marked load latency from L2", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_LSU_SRQ_SYNC_CYC ] = { .pme_name = "PM_LSU_SRQ_SYNC_CYC", .pme_code = 0x830e5, .pme_short_desc = "SRQ sync duration", .pme_long_desc = "Cycles that a sync instruction is active in the Store Request Queue.", }, [ POWER5p_PME_PM_LSU0_BUSY_REJECT ] = { .pme_name = "PM_LSU0_BUSY_REJECT", .pme_code = 0xc20e1, .pme_short_desc = "LSU0 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions.", }, [ POWER5p_PME_PM_LSU_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU_REJECT_ERAT_MISS", .pme_code = 0x1c4090, .pme_short_desc = "LSU reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_RMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_RMEM_CYC", .pme_code = 0x4c70a1, .pme_short_desc = "Marked load latency from remote memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_DATA_FROM_L375_SHR ] = { .pme_name = "PM_DATA_FROM_L375_SHR", .pme_code = 0x3c309e, .pme_short_desc = "Data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a demand load.", }, [ POWER5p_PME_PM_PTEG_FROM_L25_MOD ] = { .pme_name = "PM_PTEG_FROM_L25_MOD", .pme_code = 0x283097, .pme_short_desc = "PTEG loaded from L2.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_FPU0_FMOV_FEST ] = { .pme_name = "PM_FPU0_FMOV_FEST", .pme_code = 0x10c0, .pme_short_desc = "FPU0 executed FMOV or FEST instructions", .pme_long_desc = "FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5p_PME_PM_THRD_PRIO_7_CYC ] = { .pme_name = "PM_THRD_PRIO_7_CYC", .pme_code = 0x420e6, .pme_short_desc = "Cycles thread running at priority level 7", .pme_long_desc = "Cycles this thread was running at priority level 7.", }, [ POWER5p_PME_PM_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_LSU1_FLUSH_SRQ", .pme_code = 0xc00c7, .pme_short_desc = "LSU1 SRQ lhs flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5p_PME_PM_LD_REF_L1_LSU0 ] = { .pme_name = "PM_LD_REF_L1_LSU0", .pme_code = 0xc10c0, .pme_short_desc = "LSU0 L1 D cache load references", .pme_long_desc = "Load references to Level 1 Data Cache, by unit 0.", }, [ POWER5p_PME_PM_L2SC_RCST_DISP ] = { .pme_name = "PM_L2SC_RCST_DISP", .pme_code = 0x702c2, .pme_short_desc = "L2 slice C RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5p_PME_PM_CMPLU_STALL_DIV ] = { .pme_name = "PM_CMPLU_STALL_DIV", .pme_code = 0x411099, .pme_short_desc = "Completion stall caused by DIV instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point divide instruction. This is a subset of PM_CMPLU_STALL_FXU.", }, [ POWER5p_PME_PM_MEM_RQ_DISP_Q12to15 ] = { .pme_name = "PM_MEM_RQ_DISP_Q12to15", .pme_code = 0x732e6, .pme_short_desc = "Memory read queue dispatched to queues 12-15", .pme_long_desc = "A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_INST_FROM_L375_SHR ] = { .pme_name = "PM_INST_FROM_L375_SHR", .pme_code = 0x32209d, .pme_short_desc = "Instruction fetched from L3.75 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (S) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_ST_REF_L1 ] = { .pme_name = "PM_ST_REF_L1", .pme_code = 0x2c10a8, .pme_short_desc = "L1 D cache store references", .pme_long_desc = "Store references to the Data Cache. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_L3SB_ALL_BUSY ] = { .pme_name = "PM_L3SB_ALL_BUSY", .pme_code = 0x721e4, .pme_short_desc = "L3 slice B active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5p_PME_PM_FAB_P1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_P1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x711c7, .pme_short_desc = "P1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L275_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L275_SHR_CYC", .pme_code = 0x2c70a3, .pme_short_desc = "Marked load latency from L2.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_FAB_HOLDtoNN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoNN_EMPTY", .pme_code = 0x722e7, .pme_short_desc = "Hold buffer to NN empty", .pme_long_desc = "Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_DATA_FROM_LMEM ] = { .pme_name = "PM_DATA_FROM_LMEM", .pme_code = 0x2c3087, .pme_short_desc = "Data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on.", }, [ POWER5p_PME_PM_RUN_CYC ] = { .pme_name = "PM_RUN_CYC", .pme_code = 0x100005, .pme_short_desc = "Run cycles", .pme_long_desc = "Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop.", }, [ POWER5p_PME_PM_PTEG_FROM_RMEM ] = { .pme_name = "PM_PTEG_FROM_RMEM", .pme_code = 0x1830a1, .pme_short_desc = "PTEG loaded from remote memory", .pme_long_desc = "A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on.", }, [ POWER5p_PME_PM_L2SC_RCLD_DISP ] = { .pme_name = "PM_L2SC_RCLD_DISP", .pme_code = 0x701c2, .pme_short_desc = "L2 slice C RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5p_PME_PM_LSU_LRQ_S0_VALID ] = { .pme_name = "PM_LSU_LRQ_S0_VALID", .pme_code = 0xc60e6, .pme_short_desc = "LRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each).", }, [ POWER5p_PME_PM_LSU0_LDF ] = { .pme_name = "PM_LSU0_LDF", .pme_code = 0xc50c0, .pme_short_desc = "LSU0 executed Floating Point load instruction", .pme_long_desc = "A floating point load was executed by LSU0", }, [ POWER5p_PME_PM_PMC3_OVERFLOW ] = { .pme_name = "PM_PMC3_OVERFLOW", .pme_code = 0x40000a, .pme_short_desc = "PMC3 Overflow", .pme_long_desc = "Overflows from PMC3 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_MRK_IMR_RELOAD ] = { .pme_name = "PM_MRK_IMR_RELOAD", .pme_code = 0x820e2, .pme_short_desc = "Marked IMR reloaded", .pme_long_desc = "A DL1 reload occurred due to marked load", }, [ POWER5p_PME_PM_MRK_GRP_TIMEO ] = { .pme_name = "PM_MRK_GRP_TIMEO", .pme_code = 0x40000b, .pme_short_desc = "Marked group completion timeout", .pme_long_desc = "The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor", }, [ POWER5p_PME_PM_ST_MISS_L1 ] = { .pme_name = "PM_ST_MISS_L1", .pme_code = 0xc10c3, .pme_short_desc = "L1 D cache store misses", .pme_long_desc = "A store missed the dcache. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_STOP_COMPLETION ] = { .pme_name = "PM_STOP_COMPLETION", .pme_code = 0x300018, .pme_short_desc = "Completion stopped", .pme_long_desc = "RAS Unit has signaled completion to stop", }, [ POWER5p_PME_PM_LSU_BUSY_REJECT ] = { .pme_name = "PM_LSU_BUSY_REJECT", .pme_code = 0x2c2088, .pme_short_desc = "LSU busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions. Combined unit 0 + 1.", }, [ POWER5p_PME_PM_ISLB_MISS ] = { .pme_name = "PM_ISLB_MISS", .pme_code = 0x800c1, .pme_short_desc = "Instruction SLB misses", .pme_long_desc = "A SLB miss for an instruction fetch as occurred", }, [ POWER5p_PME_PM_CYC ] = { .pme_name = "PM_CYC", .pme_code = 0xf, .pme_short_desc = "Processor cycles", .pme_long_desc = "Processor cycles", }, [ POWER5p_PME_PM_THRD_ONE_RUN_CYC ] = { .pme_name = "PM_THRD_ONE_RUN_CYC", .pme_code = 0x10000b, .pme_short_desc = "One of the threads in run cycles", .pme_long_desc = "At least one thread has set its run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. This event does not respect FCWAIT.", }, [ POWER5p_PME_PM_GRP_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_BR_REDIR_NONSPEC", .pme_code = 0x112091, .pme_short_desc = "Group experienced non-speculative branch redirect", .pme_long_desc = "Number of groups, counted at completion, that have encountered a branch redirect.", }, [ POWER5p_PME_PM_LSU1_SRQ_STFWD ] = { .pme_name = "PM_LSU1_SRQ_STFWD", .pme_code = 0xc60e5, .pme_short_desc = "LSU1 SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss.", }, [ POWER5p_PME_PM_L3SC_MOD_INV ] = { .pme_name = "PM_L3SC_MOD_INV", .pme_code = 0x730e5, .pme_short_desc = "L3 slice C transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_L2_PREF ] = { .pme_name = "PM_L2_PREF", .pme_code = 0xc50c3, .pme_short_desc = "L2 cache prefetches", .pme_long_desc = "A request to prefetch data into L2 was made", }, [ POWER5p_PME_PM_GCT_NOSLOT_BR_MPRED ] = { .pme_name = "PM_GCT_NOSLOT_BR_MPRED", .pme_code = 0x41009c, .pme_short_desc = "No slot in GCT caused by branch mispredict", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of a branch misprediction.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L25_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD", .pme_code = 0x2c7097, .pme_short_desc = "Marked data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5p_PME_PM_L2SB_ST_REQ ] = { .pme_name = "PM_L2SB_ST_REQ", .pme_code = 0x723e1, .pme_short_desc = "L2 slice B store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_L2SB_MOD_INV ] = { .pme_name = "PM_L2SB_MOD_INV", .pme_code = 0x730e1, .pme_short_desc = "L2 slice B transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_MRK_L1_RELOAD_VALID ] = { .pme_name = "PM_MRK_L1_RELOAD_VALID", .pme_code = 0xc70e4, .pme_short_desc = "Marked L1 reload data source valid", .pme_long_desc = "The source information is valid and is for a marked load", }, [ POWER5p_PME_PM_L3SB_HIT ] = { .pme_name = "PM_L3SB_HIT", .pme_code = 0x711c4, .pme_short_desc = "L3 slice B hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", }, [ POWER5p_PME_PM_L2SB_SHR_MOD ] = { .pme_name = "PM_L2SB_SHR_MOD", .pme_code = 0x700c1, .pme_short_desc = "L2 slice B transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_EE_OFF_EXT_INT ] = { .pme_name = "PM_EE_OFF_EXT_INT", .pme_code = 0x130e7, .pme_short_desc = "Cycles MSR(EE) bit off and external interrupt pending", .pme_long_desc = "Cycles when an interrupt due to an external exception is pending but external exceptions were masked.", }, [ POWER5p_PME_PM_1PLUS_PPC_CMPL ] = { .pme_name = "PM_1PLUS_PPC_CMPL", .pme_code = 0x100013, .pme_short_desc = "One or more PPC instruction completed", .pme_long_desc = "A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once.", }, [ POWER5p_PME_PM_L2SC_SHR_MOD ] = { .pme_name = "PM_L2SC_SHR_MOD", .pme_code = 0x700c2, .pme_short_desc = "L2 slice C transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_PMC6_OVERFLOW ] = { .pme_name = "PM_PMC6_OVERFLOW", .pme_code = 0x30001a, .pme_short_desc = "PMC6 Overflow", .pme_long_desc = "Overflows from PMC6 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_IC_PREF_INSTALL ] = { .pme_name = "PM_IC_PREF_INSTALL", .pme_code = 0x210c7, .pme_short_desc = "Instruction prefetched installed in prefetch buffer", .pme_long_desc = "A prefetch buffer entry (line) is allocated but the request is not a demand fetch.", }, [ POWER5p_PME_PM_LSU_LRQ_FULL_CYC ] = { .pme_name = "PM_LSU_LRQ_FULL_CYC", .pme_code = 0x110c2, .pme_short_desc = "Cycles LRQ full", .pme_long_desc = "Cycles when the LRQ is full.", }, [ POWER5p_PME_PM_TLB_MISS ] = { .pme_name = "PM_TLB_MISS", .pme_code = 0x180088, .pme_short_desc = "TLB misses", .pme_long_desc = "Total of Data TLB mises + Instruction TLB misses", }, [ POWER5p_PME_PM_GCT_FULL_CYC ] = { .pme_name = "PM_GCT_FULL_CYC", .pme_code = 0x100c0, .pme_short_desc = "Cycles GCT full", .pme_long_desc = "The Global Completion Table is completely full.", }, [ POWER5p_PME_PM_FXU_BUSY ] = { .pme_name = "PM_FXU_BUSY", .pme_code = 0x200012, .pme_short_desc = "FXU busy", .pme_long_desc = "Cycles when both FXU0 and FXU1 are busy.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L3_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L3_CYC", .pme_code = 0x2c70a4, .pme_short_desc = "Marked load latency from L3", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_LSU_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU_REJECT_LMQ_FULL", .pme_code = 0x2c4088, .pme_short_desc = "LSU reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all the eight entries are full, subsequent load instructions are rejected. Combined unit 0 + 1.", }, [ POWER5p_PME_PM_LSU_SRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_SRQ_S0_ALLOC", .pme_code = 0xc20e7, .pme_short_desc = "SRQ slot 0 allocated", .pme_long_desc = "SRQ Slot zero was allocated", }, [ POWER5p_PME_PM_GRP_MRK ] = { .pme_name = "PM_GRP_MRK", .pme_code = 0x100014, .pme_short_desc = "Group marked in IDU", .pme_long_desc = "A group was sampled (marked). The group is called a marked group. One instruction within the group is tagged for detailed monitoring. The sampled instruction is called a marked instructions. Events associated with the marked instruction are annotated with the marked term.", }, [ POWER5p_PME_PM_INST_FROM_L25_SHR ] = { .pme_name = "PM_INST_FROM_L25_SHR", .pme_code = 0x122096, .pme_short_desc = "Instruction fetched from L2.5 shared", .pme_long_desc = "An instruction fetch group was fetched with shared (T or SL) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", }, [ POWER5p_PME_PM_DC_PREF_STREAM_ALLOC ] = { .pme_name = "PM_DC_PREF_STREAM_ALLOC", .pme_code = 0x830e7, .pme_short_desc = "D cache new prefetch stream allocated", .pme_long_desc = "A new Prefetch Stream was allocated.", }, [ POWER5p_PME_PM_FPU1_FIN ] = { .pme_name = "PM_FPU1_FIN", .pme_code = 0x10c7, .pme_short_desc = "FPU1 produced a result", .pme_long_desc = "FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., ,", }, [ POWER5p_PME_PM_BR_MPRED_TA ] = { .pme_name = "PM_BR_MPRED_TA", .pme_code = 0x230e6, .pme_short_desc = "Branch mispredictions due to target address", .pme_long_desc = "A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction.", }, [ POWER5p_PME_PM_MRK_DTLB_REF_64K ] = { .pme_name = "PM_MRK_DTLB_REF_64K", .pme_code = 0x2c6086, .pme_short_desc = "Marked Data TLB reference for 64K page", .pme_long_desc = "Data TLB references by a marked instruction for 64KB pages.", }, [ POWER5p_PME_PM_RUN_INST_CMPL ] = { .pme_name = "PM_RUN_INST_CMPL", .pme_code = 0x500009, .pme_short_desc = "Run instructions completed", .pme_long_desc = "Number of run instructions completed.", }, [ POWER5p_PME_PM_CRQ_FULL_CYC ] = { .pme_name = "PM_CRQ_FULL_CYC", .pme_code = 0x110c1, .pme_short_desc = "Cycles CR issue queue full", .pme_long_desc = "The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_L2SA_RCLD_DISP ] = { .pme_name = "PM_L2SA_RCLD_DISP", .pme_code = 0x701c0, .pme_short_desc = "L2 slice A RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5p_PME_PM_SNOOP_WR_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_WR_RETRY_QFULL", .pme_code = 0x710c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DTLB_REF_4K ] = { .pme_name = "PM_MRK_DTLB_REF_4K", .pme_code = 0x1c6086, .pme_short_desc = "Marked Data TLB reference for 4K page", .pme_long_desc = "Data TLB references by a marked instruction for 4KB pages.", }, [ POWER5p_PME_PM_LSU_SRQ_S0_VALID ] = { .pme_name = "PM_LSU_SRQ_S0_VALID", .pme_code = 0xc20e6, .pme_short_desc = "SRQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each).", }, [ POWER5p_PME_PM_LSU0_FLUSH_LRQ ] = { .pme_name = "PM_LSU0_FLUSH_LRQ", .pme_code = 0xc00c2, .pme_short_desc = "LSU0 LRQ flushes", .pme_long_desc = "A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5p_PME_PM_INST_FROM_L275_MOD ] = { .pme_name = "PM_INST_FROM_L275_MOD", .pme_code = 0x422096, .pme_short_desc = "Instruction fetched from L2.75 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_GCT_EMPTY_CYC ] = { .pme_name = "PM_GCT_EMPTY_CYC", .pme_code = 0x200004, .pme_short_desc = "Cycles GCT empty", .pme_long_desc = "The Global Completion Table is completely empty", }, [ POWER5p_PME_PM_LARX_LSU0 ] = { .pme_name = "PM_LARX_LSU0", .pme_code = 0x820e7, .pme_short_desc = "Larx executed on LSU0", .pme_long_desc = "A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0)", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_5or6_CYC", .pme_code = 0x430e6, .pme_short_desc = "Cycles thread priority difference is 5 or 6", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 5 or 6.", }, [ POWER5p_PME_PM_SNOOP_RETRY_1AHEAD ] = { .pme_name = "PM_SNOOP_RETRY_1AHEAD", .pme_code = 0x725e6, .pme_short_desc = "Snoop retry due to one ahead collision", .pme_long_desc = "Snoop retry due to one ahead collision", }, [ POWER5p_PME_PM_FPU1_FSQRT ] = { .pme_name = "PM_FPU1_FSQRT", .pme_code = 0xc6, .pme_short_desc = "FPU1 executed FSQRT instruction", .pme_long_desc = "FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_MRK_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU1", .pme_code = 0x820e4, .pme_short_desc = "LSU1 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU1.", }, [ POWER5p_PME_PM_MRK_FPU_FIN ] = { .pme_name = "PM_MRK_FPU_FIN", .pme_code = 0x300014, .pme_short_desc = "Marked instruction FPU processing finished", .pme_long_desc = "One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5p_PME_PM_THRD_PRIO_5_CYC ] = { .pme_name = "PM_THRD_PRIO_5_CYC", .pme_code = 0x420e4, .pme_short_desc = "Cycles thread running at priority level 5", .pme_long_desc = "Cycles this thread was running at priority level 5.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_LMEM ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM", .pme_code = 0x2c7087, .pme_short_desc = "Marked data loaded from local memory", .pme_long_desc = "The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on.", }, [ POWER5p_PME_PM_SNOOP_TLBIE ] = { .pme_name = "PM_SNOOP_TLBIE", .pme_code = 0x800c3, .pme_short_desc = "Snoop TLBIE", .pme_long_desc = "A tlbie was snooped from another processor.", }, [ POWER5p_PME_PM_FPU1_FRSP_FCONV ] = { .pme_name = "PM_FPU1_FRSP_FCONV", .pme_code = 0x10c5, .pme_short_desc = "FPU1 executed FRSP or FCONV instructions", .pme_long_desc = "FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_DTLB_MISS_16G ] = { .pme_name = "PM_DTLB_MISS_16G", .pme_code = 0x4c208d, .pme_short_desc = "Data TLB miss for 16G page", .pme_long_desc = "Data TLB references to 16GB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_L3SB_SNOOP_RETRY ] = { .pme_name = "PM_L3SB_SNOOP_RETRY", .pme_code = 0x731e4, .pme_short_desc = "L3 slice B snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5p_PME_PM_FAB_VBYPASS_EMPTY ] = { .pme_name = "PM_FAB_VBYPASS_EMPTY", .pme_code = 0x731e7, .pme_short_desc = "Vertical bypass buffer empty", .pme_long_desc = "Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L275_MOD ] = { .pme_name = "PM_MRK_DATA_FROM_L275_MOD", .pme_code = 0x1c70a3, .pme_short_desc = "Marked data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load.", }, [ POWER5p_PME_PM_L2SB_RCST_DISP ] = { .pme_name = "PM_L2SB_RCST_DISP", .pme_code = 0x702c1, .pme_short_desc = "L2 slice B RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5p_PME_PM_6INST_CLB_CYC ] = { .pme_name = "PM_6INST_CLB_CYC", .pme_code = 0x400c6, .pme_short_desc = "Cycles 6 instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_FLUSH ] = { .pme_name = "PM_FLUSH", .pme_code = 0x110c7, .pme_short_desc = "Flushes", .pme_long_desc = "Flushes occurred including LSU and Branch flushes.", }, [ POWER5p_PME_PM_L2SC_MOD_INV ] = { .pme_name = "PM_L2SC_MOD_INV", .pme_code = 0x730e2, .pme_short_desc = "L2 slice C transition from modified to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_FPU_DENORM ] = { .pme_name = "PM_FPU_DENORM", .pme_code = 0x102088, .pme_short_desc = "FPU received denormalized data", .pme_long_desc = "The floating point unit has encountered a denormalized operand. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_L3SC_HIT ] = { .pme_name = "PM_L3SC_HIT", .pme_code = 0x711c5, .pme_short_desc = "L3 slice C hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice", }, [ POWER5p_PME_PM_SNOOP_WR_RETRY_RQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_RQ", .pme_code = 0x706c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active read queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5p_PME_PM_LSU1_REJECT_SRQ ] = { .pme_name = "PM_LSU1_REJECT_SRQ", .pme_code = 0xc40c4, .pme_short_desc = "LSU1 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", }, [ POWER5p_PME_PM_L3SC_ALL_BUSY ] = { .pme_name = "PM_L3SC_ALL_BUSY", .pme_code = 0x721e5, .pme_short_desc = "L3 slice C active for every cycle all CI/CO machines busy", .pme_long_desc = "Cycles All Castin/Castout machines are busy.", }, [ POWER5p_PME_PM_IC_PREF_REQ ] = { .pme_name = "PM_IC_PREF_REQ", .pme_code = 0x220e6, .pme_short_desc = "Instruction prefetch requests", .pme_long_desc = "An instruction prefetch request has been made.", }, [ POWER5p_PME_PM_MRK_GRP_IC_MISS ] = { .pme_name = "PM_MRK_GRP_IC_MISS", .pme_code = 0x412091, .pme_short_desc = "Group experienced marked I cache miss", .pme_long_desc = "A group containing a marked (sampled) instruction experienced an instruction cache miss.", }, [ POWER5p_PME_PM_GCT_NOSLOT_IC_MISS ] = { .pme_name = "PM_GCT_NOSLOT_IC_MISS", .pme_code = 0x21009c, .pme_short_desc = "No slot in GCT caused by I cache miss", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because of an Instruction Cache miss.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L3 ] = { .pme_name = "PM_MRK_DATA_FROM_L3", .pme_code = 0x1c708e, .pme_short_desc = "Marked data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a marked load.", }, [ POWER5p_PME_PM_GCT_NOSLOT_SRQ_FULL ] = { .pme_name = "PM_GCT_NOSLOT_SRQ_FULL", .pme_code = 0x310084, .pme_short_desc = "No slot in GCT caused by SRQ full", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread because the Store Request Queue (SRQ) is full. This happens when the storage subsystem can not process the stores in the SRQ. Groups can not be dispatched until a SRQ entry is available.", }, [ POWER5p_PME_PM_CMPLU_STALL_DCACHE_MISS ] = { .pme_name = "PM_CMPLU_STALL_DCACHE_MISS", .pme_code = 0x21109a, .pme_short_desc = "Completion stall caused by D cache miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a Data Cache Miss. Data Cache Miss has higher priority than any other Load/Store delay, so if an instruction encounters multiple delays only the Data Cache Miss will be reported and the entire delay period will be charged to Data Cache Miss. This is a subset of PM_CMPLU_STALL_LSU.", }, [ POWER5p_PME_PM_THRD_SEL_OVER_ISU_HOLD ] = { .pme_name = "PM_THRD_SEL_OVER_ISU_HOLD", .pme_code = 0x410c5, .pme_short_desc = "Thread selection overrides caused by ISU holds", .pme_long_desc = "Thread selection was overridden because of an ISU hold.", }, [ POWER5p_PME_PM_LSU_FLUSH_LRQ ] = { .pme_name = "PM_LSU_FLUSH_LRQ", .pme_code = 0x2c0090, .pme_short_desc = "LRQ flushes", .pme_long_desc = "A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1.", }, [ POWER5p_PME_PM_THRD_PRIO_2_CYC ] = { .pme_name = "PM_THRD_PRIO_2_CYC", .pme_code = 0x420e1, .pme_short_desc = "Cycles thread running at priority level 2", .pme_long_desc = "Cycles this thread was running at priority level 2.", }, [ POWER5p_PME_PM_L3SA_MOD_INV ] = { .pme_name = "PM_L3SA_MOD_INV", .pme_code = 0x730e3, .pme_short_desc = "L3 slice A transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_LSU_FLUSH_SRQ ] = { .pme_name = "PM_LSU_FLUSH_SRQ", .pme_code = 0x1c0090, .pme_short_desc = "SRQ flushes", .pme_long_desc = "A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_MRK_LSU_SRQ_INST_VALID ] = { .pme_name = "PM_MRK_LSU_SRQ_INST_VALID", .pme_code = 0xc70e6, .pme_short_desc = "Marked instruction valid in SRQ", .pme_long_desc = "This signal is asserted every cycle when a marked request is resident in the Store Request Queue", }, [ POWER5p_PME_PM_L3SA_REF ] = { .pme_name = "PM_L3SA_REF", .pme_code = 0x701c3, .pme_short_desc = "L3 slice A references", .pme_long_desc = "Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice", }, [ POWER5p_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", .pme_code = 0x713c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to all CO busy", .pme_long_desc = "A Read/Claim dispatch was rejected because all Castout machines were busy.", }, [ POWER5p_PME_PM_FPU0_STALL3 ] = { .pme_name = "PM_FPU0_STALL3", .pme_code = 0x20e1, .pme_short_desc = "FPU0 stalled in pipe3", .pme_long_desc = "FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always).", }, [ POWER5p_PME_PM_TB_BIT_TRANS ] = { .pme_name = "PM_TB_BIT_TRANS", .pme_code = 0x100018, .pme_short_desc = "Time Base bit transition", .pme_long_desc = "When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1", }, [ POWER5p_PME_PM_GPR_MAP_FULL_CYC ] = { .pme_name = "PM_GPR_MAP_FULL_CYC", .pme_code = 0x130e5, .pme_short_desc = "Cycles GPR mapper full", .pme_long_desc = "The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_MRK_LSU_FLUSH_LRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_LRQ", .pme_code = 0x381088, .pme_short_desc = "Marked LRQ flushes", .pme_long_desc = "A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte.", }, [ POWER5p_PME_PM_FPU0_STF ] = { .pme_name = "PM_FPU0_STF", .pme_code = 0x20e2, .pme_short_desc = "FPU0 executed store instruction", .pme_long_desc = "FPU0 has executed a Floating Point Store instruction.", }, [ POWER5p_PME_PM_MRK_DTLB_MISS ] = { .pme_name = "PM_MRK_DTLB_MISS", .pme_code = 0xc50c6, .pme_short_desc = "Marked Data TLB misses", .pme_long_desc = "Data TLB references by a marked instruction that missed the TLB (all page sizes).", }, [ POWER5p_PME_PM_FPU1_FMA ] = { .pme_name = "PM_FPU1_FMA", .pme_code = 0xc5, .pme_short_desc = "FPU1 executed multiply-add instruction", .pme_long_desc = "The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_L2SA_MOD_TAG ] = { .pme_name = "PM_L2SA_MOD_TAG", .pme_code = 0x720e0, .pme_short_desc = "L2 slice A transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_LSU1_FLUSH_ULD ] = { .pme_name = "PM_LSU1_FLUSH_ULD", .pme_code = 0xc00c4, .pme_short_desc = "LSU1 unaligned load flushes", .pme_long_desc = "A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1).", }, [ POWER5p_PME_PM_MRK_INST_FIN ] = { .pme_name = "PM_MRK_INST_FIN", .pme_code = 0x300005, .pme_short_desc = "Marked instruction finished", .pme_long_desc = "One of the execution units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5p_PME_PM_MRK_LSU0_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU0_FLUSH_UST", .pme_code = 0x810c0, .pme_short_desc = "LSU0 marked unaligned store flushes", .pme_long_desc = "A marked store was flushed from unit 0 because it was unaligned", }, [ POWER5p_PME_PM_FPU0_FULL_CYC ] = { .pme_name = "PM_FPU0_FULL_CYC", .pme_code = 0x100c3, .pme_short_desc = "Cycles FPU0 issue queue full", .pme_long_desc = "The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped.", }, [ POWER5p_PME_PM_LSU_LRQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LRQ_S0_ALLOC", .pme_code = 0xc60e7, .pme_short_desc = "LRQ slot 0 allocated", .pme_long_desc = "LRQ slot zero was allocated", }, [ POWER5p_PME_PM_MRK_LSU1_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU1_FLUSH_ULD", .pme_code = 0x810c4, .pme_short_desc = "LSU1 marked unaligned load flushes", .pme_long_desc = "A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5p_PME_PM_MRK_DTLB_REF ] = { .pme_name = "PM_MRK_DTLB_REF", .pme_code = 0xc60e4, .pme_short_desc = "Marked Data TLB reference", .pme_long_desc = "Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_BR_UNCOND ] = { .pme_name = "PM_BR_UNCOND", .pme_code = 0x123087, .pme_short_desc = "Unconditional branch", .pme_long_desc = "An unconditional branch was executed.", }, [ POWER5p_PME_PM_THRD_SEL_OVER_L2MISS ] = { .pme_name = "PM_THRD_SEL_OVER_L2MISS", .pme_code = 0x410c3, .pme_short_desc = "Thread selection overrides caused by L2 misses", .pme_long_desc = "Thread selection was overridden because one thread was had a L2 miss pending.", }, [ POWER5p_PME_PM_L2SB_SHR_INV ] = { .pme_name = "PM_L2SB_SHR_INV", .pme_code = 0x710c1, .pme_short_desc = "L2 slice B transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5p_PME_PM_MEM_LO_PRIO_WR_CMPL ] = { .pme_name = "PM_MEM_LO_PRIO_WR_CMPL", .pme_code = 0x736e6, .pme_short_desc = "Low priority write completed", .pme_long_desc = "A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5p_PME_PM_MRK_DTLB_MISS_64K ] = { .pme_name = "PM_MRK_DTLB_MISS_64K", .pme_code = 0x2c608d, .pme_short_desc = "Marked Data TLB misses for 64K page", .pme_long_desc = "Data TLB references to 64KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_MRK_ST_MISS_L1 ] = { .pme_name = "PM_MRK_ST_MISS_L1", .pme_code = 0x820e3, .pme_short_desc = "Marked L1 D cache store misses", .pme_long_desc = "A marked store missed the dcache", }, [ POWER5p_PME_PM_L3SC_MOD_TAG ] = { .pme_name = "PM_L3SC_MOD_TAG", .pme_code = 0x720e5, .pme_short_desc = "L3 slice C transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_GRP_DISP_SUCCESS ] = { .pme_name = "PM_GRP_DISP_SUCCESS", .pme_code = 0x300002, .pme_short_desc = "Group dispatch success", .pme_long_desc = "Number of groups sucessfully dispatched (not rejected)", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_1or2_CYC", .pme_code = 0x430e4, .pme_short_desc = "Cycles thread priority difference is 1 or 2", .pme_long_desc = "Cycles when this thread's priority is higher than the other thread's priority by 1 or 2.", }, [ POWER5p_PME_PM_IC_DEMAND_L2_BHT_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BHT_REDIRECT", .pme_code = 0x230e0, .pme_short_desc = "L2 I cache demand request due to BHT redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict).", }, [ POWER5p_PME_PM_LSU_DERAT_MISS ] = { .pme_name = "PM_LSU_DERAT_MISS", .pme_code = 0x280090, .pme_short_desc = "DERAT misses", .pme_long_desc = "Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_MEM_WQ_DISP_Q8to15 ] = { .pme_name = "PM_MEM_WQ_DISP_Q8to15", .pme_code = 0x733e6, .pme_short_desc = "Memory write queue dispatched to queues 8-15", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU0_SINGLE ] = { .pme_name = "PM_FPU0_SINGLE", .pme_code = 0x20e3, .pme_short_desc = "FPU0 executed single precision instruction", .pme_long_desc = "FPU0 has executed a single precision instruction.", }, [ POWER5p_PME_PM_THRD_PRIO_1_CYC ] = { .pme_name = "PM_THRD_PRIO_1_CYC", .pme_code = 0x420e0, .pme_short_desc = "Cycles thread running at priority level 1", .pme_long_desc = "Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping.", }, [ POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e2, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5p_PME_PM_SNOOP_RD_RETRY_RQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_RQ", .pme_code = 0x705c6, .pme_short_desc = "Snoop read retry due to collision with active read queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FAB_HOLDtoVN_EMPTY ] = { .pme_name = "PM_FAB_HOLDtoVN_EMPTY", .pme_code = 0x721e7, .pme_short_desc = "Hold buffer to VN empty", .pme_long_desc = "Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_FPU1_FEST ] = { .pme_name = "PM_FPU1_FEST", .pme_code = 0x10c6, .pme_short_desc = "FPU1 executed FEST instruction", .pme_long_desc = "FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5p_PME_PM_SNOOP_DCLAIM_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_DCLAIM_RETRY_QFULL", .pme_code = 0x720e6, .pme_short_desc = "Snoop dclaim/flush retry due to write/dclaim queues full", .pme_long_desc = "The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L25_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR_CYC", .pme_code = 0x2c70a2, .pme_short_desc = "Marked load latency from L2.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_MRK_ST_CMPL_INT ] = { .pme_name = "PM_MRK_ST_CMPL_INT", .pme_code = 0x300003, .pme_short_desc = "Marked store completed with intervention", .pme_long_desc = "A marked store previously sent to the memory subsystem completed (data home) after requiring intervention", }, [ POWER5p_PME_PM_FLUSH_BR_MPRED ] = { .pme_name = "PM_FLUSH_BR_MPRED", .pme_code = 0x110c6, .pme_short_desc = "Flush caused by branch mispredict", .pme_long_desc = "A flush was caused by a branch mispredict.", }, [ POWER5p_PME_PM_MRK_DTLB_MISS_16G ] = { .pme_name = "PM_MRK_DTLB_MISS_16G", .pme_code = 0x4c608d, .pme_short_desc = "Marked Data TLB misses for 16G page", .pme_long_desc = "Data TLB references to 16GB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_FPU_STF ] = { .pme_name = "PM_FPU_STF", .pme_code = 0x202090, .pme_short_desc = "FPU executed store instruction", .pme_long_desc = "FPU has executed a store instruction. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_CMPLU_STALL_FPU ] = { .pme_name = "PM_CMPLU_STALL_FPU", .pme_code = 0x411098, .pme_short_desc = "Completion stall caused by FPU instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point instruction.", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_minus1or2_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus1or2_CYC", .pme_code = 0x430e2, .pme_short_desc = "Cycles thread priority difference is -1 or -2", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 1 or 2.", }, [ POWER5p_PME_PM_GCT_NOSLOT_CYC ] = { .pme_name = "PM_GCT_NOSLOT_CYC", .pme_code = 0x100004, .pme_short_desc = "Cycles no GCT slot allocated", .pme_long_desc = "Cycles when the Global Completion Table has no slots from this thread.", }, [ POWER5p_PME_PM_FXU0_BUSY_FXU1_IDLE ] = { .pme_name = "PM_FXU0_BUSY_FXU1_IDLE", .pme_code = 0x300012, .pme_short_desc = "FXU0 busy FXU1 idle", .pme_long_desc = "FXU0 is busy while FXU1 was idle", }, [ POWER5p_PME_PM_PTEG_FROM_L35_SHR ] = { .pme_name = "PM_PTEG_FROM_L35_SHR", .pme_code = 0x18309e, .pme_short_desc = "PTEG loaded from L3.5 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", }, [ POWER5p_PME_PM_MRK_DTLB_REF_16G ] = { .pme_name = "PM_MRK_DTLB_REF_16G", .pme_code = 0x4c6086, .pme_short_desc = "Marked Data TLB reference for 16G page", .pme_long_desc = "Data TLB references by a marked instruction for 16GB pages.", }, [ POWER5p_PME_PM_MRK_LSU_FLUSH_UST ] = { .pme_name = "PM_MRK_LSU_FLUSH_UST", .pme_code = 0x2810a8, .pme_short_desc = "Marked unaligned store flushes", .pme_long_desc = "A marked store was flushed because it was unaligned", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L25_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L25_SHR", .pme_code = 0x1c7097, .pme_short_desc = "Marked data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5p_PME_PM_L3SA_HIT ] = { .pme_name = "PM_L3SA_HIT", .pme_code = 0x711c3, .pme_short_desc = "L3 slice A hits", .pme_long_desc = "Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L35_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR", .pme_code = 0x1c709e, .pme_short_desc = "Marked data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a marked load.", }, [ POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_ADDR", .pme_code = 0x712c1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_IERAT_XLATE_WR ] = { .pme_name = "PM_IERAT_XLATE_WR", .pme_code = 0x220e7, .pme_short_desc = "Translation written to ierat", .pme_long_desc = "An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed.", }, [ POWER5p_PME_PM_L2SA_ST_REQ ] = { .pme_name = "PM_L2SA_ST_REQ", .pme_code = 0x723e0, .pme_short_desc = "L2 slice A store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_INST_FROM_LMEM ] = { .pme_name = "PM_INST_FROM_LMEM", .pme_code = 0x222086, .pme_short_desc = "Instruction fetched from local memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to the same module this proccessor is located on. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_THRD_SEL_T1 ] = { .pme_name = "PM_THRD_SEL_T1", .pme_code = 0x410c1, .pme_short_desc = "Decode selected thread 1", .pme_long_desc = "Thread selection picked thread 1 for decode.", }, [ POWER5p_PME_PM_IC_DEMAND_L2_BR_REDIRECT ] = { .pme_name = "PM_IC_DEMAND_L2_BR_REDIRECT", .pme_code = 0x230e1, .pme_short_desc = "L2 I cache demand request due to branch redirect", .pme_long_desc = "A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target).", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L35_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L35_SHR_CYC", .pme_code = 0x2c70a6, .pme_short_desc = "Marked load latency from L3.5 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_FPU0_1FLOP ] = { .pme_name = "PM_FPU0_1FLOP", .pme_code = 0xc3, .pme_short_desc = "FPU0 executed add, mult, sub, cmp or sel instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5p_PME_PM_PTEG_FROM_L2 ] = { .pme_name = "PM_PTEG_FROM_L2", .pme_code = 0x183087, .pme_short_desc = "PTEG loaded from L2", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L2 due to a demand load", }, [ POWER5p_PME_PM_MEM_PW_CMPL ] = { .pme_name = "PM_MEM_PW_CMPL", .pme_code = 0x724e6, .pme_short_desc = "Memory partial-write completed", .pme_long_desc = "Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_minus5or6_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_minus5or6_CYC", .pme_code = 0x430e0, .pme_short_desc = "Cycles thread priority difference is -5 or -6", .pme_long_desc = "Cycles when this thread's priority is lower than the other thread's priority by 5 or 6.", }, [ POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5p_PME_PM_MRK_DTLB_MISS_4K ] = { .pme_name = "PM_MRK_DTLB_MISS_4K", .pme_code = 0x1c608d, .pme_short_desc = "Marked Data TLB misses for 4K page", .pme_long_desc = "Data TLB references to 4KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_FPU0_FIN ] = { .pme_name = "PM_FPU0_FIN", .pme_code = 0x10c3, .pme_short_desc = "FPU0 produced a result", .pme_long_desc = "FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads.", }, [ POWER5p_PME_PM_L3SC_SHR_INV ] = { .pme_name = "PM_L3SC_SHR_INV", .pme_code = 0x710c5, .pme_short_desc = "L3 slice C transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5p_PME_PM_GRP_BR_REDIR ] = { .pme_name = "PM_GRP_BR_REDIR", .pme_code = 0x120e6, .pme_short_desc = "Group experienced branch redirect", .pme_long_desc = "Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count.", }, [ POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5p_PME_PM_MRK_LSU_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU_FLUSH_SRQ", .pme_code = 0x481088, .pme_short_desc = "Marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5p_PME_PM_PTEG_FROM_L275_SHR ] = { .pme_name = "PM_PTEG_FROM_L275_SHR", .pme_code = 0x383097, .pme_short_desc = "PTEG loaded from L2.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (T) data from the L2 on a different module than this processor is located due to a demand load.", }, [ POWER5p_PME_PM_L2SB_RCLD_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCLD_DISP_FAIL_RC_FULL", .pme_code = 0x721e1, .pme_short_desc = "L2 slice B RC load dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a load failed because all RC machines are busy.", }, [ POWER5p_PME_PM_SNOOP_RD_RETRY_WQ ] = { .pme_name = "PM_SNOOP_RD_RETRY_WQ", .pme_code = 0x715c6, .pme_short_desc = "Snoop read retry due to collision with active write queue", .pme_long_desc = "A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_FAB_DCLAIM_RETRIED ] = { .pme_name = "PM_FAB_DCLAIM_RETRIED", .pme_code = 0x730e7, .pme_short_desc = "dclaim retried", .pme_long_desc = "A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_LSU0_NCLD ] = { .pme_name = "PM_LSU0_NCLD", .pme_code = 0xc50c1, .pme_short_desc = "LSU0 non-cacheable loads", .pme_long_desc = "A non-cacheable load was executed by unit 0.", }, [ POWER5p_PME_PM_LSU1_BUSY_REJECT ] = { .pme_name = "PM_LSU1_BUSY_REJECT", .pme_code = 0xc20e5, .pme_short_desc = "LSU1 busy due to reject", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions.", }, [ POWER5p_PME_PM_FXLS0_FULL_CYC ] = { .pme_name = "PM_FXLS0_FULL_CYC", .pme_code = 0x110c0, .pme_short_desc = "Cycles FXU0/LS0 queue full", .pme_long_desc = "The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_DTLB_REF_16M ] = { .pme_name = "PM_DTLB_REF_16M", .pme_code = 0x3c2086, .pme_short_desc = "Data TLB reference for 16M page", .pme_long_desc = "Data TLB references for 16MB pages. Includes hits + misses.", }, [ POWER5p_PME_PM_FPU0_FEST ] = { .pme_name = "PM_FPU0_FEST", .pme_code = 0x10c2, .pme_short_desc = "FPU0 executed FEST instruction", .pme_long_desc = "FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5p_PME_PM_GCT_USAGE_60to79_CYC ] = { .pme_name = "PM_GCT_USAGE_60to79_CYC", .pme_code = 0x20001f, .pme_short_desc = "Cycles GCT 60-79% full", .pme_long_desc = "Cycles when the Global Completion Table has between 60% and 70% of its slots used. The GCT has 20 entries shared between threads.", }, [ POWER5p_PME_PM_DATA_FROM_L25_MOD ] = { .pme_name = "PM_DATA_FROM_L25_MOD", .pme_code = 0x2c3097, .pme_short_desc = "Data loaded from L2.5 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_LSU0_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU0_REJECT_ERAT_MISS", .pme_code = 0xc40c3, .pme_short_desc = "LSU0 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5p_PME_PM_DATA_FROM_L375_MOD ] = { .pme_name = "PM_DATA_FROM_L375_MOD", .pme_code = 0x1c30a7, .pme_short_desc = "Data loaded from L3.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_LSU_LMQ_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_LMQ_SRQ_EMPTY_CYC", .pme_code = 0x200015, .pme_short_desc = "Cycles LMQ and SRQ empty", .pme_long_desc = "Cycles when both the LMQ and SRQ are empty (LSU is idle)", }, [ POWER5p_PME_PM_DTLB_MISS_64K ] = { .pme_name = "PM_DTLB_MISS_64K", .pme_code = 0x2c208d, .pme_short_desc = "Data TLB miss for 64K page", .pme_long_desc = "Data TLB references to 64KB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_LSU0_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU0_REJECT_RELOAD_CDF", .pme_code = 0xc40c2, .pme_short_desc = "LSU0 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", }, [ POWER5p_PME_PM_0INST_FETCH ] = { .pme_name = "PM_0INST_FETCH", .pme_code = 0x42208d, .pme_short_desc = "No instructions fetched", .pme_long_desc = "No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss)", }, [ POWER5p_PME_PM_LSU1_REJECT_RELOAD_CDF ] = { .pme_name = "PM_LSU1_REJECT_RELOAD_CDF", .pme_code = 0xc40c6, .pme_short_desc = "LSU1 reject due to reload CDF or tag update collision", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated.", }, [ POWER5p_PME_PM_MEM_WQ_DISP_Q0to7 ] = { .pme_name = "PM_MEM_WQ_DISP_Q0to7", .pme_code = 0x723e6, .pme_short_desc = "Memory write queue dispatched to queues 0-7", .pme_long_desc = "A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_L1_PREF ] = { .pme_name = "PM_L1_PREF", .pme_code = 0xc70e7, .pme_short_desc = "L1 cache data prefetches", .pme_long_desc = "A request to prefetch data into the L1 was made", }, [ POWER5p_PME_PM_MRK_DATA_FROM_LMEM_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_LMEM_CYC", .pme_code = 0x4c70a0, .pme_short_desc = "Marked load latency from local memory", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_BRQ_FULL_CYC ] = { .pme_name = "PM_BRQ_FULL_CYC", .pme_code = 0x100c5, .pme_short_desc = "Cycles branch queue full", .pme_long_desc = "Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_GRP_IC_MISS_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_NONSPEC", .pme_code = 0x112099, .pme_short_desc = "Group experienced non-speculative I cache miss", .pme_long_desc = "Number of groups, counted at completion, that have encountered an instruction cache miss.", }, [ POWER5p_PME_PM_PTEG_FROM_L275_MOD ] = { .pme_name = "PM_PTEG_FROM_L275_MOD", .pme_code = 0x1830a3, .pme_short_desc = "PTEG loaded from L2.75 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load.", }, [ POWER5p_PME_PM_MRK_LD_MISS_L1_LSU0 ] = { .pme_name = "PM_MRK_LD_MISS_L1_LSU0", .pme_code = 0x820e0, .pme_short_desc = "LSU0 marked L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by LSU0.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L375_SHR_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR_CYC", .pme_code = 0x2c70a7, .pme_short_desc = "Marked load latency from L3.75 shared", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_DATA_FROM_L3 ] = { .pme_name = "PM_DATA_FROM_L3", .pme_code = 0x1c308e, .pme_short_desc = "Data loaded from L3", .pme_long_desc = "The processor's Data Cache was reloaded from the local L3 due to a demand load.", }, [ POWER5p_PME_PM_INST_FROM_L2 ] = { .pme_name = "PM_INST_FROM_L2", .pme_code = 0x122086, .pme_short_desc = "Instruction fetched from L2", .pme_long_desc = "An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_LSU_FLUSH ] = { .pme_name = "PM_LSU_FLUSH", .pme_code = 0x110c5, .pme_short_desc = "Flush initiated by LSU", .pme_long_desc = "A flush was initiated by the Load Store Unit", }, [ POWER5p_PME_PM_PMC2_OVERFLOW ] = { .pme_name = "PM_PMC2_OVERFLOW", .pme_code = 0x30000a, .pme_short_desc = "PMC2 Overflow", .pme_long_desc = "Overflows from PMC2 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_FPU0_DENORM ] = { .pme_name = "PM_FPU0_DENORM", .pme_code = 0x20e0, .pme_short_desc = "FPU0 received denormalized data", .pme_long_desc = "FPU0 has encountered a denormalized operand.", }, [ POWER5p_PME_PM_FPU1_FMOV_FEST ] = { .pme_name = "PM_FPU1_FMOV_FEST", .pme_code = 0x10c4, .pme_short_desc = "FPU1 executed FMOV or FEST instructions", .pme_long_desc = "FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.", }, [ POWER5p_PME_PM_INST_FETCH_CYC ] = { .pme_name = "PM_INST_FETCH_CYC", .pme_code = 0x220e4, .pme_short_desc = "Cycles at least 1 instruction fetched", .pme_long_desc = "Cycles when at least one instruction was sent from the fetch unit to the decode unit.", }, [ POWER5p_PME_PM_INST_DISP ] = { .pme_name = "PM_INST_DISP", .pme_code = 0x300009, .pme_short_desc = "Instructions dispatched", .pme_long_desc = "Number of PowerPC instructions successfully dispatched.", }, [ POWER5p_PME_PM_LSU_LDF ] = { .pme_name = "PM_LSU_LDF", .pme_code = 0x1c50a8, .pme_short_desc = "LSU executed Floating Point load instruction", .pme_long_desc = "LSU executed Floating Point load instruction. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_DATA_FROM_L25_SHR ] = { .pme_name = "PM_DATA_FROM_L25_SHR", .pme_code = 0x1c3097, .pme_short_desc = "Data loaded from L2.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_L1_DCACHE_RELOAD_VALID ] = { .pme_name = "PM_L1_DCACHE_RELOAD_VALID", .pme_code = 0xc30e4, .pme_short_desc = "L1 reload data source valid", .pme_long_desc = "The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads.", }, [ POWER5p_PME_PM_MEM_WQ_DISP_DCLAIM ] = { .pme_name = "PM_MEM_WQ_DISP_DCLAIM", .pme_code = 0x713c6, .pme_short_desc = "Memory write queue dispatched due to dclaim/flush", .pme_long_desc = "A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_MRK_GRP_ISSUED ] = { .pme_name = "PM_MRK_GRP_ISSUED", .pme_code = 0x100015, .pme_short_desc = "Marked group issued", .pme_long_desc = "A sampled instruction was issued.", }, [ POWER5p_PME_PM_FPU_FULL_CYC ] = { .pme_name = "PM_FPU_FULL_CYC", .pme_code = 0x110090, .pme_short_desc = "Cycles FPU issue queue full", .pme_long_desc = "Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full.", }, [ POWER5p_PME_PM_INST_FROM_L35_MOD ] = { .pme_name = "PM_INST_FROM_L35_MOD", .pme_code = 0x22209d, .pme_short_desc = "Instruction fetched from L3.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_FPU_FMA ] = { .pme_name = "PM_FPU_FMA", .pme_code = 0x200088, .pme_short_desc = "FPU executed multiply-add instruction", .pme_long_desc = "This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_THRD_PRIO_3_CYC ] = { .pme_name = "PM_THRD_PRIO_3_CYC", .pme_code = 0x420e2, .pme_short_desc = "Cycles thread running at priority level 3", .pme_long_desc = "Cycles this thread was running at priority level 3.", }, [ POWER5p_PME_PM_MRK_CRU_FIN ] = { .pme_name = "PM_MRK_CRU_FIN", .pme_code = 0x400005, .pme_short_desc = "Marked instruction CRU processing finished", .pme_long_desc = "The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_SNOOP_WR_RETRY_WQ ] = { .pme_name = "PM_SNOOP_WR_RETRY_WQ", .pme_code = 0x716c6, .pme_short_desc = "Snoop write/dclaim retry due to collision with active write queue", .pme_long_desc = "A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_CMPLU_STALL_REJECT ] = { .pme_name = "PM_CMPLU_STALL_REJECT", .pme_code = 0x41109a, .pme_short_desc = "Completion stall caused by reject", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a load/store reject. This is a subset of PM_CMPLU_STALL_LSU.", }, [ POWER5p_PME_PM_MRK_FXU_FIN ] = { .pme_name = "PM_MRK_FXU_FIN", .pme_code = 0x200014, .pme_short_desc = "Marked instruction FXU processing finished", .pme_long_desc = "One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_LSU1_REJECT_ERAT_MISS ] = { .pme_name = "PM_LSU1_REJECT_ERAT_MISS", .pme_code = 0xc40c7, .pme_short_desc = "LSU1 reject due to ERAT miss", .pme_long_desc = "Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat.", }, [ POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e1, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5p_PME_PM_L2SC_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SC_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c2, .pme_short_desc = "L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", }, [ POWER5p_PME_PM_PMC4_OVERFLOW ] = { .pme_name = "PM_PMC4_OVERFLOW", .pme_code = 0x10000a, .pme_short_desc = "PMC4 Overflow", .pme_long_desc = "Overflows from PMC4 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_L3SA_SNOOP_RETRY ] = { .pme_name = "PM_L3SA_SNOOP_RETRY", .pme_code = 0x731e3, .pme_short_desc = "L3 slice A snoop retries", .pme_long_desc = "Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b)", }, [ POWER5p_PME_PM_PTEG_FROM_L35_MOD ] = { .pme_name = "PM_PTEG_FROM_L35_MOD", .pme_code = 0x28309e, .pme_short_desc = "PTEG loaded from L3.5 modified", .pme_long_desc = "A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load.", }, [ POWER5p_PME_PM_INST_FROM_L25_MOD ] = { .pme_name = "PM_INST_FROM_L25_MOD", .pme_code = 0x222096, .pme_short_desc = "Instruction fetched from L2.5 modified", .pme_long_desc = "An instruction fetch group was fetched with modified (M) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions.", }, [ POWER5p_PME_PM_THRD_SMT_HANG ] = { .pme_name = "PM_THRD_SMT_HANG", .pme_code = 0x330e7, .pme_short_desc = "SMT hang detected", .pme_long_desc = "A hung thread was detected", }, [ POWER5p_PME_PM_CMPLU_STALL_ERAT_MISS ] = { .pme_name = "PM_CMPLU_STALL_ERAT_MISS", .pme_code = 0x41109b, .pme_short_desc = "Completion stall caused by ERAT miss", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered an ERAT miss. This is a subset of PM_CMPLU_STALL_REJECT.", }, [ POWER5p_PME_PM_L3SA_MOD_TAG ] = { .pme_name = "PM_L3SA_MOD_TAG", .pme_code = 0x720e3, .pme_short_desc = "L3 slice A transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_INST_FROM_L2MISS ] = { .pme_name = "PM_INST_FROM_L2MISS", .pme_code = 0x12209b, .pme_short_desc = "Instruction fetched missed L2", .pme_long_desc = "An instruction fetch group was fetched from beyond the local L2.", }, [ POWER5p_PME_PM_FLUSH_SYNC ] = { .pme_name = "PM_FLUSH_SYNC", .pme_code = 0x330e1, .pme_short_desc = "Flush caused by sync", .pme_long_desc = "This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes.", }, [ POWER5p_PME_PM_MRK_GRP_DISP ] = { .pme_name = "PM_MRK_GRP_DISP", .pme_code = 0x100002, .pme_short_desc = "Marked group dispatched", .pme_long_desc = "A group containing a sampled instruction was dispatched", }, [ POWER5p_PME_PM_MEM_RQ_DISP_Q8to11 ] = { .pme_name = "PM_MEM_RQ_DISP_Q8to11", .pme_code = 0x722e6, .pme_short_desc = "Memory read queue dispatched to queues 8-11", .pme_long_desc = "A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_L2SC_ST_HIT ] = { .pme_name = "PM_L2SC_ST_HIT", .pme_code = 0x733e2, .pme_short_desc = "L2 slice C store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_L2SB_MOD_TAG ] = { .pme_name = "PM_L2SB_MOD_TAG", .pme_code = 0x720e1, .pme_short_desc = "L2 slice B transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_CLB_EMPTY_CYC ] = { .pme_name = "PM_CLB_EMPTY_CYC", .pme_code = 0x410c6, .pme_short_desc = "Cycles CLB empty", .pme_long_desc = "Cycles when both thread's CLB is completely empty.", }, [ POWER5p_PME_PM_L2SB_ST_HIT ] = { .pme_name = "PM_L2SB_ST_HIT", .pme_code = 0x733e1, .pme_short_desc = "L2 slice B store hits", .pme_long_desc = "A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C.", }, [ POWER5p_PME_PM_MEM_NONSPEC_RD_CANCEL ] = { .pme_name = "PM_MEM_NONSPEC_RD_CANCEL", .pme_code = 0x711c6, .pme_short_desc = "Non speculative memory read cancelled", .pme_long_desc = "A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly", }, [ POWER5p_PME_PM_BR_PRED_CR_TA ] = { .pme_name = "PM_BR_PRED_CR_TA", .pme_code = 0x423087, .pme_short_desc = "A conditional branch was predicted, CR and target prediction", .pme_long_desc = "Both the condition (taken or not taken) and the target address of a branch instruction was predicted.", }, [ POWER5p_PME_PM_MRK_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU0_FLUSH_SRQ", .pme_code = 0x810c3, .pme_short_desc = "LSU0 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5p_PME_PM_MRK_LSU_FLUSH_ULD ] = { .pme_name = "PM_MRK_LSU_FLUSH_ULD", .pme_code = 0x1810a8, .pme_short_desc = "Marked unaligned load flushes", .pme_long_desc = "A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1)", }, [ POWER5p_PME_PM_INST_DISP_ATTEMPT ] = { .pme_name = "PM_INST_DISP_ATTEMPT", .pme_code = 0x120e1, .pme_short_desc = "Instructions dispatch attempted", .pme_long_desc = "Number of PowerPC Instructions dispatched (attempted, not filtered by success.", }, [ POWER5p_PME_PM_INST_FROM_RMEM ] = { .pme_name = "PM_INST_FROM_RMEM", .pme_code = 0x422086, .pme_short_desc = "Instruction fetched from remote memory", .pme_long_desc = "An instruction fetch group was fetched from memory attached to a different module than this proccessor is located on. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_ST_REF_L1_LSU0 ] = { .pme_name = "PM_ST_REF_L1_LSU0", .pme_code = 0xc10c1, .pme_short_desc = "LSU0 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU0.", }, [ POWER5p_PME_PM_LSU0_DERAT_MISS ] = { .pme_name = "PM_LSU0_DERAT_MISS", .pme_code = 0x800c2, .pme_short_desc = "LSU0 DERAT misses", .pme_long_desc = "Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction.", }, [ POWER5p_PME_PM_FPU_STALL3 ] = { .pme_name = "PM_FPU_STALL3", .pme_code = 0x202088, .pme_short_desc = "FPU stalled in pipe3", .pme_long_desc = "FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_L2SB_RCLD_DISP ] = { .pme_name = "PM_L2SB_RCLD_DISP", .pme_code = 0x701c1, .pme_short_desc = "L2 slice B RC load dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Load was attempted", }, [ POWER5p_PME_PM_BR_PRED_CR ] = { .pme_name = "PM_BR_PRED_CR", .pme_code = 0x230e2, .pme_short_desc = "A conditional branch was predicted, CR prediction", .pme_long_desc = "A conditional branch instruction was predicted as taken or not taken.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L2 ] = { .pme_name = "PM_MRK_DATA_FROM_L2", .pme_code = 0x1c7087, .pme_short_desc = "Marked data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a marked load.", }, [ POWER5p_PME_PM_LSU0_FLUSH_SRQ ] = { .pme_name = "PM_LSU0_FLUSH_SRQ", .pme_code = 0xc00c3, .pme_short_desc = "LSU0 SRQ lhs flushes", .pme_long_desc = "A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5p_PME_PM_FAB_PNtoNN_DIRECT ] = { .pme_name = "PM_FAB_PNtoNN_DIRECT", .pme_code = 0x703c7, .pme_short_desc = "PN to NN beat went straight to its destination", .pme_long_desc = "Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled.", }, [ POWER5p_PME_PM_IOPS_CMPL ] = { .pme_name = "PM_IOPS_CMPL", .pme_code = 0x1, .pme_short_desc = "Internal operations completed", .pme_long_desc = "Number of internal operations that completed.", }, [ POWER5p_PME_PM_L2SA_RCST_DISP ] = { .pme_name = "PM_L2SA_RCST_DISP", .pme_code = 0x702c0, .pme_short_desc = "L2 slice A RC store dispatch attempt", .pme_long_desc = "A Read/Claim dispatch for a Store was attempted.", }, [ POWER5p_PME_PM_L2SA_RCST_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SA_RCST_DISP_FAIL_OTHER", .pme_code = 0x732e0, .pme_short_desc = "L2 slice A RC store dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted.", }, [ POWER5p_PME_PM_L2SC_SHR_INV ] = { .pme_name = "PM_L2SC_SHR_INV", .pme_code = 0x710c2, .pme_short_desc = "L2 slice C transition from shared to invalid", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted.", }, [ POWER5p_PME_PM_SNOOP_RETRY_AB_COLLISION ] = { .pme_name = "PM_SNOOP_RETRY_AB_COLLISION", .pme_code = 0x735e6, .pme_short_desc = "Snoop retry due to a b collision", .pme_long_desc = "Snoop retry due to a b collision", }, [ POWER5p_PME_PM_FAB_PNtoVN_SIDECAR ] = { .pme_name = "PM_FAB_PNtoVN_SIDECAR", .pme_code = 0x733e7, .pme_short_desc = "PN to VN beat went to sidecar first", .pme_long_desc = "Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_LSU0_REJECT_LMQ_FULL ] = { .pme_name = "PM_LSU0_REJECT_LMQ_FULL", .pme_code = 0xc40c1, .pme_short_desc = "LSU0 reject due to LMQ full or missed data coming", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected.", }, [ POWER5p_PME_PM_LSU_LMQ_S0_ALLOC ] = { .pme_name = "PM_LSU_LMQ_S0_ALLOC", .pme_code = 0xc30e6, .pme_short_desc = "LMQ slot 0 allocated", .pme_long_desc = "The first entry in the LMQ was allocated.", }, [ POWER5p_PME_PM_SNOOP_PW_RETRY_RQ ] = { .pme_name = "PM_SNOOP_PW_RETRY_RQ", .pme_code = 0x707c6, .pme_short_desc = "Snoop partial-write retry due to collision with active read queue", .pme_long_desc = "A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_DTLB_REF ] = { .pme_name = "PM_DTLB_REF", .pme_code = 0xc20e4, .pme_short_desc = "Data TLB references", .pme_long_desc = "Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_PTEG_FROM_L3 ] = { .pme_name = "PM_PTEG_FROM_L3", .pme_code = 0x18308e, .pme_short_desc = "PTEG loaded from L3", .pme_long_desc = "A Page Table Entry was loaded into the TLB from the local L3 due to a demand load.", }, [ POWER5p_PME_PM_FAB_M1toVNorNN_SIDECAR_EMPTY ] = { .pme_name = "PM_FAB_M1toVNorNN_SIDECAR_EMPTY", .pme_code = 0x712c7, .pme_short_desc = "M1 to VN/NN sidecar empty", .pme_long_desc = "Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly.", }, [ POWER5p_PME_PM_LSU_SRQ_EMPTY_CYC ] = { .pme_name = "PM_LSU_SRQ_EMPTY_CYC", .pme_code = 0x400015, .pme_short_desc = "Cycles SRQ empty", .pme_long_desc = "Cycles the Store Request Queue is empty", }, [ POWER5p_PME_PM_FPU1_STF ] = { .pme_name = "PM_FPU1_STF", .pme_code = 0x20e6, .pme_short_desc = "FPU1 executed store instruction", .pme_long_desc = "FPU1 has executed a Floating Point Store instruction.", }, [ POWER5p_PME_PM_LSU_LMQ_S0_VALID ] = { .pme_name = "PM_LSU_LMQ_S0_VALID", .pme_code = 0xc30e5, .pme_short_desc = "LMQ slot 0 valid", .pme_long_desc = "This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO", }, [ POWER5p_PME_PM_GCT_USAGE_00to59_CYC ] = { .pme_name = "PM_GCT_USAGE_00to59_CYC", .pme_code = 0x10001f, .pme_short_desc = "Cycles GCT less than 60% full", .pme_long_desc = "Cycles when the Global Completion Table has fewer than 60% of its slots used. The GCT has 20 entries shared between threads.", }, [ POWER5p_PME_PM_FPU_FMOV_FEST ] = { .pme_name = "PM_FPU_FMOV_FEST", .pme_code = 0x301088, .pme_short_desc = "FPU executed FMOV or FEST instructions", .pme_long_desc = "The floating point unit has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.. Combined Unit 0 + Unit 1.", }, [ POWER5p_PME_PM_DATA_FROM_L2MISS ] = { .pme_name = "PM_DATA_FROM_L2MISS", .pme_code = 0x3c309b, .pme_short_desc = "Data loaded missed L2", .pme_long_desc = "The processor's Data Cache was reloaded but not from the local L2.", }, [ POWER5p_PME_PM_XER_MAP_FULL_CYC ] = { .pme_name = "PM_XER_MAP_FULL_CYC", .pme_code = 0x100c2, .pme_short_desc = "Cycles XER mapper full", .pme_long_desc = "The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_GRP_DISP_BLK_SB_CYC ] = { .pme_name = "PM_GRP_DISP_BLK_SB_CYC", .pme_code = 0x130e1, .pme_short_desc = "Cycles group dispatch blocked by scoreboard", .pme_long_desc = "A scoreboard operation on a non-renamed resource has blocked dispatch.", }, [ POWER5p_PME_PM_FLUSH_SB ] = { .pme_name = "PM_FLUSH_SB", .pme_code = 0x330e2, .pme_short_desc = "Flush caused by scoreboard operation", .pme_long_desc = "This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L375_SHR ] = { .pme_name = "PM_MRK_DATA_FROM_L375_SHR", .pme_code = 0x3c709e, .pme_short_desc = "Marked data loaded from L3.75 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a marked load.", }, [ POWER5p_PME_PM_MRK_GRP_CMPL ] = { .pme_name = "PM_MRK_GRP_CMPL", .pme_code = 0x400013, .pme_short_desc = "Marked group completed", .pme_long_desc = "A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group.", }, [ POWER5p_PME_PM_SUSPENDED ] = { .pme_name = "PM_SUSPENDED", .pme_code = 0x0, .pme_short_desc = "Suspended", .pme_long_desc = "The counter is suspended (does not count).", }, [ POWER5p_PME_PM_SNOOP_RD_RETRY_QFULL ] = { .pme_name = "PM_SNOOP_RD_RETRY_QFULL", .pme_code = 0x700c6, .pme_short_desc = "Snoop read retry due to read queue full", .pme_long_desc = "A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_GRP_IC_MISS_BR_REDIR_NONSPEC ] = { .pme_name = "PM_GRP_IC_MISS_BR_REDIR_NONSPEC", .pme_code = 0x120e5, .pme_short_desc = "Group experienced non-speculative I cache miss or branch redirect", .pme_long_desc = "Group experienced non-speculative I cache miss or branch redirect", }, [ POWER5p_PME_PM_DATA_FROM_L35_SHR ] = { .pme_name = "PM_DATA_FROM_L35_SHR", .pme_code = 0x1c309e, .pme_short_desc = "Data loaded from L3.5 shared", .pme_long_desc = "The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a demand load.", }, [ POWER5p_PME_PM_L3SB_MOD_INV ] = { .pme_name = "PM_L3SB_MOD_INV", .pme_code = 0x730e4, .pme_short_desc = "L3 slice B transition from modified to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_STCX_FAIL ] = { .pme_name = "PM_STCX_FAIL", .pme_code = 0x820e1, .pme_short_desc = "STCX failed", .pme_long_desc = "A stcx (stwcx or stdcx) failed", }, [ POWER5p_PME_PM_LD_MISS_L1_LSU1 ] = { .pme_name = "PM_LD_MISS_L1_LSU1", .pme_code = 0xc10c5, .pme_short_desc = "LSU1 L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache, by unit 1.", }, [ POWER5p_PME_PM_GRP_DISP ] = { .pme_name = "PM_GRP_DISP", .pme_code = 0x200002, .pme_short_desc = "Group dispatches", .pme_long_desc = "A group was dispatched", }, [ POWER5p_PME_PM_DC_PREF_DST ] = { .pme_name = "PM_DC_PREF_DST", .pme_code = 0x830e6, .pme_short_desc = "DST (Data Stream Touch) stream start", .pme_long_desc = "A prefetch stream was started using the DST instruction.", }, [ POWER5p_PME_PM_FPU1_DENORM ] = { .pme_name = "PM_FPU1_DENORM", .pme_code = 0x20e4, .pme_short_desc = "FPU1 received denormalized data", .pme_long_desc = "FPU1 has encountered a denormalized operand.", }, [ POWER5p_PME_PM_FPU0_FPSCR ] = { .pme_name = "PM_FPU0_FPSCR", .pme_code = 0x30e0, .pme_short_desc = "FPU0 executed FPSCR instruction", .pme_long_desc = "FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_DATA_FROM_L2 ] = { .pme_name = "PM_DATA_FROM_L2", .pme_code = 0x1c3087, .pme_short_desc = "Data loaded from L2", .pme_long_desc = "The processor's Data Cache was reloaded from the local L2 due to a demand load.", }, [ POWER5p_PME_PM_L2SA_RCLD_DISP_FAIL_ADDR ] = { .pme_name = "PM_L2SA_RCLD_DISP_FAIL_ADDR", .pme_code = 0x711c0, .pme_short_desc = "L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", .pme_long_desc = "A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time.", }, [ POWER5p_PME_PM_FPU_1FLOP ] = { .pme_name = "PM_FPU_1FLOP", .pme_code = 0x100090, .pme_short_desc = "FPU executed one flop instruction", .pme_long_desc = "The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations.", }, [ POWER5p_PME_PM_L2SC_RCLD_DISP_FAIL_OTHER ] = { .pme_name = "PM_L2SC_RCLD_DISP_FAIL_OTHER", .pme_code = 0x731e2, .pme_short_desc = "L2 slice C RC load dispatch attempt failed due to other reasons", .pme_long_desc = "A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions.", }, [ POWER5p_PME_PM_FPU0_FSQRT ] = { .pme_name = "PM_FPU0_FSQRT", .pme_code = 0xc2, .pme_short_desc = "FPU0 executed FSQRT instruction", .pme_long_desc = "FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs.", }, [ POWER5p_PME_PM_L2SC_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SC_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e1, .pme_short_desc = "L2 slice C RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5p_PME_PM_LD_REF_L1 ] = { .pme_name = "PM_LD_REF_L1", .pme_code = 0x1c10a8, .pme_short_desc = "L1 D cache load references", .pme_long_desc = "Load references to the Level 1 Data Cache. Combined unit 0 + 1.", }, [ POWER5p_PME_PM_INST_FROM_L1 ] = { .pme_name = "PM_INST_FROM_L1", .pme_code = 0x22208d, .pme_short_desc = "Instruction fetched from L1", .pme_long_desc = "An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_TLBIE_HELD ] = { .pme_name = "PM_TLBIE_HELD", .pme_code = 0x130e4, .pme_short_desc = "TLBIE held at dispatch", .pme_long_desc = "Cycles a TLBIE instruction was held at dispatch.", }, [ POWER5p_PME_PM_DC_PREF_OUT_OF_STREAMS ] = { .pme_name = "PM_DC_PREF_OUT_OF_STREAMS", .pme_code = 0xc50c2, .pme_short_desc = "D cache out of prefetch streams", .pme_long_desc = "A new prefetch stream was detected but no more stream entries were available.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L25_MOD_CYC ] = { .pme_name = "PM_MRK_DATA_FROM_L25_MOD_CYC", .pme_code = 0x4c70a2, .pme_short_desc = "Marked load latency from L2.5 modified", .pme_long_desc = "Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level.", }, [ POWER5p_PME_PM_MRK_LSU1_FLUSH_SRQ ] = { .pme_name = "PM_MRK_LSU1_FLUSH_SRQ", .pme_code = 0x810c7, .pme_short_desc = "LSU1 marked SRQ lhs flushes", .pme_long_desc = "A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group.", }, [ POWER5p_PME_PM_MEM_RQ_DISP_Q0to3 ] = { .pme_name = "PM_MEM_RQ_DISP_Q0to3", .pme_code = 0x702c6, .pme_short_desc = "Memory read queue dispatched to queues 0-3", .pme_long_desc = "A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly.", }, [ POWER5p_PME_PM_ST_REF_L1_LSU1 ] = { .pme_name = "PM_ST_REF_L1_LSU1", .pme_code = 0xc10c4, .pme_short_desc = "LSU1 L1 D cache store references", .pme_long_desc = "Store references to the Data Cache by LSU1.", }, [ POWER5p_PME_PM_MRK_LD_MISS_L1 ] = { .pme_name = "PM_MRK_LD_MISS_L1", .pme_code = 0x182088, .pme_short_desc = "Marked L1 D cache load misses", .pme_long_desc = "Marked L1 D cache load misses", }, [ POWER5p_PME_PM_L1_WRITE_CYC ] = { .pme_name = "PM_L1_WRITE_CYC", .pme_code = 0x230e7, .pme_short_desc = "Cycles writing to instruction L1", .pme_long_desc = "Cycles that a cache line was written to the instruction cache.", }, [ POWER5p_PME_PM_L2SC_ST_REQ ] = { .pme_name = "PM_L2SC_ST_REQ", .pme_code = 0x723e2, .pme_short_desc = "L2 slice C store requests", .pme_long_desc = "A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_CMPLU_STALL_FDIV ] = { .pme_name = "PM_CMPLU_STALL_FDIV", .pme_code = 0x21109b, .pme_short_desc = "Completion stall caused by FDIV or FQRT instruction", .pme_long_desc = "Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point divide or square root instruction. This is a subset of PM_CMPLU_STALL_FPU.", }, [ POWER5p_PME_PM_THRD_SEL_OVER_CLB_EMPTY ] = { .pme_name = "PM_THRD_SEL_OVER_CLB_EMPTY", .pme_code = 0x410c2, .pme_short_desc = "Thread selection overrides caused by CLB empty", .pme_long_desc = "Thread selection was overridden because one thread's CLB was empty.", }, [ POWER5p_PME_PM_BR_MPRED_CR ] = { .pme_name = "PM_BR_MPRED_CR", .pme_code = 0x230e5, .pme_short_desc = "Branch mispredictions due to CR bit setting", .pme_long_desc = "A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction.", }, [ POWER5p_PME_PM_L3SB_MOD_TAG ] = { .pme_name = "PM_L3SB_MOD_TAG", .pme_code = 0x720e4, .pme_short_desc = "L3 slice B transition from modified to TAG", .pme_long_desc = "L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point.", }, [ POWER5p_PME_PM_MRK_DATA_FROM_L2MISS ] = { .pme_name = "PM_MRK_DATA_FROM_L2MISS", .pme_code = 0x3c709b, .pme_short_desc = "Marked data loaded missed L2", .pme_long_desc = "DL1 was reloaded from beyond L2 due to a marked demand load.", }, [ POWER5p_PME_PM_LSU_REJECT_SRQ ] = { .pme_name = "PM_LSU_REJECT_SRQ", .pme_code = 0x1c4088, .pme_short_desc = "LSU SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_LD_MISS_L1 ] = { .pme_name = "PM_LD_MISS_L1", .pme_code = 0x3c1088, .pme_short_desc = "L1 D cache load misses", .pme_long_desc = "Load references that miss the Level 1 Data cache. Combined unit 0 + 1.", }, [ POWER5p_PME_PM_INST_FROM_PREF ] = { .pme_name = "PM_INST_FROM_PREF", .pme_code = 0x32208d, .pme_short_desc = "Instruction fetched from prefetch", .pme_long_desc = "An instruction fetch group was fetched from the prefetch buffer. Fetch groups can contain up to 8 instructions", }, [ POWER5p_PME_PM_STCX_PASS ] = { .pme_name = "PM_STCX_PASS", .pme_code = 0x820e5, .pme_short_desc = "Stcx passes", .pme_long_desc = "A stcx (stwcx or stdcx) instruction was successful", }, [ POWER5p_PME_PM_DC_INV_L2 ] = { .pme_name = "PM_DC_INV_L2", .pme_code = 0xc10c7, .pme_short_desc = "L1 D cache entries invalidated from L2", .pme_long_desc = "A dcache invalidated was received from the L2 because a line in L2 was castout.", }, [ POWER5p_PME_PM_LSU_SRQ_FULL_CYC ] = { .pme_name = "PM_LSU_SRQ_FULL_CYC", .pme_code = 0x110c3, .pme_short_desc = "Cycles SRQ full", .pme_long_desc = "Cycles the Store Request Queue is full.", }, [ POWER5p_PME_PM_FPU_FIN ] = { .pme_name = "PM_FPU_FIN", .pme_code = 0x401088, .pme_short_desc = "FPU produced a result", .pme_long_desc = "FPU finished, produced a result. This only indicates finish, not completion. Combined Unit 0 + Unit 1. Floating Point Stores are included in this count but not Floating Point Loads., , , XYZs", }, [ POWER5p_PME_PM_LSU_SRQ_STFWD ] = { .pme_name = "PM_LSU_SRQ_STFWD", .pme_code = 0x2c6088, .pme_short_desc = "SRQ store forwarded", .pme_long_desc = "Data from a store instruction was forwarded to a load. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. Combined Unit 0 + 1.", }, [ POWER5p_PME_PM_L2SA_SHR_MOD ] = { .pme_name = "PM_L2SA_SHR_MOD", .pme_code = 0x700c0, .pme_short_desc = "L2 slice A transition from shared to modified", .pme_long_desc = "A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C.", }, [ POWER5p_PME_PM_0INST_CLB_CYC ] = { .pme_name = "PM_0INST_CLB_CYC", .pme_code = 0x400c0, .pme_short_desc = "Cycles no instructions in CLB", .pme_long_desc = "The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific.", }, [ POWER5p_PME_PM_FXU0_FIN ] = { .pme_name = "PM_FXU0_FIN", .pme_code = 0x130e2, .pme_short_desc = "FXU0 produced a result", .pme_long_desc = "The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete.", }, [ POWER5p_PME_PM_L2SB_RCST_DISP_FAIL_RC_FULL ] = { .pme_name = "PM_L2SB_RCST_DISP_FAIL_RC_FULL", .pme_code = 0x722e2, .pme_short_desc = "L2 slice B RC store dispatch attempt failed due to all RC full", .pme_long_desc = "A Read/Claim dispatch for a store failed because all RC machines are busy.", }, [ POWER5p_PME_PM_THRD_GRP_CMPL_BOTH_CYC ] = { .pme_name = "PM_THRD_GRP_CMPL_BOTH_CYC", .pme_code = 0x200013, .pme_short_desc = "Cycles group completed by both threads", .pme_long_desc = "Cycles that both threads completed.", }, [ POWER5p_PME_PM_PMC5_OVERFLOW ] = { .pme_name = "PM_PMC5_OVERFLOW", .pme_code = 0x10001a, .pme_short_desc = "PMC5 Overflow", .pme_long_desc = "Overflows from PMC5 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow.", }, [ POWER5p_PME_PM_FPU0_FDIV ] = { .pme_name = "PM_FPU0_FDIV", .pme_code = 0xc0, .pme_short_desc = "FPU0 executed FDIV instruction", .pme_long_desc = "FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs.", }, [ POWER5p_PME_PM_PTEG_FROM_L375_SHR ] = { .pme_name = "PM_PTEG_FROM_L375_SHR", .pme_code = 0x38309e, .pme_short_desc = "PTEG loaded from L3.75 shared", .pme_long_desc = "A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on a different module than this processor is located, due to a demand load.", }, [ POWER5p_PME_PM_HV_CYC ] = { .pme_name = "PM_HV_CYC", .pme_code = 0x20000b, .pme_short_desc = "Hypervisor Cycles", .pme_long_desc = "Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0)", }, [ POWER5p_PME_PM_L2SA_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SA_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c0, .pme_short_desc = "L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", }, [ POWER5p_PME_PM_THRD_PRIO_DIFF_0_CYC ] = { .pme_name = "PM_THRD_PRIO_DIFF_0_CYC", .pme_code = 0x430e3, .pme_short_desc = "Cycles no thread priority difference", .pme_long_desc = "Cycles when this thread's priority is equal to the other thread's priority.", }, [ POWER5p_PME_PM_LR_CTR_MAP_FULL_CYC ] = { .pme_name = "PM_LR_CTR_MAP_FULL_CYC", .pme_code = 0x100c6, .pme_short_desc = "Cycles LR/CTR mapper full", .pme_long_desc = "The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented.", }, [ POWER5p_PME_PM_L3SB_SHR_INV ] = { .pme_name = "PM_L3SB_SHR_INV", .pme_code = 0x710c4, .pme_short_desc = "L3 slice B transition from shared to invalid", .pme_long_desc = "L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched).", }, [ POWER5p_PME_PM_DATA_FROM_RMEM ] = { .pme_name = "PM_DATA_FROM_RMEM", .pme_code = 0x1c30a1, .pme_short_desc = "Data loaded from remote memory", .pme_long_desc = "The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on.", }, [ POWER5p_PME_PM_DATA_FROM_L275_MOD ] = { .pme_name = "PM_DATA_FROM_L275_MOD", .pme_code = 0x1c30a3, .pme_short_desc = "Data loaded from L2.75 modified", .pme_long_desc = "The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load.", }, [ POWER5p_PME_PM_LSU0_REJECT_SRQ ] = { .pme_name = "PM_LSU0_REJECT_SRQ", .pme_code = 0xc40c0, .pme_short_desc = "LSU0 SRQ lhs rejects", .pme_long_desc = "Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue.", }, [ POWER5p_PME_PM_LSU1_DERAT_MISS ] = { .pme_name = "PM_LSU1_DERAT_MISS", .pme_code = 0x800c6, .pme_short_desc = "LSU1 DERAT misses", .pme_long_desc = "A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur.", }, [ POWER5p_PME_PM_MRK_LSU_FIN ] = { .pme_name = "PM_MRK_LSU_FIN", .pme_code = 0x400014, .pme_short_desc = "Marked instruction LSU processing finished", .pme_long_desc = "One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete", }, [ POWER5p_PME_PM_DTLB_MISS_16M ] = { .pme_name = "PM_DTLB_MISS_16M", .pme_code = 0x3c208d, .pme_short_desc = "Data TLB miss for 16M page", .pme_long_desc = "Data TLB references to 16MB pages that missed the TLB. Page size is determined at TLB reload time.", }, [ POWER5p_PME_PM_LSU0_FLUSH_UST ] = { .pme_name = "PM_LSU0_FLUSH_UST", .pme_code = 0xc00c1, .pme_short_desc = "LSU0 unaligned store flushes", .pme_long_desc = "A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary).", }, [ POWER5p_PME_PM_L2SB_RC_DISP_FAIL_CO_BUSY ] = { .pme_name = "PM_L2SB_RC_DISP_FAIL_CO_BUSY", .pme_code = 0x703c1, .pme_short_desc = "L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy", .pme_long_desc = "A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access.", }, [ POWER5p_PME_PM_L2SC_MOD_TAG ] = { .pme_name = "PM_L2SC_MOD_TAG", .pme_code = 0x720e2, .pme_short_desc = "L2 slice C transition from modified to tagged", .pme_long_desc = "A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C.", } }; #endif papi-5.6.0/src/examples/PAPI_perror.c000664 001750 001750 00000005233 13216244361 021431 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * PAPI_perror converts PAPI error codes to strings,it fills the string * * destination with the error message corresponding to the error code. * * The function copies length worth of the error description string * * corresponding to code into destination. The resulting string is always * * null terminated. If length is 0, then the string is printed on stderr. * * PAPI_strerror does similar but it just returns the corresponding * * error string from the code. * *****************************************************************************/ #include #include #include "papi.h" /* This needs to be included every time you use PAPI */ int main() { int retval; int EventSet = PAPI_NULL; char error_str[PAPI_MAX_STR_LEN]; /**************************************************************************** * This part initializes the library and compares the version number of the * * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ****************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { exit(1); } if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) { fprintf(stderr, "PAPI error %d: %s\n",retval,PAPI_strerror(retval)); exit(1); } /* Add Total Instructions Executed to our EventSet */ if ((retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) { PAPI_perror( "PAPI_add_event" ); exit(1); } /* Start counting */ if ((retval = PAPI_start(EventSet)) != PAPI_OK) { PAPI_perror( "PAPI_start" ); exit(1); } /* We are trying to start the counter which has already been started, and this will give an error which will be passed to PAPI_perror via retval and the function will then display the error string on the screen. */ if ((retval = PAPI_start(EventSet)) != PAPI_OK) { PAPI_perror( "PAPI_start" ); } /* The function PAPI_strerror returns the corresponding error string from the error code */ if ((retval = PAPI_start(EventSet)) != PAPI_OK) { printf("%s\n",PAPI_strerror(retval)); } /* finish using PAPI and free all related resources (this is optional, you don't have to use it */ PAPI_shutdown (); exit(0); } papi-5.6.0/doc/000775 001750 001750 00000000000 13216244473 015304 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/intel_pii_events.h000664 001750 001750 00000055303 13216244364 024453 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * This file has been automatically generated. * * PMU: pii (Intel Pentium II) */ static const intel_x86_umask_t pii_l2_ifetch[]={ { .uname = "I", .udesc = "Invalid state", .ucode = 0x100, }, { .uname = "S", .udesc = "Shared state", .ucode = 0x200, }, { .uname = "E", .udesc = "Exclusive state", .ucode = 0x400, }, { .uname = "M", .udesc = "Modified state", .ucode = 0x800, }, }; static const intel_x86_umask_t pii_bus_drdy_clocks[]={ { .uname = "SELF", .udesc = "Clocks when processor is driving bus", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "Clocks when any agent is driving bus", .ucode = 0x2000, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pii_mmx_instr_type_exec[]={ { .uname = "MUL", .udesc = "MMX packed multiply instructions executed", .ucode = 0x100, }, { .uname = "SHIFT", .udesc = "MMX packed shift instructions executed", .ucode = 0x200, }, { .uname = "PACK", .udesc = "MMX pack operation instructions executed", .ucode = 0x400, }, { .uname = "UNPACK", .udesc = "MMX unpack operation instructions executed", .ucode = 0x800, }, { .uname = "LOGICAL", .udesc = "MMX packed logical instructions executed", .ucode = 0x1000, }, { .uname = "ARITH", .udesc = "MMX packed arithmetic instructions executed", .ucode = 0x2000, }, }; static const intel_x86_umask_t pii_fp_mmx_trans[]={ { .uname = "TO_FP", .udesc = "From MMX instructions to floating-point instructions", .ucode = 0x0, .uflags= INTEL_X86_NCOMBO, }, { .uname = "TO_MMX", .udesc = "From floating-point instructions to MMX instructions", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t pii_seg_rename_stalls[]={ { .uname = "ES", .udesc = "Segment register ES", .ucode = 0x100, }, { .uname = "DS", .udesc = "Segment register DS", .ucode = 0x200, }, { .uname = "FS", .udesc = "Segment register FS", .ucode = 0x400, }, { .uname = "GS", .udesc = "Segment register GS", .ucode = 0x800, }, }; static const intel_x86_entry_t intel_pii_pe[]={ { .name = "CPU_CLK_UNHALTED", .desc = "Number cycles during which the processor is not halted", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x79, }, { .name = "INST_RETIRED", .desc = "Number of instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc0, }, { .name = "DATA_MEM_REFS", .desc = "All loads from any memory type. All stores to any memory typeEach part of a split is counted separately. The internal logic counts not only memory loads and stores but also internal retries. 80-bit floating point accesses are double counted, since they are decomposed into a 16-bit exponent load and a 64-bit mantissa load. Memory accesses are only counted when they are actually performed (such as a load that gets squashed because a previous cache miss is outstanding to the same address, and which finally gets performed, is only counted once). Does not include I/O accesses or other non-memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x43, }, { .name = "DCU_LINES_IN", .desc = "Total lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x45, }, { .name = "DCU_M_LINES_IN", .desc = "Number of M state lines allocated in the DCU", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x46, }, { .name = "DCU_M_LINES_OUT", .desc = "Number of M state lines evicted from the DCU. This includes evictions via snoop HITM, intervention or replacement", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x47, }, { .name = "DCU_MISS_OUTSTANDING", .desc = "Weighted number of cycle while a DCU miss is outstanding, incremented by the number of cache misses at any particular time. Cacheable read requests only are considered. Uncacheable requests are excluded Read-for-ownerships are counted, as well as line fills, invalidates, and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x48, }, { .name = "IFU_IFETCH", .desc = "Number of instruction fetches, both cacheable and noncacheable including UC fetches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x80, }, { .name = "IFU_IFETCH_MISS", .desc = "Number of instruction fetch misses. All instructions fetches that do not hit the IFU (i.e., that produce memory requests). Includes UC accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x81, }, { .name = "ITLB_MISS", .desc = "Number of ITLB misses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x85, }, { .name = "IFU_MEM_STALL", .desc = "Number of cycles instruction fetch is stalled for any reason. Includes IFU cache misses, ITLB misses, ITLB faults, and other minor stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x86, }, { .name = "ILD_STALL", .desc = "Number of cycles that the instruction length decoder is stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x87, }, { .name = "L2_IFETCH", .desc = "Number of L2 instruction fetches. This event indicates that a normal instruction fetch was received by the L2. The count includes only L2 cacheable instruction fetches: it does not include UC instruction fetches It does not include ITLB miss accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x28, .numasks = LIBPFM_ARRAY_SIZE(pii_l2_ifetch), .ngrp = 1, .umasks = pii_l2_ifetch, }, { .name = "L2_ST", .desc = "Number of L2 data stores. This event indicates that a normal, unlocked, store memory access was received by the L2. Specifically, it indicates that the DCU sent a read-for ownership request to the L2. It also includes Invalid to Modified requests sent by the DCU to the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(pii_l2_ifetch), .ngrp = 1, .umasks = pii_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_M_LINES_INM", .desc = "Number of modified lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x25, }, { .name = "L2_RQSTS", .desc = "Total number of L2 requests", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x2e, .numasks = LIBPFM_ARRAY_SIZE(pii_l2_ifetch), .ngrp = 1, .umasks = pii_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_ADS", .desc = "Number of L2 address strobes", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x21, }, { .name = "L2_DBUS_BUSY", .desc = "Number of cycles during which the L2 cache data bus was busy", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x22, }, { .name = "L2_DBUS_BUSY_RD", .desc = "Number of cycles during which the data bus was busy transferring read data from L2 to the processor", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x23, }, { .name = "BUS_DRDY_CLOCKS", .desc = "Number of clocks during which DRDY# is asserted. Utilization of the external system data bus during data transfers", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x62, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, }, { .name = "BUS_LOCK_CLOCKS", .desc = "Number of clocks during which LOCK# is asserted on the external system bus", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x63, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_REQ_OUTSTANDING", .desc = "Number of bus requests outstanding. This counter is incremented by the number of cacheable read bus requests outstanding in any given cycle", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x60, }, { .name = "BUS_TRANS_BRD", .desc = "Number of burst read transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_RFO", .desc = "Number of completed read for ownership transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x66, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_WB", .desc = "Number of completed write back transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_IFETCH", .desc = "Number of completed instruction fetch transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x68, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_INVAL", .desc = "Number of completed invalidate transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x69, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_PWR", .desc = "Number of completed partial write transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6a, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_P", .desc = "Number of completed partial transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6b, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRANS_IO", .desc = "Number of completed I/O transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_DEF", .desc = "Number of completed deferred transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_BURST", .desc = "Number of completed burst transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6e, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_ANY", .desc = "Number of all completed bus transactions. Address bus utilization can be calculated knowing the minimum address bus occupancy. Includes special cycles, etc.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x70, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_TRAN_MEM", .desc = "Number of completed memory transactions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6f, .numasks = LIBPFM_ARRAY_SIZE(pii_bus_drdy_clocks), .ngrp = 1, .umasks = pii_bus_drdy_clocks, /* identical to actual umasks list for this event */ }, { .name = "BUS_DATA_RECV", .desc = "Number of bus clock cycles during which this processor is receiving data", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x64, }, { .name = "BUS_BNR_DRV", .desc = "Number of bus clock cycles during which this processor is driving the BNR# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x61, }, { .name = "BUS_HIT_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HIT# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7a, }, { .name = "BUS_HITM_DRV", .desc = "Number of bus clock cycles during which this processor is driving the HITM# pin", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7b, }, { .name = "BUS_SNOOP_STALL", .desc = "Number of clock cycles during which the bus is snoop stalled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x7e, }, { .name = "FLOPS", .desc = "Number of computational floating-point operations retired. Excludes floating-point computational operations that cause traps or assists. Includes internal sub-operations for complex floating-point instructions like transcendentals. Excludes floating point loads and stores", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0xc1, }, { .name = "FP_COMP_OPS_EXE", .desc = "Number of computational floating-point operations executed. The number of FADD, FSUB, FCOM, FMULs, integer MULs and IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This number does not include the number of cycles, but the number of operations. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x10, }, { .name = "FP_ASSIST", .desc = "Number of floating-point exception cases handled by microcode.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x11, }, { .name = "MUL", .desc = "Number of multiplies.This count includes integer as well as FP multiplies and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x12, }, { .name = "DIV", .desc = "Number of divides.This count includes integer as well as FP divides and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x2, .code = 0x13, }, { .name = "CYCLES_DIV_BUSY", .desc = "Number of cycles during which the divider is busy, and cannot accept new divides. This includes integer and FP divides, FPREM, FPSQRT, etc. and is speculative", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x1, .code = 0x14, }, { .name = "LD_BLOCKS", .desc = "Number of load operations delayed due to store buffer blocks. Includes counts caused by preceding stores whose addresses are unknown, preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflicts with the load but which incompletely overlap the load", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x3, }, { .name = "SB_DRAINS", .desc = "Number of store buffer drain cycles. Incremented every cycle the store buffer is draining. Draining is caused by serializing operations like CPUID, synchronizing operations like XCHG, interrupt acknowledgment, as well as other conditions (such as cache flushing).", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x4, }, { .name = "MISALIGN_MEM_REF", .desc = "Number of misaligned data memory references. Incremented by 1 every cycle during which, either the processor's load or store pipeline dispatches a misaligned micro-op Counting is performed if it is the first or second half or if it is blocked, squashed, or missed. In this context, misaligned means crossing a 64-bit boundary", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x5, }, { .name = "UOPS_RETIRED", .desc = "Number of micro-ops retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc2, }, { .name = "INST_DECODED", .desc = "Number of instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd0, }, { .name = "HW_INT_RX", .desc = "Number of hardware interrupts received", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc8, }, { .name = "CYCLES_INT_MASKED", .desc = "Number of processor cycles for which interrupts are disabled", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc6, }, { .name = "CYCLES_INT_PENDING_AND_MASKED", .desc = "Number of processor cycles for which interrupts are disabled and interrupts are pending.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc7, }, { .name = "BR_INST_RETIRED", .desc = "Number of branch instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc4, }, { .name = "BR_MISS_PRED_RETIRED", .desc = "Number of mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc5, }, { .name = "BR_TAKEN_RETIRED", .desc = "Number of taken branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xc9, }, { .name = "BR_MISS_PRED_TAKEN_RET", .desc = "Number of taken mispredicted branches retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xca, }, { .name = "BR_INST_DECODED", .desc = "Number of branch instructions decoded", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe0, }, { .name = "BTB_MISSES", .desc = "Number of branches for which the BTB did not produce a prediction", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe2, }, { .name = "BR_BOGUS", .desc = "Number of bogus branches", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe4, }, { .name = "BACLEARS", .desc = "Number of times BACLEAR is asserted. This is the number of times that a static branch prediction was made, in which the branch decoder decided to make a branch prediction because the BTB did not", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xe6, }, { .name = "RESOURCE_STALLS", .desc = "Incremented by 1 during every cycle for which there is a resource related stall. Includes register renaming buffer entries, memory buffer entries. Does not include stalls due to bus queue full, too many cache misses, etc. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xa2, }, { .name = "PARTIAL_RAT_STALLS", .desc = "Number of cycles or events for partial stalls. This includes flag partial stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd2, }, { .name = "SEGMENT_REG_LOADS", .desc = "Number of segment register loads.", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x6, }, { .name = "MMX_INSTR_EXEC", .desc = "Number of MMX instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb0, }, { .name = "MMX_INSTR_RET", .desc = "Number of MMX instructions retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xce, }, { .name = "MMX_SAT_INSTR_EXEC", .desc = "Number of MMX saturating instructions executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb1, }, { .name = "MMX_UOPS_EXEC", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb2, }, { .name = "MMX_INSTR_TYPE_EXEC", .desc = "Number of MMX instructions executed by type", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xb3, .numasks = LIBPFM_ARRAY_SIZE(pii_mmx_instr_type_exec), .ngrp = 1, .umasks = pii_mmx_instr_type_exec, }, { .name = "FP_MMX_TRANS", .desc = "Number of MMX transitions", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcc, .numasks = LIBPFM_ARRAY_SIZE(pii_fp_mmx_trans), .ngrp = 1, .umasks = pii_fp_mmx_trans, }, { .name = "MMX_ASSIST", .desc = "Number of MMX micro-ops executed", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xcd, }, { .name = "SEG_RENAME_STALLS", .desc = "Number of Segment Register Renaming Stalls", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd4, .numasks = LIBPFM_ARRAY_SIZE(pii_seg_rename_stalls), .ngrp = 1, .umasks = pii_seg_rename_stalls, }, { .name = "SEG_REG_RENAMES", .desc = "Number of Segment Register Renames", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd5, .numasks = LIBPFM_ARRAY_SIZE(pii_seg_rename_stalls), .ngrp = 1, .umasks = pii_seg_rename_stalls, /* identical to actual umasks list for this event */ }, { .name = "RET_SEG_RENAMES", .desc = "Number of segment register rename events retired", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0xd6, }, { .name = "L2_LD", .desc = "Number of L2 data loads. This event indicates that a normal, unlocked, load memory access was received by the L2. It includes only L2 cacheable memory accesses; it does not include I/O accesses, other non-memory accesses, or memory accesses such as UC/WT memory accesses. It does include L2 cacheable TLB miss memory accesses", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x29, .numasks = LIBPFM_ARRAY_SIZE(pii_l2_ifetch), .ngrp = 1, .umasks = pii_l2_ifetch, /* identical to actual umasks list for this event */ }, { .name = "L2_LINES_IN", .desc = "Number of lines allocated in the L2", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x24, }, { .name = "L2_LINES_OUT", .desc = "Number of lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x26, }, { .name = "L2_M_LINES_OUTM", .desc = "Number of modified lines removed from the L2 for any reason", .modmsk = INTEL_X86_ATTRS, .cntmsk = 0x3, .code = 0x27, }, }; papi-5.6.0/src/components/perfctr_ppc/Rules.perfctr_ppc000664 001750 001750 00000000654 13216244357 025353 0ustar00jshenry1963jshenry1963000000 000000 COMPSRCS += components/perfctr/perfctr.c components/perfctr_ppc/perfctr-ppc64.c COMPOBJS += perfctr.o perfctr-ppc64.o perfctr.o: components/perfctr/perfctr.c $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr/perfctr.c -o perfctr.o perfctr-ppc64.o: components/perfctr_ppc/perfctr-ppc64.c components/perfctr_ppc/perfctr-ppc64.h $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr-ppc/perfct_-ppc64.c -o perfctr-ppc64.o papi-5.6.0/man/man3/PAPI_set_thr_specific.3000664 001750 001750 00000003645 13216244356 022372 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_set_thr_specific" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_set_thr_specific \- .PP Store a pointer to a thread specific data structure\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @par Prototype: \#include @n int PAPI_set_thr_specific( int tag, void *ptr ); @param tag An identifier, the value of which is either PAPI_USR1_TLS or PAPI_USR2_TLS. This identifier indicates which of several data structures associated with this thread is to be accessed. @param ptr A pointer to the memory containing the data structure. @retval PAPI_OK @retval PAPI_EINVAL The @em tag argument is out of range. In C, PAPI_set_thr_specific will save @em ptr into an array indexed by @em tag. There are 2 user available locations and @em tag can be either PAPI_USR1_TLS or PAPI_USR2_TLS. The array mentioned above is managed by PAPI and allocated to each thread which has called PAPI_thread_init. There is no Fortran equivalent function. @par Example: .fi .PP .PP .nf int ret; HighLevelInfo *state = NULL; ret = PAPI_thread_init(pthread_self); if (ret != PAPI_OK) handle_error(ret); // Do we have the thread specific data setup yet? ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); if (ret != PAPI_OK || state == NULL) { state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); if (state == NULL) return (PAPI_ESYS); memset(state, 0, sizeof(HighLevelInfo)); state->EventSet = PAPI_NULL; ret = PAPI_create_eventset(&state->EventSet); if (ret != PAPI_OK) return (PAPI_ESYS); ret = PAPI_set_thr_specific(PAPI_USR1_TLS, state); if (ret != PAPI_OK) return (ret); } * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_register_thread\fP \fBPAPI_thread_init\fP \fBPAPI_thread_id\fP \fBPAPI_get_thr_specific\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.6.x/usr.lib/arm.h000664 001750 001750 00000000510 13216244367 022050 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arm.h,v 1.1.2.1 2007/02/11 20:15:03 mikpe Exp $ * ARM-specific code for performance counters library. * * Copyright (C) 2005-2007 Mikael Pettersson */ #ifndef __LIB_PERFCTR_ARM_H #define __LIB_PERFCTR_ARM_H #define PAGE_SIZE 4096 #define perfctr_info_cpu_init(info) do{}while(0) #endif /* __LIB_PERFCTR_ARM_H */ papi-5.6.0/src/ctests/pthrtough.c000664 001750 001750 00000004577 13216244360 021033 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include #include "papi.h" #include "papi_test.h" #define NITER 1000 void * Thread( void *data ) { int i, ret, evtset; ( void ) data; for ( i = 0; i < NITER; i++ ) { if ( ( ret = PAPI_register_thread( ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); evtset = PAPI_NULL; if ( ( ret = PAPI_create_eventset( &evtset ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", ret ); if ( ( ret = PAPI_destroy_eventset( &evtset ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); if ( ( ret = PAPI_unregister_thread( ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); } return ( NULL ); } int main( int argc, char *argv[] ) { int j; pthread_t *th = NULL; pthread_attr_t attr; int ret; long nthr; const PAPI_hw_info_t *hwinfo; tests_quiet( argc, argv ); /*Set TESTS_QUIET variable */ ret = PAPI_library_init( PAPI_VER_CURRENT ); if ( ret != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", ret ); if ( ( ret = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); pthread_attr_init( &attr ); #ifdef PTHREAD_CREATE_UNDETACHED pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); #endif #ifdef PTHREAD_SCOPE_SYSTEM ret=pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); if ( ret != 0 ) test_skip( __FILE__, __LINE__, "pthread_attr_setscope", ret ); #endif if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); nthr = hwinfo->ncpu; if ( !TESTS_QUIET ) { printf( "Creating %ld threads for %d iterations each of:\n", nthr, NITER ); printf( "\tregister\n" ); printf( "\tcreate_eventset\n" ); printf( "\tdestroy_eventset\n" ); printf( "\tunregister\n" ); } th = ( pthread_t * ) malloc( ( size_t ) nthr * sizeof ( pthread_t ) ); if ( th == NULL ) test_fail( __FILE__, __LINE__, "malloc", PAPI_ESYS ); for ( j = 0; j < nthr; j++ ) { ret = pthread_create( &th[j], &attr, &Thread, NULL ); if ( ret ) test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); } for ( j = 0; j < nthr; j++ ) { pthread_join( th[j], NULL ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/ctests/disable_component.c000664 001750 001750 00000004147 13216244360 022465 0ustar00jshenry1963jshenry1963000000 000000 /* * File: disable_component.c * Author: Vince Weaver * vweaver1@eecs.utk.edu */ /* This tests the functionality of PAPI_disable_component() */ #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int retval; const PAPI_component_info_t* cmpinfo; int numcmp, cid, active_components=0; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* Disable All Compiled-in Components */ numcmp = PAPI_num_components( ); if (!TESTS_QUIET) printf("Compiled-in components:\n"); for( cid = 0; cid < numcmp; cid++ ) { cmpinfo = PAPI_get_component_info( cid ); if (!TESTS_QUIET) { printf( "Name: %-23s %s\n", cmpinfo->name, cmpinfo->description); } retval=PAPI_disable_component( cid ); if (retval!=PAPI_OK) { test_fail(__FILE__,__LINE__,"Error disabling component",retval); } } /* Initialize the library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Try to disable after init, should fail */ retval=PAPI_disable_component( 0 ); if (retval==PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_disable_component should fail", retval ); } if (!TESTS_QUIET) printf("\nAfter init components:\n"); for( cid = 0; cid < numcmp; cid++ ) { cmpinfo = PAPI_get_component_info( cid ); if (!TESTS_QUIET) { printf( "%d %d Name: %-23s %s\n", cid, PAPI_get_component_index((char *)cmpinfo->name), cmpinfo->name ,cmpinfo->description); } if (cid!=PAPI_get_component_index((char *)cmpinfo->name)) { test_fail( __FILE__, __LINE__, "PAPI_get_component_index mismatch", 2 ); } if (cmpinfo->disabled) { if (!TESTS_QUIET) { printf(" \\-> Disabled: %s\n",cmpinfo->disabled_reason); } } else { active_components++; } } if (active_components>0) { test_fail( __FILE__, __LINE__, "too many active components", retval ); } test_pass( __FILE__ ); return PAPI_OK; } papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_regmask_isset.3000664 001750 001750 00000000033 13216244361 024552 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_regmask_set.3 papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.29000664 001750 001750 00000036562 13216244367 024245 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.29.perfctr26/CREDITS.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/CREDITS 2009-03-24 18:49:12.000000000 +0100 @@ -2755,6 +2755,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.29.perfctr26/Documentation/ioctl/ioctl-number.txt.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/Documentation/ioctl/ioctl-number.txt 2009-03-24 18:49:12.000000000 +0100 @@ -197,6 +197,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.29.perfctr26/MAINTAINERS.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/MAINTAINERS 2009-03-24 18:49:12.000000000 +0100 @@ -3408,6 +3408,12 @@ M: balbir@linux.vnet.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.29.perfctr26/arch/arm/Kconfig.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/arm/Kconfig 2009-03-24 18:49:12.000000000 +0100 @@ -696,6 +696,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.29.perfctr26/arch/arm/include/asm/processor.h.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/arm/include/asm/processor.h 2009-03-24 18:49:12.000000000 +0100 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.29.perfctr26/arch/arm/include/asm/system.h.~1~ 2009-03-24 18:00:31.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/arm/include/asm/system.h 2009-03-24 18:49:12.000000000 +0100 @@ -209,7 +209,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.29.perfctr26/arch/arm/kernel/process.c.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/arm/kernel/process.c 2009-03-24 18:49:12.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -272,6 +273,7 @@ void show_regs(struct pt_regs * regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -317,6 +319,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.29.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/powerpc/include/asm/processor.h 2009-03-24 18:49:12.000000000 +0100 @@ -197,6 +197,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.29.perfctr26/arch/powerpc/kernel/process.c.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/powerpc/kernel/process.c 2009-03-24 18:49:12.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -406,8 +407,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -547,6 +550,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -673,6 +677,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.29.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2009-03-24 18:49:12.000000000 +0100 @@ -264,4 +264,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.29.perfctr26/arch/x86/Kconfig.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/Kconfig 2009-03-24 18:49:12.000000000 +0100 @@ -1364,6 +1364,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.29.perfctr26/arch/x86/include/asm/irq_vectors.h.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/include/asm/irq_vectors.h 2009-03-24 18:49:12.000000000 +0100 @@ -85,9 +85,10 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xed) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.29.perfctr26/arch/x86/include/asm/processor.h.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/include/asm/processor.h 2009-03-24 18:49:12.000000000 +0100 @@ -433,6 +433,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; #ifdef CONFIG_X86_DS --- linux-2.6.29.perfctr26/arch/x86/include/asm/system.h.~1~ 2009-03-24 18:00:33.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/include/asm/system.h 2009-03-24 18:49:12.000000000 +0100 @@ -37,6 +37,7 @@ do { \ * __switch_to()) \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ + perfctr_suspend_thread(&(prev)->thread); \ \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ @@ -87,7 +88,8 @@ do { \ "r12", "r13", "r14", "r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -107,7 +109,8 @@ do { \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.29.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/entry_32.S 2009-03-24 18:49:12.000000000 +0100 @@ -688,6 +688,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 --- linux-2.6.29.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/entry_64.S 2009-03-24 18:49:12.000000000 +0100 @@ -1025,6 +1025,11 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +apicinterrupt LOCAL_PERFCTR_VECTOR \ + perfctr_interrupt smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.29.perfctr26/arch/x86/kernel/irqinit_32.c.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/irqinit_32.c 2009-03-24 18:49:12.000000000 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -174,6 +175,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.29.perfctr26/arch/x86/kernel/irqinit_64.c.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/irqinit_64.c 2009-03-24 18:49:12.000000000 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -170,6 +171,8 @@ void __init native_init_IRQ(void) apic_intr_init(); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.29.perfctr26/arch/x86/kernel/process_32.c.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/process_32.c 2009-03-24 18:49:12.000000000 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); ds_exit_thread(current); } @@ -307,6 +309,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs, p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -590,6 +594,8 @@ __switch_to(struct task_struct *prev_p, x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.29.perfctr26/arch/x86/kernel/process_64.c.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/arch/x86/kernel/process_64.c 2009-03-24 18:49:12.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +243,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); ds_exit_thread(current); } @@ -348,6 +350,8 @@ int copy_thread(int nr, unsigned long cl savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -650,6 +654,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.29.perfctr26/drivers/Makefile.~1~ 2009-03-24 18:00:34.000000000 +0100 +++ linux-2.6.29.perfctr26/drivers/Makefile 2009-03-24 18:49:12.000000000 +0100 @@ -93,6 +93,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.29.perfctr26/fs/exec.c.~1~ 2009-03-24 18:00:46.000000000 +0100 +++ linux-2.6.29.perfctr26/fs/exec.c 2009-03-24 18:49:12.000000000 +0100 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -991,6 +992,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.29.perfctr26/kernel/exit.c.~1~ 2009-03-24 18:00:55.000000000 +0100 +++ linux-2.6.29.perfctr26/kernel/exit.c 2009-03-24 18:49:12.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -210,6 +211,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.29.perfctr26/kernel/sched.c.~1~ 2009-03-24 18:00:55.000000000 +0100 +++ linux-2.6.29.perfctr26/kernel/sched.c 2009-03-24 18:49:12.000000000 +0100 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -6107,6 +6108,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; --- linux-2.6.29.perfctr26/kernel/timer.c.~1~ 2009-03-24 18:00:55.000000000 +0100 +++ linux-2.6.29.perfctr26/kernel/timer.c 2009-03-24 18:49:12.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1029,6 +1030,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/ctests/zero_attach.c000664 001750 001750 00000014441 13216244361 021302 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: start, stop and timer functionality for attached processes. - It attempts to use the following two counters. It may use less depending on hardware counter resource limitations. These are counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + PAPI_FP_INS + PAPI_TOT_CYC - Get us. - Start counters - Do flops - Stop and read counters - Get us. */ #include #include #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #ifdef _AIX #define _LINUX_SOURCE_COMPAT #endif #if defined(__FreeBSD__) # define PTRACE_ATTACH PT_ATTACH # define PTRACE_CONT PT_CONTINUE #endif int wait_for_attach_and_loop( void ) { kill( getpid( ), SIGSTOP ); do_flops( NUM_FLOPS ); kill( getpid( ), SIGSTOP ); return 0; } int main( int argc, char **argv ) { int status, retval, num_tests = 1, tmp; int EventSet1 = PAPI_NULL; int PAPI_event, mask1; int num_events1; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; const PAPI_component_info_t *cmpinfo; pid_t pid; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* Initialize the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); } if ( cmpinfo->attach == 0 ) { test_skip( __FILE__, __LINE__, "Platform does not support attaching", 0 ); } pid = fork( ); if ( pid < 0 ) { test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); } if ( pid == 0 ) { exit( wait_for_attach_and_loop( ) ); } /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_ATTACH, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_ATTACH)" ); return 1; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) test_fail( __FILE__, __LINE__, "Child process didnt return true to WIFSTOPPED", 0 ); } retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); /* num_events1 is greater than num_events2 so don't worry. */ values = allocate_test_space( num_tests, num_events1 ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Wait for the SIGSTOP. */ if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } } retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* Wait for the SIGSTOP. */ if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFSTOPPED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFSTOPPED", 0 ); } if ( WSTOPSIG( status ) != SIGSTOP ) { test_fail( __FILE__, __LINE__, "Child process didn't stop on SIGSTOP", 0 ); } } elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } remove_test_events( &EventSet1, mask1 ); if ( cmpinfo->attach_must_ptrace ) { if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { perror( "ptrace(PTRACE_CONT)" ); return 1; } } if ( waitpid( pid, &status, 0 ) == -1 ) { perror( "waitpid()" ); exit( 1 ); } if ( WIFEXITED( status ) == 0 ) { test_fail( __FILE__, __LINE__, "Child process didn't return true to WIFEXITED", 0 ); } if (!TESTS_QUIET) { printf( "Test case: 3rd party attach start, stop.\n" ); printf( "-----------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-------------------------------------------------------------------------\n" ); printf( "Test type : \t 1\n" ); sprintf( add_event_str, "%-12s : \t", event_name ); printf( TAB1, add_event_str, values[0][1] ); printf( TAB1, "PAPI_TOT_CYC : \t", values[0][0] ); printf( TAB1, "Real usec : \t", elapsed_us ); printf( TAB1, "Real cycles : \t", elapsed_cyc ); printf( TAB1, "Virt usec : \t", elapsed_virt_us ); printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); printf( "-------------------------------------------------------------------------\n" ); printf( "Verification: none\n" ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.28000664 001750 001750 00000037036 13216244367 024241 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.28.perfctr26/CREDITS.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/CREDITS 2008-12-25 16:06:06.000000000 +0100 @@ -2739,6 +2739,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.28.perfctr26/Documentation/ioctl/ioctl-number.txt.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/Documentation/ioctl/ioctl-number.txt 2008-12-25 16:06:06.000000000 +0100 @@ -193,6 +193,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.28.perfctr26/MAINTAINERS.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/MAINTAINERS 2008-12-25 16:06:06.000000000 +0100 @@ -3355,6 +3355,12 @@ M: balbir@linux.vnet.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.28.perfctr26/arch/arm/Kconfig.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/arm/Kconfig 2008-12-25 16:06:06.000000000 +0100 @@ -647,6 +647,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.28.perfctr26/arch/arm/include/asm/processor.h.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/arm/include/asm/processor.h 2008-12-25 16:06:06.000000000 +0100 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.28.perfctr26/arch/arm/include/asm/system.h.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/arm/include/asm/system.h 2008-12-25 16:06:06.000000000 +0100 @@ -211,7 +211,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.28.perfctr26/arch/arm/kernel/process.c.~1~ 2008-12-25 15:54:13.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/arm/kernel/process.c 2008-12-25 16:06:06.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -272,6 +273,7 @@ void show_regs(struct pt_regs * regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -317,6 +319,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.28.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.28.perfctr26/arch/powerpc/include/asm/processor.h 2008-12-25 16:06:06.000000000 +0100 @@ -199,6 +199,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.28.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.28.perfctr26/arch/powerpc/kernel/process.c 2008-12-25 16:06:06.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -405,8 +406,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -544,6 +547,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -670,6 +674,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.28.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-12-25 16:06:06.000000000 +0100 @@ -256,4 +256,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.28.perfctr26/arch/x86/Kconfig.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/Kconfig 2008-12-25 16:06:06.000000000 +0100 @@ -1293,6 +1293,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.28.perfctr26/arch/x86/include/asm/irq_vectors.h.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/include/asm/irq_vectors.h 2008-12-25 16:06:06.000000000 +0100 @@ -85,9 +85,10 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xed) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.28.perfctr26/arch/x86/include/asm/processor.h.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/include/asm/processor.h 2008-12-25 16:06:06.000000000 +0100 @@ -429,6 +429,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; #ifdef CONFIG_X86_DS --- linux-2.6.28.perfctr26/arch/x86/include/asm/system.h.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/include/asm/system.h 2008-12-25 16:06:06.000000000 +0100 @@ -37,6 +37,7 @@ do { \ * __switch_to()) \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ + perfctr_suspend_thread(&(prev)->thread); \ \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ @@ -87,7 +88,8 @@ do { \ "r12", "r13", "r14", "r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -107,7 +109,8 @@ do { \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.28.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/entry_32.S 2008-12-25 16:06:06.000000000 +0100 @@ -678,6 +678,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.28.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/entry_64.S 2008-12-25 16:06:06.000000000 +0100 @@ -870,6 +870,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.28.perfctr26/arch/x86/kernel/irqinit_32.c.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/irqinit_32.c 2008-12-25 16:06:06.000000000 +0100 @@ -20,6 +20,7 @@ #include #include #include +#include @@ -175,6 +176,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.28.perfctr26/arch/x86/kernel/irqinit_64.c.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/irqinit_64.c 2008-12-25 16:06:06.000000000 +0100 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -224,6 +225,8 @@ void __init native_init_IRQ(void) apic_intr_init(); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.28.perfctr26/arch/x86/kernel/process_32.c.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/process_32.c 2008-12-25 16:06:06.000000000 +0100 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); #ifdef CONFIG_X86_DS /* Free any DS contexts that have not been properly released. */ if (unlikely(current->thread.ds_ctx)) { @@ -315,6 +317,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs, p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -629,6 +633,8 @@ struct task_struct * __switch_to(struct x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.28.perfctr26/arch/x86/kernel/process_64.c.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/arch/x86/kernel/process_64.c 2008-12-25 16:06:06.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); #ifdef CONFIG_X86_DS /* Free any DS contexts that have not been properly released. */ if (unlikely(t->ds_ctx)) { @@ -347,6 +349,8 @@ int copy_thread(int nr, unsigned long cl savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -671,6 +675,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.28.perfctr26/drivers/Makefile.~1~ 2008-12-25 15:54:14.000000000 +0100 +++ linux-2.6.28.perfctr26/drivers/Makefile 2008-12-25 16:06:06.000000000 +0100 @@ -89,6 +89,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.28.perfctr26/fs/exec.c.~1~ 2008-12-25 15:54:16.000000000 +0100 +++ linux-2.6.28.perfctr26/fs/exec.c 2008-12-25 16:06:06.000000000 +0100 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -999,6 +1000,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.28.perfctr26/kernel/exit.c.~1~ 2008-12-25 15:54:17.000000000 +0100 +++ linux-2.6.28.perfctr26/kernel/exit.c 2008-12-25 16:06:06.000000000 +0100 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -198,6 +199,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.28.perfctr26/kernel/sched.c.~1~ 2008-12-25 15:54:17.000000000 +0100 +++ linux-2.6.28.perfctr26/kernel/sched.c 2008-12-25 16:06:06.000000000 +0100 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -5967,6 +5968,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.28.perfctr26/kernel/timer.c.~1~ 2008-12-25 15:54:17.000000000 +0100 +++ linux-2.6.28.perfctr26/kernel/timer.c 2008-12-25 16:06:06.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1044,6 +1045,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.27000664 001750 001750 00000036650 13216244367 024241 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.27.perfctr26/CREDITS.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/CREDITS 2008-10-11 10:52:26.000000000 +0200 @@ -2728,6 +2728,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.27.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/Documentation/ioctl-number.txt 2008-10-11 10:52:26.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.27.perfctr26/MAINTAINERS.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/MAINTAINERS 2008-10-11 10:52:26.000000000 +0200 @@ -3239,6 +3239,12 @@ M: balbir@linux.vnet.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.27.perfctr26/arch/arm/Kconfig.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/arm/Kconfig 2008-10-11 10:52:26.000000000 +0200 @@ -638,6 +638,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.27.perfctr26/arch/arm/include/asm/processor.h.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/arm/include/asm/processor.h 2008-10-11 10:52:26.000000000 +0200 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.27.perfctr26/arch/arm/include/asm/system.h.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/arm/include/asm/system.h 2008-10-11 10:52:26.000000000 +0200 @@ -265,7 +265,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.27.perfctr26/arch/arm/kernel/process.c.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/arm/kernel/process.c 2008-10-11 10:52:26.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -301,6 +302,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -346,6 +348,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.27.perfctr26/arch/powerpc/include/asm/processor.h.~1~ 2008-10-11 10:43:49.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/powerpc/include/asm/processor.h 2008-10-11 10:52:26.000000000 +0200 @@ -199,6 +199,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.27.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/powerpc/kernel/process.c 2008-10-11 10:52:26.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -405,8 +406,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -544,6 +547,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -670,6 +674,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.27.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-10-11 10:52:26.000000000 +0200 @@ -254,4 +254,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.27.perfctr26/arch/x86/Kconfig.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/Kconfig 2008-10-11 10:52:26.000000000 +0200 @@ -1229,6 +1229,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.27.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/entry_32.S 2008-10-11 10:52:26.000000000 +0200 @@ -678,6 +678,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.27.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/entry_64.S 2008-10-11 10:52:26.000000000 +0200 @@ -889,6 +889,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.27.perfctr26/arch/x86/kernel/irqinit_32.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/irqinit_32.c 2008-10-11 10:52:26.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include @@ -103,6 +104,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.27.perfctr26/arch/x86/kernel/irqinit_64.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/irqinit_64.c 2008-10-11 10:52:26.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -217,6 +218,8 @@ void __init native_init_IRQ(void) alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.27.perfctr26/arch/x86/kernel/process_32.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/process_32.c 2008-10-11 10:52:26.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -277,6 +278,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs, p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -626,6 +630,8 @@ struct task_struct * __switch_to(struct x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.27.perfctr26/arch/x86/kernel/process_64.c.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/arch/x86/kernel/process_64.c 2008-10-11 10:52:26.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long cl savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -654,6 +658,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.27.perfctr26/drivers/Makefile.~1~ 2008-10-11 10:43:50.000000000 +0200 +++ linux-2.6.27.perfctr26/drivers/Makefile 2008-10-11 10:52:26.000000000 +0200 @@ -87,6 +87,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.27.perfctr26/fs/exec.c.~1~ 2008-10-11 10:43:52.000000000 +0200 +++ linux-2.6.27.perfctr26/fs/exec.c 2008-10-11 10:52:26.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1004,6 +1005,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.27.perfctr26/include/asm-x86/irq_vectors.h.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/include/asm-x86/irq_vectors.h 2008-10-11 10:52:26.000000000 +0200 @@ -90,9 +90,10 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we + * First APIC vector available to drivers: (vectors 0x30-0xed) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ --- linux-2.6.27.perfctr26/include/asm-x86/processor.h.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/include/asm-x86/processor.h 2008-10-11 10:52:26.000000000 +0200 @@ -409,6 +409,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; /* Debug Store - if not 0 points to a DS Save Area configuration; --- linux-2.6.27.perfctr26/include/asm-x86/system.h.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/include/asm-x86/system.h 2008-10-11 10:52:26.000000000 +0200 @@ -37,6 +37,7 @@ do { \ * __switch_to()) \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ + perfctr_suspend_thread(&(prev)->thread); \ \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ @@ -84,7 +85,8 @@ do { \ "r12", "r13", "r14", "r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -104,7 +106,8 @@ do { \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.27.perfctr26/kernel/exit.c.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/kernel/exit.c 2008-10-11 10:52:26.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -193,6 +194,7 @@ repeat: leader->exit_state = EXIT_DEAD; } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.27.perfctr26/kernel/sched.c.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/kernel/sched.c 2008-10-11 10:52:26.000000000 +0200 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -5882,6 +5883,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.27.perfctr26/kernel/timer.c.~1~ 2008-10-11 10:43:53.000000000 +0200 +++ linux-2.6.27.perfctr26/kernel/timer.c 2008-10-11 10:52:26.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -975,6 +976,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.26000664 001750 001750 00000042731 13216244367 024235 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.26.perfctr26/CREDITS.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/CREDITS 2008-07-14 10:32:18.000000000 +0200 @@ -2720,6 +2720,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.26.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/Documentation/ioctl-number.txt 2008-07-14 10:32:18.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.26.perfctr26/MAINTAINERS.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/MAINTAINERS 2008-07-14 10:32:18.000000000 +0200 @@ -3202,6 +3202,12 @@ M: balbir@linux.vnet.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.26.perfctr26/arch/arm/Kconfig.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/arm/Kconfig 2008-07-14 10:32:18.000000000 +0200 @@ -572,6 +572,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.26.perfctr26/arch/arm/kernel/process.c.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/arm/kernel/process.c 2008-07-14 10:32:18.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -303,6 +304,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -348,6 +350,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.26.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/powerpc/kernel/process.c 2008-07-14 10:32:18.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -358,8 +359,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -497,6 +500,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -618,6 +622,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.26.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-07-14 10:32:18.000000000 +0200 @@ -234,4 +234,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.26.perfctr26/arch/x86/Kconfig.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/Kconfig 2008-07-14 10:32:18.000000000 +0200 @@ -1169,6 +1169,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.26.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/entry_32.S 2008-07-14 10:32:18.000000000 +0200 @@ -635,6 +635,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.26.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/entry_64.S 2008-07-14 10:32:18.000000000 +0200 @@ -728,6 +728,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.26.perfctr26/arch/x86/kernel/i8259_32.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/i8259_32.c 2008-07-14 10:32:18.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include /* * This is the 'legacy' 8259A Programmable Interrupt Controller, @@ -398,6 +399,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.26.perfctr26/arch/x86/kernel/i8259_64.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/i8259_64.c 2008-07-14 10:32:18.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -507,6 +508,8 @@ void __init native_init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.26.perfctr26/arch/x86/kernel/process_32.c.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/process_32.c 2008-07-14 10:32:18.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -316,6 +317,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -373,6 +375,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs, p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -665,6 +669,8 @@ struct task_struct * __switch_to(struct x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.26.perfctr26/arch/x86/kernel/process_64.c.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/arch/x86/kernel/process_64.c 2008-07-14 10:32:18.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -267,6 +268,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -371,6 +373,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -665,6 +669,9 @@ __switch_to(struct task_struct *prev_p, */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.26.perfctr26/drivers/Makefile.~1~ 2008-07-14 10:22:36.000000000 +0200 +++ linux-2.6.26.perfctr26/drivers/Makefile 2008-07-14 10:32:18.000000000 +0200 @@ -83,6 +83,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.26.perfctr26/fs/exec.c.~1~ 2008-07-14 10:22:38.000000000 +0200 +++ linux-2.6.26.perfctr26/fs/exec.c 2008-07-14 10:32:18.000000000 +0200 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -992,6 +993,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.26.perfctr26/include/asm-arm/processor.h.~1~ 2008-04-17 18:22:35.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-arm/processor.h 2008-07-14 10:32:18.000000000 +0200 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.26.perfctr26/include/asm-arm/system.h.~1~ 2008-07-14 10:22:38.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-arm/system.h 2008-07-14 10:32:18.000000000 +0200 @@ -265,7 +265,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.26.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-07-14 10:22:38.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-powerpc/processor.h 2008-07-14 10:32:18.000000000 +0200 @@ -179,6 +179,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.26.perfctr26/include/asm-x86/hw_irq_64.h.~1~ 2008-07-14 10:22:38.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-x86/hw_irq_64.h 2008-07-14 10:32:18.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.26.perfctr26/include/asm-x86/irq_64.h.~1~ 2008-07-14 10:22:38.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-x86/irq_64.h 2008-07-14 10:32:18.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.26.perfctr26/include/asm-x86/mach-default/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.26.perfctr26/include/asm-x86/mach-default/irq_vectors.h 2008-07-14 10:32:18.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.26.perfctr26/include/asm-x86/mach-visws/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.26.perfctr26/include/asm-x86/mach-visws/irq_vectors.h 2008-07-14 10:32:18.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.26.perfctr26/include/asm-x86/processor.h.~1~ 2008-07-14 10:22:39.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-x86/processor.h 2008-07-14 10:32:18.000000000 +0200 @@ -413,6 +413,8 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; /* Debug Store - if not 0 points to a DS Save Area configuration; --- linux-2.6.26.perfctr26/include/asm-x86/system.h.~1~ 2008-07-14 10:22:39.000000000 +0200 +++ linux-2.6.26.perfctr26/include/asm-x86/system.h 2008-07-14 10:32:18.000000000 +0200 @@ -37,6 +37,7 @@ do { \ * __switch_to()) \ */ \ unsigned long ebx, ecx, edx, esi, edi; \ + perfctr_suspend_thread(&(prev)->thread); \ \ asm volatile("pushfl\n\t" /* save flags */ \ "pushl %%ebp\n\t" /* save EBP */ \ @@ -84,7 +85,8 @@ do { \ "r12", "r13", "r14", "r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -104,7 +106,8 @@ do { \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.26.perfctr26/kernel/exit.c.~1~ 2008-07-14 10:22:39.000000000 +0200 +++ linux-2.6.26.perfctr26/kernel/exit.c 2008-07-14 10:32:18.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -184,6 +185,7 @@ repeat: zap_leader = task_detached(leader); } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.26.perfctr26/kernel/sched.c.~1~ 2008-07-14 10:22:39.000000000 +0200 +++ linux-2.6.26.perfctr26/kernel/sched.c 2008-07-14 10:32:18.000000000 +0200 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -5565,6 +5566,8 @@ int set_cpus_allowed_ptr(struct task_str struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, *new_mask); /* XXX: convert to _ptr */ + rq = task_rq_lock(p, &flags); if (!cpus_intersects(*new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.26.perfctr26/kernel/timer.c.~1~ 2008-07-14 10:22:39.000000000 +0200 +++ linux-2.6.26.perfctr26/kernel/timer.c 2008-07-14 10:32:18.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -983,6 +984,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.25000664 001750 001750 00000042766 13216244367 024244 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.25.perfctr26/CREDITS.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.25.perfctr26/CREDITS 2008-06-22 15:17:34.000000000 +0200 @@ -2717,6 +2717,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.25.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.25.perfctr26/Documentation/ioctl-number.txt 2008-06-22 15:17:34.000000000 +0200 @@ -188,6 +188,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.25.perfctr26/MAINTAINERS.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.25.perfctr26/MAINTAINERS 2008-06-22 15:17:34.000000000 +0200 @@ -3098,6 +3098,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.25.perfctr26/arch/arm/Kconfig.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/arm/Kconfig 2008-06-22 15:17:34.000000000 +0200 @@ -573,6 +573,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.25.perfctr26/arch/arm/kernel/process.c.~1~ 2008-04-17 18:22:31.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/arm/kernel/process.c 2008-06-22 15:17:34.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -303,6 +304,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -348,6 +350,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.25.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 15:17:34.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -358,8 +359,10 @@ struct task_struct *__switch_to(struct t * window where the kernel stack SLB and the kernel stack are out * of sync. Hard disable here. */ + perfctr_suspend_thread(&prev->thread); hard_irq_disable(); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -497,6 +500,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -616,6 +620,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.25.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-06-22 15:17:34.000000000 +0200 @@ -232,4 +232,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.25.perfctr26/arch/x86/Kconfig.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/Kconfig 2008-06-22 15:17:34.000000000 +0200 @@ -1074,6 +1074,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.25.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/entry_32.S 2008-06-22 15:17:34.000000000 +0200 @@ -640,6 +640,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.25.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/entry_64.S 2008-06-22 15:17:34.000000000 +0200 @@ -730,6 +730,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.25.perfctr26/arch/x86/kernel/i8259_32.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/i8259_32.c 2008-06-22 15:17:34.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include /* * This is the 'legacy' 8259A Programmable Interrupt Controller, @@ -398,6 +399,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.25.perfctr26/arch/x86/kernel/i8259_64.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/i8259_64.c 2008-06-22 15:17:34.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -507,6 +508,8 @@ void __init native_init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.25.perfctr26/arch/x86/kernel/process_32.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/process_32.c 2008-06-22 15:17:34.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -430,6 +431,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -486,6 +488,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs, p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -722,6 +726,8 @@ struct task_struct * __switch_to(struct x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.25.perfctr26/arch/x86/kernel/process_64.c.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/arch/x86/kernel/process_64.c 2008-06-22 15:17:34.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -391,6 +392,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -494,6 +496,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -699,6 +703,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.25.perfctr26/drivers/Makefile.~1~ 2008-04-17 18:22:32.000000000 +0200 +++ linux-2.6.25.perfctr26/drivers/Makefile 2008-06-22 15:17:34.000000000 +0200 @@ -82,6 +82,7 @@ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.25.perfctr26/fs/exec.c.~1~ 2008-04-17 18:22:35.000000000 +0200 +++ linux-2.6.25.perfctr26/fs/exec.c 2008-06-22 15:17:56.000000000 +0200 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1006,6 +1007,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.25.perfctr26/include/asm-arm/processor.h.~1~ 2008-04-17 18:22:35.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-arm/processor.h 2008-06-22 15:17:34.000000000 +0200 @@ -50,6 +50,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.25.perfctr26/include/asm-arm/system.h.~1~ 2008-04-17 18:22:35.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-arm/system.h 2008-06-22 15:17:34.000000000 +0200 @@ -264,7 +264,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.25.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-powerpc/processor.h 2008-06-22 15:17:34.000000000 +0200 @@ -177,6 +177,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.25.perfctr26/include/asm-x86/hw_irq_64.h.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-x86/hw_irq_64.h 2008-06-22 15:17:34.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.25.perfctr26/include/asm-x86/irq_64.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.25.perfctr26/include/asm-x86/irq_64.h 2008-06-22 15:17:34.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.25.perfctr26/include/asm-x86/mach-default/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.25.perfctr26/include/asm-x86/mach-default/irq_vectors.h 2008-06-22 15:17:34.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.25.perfctr26/include/asm-x86/mach-visws/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.25.perfctr26/include/asm-x86/mach-visws/irq_vectors.h 2008-06-22 15:17:34.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.25.perfctr26/include/asm-x86/processor.h.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-x86/processor.h 2008-06-22 15:17:34.000000000 +0200 @@ -349,6 +349,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; /* Debug Store - if not 0 points to a DS Save Area configuration; --- linux-2.6.25.perfctr26/include/asm-x86/system.h.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/include/asm-x86/system.h 2008-06-22 15:17:34.000000000 +0200 @@ -29,6 +29,7 @@ struct task_struct *__switch_to(struct t */ #define switch_to(prev, next, last) do { \ unsigned long esi, edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ @@ -62,7 +63,8 @@ struct task_struct *__switch_to(struct t "r12", "r13", "r14", "r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ +#define switch_to(prev, next, last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -82,7 +84,8 @@ struct task_struct *__switch_to(struct t [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) #endif #ifdef __KERNEL__ --- linux-2.6.25.perfctr26/kernel/exit.c.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/kernel/exit.c 2008-06-22 15:17:34.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -173,6 +174,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.25.perfctr26/kernel/sched.c.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/kernel/sched.c 2008-06-22 15:17:34.000000000 +0200 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -5345,6 +5346,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.25.perfctr26/kernel/timer.c.~1~ 2008-04-17 18:22:36.000000000 +0200 +++ linux-2.6.25.perfctr26/kernel/timer.c 2008-06-22 15:17:34.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -849,6 +850,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.24000664 001750 001750 00000043566 13216244367 024242 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.24.perfctr26/CREDITS.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.24.perfctr26/CREDITS 2008-06-22 15:44:14.000000000 +0200 @@ -2710,6 +2710,7 @@ N: Mikael Pettersson E: mikpe@it.uu.se W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.24.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.24.perfctr26/Documentation/ioctl-number.txt 2008-06-22 15:44:14.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.24.perfctr26/MAINTAINERS.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.24.perfctr26/MAINTAINERS 2008-06-22 15:44:14.000000000 +0200 @@ -2980,6 +2980,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.24.perfctr26/arch/arm/Kconfig.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/arm/Kconfig 2008-06-22 15:44:14.000000000 +0200 @@ -494,6 +494,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.24.perfctr26/arch/arm/kernel/process.c.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/arm/kernel/process.c 2008-06-22 15:44:14.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -304,6 +305,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -349,6 +351,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.24.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 15:44:14.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -353,7 +354,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -491,6 +494,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -610,6 +614,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.24.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-06-22 15:44:14.000000000 +0200 @@ -234,4 +234,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.24.perfctr26/arch/x86/Kconfig.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/Kconfig 2008-06-22 15:44:14.000000000 +0200 @@ -1060,6 +1060,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.24.perfctr26/arch/x86/kernel/entry_32.S.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/entry_32.S 2008-06-22 15:44:14.000000000 +0200 @@ -639,6 +639,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.24.perfctr26/arch/x86/kernel/entry_64.S.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/entry_64.S 2008-06-22 15:44:14.000000000 +0200 @@ -696,6 +696,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.24.perfctr26/arch/x86/kernel/i8259_32.c.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/i8259_32.c 2008-06-22 15:44:14.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -413,6 +414,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.24.perfctr26/arch/x86/kernel/i8259_64.c.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/i8259_64.c 2008-06-22 15:44:14.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -497,6 +498,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + if (!acpi_ioapic) setup_irq(2, &irq2); } --- linux-2.6.24.perfctr26/arch/x86/kernel/process_32.c.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/process_32.c 2008-06-22 15:44:14.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -429,6 +430,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -480,6 +482,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -769,6 +773,8 @@ struct task_struct fastcall * __switch_t x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.24.perfctr26/arch/x86/kernel/process_64.c.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/arch/x86/kernel/process_64.c 2008-06-22 15:44:14.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -403,6 +404,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -511,6 +513,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -694,6 +698,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.24.perfctr26/drivers/Makefile.~1~ 2008-06-22 15:42:30.000000000 +0200 +++ linux-2.6.24.perfctr26/drivers/Makefile 2008-06-22 15:44:14.000000000 +0200 @@ -80,6 +80,7 @@ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.24.perfctr26/fs/exec.c.~1~ 2008-06-22 15:42:32.000000000 +0200 +++ linux-2.6.24.perfctr26/fs/exec.c 2008-06-22 15:43:55.000000000 +0200 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1019,6 +1020,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.24.perfctr26/include/asm-arm/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-arm/processor.h 2008-06-22 15:44:14.000000000 +0200 @@ -44,6 +44,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.24.perfctr26/include/asm-arm/system.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-arm/system.h 2008-06-22 15:44:14.000000000 +0200 @@ -264,7 +264,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.24.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-powerpc/processor.h 2008-06-22 15:44:14.000000000 +0200 @@ -157,6 +157,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.24.perfctr26/include/asm-x86/hw_irq_64.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-x86/hw_irq_64.h 2008-06-22 15:44:14.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.24.perfctr26/include/asm-x86/irq_64.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.24.perfctr26/include/asm-x86/irq_64.h 2008-06-22 15:44:14.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.24.perfctr26/include/asm-x86/mach-default/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.24.perfctr26/include/asm-x86/mach-default/irq_vectors.h 2008-06-22 15:44:14.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.24.perfctr26/include/asm-x86/mach-visws/irq_vectors.h.~1~ 2008-01-25 12:31:58.000000000 +0100 +++ linux-2.6.24.perfctr26/include/asm-x86/mach-visws/irq_vectors.h 2008-06-22 15:44:14.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.24.perfctr26/include/asm-x86/processor_32.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-x86/processor_32.h 2008-06-22 15:44:14.000000000 +0200 @@ -369,6 +369,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.24.perfctr26/include/asm-x86/processor_64.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-x86/processor_64.h 2008-06-22 15:44:14.000000000 +0200 @@ -241,6 +241,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.24.perfctr26/include/asm-x86/system_32.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-x86/system_32.h 2008-06-22 15:44:14.000000000 +0200 @@ -18,6 +18,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.24.perfctr26/include/asm-x86/system_64.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/include/asm-x86/system_64.h 2008-06-22 15:44:14.000000000 +0200 @@ -25,7 +25,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -45,7 +46,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.24.perfctr26/kernel/exit.c.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/kernel/exit.c 2008-06-22 15:44:14.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); release_thread(p); call_rcu(&p->rcu, delayed_put_task_struct); --- linux-2.6.24.perfctr26/kernel/sched.c.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/kernel/sched.c 2008-06-22 15:44:14.000000000 +0200 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -5071,6 +5072,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.24.perfctr26/kernel/timer.c.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.24.perfctr26/kernel/timer.c 2008-06-22 15:44:14.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -841,6 +842,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.23000664 001750 001750 00000044375 13216244367 024240 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.23.perfctr26/CREDITS.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/CREDITS 2008-06-22 15:58:34.000000000 +0200 @@ -2686,8 +2686,9 @@ S: Canada K2P 0X8 N: Mikael Pettersson E: mikpe@it.uu.se -W: http://www.csd.uu.se/~mikpe/ +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.23.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 15:42:29.000000000 +0200 +++ linux-2.6.23.perfctr26/Documentation/ioctl-number.txt 2008-06-22 15:58:34.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.23.perfctr26/MAINTAINERS.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/MAINTAINERS 2008-06-22 15:58:34.000000000 +0200 @@ -2937,6 +2937,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.23.perfctr26/arch/arm/Kconfig.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/arm/Kconfig 2008-06-22 15:58:34.000000000 +0200 @@ -491,6 +491,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.23.perfctr26/arch/arm/kernel/process.c.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/arm/kernel/process.c 2008-06-22 15:58:34.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -304,6 +305,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -349,6 +351,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.23.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/i386/Kconfig 2008-06-22 15:58:34.000000000 +0200 @@ -796,6 +796,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.23.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/i386/kernel/entry.S 2008-06-22 15:58:34.000000000 +0200 @@ -635,6 +635,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.23.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 15:58:34.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -409,6 +410,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.23.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/i386/kernel/process.c 2008-06-22 15:58:34.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -396,6 +397,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -447,6 +449,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -736,6 +740,8 @@ struct task_struct fastcall * __switch_t x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.23.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 15:58:34.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -331,7 +332,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -461,6 +464,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -575,6 +579,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.23.perfctr26/arch/powerpc/platforms/Kconfig.cputype.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/powerpc/platforms/Kconfig.cputype 2008-06-22 15:58:34.000000000 +0200 @@ -218,4 +218,8 @@ config NOT_COHERENT_CACHE config CHECK_CACHE_COHERENCY bool +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu --- linux-2.6.23.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 15:56:53.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/x86_64/Kconfig 2008-06-22 15:58:34.000000000 +0200 @@ -669,6 +669,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config K8_NB --- linux-2.6.23.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 15:56:53.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 15:58:34.000000000 +0200 @@ -692,6 +692,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.23.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 15:56:53.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 15:58:34.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -533,6 +534,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.23.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 15:56:53.000000000 +0200 +++ linux-2.6.23.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 15:58:34.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -390,6 +391,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -498,6 +500,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -681,6 +685,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.23.perfctr26/drivers/Makefile.~1~ 2008-06-22 15:56:53.000000000 +0200 +++ linux-2.6.23.perfctr26/drivers/Makefile 2008-06-22 15:58:34.000000000 +0200 @@ -80,6 +80,7 @@ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.23.perfctr26/fs/exec.c.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/fs/exec.c 2008-06-22 15:58:20.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -1065,6 +1066,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.23.perfctr26/include/asm-arm/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-arm/processor.h 2008-06-22 15:58:34.000000000 +0200 @@ -44,6 +44,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.23.perfctr26/include/asm-arm/system.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-arm/system.h 2008-06-22 15:58:34.000000000 +0200 @@ -251,7 +251,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) --- linux-2.6.23.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 15:58:34.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.23.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 15:58:34.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.23.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-i386/processor.h 2008-06-22 15:58:34.000000000 +0200 @@ -363,6 +363,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.23.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-i386/system.h 2008-06-22 15:58:34.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.23.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-powerpc/processor.h 2008-06-22 15:58:34.000000000 +0200 @@ -157,6 +157,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.23.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 15:58:34.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.23.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-x86_64/irq.h 2008-06-22 15:58:34.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.23.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-x86_64/processor.h 2008-06-22 15:58:34.000000000 +0200 @@ -239,6 +239,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.23.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/include/asm-x86_64/system.h 2008-06-22 15:58:34.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.23.perfctr26/kernel/exit.c.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/kernel/exit.c 2008-06-22 15:58:34.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); release_thread(p); --- linux-2.6.23.perfctr26/kernel/sched.c.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/kernel/sched.c 2008-06-22 15:58:34.000000000 +0200 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -4958,6 +4959,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.23.perfctr26/kernel/timer.c.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.23.perfctr26/kernel/timer.c 2008-06-22 15:58:34.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -830,6 +831,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.22000664 001750 001750 00000044275 13216244367 024236 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.22.perfctr26/CREDITS.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/CREDITS 2008-06-22 16:16:39.000000000 +0200 @@ -2685,8 +2685,9 @@ S: Canada K2P 0X8 N: Mikael Pettersson E: mikpe@it.uu.se -W: http://www.csd.uu.se/~mikpe/ +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.22.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/Documentation/ioctl-number.txt 2008-06-22 16:16:32.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.22.perfctr26/MAINTAINERS.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/MAINTAINERS 2008-06-22 16:16:39.000000000 +0200 @@ -2844,6 +2844,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.22.perfctr26/arch/arm/Kconfig.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/arm/Kconfig 2008-06-22 16:16:32.000000000 +0200 @@ -476,6 +476,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.22.perfctr26/arch/arm/kernel/process.c.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/arm/kernel/process.c 2008-06-22 16:16:32.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -300,6 +301,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -345,6 +347,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.22.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/i386/Kconfig 2008-06-22 16:16:32.000000000 +0200 @@ -781,6 +781,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.22.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/i386/kernel/entry.S 2008-06-22 16:16:32.000000000 +0200 @@ -637,6 +637,23 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.22.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 15:56:52.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 16:16:32.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -409,6 +410,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.22.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/i386/kernel/process.c 2008-06-22 16:16:32.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -384,6 +385,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -435,6 +437,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -719,6 +723,8 @@ struct task_struct fastcall * __switch_t x86_write_percpu(current_task, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.22.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/powerpc/Kconfig 2008-06-22 16:16:32.000000000 +0200 @@ -393,6 +393,11 @@ config NOT_COHERENT_CACHE config CONFIG_CHECK_CACHE_COHERENCY bool + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.22.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 16:16:32.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -329,7 +330,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -455,6 +458,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -571,6 +575,8 @@ int copy_thread(int nr, unsigned long cl kregs->nip = (unsigned long)ret_from_fork; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.22.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/x86_64/Kconfig 2008-06-22 16:16:32.000000000 +0200 @@ -661,6 +661,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config K8_NB --- linux-2.6.22.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 16:16:32.000000000 +0200 @@ -692,6 +692,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.22.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 16:09:12.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 16:16:32.000000000 +0200 @@ -21,6 +21,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -551,6 +552,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.22.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 16:09:13.000000000 +0200 +++ linux-2.6.22.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 16:16:32.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -487,6 +489,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -670,6 +674,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.22.perfctr26/drivers/Makefile.~1~ 2008-06-22 16:09:13.000000000 +0200 +++ linux-2.6.22.perfctr26/drivers/Makefile 2008-06-22 16:16:32.000000000 +0200 @@ -74,6 +74,7 @@ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.22.perfctr26/fs/exec.c.~1~ 2008-06-22 16:09:14.000000000 +0200 +++ linux-2.6.22.perfctr26/fs/exec.c 2008-06-22 16:16:39.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -882,6 +883,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.22.perfctr26/include/asm-arm/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-arm/processor.h 2008-06-22 16:16:32.000000000 +0200 @@ -44,6 +44,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.22.perfctr26/include/asm-arm/system.h.~1~ 2008-06-22 16:09:14.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-arm/system.h 2008-06-22 16:16:32.000000000 +0200 @@ -251,7 +251,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) /* --- linux-2.6.22.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 16:16:32.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.22.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 16:16:32.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.22.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 16:09:14.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-i386/processor.h 2008-06-22 16:16:32.000000000 +0200 @@ -370,6 +370,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.22.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 16:09:14.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-i386/system.h 2008-06-22 16:16:32.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.22.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-powerpc/processor.h 2008-06-22 16:16:32.000000000 +0200 @@ -165,6 +165,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.22.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 16:16:32.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.22.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-x86_64/irq.h 2008-06-22 16:16:32.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.22.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-x86_64/processor.h 2008-06-22 16:16:32.000000000 +0200 @@ -239,6 +239,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.22.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/include/asm-x86_64/system.h 2008-06-22 16:16:32.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.22.perfctr26/kernel/exit.c.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/kernel/exit.c 2008-06-22 16:16:32.000000000 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -182,6 +183,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.22.perfctr26/kernel/sched.c.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/kernel/sched.c 2008-06-22 16:16:32.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -5121,6 +5122,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.22.perfctr26/kernel/timer.c.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.22.perfctr26/kernel/timer.c 2008-06-22 16:16:32.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -816,6 +817,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.21000664 001750 001750 00000044235 13216244367 024231 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.21.perfctr26/CREDITS.~1~ 2008-06-22 16:43:58.000000000 +0200 +++ linux-2.6.21.perfctr26/CREDITS 2008-06-22 16:46:48.000000000 +0200 @@ -2679,8 +2679,9 @@ S: Canada K2P 0X8 N: Mikael Pettersson E: mikpe@it.uu.se -W: http://www.csd.uu.se/~mikpe/ +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.21.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 16:43:58.000000000 +0200 +++ linux-2.6.21.perfctr26/Documentation/ioctl-number.txt 2008-06-22 16:46:44.000000000 +0200 @@ -186,6 +186,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.21.perfctr26/MAINTAINERS.~1~ 2008-06-22 16:43:58.000000000 +0200 +++ linux-2.6.21.perfctr26/MAINTAINERS 2008-06-22 16:46:48.000000000 +0200 @@ -2641,6 +2641,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.21.perfctr26/arch/arm/Kconfig.~1~ 2008-06-22 16:43:58.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/arm/Kconfig 2008-06-22 16:46:44.000000000 +0200 @@ -444,6 +444,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.21.perfctr26/arch/arm/kernel/process.c.~1~ 2008-06-22 16:43:58.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/arm/kernel/process.c 2008-06-22 16:46:44.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -291,6 +292,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -336,6 +338,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.21.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/i386/Kconfig 2008-06-22 16:46:44.000000000 +0200 @@ -773,6 +773,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.21.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/i386/kernel/entry.S 2008-06-22 16:46:44.000000000 +0200 @@ -648,6 +648,23 @@ ENDPROC(name) BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(perfctr_interrupt) +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.21.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 16:46:44.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -410,6 +411,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. --- linux-2.6.21.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/i386/kernel/process.c 2008-06-22 16:46:44.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -430,6 +432,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -714,6 +718,8 @@ struct task_struct fastcall * __switch_t write_pda(pcurrent, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.21.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/powerpc/Kconfig 2008-06-22 16:46:44.000000000 +0200 @@ -363,6 +363,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.21.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 16:46:44.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -575,6 +579,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.21.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/x86_64/Kconfig 2008-06-22 16:46:44.000000000 +0200 @@ -625,6 +625,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.21.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 16:46:44.000000000 +0200 @@ -692,6 +692,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.21.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 16:46:44.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -552,6 +553,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.21.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 16:46:44.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -485,6 +487,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -668,6 +672,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.21.perfctr26/drivers/Makefile.~1~ 2008-06-22 16:43:59.000000000 +0200 +++ linux-2.6.21.perfctr26/drivers/Makefile 2008-06-22 16:46:44.000000000 +0200 @@ -74,6 +74,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.21.perfctr26/fs/exec.c.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.21.perfctr26/fs/exec.c 2008-06-22 16:46:48.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -871,6 +872,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.21.perfctr26/include/asm-arm/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-arm/processor.h 2008-06-22 16:46:44.000000000 +0200 @@ -44,6 +44,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.21.perfctr26/include/asm-arm/system.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-arm/system.h 2008-06-22 16:46:44.000000000 +0200 @@ -246,7 +246,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) /* --- linux-2.6.21.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 16:46:44.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.21.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 16:46:44.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.21.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-i386/processor.h 2008-06-22 16:46:44.000000000 +0200 @@ -418,6 +418,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.21.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-i386/system.h 2008-06-22 16:46:44.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.21.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-powerpc/processor.h 2008-06-22 16:46:44.000000000 +0200 @@ -166,6 +166,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.21.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 16:09:15.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 16:46:44.000000000 +0200 @@ -84,14 +84,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x41 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.21.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-x86_64/irq.h 2008-06-22 16:46:44.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.21.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-x86_64/processor.h 2008-06-22 16:46:44.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.21.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.21.perfctr26/include/asm-x86_64/system.h 2008-06-22 16:46:44.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.21.perfctr26/kernel/exit.c.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.21.perfctr26/kernel/exit.c 2008-06-22 16:46:44.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.21.perfctr26/kernel/sched.c.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.21.perfctr26/kernel/sched.c 2008-06-22 16:46:44.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4836,6 +4837,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.21.perfctr26/kernel/timer.c.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.21.perfctr26/kernel/timer.c 2008-06-22 16:46:44.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1213,6 +1214,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.20000664 001750 001750 00000044244 13216244367 024230 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.20.perfctr26/CREDITS.~1~ 2008-06-22 17:08:52.000000000 +0200 +++ linux-2.6.20.perfctr26/CREDITS 2008-06-22 17:10:48.000000000 +0200 @@ -2668,9 +2668,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.20.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 17:08:52.000000000 +0200 +++ linux-2.6.20.perfctr26/Documentation/ioctl-number.txt 2008-06-22 17:10:42.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.20.perfctr26/MAINTAINERS.~1~ 2008-06-22 17:08:52.000000000 +0200 +++ linux-2.6.20.perfctr26/MAINTAINERS 2008-06-22 17:10:48.000000000 +0200 @@ -2577,6 +2577,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.20.perfctr26/arch/arm/Kconfig.~1~ 2008-06-22 17:08:52.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/arm/Kconfig 2008-06-22 17:10:42.000000000 +0200 @@ -398,6 +398,10 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. +if CPU_XSCALE +source drivers/perfctr/Kconfig +endif + # bool 'Use XScale PMU as timer source' CONFIG_XSCALE_PMU_TIMER config XSCALE_PMU bool --- linux-2.6.20.perfctr26/arch/arm/kernel/process.c.~1~ 2008-06-22 17:08:52.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/arm/kernel/process.c 2008-06-22 17:10:42.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -285,6 +286,7 @@ void show_fpregs(struct user_fp *regs) */ void exit_thread(void) { + perfctr_exit_thread(¤t->thread); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -330,6 +332,8 @@ copy_thread(int nr, unsigned long clone_ if (clone_flags & CLONE_SETTLS) thread->tp_value = regs->ARM_r3; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.20.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/i386/Kconfig 2008-06-22 17:10:42.000000000 +0200 @@ -745,6 +745,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.20.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/i386/kernel/entry.S 2008-06-22 17:10:42.000000000 +0200 @@ -626,6 +626,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.20.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 17:10:42.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -409,6 +410,8 @@ void __init native_init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.20.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/i386/kernel/process.c 2008-06-22 17:10:42.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -376,6 +377,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -427,6 +429,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -695,6 +699,8 @@ struct task_struct fastcall * __switch_t if (next_p->fpu_counter > 5) math_state_restore(); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.20.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/powerpc/Kconfig 2008-06-22 17:10:42.000000000 +0200 @@ -356,6 +356,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.20.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 17:10:42.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -570,6 +574,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.20.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/x86_64/Kconfig 2008-06-22 17:10:42.000000000 +0200 @@ -607,6 +607,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.20.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 17:10:42.000000000 +0200 @@ -689,6 +689,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.20.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 17:10:42.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -553,6 +554,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.20.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 17:10:42.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -482,6 +484,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -665,6 +669,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.20.perfctr26/drivers/Makefile.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.20.perfctr26/drivers/Makefile 2008-06-22 17:10:42.000000000 +0200 @@ -73,6 +73,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.20.perfctr26/fs/exec.c.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.20.perfctr26/fs/exec.c 2008-06-22 17:10:48.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -873,6 +874,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.20.perfctr26/include/asm-arm/processor.h.~1~ 2008-06-22 15:42:33.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-arm/processor.h 2008-06-22 17:10:42.000000000 +0200 @@ -44,6 +44,10 @@ struct thread_struct { unsigned long error_code; /* debugging */ struct debug_info debug; + +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define INIT_THREAD { } --- linux-2.6.20.perfctr26/include/asm-arm/system.h.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-arm/system.h 2008-06-22 17:10:42.000000000 +0200 @@ -220,7 +220,9 @@ extern struct task_struct *__switch_to(s #define switch_to(prev,next,last) \ do { \ + perfctr_suspend_thread(&(prev)->thread); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ + perfctr_resume_thread(&(current)->thread); \ } while (0) /* --- linux-2.6.20.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 17:10:42.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.20.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 17:10:42.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.20.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-i386/processor.h 2008-06-22 17:10:42.000000000 +0200 @@ -418,6 +418,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.20.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-i386/system.h 2008-06-22 17:10:42.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.20.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 16:44:00.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-powerpc/processor.h 2008-06-22 17:10:42.000000000 +0200 @@ -166,6 +166,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.20.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 17:10:42.000000000 +0200 @@ -63,14 +63,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.20.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-x86_64/irq.h 2008-06-22 17:10:42.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.20.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-x86_64/processor.h 2008-06-22 17:10:42.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.20.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.20.perfctr26/include/asm-x86_64/system.h 2008-06-22 17:10:42.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.20.perfctr26/kernel/exit.c.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.20.perfctr26/kernel/exit.c 2008-06-22 17:10:42.000000000 +0200 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.20.perfctr26/kernel/sched.c.~1~ 2008-06-22 17:08:55.000000000 +0200 +++ linux-2.6.20.perfctr26/kernel/sched.c 2008-06-22 17:10:42.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -5005,6 +5006,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.20.perfctr26/kernel/timer.c.~1~ 2008-06-22 17:08:55.000000000 +0200 +++ linux-2.6.20.perfctr26/kernel/timer.c 2008-06-22 17:10:42.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -1110,6 +1111,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/usr.lib/arm.c000664 001750 001750 00000002363 13216244367 022053 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arm.c,v 1.1.2.1 2007/02/11 20:15:03 mikpe Exp $ * ARM-specific perfctr library procedures. * * Copyright (C) 2005-2007 Mikael Pettersson */ #include #include "libperfctr.h" unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch (info->cpu_type) { case PERFCTR_ARM_XSC1: return 2; case PERFCTR_ARM_XSC2: return 4; default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch (info->cpu_type) { case PERFCTR_ARM_XSC1: return "XScale1"; case PERFCTR_ARM_XSC2: return "XScale2"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if (nrictrs) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { printf("pmc_map[%u]\t\t%u\n", i, control->pmc_map[i]); printf("evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i]); if (i >= nractrs) printf("ireset[%u]\t\t%d\n", i, control->ireset[i]); } } papi-5.6.0/src/ftests/flops.F000664 001750 001750 00000004140 13216244361 020063 0ustar00jshenry1963jshenry1963000000 000000 C A simple example for the use of PAPI, the number of flops you should C get is about INDEX^3 on machines that consider add and multiply one flop C such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL C -Kevin London #include "fpapi_test.h" program flops implicit integer (p) integer index PARAMETER(index=100) REAL*4 matrixa(index,index),matrixb(index,index),mres(index,index) REAL*4 proc_time, mflops, real_time INTEGER*8 flpins INTEGER i,j,k, retval integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_INS', PAPI_ENOEVNT) end if C Initialize the Matrix arrays do i=1,index do j=1,index matrixa(i,j) = i+j matrixb(i,j) = j-i mres(i,j) = 0.0 end do end do C Setup PAPI library and begin collecting data from the counters call PAPIf_flips( real_time, proc_time, flpins, mflops, retval ) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) end if C Matrix-Matrix Multiply do i=1,index do j=1,index do k=1,index mres(i,j) = mres(i,j) + matrixa(i,k)*matrixb(k,j) end do end do end do C Collect the data into the Variables passed in call PAPIf_flips( real_time, proc_time, flpins, mflops, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) end if if (tests_quiet .EQ. 0) then print *, 'Real_time: ', real_time print *, ' Proc_time: ', proc_time print *, ' Total flpins: ', flpins print *, ' MFLOPS: ', mflops end if call dummy(mres) call ftests_pass(__FILE__) end papi-5.6.0/src/ftests/case2.F000664 001750 001750 00000006166 13216244361 017747 0ustar00jshenry1963jshenry1963000000 000000 C From Dave McNamara at PSRV. Thanks! C Ported to fortran by Kevin London C If an event is countable but you've exhausted the counter resources C and you try to add an event, it seems subsequent PAPI_start and/or C PAPI_stop will causes a Seg. Violation. C I got around this by calling PAPI to get the # of countable events, C then making sure that I didn't try to add more than these number of C events. I still have a problem if someone adds Level 2 cache misses C and then adds FLOPS 'cause I didn't count FLOPS as actually requiring C 2 counters. #include "fpapi_test.h" program case2 IMPLICIT integer (p) REAL c,a,b INTEGER n INTEGER EventSet INTEGER retval INTEGER I,j INTEGER*8 gl(3) INTEGER last_char EXTERNAL last_char integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() a=0.999 b=1.001 n=1000 i=0 j=0 EventSet = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init( retval ) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_create_eventset( EventSet, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_query_event(PAPI_BR_CN, retval) if (retval .EQ. PAPI_OK) then j = j + 1 end if if (j .NE. 0) then call PAPIf_add_event( EventSet, PAPI_BR_CN, retval ) if ( retval .NE. PAPI_OK ) then if (tests_quiet .EQ. 0) then call PAPIf_perror( 'PAPIf_add_event' ) endif end if end if i = j call PAPIf_query_event(PAPI_TOT_CYC, retval) if (retval .EQ. PAPI_OK) then j = j + 1 end if if (j .EQ. i+1) then call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK )then if (tests_quiet .EQ. 0) then call PAPIf_perror( 'PAPIf_add_event' ) end if end if end if i = j call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .EQ. PAPI_OK) then j = j + 1 end if if (j .EQ. i+1) then call PAPIf_add_event(EventSet,PAPI_TOT_INS,retval) if ( retval .NE. PAPI_OK )then if ( retval .NE. PAPI_ECNFLCT ) then if (tests_quiet .EQ. 0) then call PAPIf_perror( 'PAPIf_add_event' ) end if end if end if end if if (J .GT. 0) then call PAPIf_start(EventSet, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if end if do i=1,n c = a * b end do if (J .GT. 0) then call PAPIf_stop( EventSet, gl, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if end if call ftests_pass(__FILE__) end papi-5.6.0/src/perfctr-2.6.x/usr.lib/event_set_p4.c000775 001750 001750 00000052730 13216244367 023701 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_p4.c,v 1.5 2004/02/20 21:32:06 mikpe Exp $ * Performance counter event descriptions for Intel P4. * * Copyright (C) 2003-2004 Mikael Pettersson * * This is still preliminary: * - need mapping from enum escr_set to * - the current data structures can't describe all P4 side-conditions * - replace eventsel in struct perfctr_event with a unique cookie? */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" enum escr_set { ALF_ESCR_0_1, /* CCCR 12/13/14/15/16/17 via ESCR select 0x01 */ BPU_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x00 */ BSU_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x07 */ BSU_ESCR_0, /* CCCR 0/1 via ESCR select 0x07 */ BSU_ESCR_1, /* CCCR 2/3 via ESCR select 0x07 */ CRU_ESCR_0_1, /* CCCR 12/13/14/15/16/17 via ESCR select 0x04 */ CRU_ESCR_2_3, /* CCCR 12/13/14/15/16/17 via ESCR select 0x05 */ DAC_ESCR_0_1, /* CCCR 8/9/10/11 via ESCR select 0x05 */ FIRM_ESCR_0_1, /* CCCR 8/9/10/11 via ESCR select 0x01 */ FSB_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x06 */ FSB_ESCR_0, /* CCCR 0/1 via ESCR select 0x06 */ FSB_ESCR_1, /* CCCR 2/3 via ESCR select 0x06 */ ITLB_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x03 */ MOB_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x02 */ MS_ESCR_0_1, /* CCCR 4/5/6/7 via ESCR select 0x00 */ PMH_ESCR_0_1, /* CCCR 0/1/2/3 via ESCR select 0x04 */ RAT_ESCR_0_1, /* CCCR 12/13/14/15/16/17 via ESCR select 0x02 */ SAAT_ESCR_0_1, /* CCCR 8/9/10/11 via ESCR select 0x02 */ TBPU_ESCR_0_1, /* CCCR 4/5/6/7 via ESCR select 0x02 */ TC_ESCR_0_1, /* CCCR 4/5/6/7 via ESCR select 0x01 */ }; static const struct perfctr_unit_mask_8 p4_um_TC_deliver_mode = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, /* DD */ .nvalues = 8 }, { { 0x01, "DD:both logical processors in deliver mode" }, { 0x02, "DB:logical processor 0 in deliver mode, 1 in build mode" }, { 0x04, "DI:logical processor 0 in deliver mode, 1 is inactive" }, { 0x08, "BD:logical processor 0 in build mode, 1 in deliver mode" }, { 0x10, "BB:both logical processors in build mode" }, { 0x20, "BI:logical processor 0 in build mode, 1 is inactive" }, { 0x40, "ID:logical processor 0 is inactive, 1 in deliver mode" }, { 0x80, "IB:logical processor 0 is inactive, 1 in build mode" } } }; static const struct perfctr_unit_mask_1 p4_um_BPU_fetch_request = { { .type = perfctr_um_type_bitmask, .default_value = 0x00, .nvalues = 1 }, { { 0x01, "TCMISS:Trace cache lookup miss" } } }; static const struct perfctr_unit_mask_3 p4_um_ITLB_reference = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "HIT:ITLB hit" }, { 0x02, "MISS:ITLB miss" }, { 0x04, "HIT_UC:Uncacheable ITLB hit" } } }; static const struct perfctr_unit_mask_2 p4_um_memory_cancel = { { .type = perfctr_um_type_bitmask, .default_value = 0x0C, .nvalues = 2 }, { { 0x04, "ST_RB_FULL:Replayed because no store request buffer is available" }, { 0x08, "64K_CONF:Conflicts due to 64K aliasing" } } }; static const struct perfctr_unit_mask_2 p4_um_memory_complete = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "LSC:Load split completed, excluding UC/WC loads" }, { 0x02, "SSC:Any split stores completed" } } }; static const struct perfctr_unit_mask_1 p4_um_load_port_replay = { { .type = perfctr_um_type_bitmask, .default_value = 0x02, .nvalues = 1 }, { { 0x02, "SPLIT_LD:Split load" } } }; static const struct perfctr_unit_mask_1 p4_um_store_port_replay = { { .type = perfctr_um_type_bitmask, .default_value = 0x02, .nvalues = 1 }, { { 0x02, "SPLIT_ST:Split store" } } }; static const struct perfctr_unit_mask_4 p4_um_MOB_load_replay = { { .type = perfctr_um_type_bitmask, .default_value = 0x3A, .nvalues = 4 }, { { 0x02, "NO_STA:Replayed because of unknown store address" }, { 0x08, "NO_STD:Replayed because of unknown store data" }, { 0x10, "PARTIAL_DATA:Replayed because of partially overlapped data access between the load and store operations" }, { 0x20, "UNALGN_ADDR:Replayed because the lower 4 bits of the linear address do not match between the load and store operations" } } }; static const struct perfctr_unit_mask_2 p4_um_page_walk_type = { { .type = perfctr_um_type_bitmask, .default_value = 0x03, .nvalues = 2 }, { { 0x01, "DTMISS:Page walk for a data TLB miss" }, { 0x02, "ITMISS:Page walk for an instruction TLB miss" } } }; static const struct perfctr_unit_mask_9 p4_um_BSQ_cache_reference = { { .type = perfctr_um_type_bitmask, .default_value = 0x73F, .nvalues = 9 }, { { 0x001, "RD_2ndL_HITS:Read 2nd level cache hit Shared" }, { 0x002, "RD_2ndL_HITE:Read 2nd level cache hit Exclusive" }, { 0x004, "RD_2ndL_HITM:Read 2nd level cache hit Modified" }, { 0x008, "RD_3rdL_HITS:Read 3rd level cache hit Shared" }, { 0x010, "RD_3rdL_HITE:Read 3rd level cache hit Exclusive" }, { 0x020, "RD_3rdL_HITM:Read 3rd level cache hit Modified" }, { 0x100, "RD_2ndL_MISS:Read 2nd level cache miss" }, { 0x200, "RD_3rdL_MISS:Read 3rd level cache miss" }, { 0x400, "WR_2ndL_MISS:Writeback lookup from DAC misses the 2nd level cache" } } }; /* review P4M0 and P4M2 diffs according to P4 Code Optim manual */ static const struct perfctr_unit_mask_15 p4_um_IOQ = { { .type = perfctr_um_type_bitmask, .default_value = 0xEFE1, .nvalues = 15 }, /* XXX: how should we describe that bits 0-4 are a single field? */ { { 0x0001, "bus request type bit 0" }, { 0x0002, "bus request type bit 1" }, { 0x0004, "bus request type bit 2" }, { 0x0008, "bus request type bit 3" }, { 0x0010, "bus request type bit 4" }, { 0x0020, "ALL_READ:Count read entries" }, { 0x0040, "ALL_WRITE:Count write entries" }, { 0x0080, "MEM_UC:Count UC memory access entries" }, { 0x0100, "MEM_WC:Count WC memory access entries" }, { 0x0200, "MEM_WT:Count WT memory access entries" }, { 0x0400, "MEM_WP:Count WP memory access entries" }, { 0x0800, "MEM_WB:Count WB memory access entries" }, { 0x2000, "OWN:Count own store requests" }, { 0x4000, "OTHER:Count other and DMA store requests" }, { 0x8000, "PREFETCH:Include HW and SW prefetch requests" } } }; static const struct perfctr_unit_mask_6 p4_um_FSB_data_activity = { { .type = perfctr_um_type_bitmask, .default_value = 0x1B, .nvalues = 6 }, /* DRDY_OWN is mutually exclusive with DRDY_OTHER */ /* DBSY_OWN is mutually exclusive with DBSY_OTHER */ { { 0x01, "DRDY_DRV:Count when this processor drives data onto the bus" }, { 0x02, "DRDY_OWN:Count when this processor reads data from the bus" }, { 0x04, "DRDY_OTHER:Count when data is on the bus but not being sampled by the processor" }, { 0x08, "DBSY_DRV:Count when this processor reserves the bus for driving data" }, { 0x10, "DBSY_OWN:Count when this processor reserves the bus for sampling data" }, { 0x20, "DBSY_OTHER:Count when the bus is reserved for driving data this processor will not sample" } } }; static const struct perfctr_unit_mask_13 p4_um_BSQ = { { .type = perfctr_um_type_bitmask, .default_value = 0x0021, .nvalues = 13 }, { { 0x0001, "REQ_TYPE0:Request type encoding bit 0" }, { 0x0002, "REQ_TYPE1:Request type encoding bit 1" }, { 0x0004, "REQ_LEN0:Request length encoding bit 0" }, { 0x0008, "REQ_LEN1:Request length encoding bit 1" }, { 0x0020, "REQ_IO_TYPE:Request type is input or output" }, { 0x0040, "REQ_LOCK_TYPE:Request type is bus lock" }, { 0x0080, "REQ_CACHE_TYPE:Request type is cacheable" }, { 0x0100, "REQ_SPLIT_TYPE:Request type is a bus 8-byte chunk split across 8-byte boundary" }, { 0x0200, "REQ_DEM_TYPE:Request type is a demand (1) or prefetch (0)" }, { 0x0400, "REQ_ORD_TYPE:Request is an ordered type" }, { 0x0800, "MEM_TYPE0:Memory type encoding bit 0" }, { 0x1000, "MEM_TYPE1:Memory type encoding bit 1" }, { 0x2000, "MEM_TYPE2:Memory type encoding bit 2" } } }; static const struct perfctr_unit_mask_1 p4_um_firm_uop = { { .type = perfctr_um_type_bitmask, .default_value = 0x8000, .nvalues = 1 }, { { 0x8000, "ALL:count all uops of this type" } } }; static const struct perfctr_unit_mask_2 p4_um_x87_SIMD_moves_uop = { { .type = perfctr_um_type_bitmask, .default_value = 0x18, .nvalues = 2 }, { { 0x08, "ALLP0:Count all x87/SIMD store/move uops" }, { 0x10, "ALLP2:count all x87/SIMD load uops" } } }; static const struct perfctr_unit_mask_1 p4_um_TC_misc = { { .type = perfctr_um_type_bitmask, .default_value = 0x10, .nvalues = 1 }, { { 0x10, "FLUSH:Number of flushes" } } }; static const struct perfctr_unit_mask_1 p4_um_global_power_events = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 1 }, { { 0x01, "Running:The processor is active" } } }; static const struct perfctr_unit_mask_1 p4_um_tc_ms_xfer = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 1 }, { { 0x01, "CISC:A TC to MS transfer ocurred" } } }; static const struct perfctr_unit_mask_3 p4_um_uop_queue_writes = { { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "FROM_TC_BUILD:uops written from TC build mode" }, { 0x02, "FROM_TC_DELIVER:uops written from TC deliver mode" }, { 0x04, "FROM_ROM:uops written from microcode ROM" } } }; static const struct perfctr_unit_mask_4 p4_um_branch_type = { { .type = perfctr_um_type_bitmask, .default_value = 0x1E, .nvalues = 4 }, { { 0x02, "CONDITIONAL:Conditional jumps" }, { 0x04, "CALL:Call branches" }, /* XXX: diff MISPRED/non-MISPRED events? */ { 0x08, "RETURN:Return branches" }, { 0x10, "INDIRECT:Returns, indirect calls, or indirect jumps" } } }; static const struct perfctr_unit_mask_1 p4_um_resource_stall = { { .type = perfctr_um_type_bitmask, .default_value = 0x20, .nvalues = 1 }, { { 0x20, "SBFULL:A Stall due to lack of store buffers" } } }; static const struct perfctr_unit_mask_3 p4_um_WC_Buffer = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 3 }, { { 0x01, "WCB_EVICTS:all causes" }, { 0x02, "WCB_FULL_EVICT:no WC buffer is available" }, /* XXX: 245472-011 no longer lists bit 2, but that looks like a table formatting error. Keeping it for now. */ { 0x04, "WCB_HITM_EVICT:store encountered a Hit Modified condition" } } }; static const struct perfctr_unit_mask_6 p4_um_b2b_cycles = { /* XXX: bits 1-6; no details documented yet */ { .type = perfctr_um_type_bitmask, .default_value = 0x7E, .nvalues = 6 }, { { 0x02, "bit 1" }, { 0x04, "bit 2" }, { 0x08, "bit 3" }, { 0x10, "bit 4" }, { 0x20, "bit 5" }, { 0x40, "bit 6" } } }; static const struct perfctr_unit_mask_3 p4_um_bnr = { /* XXX: bits 0-2; no details documented yet */ { .type = perfctr_um_type_bitmask, .default_value = 0x07, .nvalues = 3 }, { { 0x01, "bit 0" }, { 0x02, "bit 1" }, { 0x04, "bit 2" } } }; static const struct perfctr_unit_mask_3 p4_um_snoop = { /* XXX: bits 2, 6, and 7; no details documented yet */ { .type = perfctr_um_type_bitmask, .default_value = 0xC4, .nvalues = 3 }, { { 0x04, "bit 2" }, { 0x40, "bit 6" }, { 0x80, "bit 7" } } }; static const struct perfctr_unit_mask_4 p4_um_response = { /* XXX: bits 1, 2, 8, and 9; no details documented yet */ { .type = perfctr_um_type_bitmask, .default_value = 0x306, .nvalues = 4 }, { { 0x002, "bit 1" }, { 0x004, "bit 2" }, { 0x100, "bit 8" }, { 0x200, "bit 9" } } }; static const struct perfctr_unit_mask_2 p4_um_nbogus_bogus = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 2 }, { { 0x01, "NBOGUS:The marked uops are not bogus" }, { 0x02, "BOGUS:The marked uops are bogus" } } }; static const struct perfctr_unit_mask_8 p4_um_execution_event = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 8 }, { { 0x01, "NBOGUS0:non-bogus uops with tag bit 0 set" }, { 0x02, "NBOGUS1:non-bogus uops with tag bit 1 set" }, { 0x04, "NBOGUS2:non-bogus uops with tag bit 2 set" }, { 0x08, "NBOGUS3:non-bogus uops with tag bit 3 set" }, { 0x10, "BOGUS0:bogus uops with tag bit 0 set" }, { 0x20, "BOGUS1:bogus uops with tag bit 1 set" }, { 0x40, "BOGUS2:bogus uops with tag bit 2 set" }, { 0x80, "BOGUS3:bogus uops with tag bit 3 set" } } }; static const struct perfctr_unit_mask_4 p4_um_instr_retired = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 4 }, { { 0x01, "NBOGUSNTAG:Non-bogus instructions that are not tagged" }, { 0x02, "NBOGUSTAG:Non-bogus instructions that are tagged" }, { 0x04, "BOGUSNTAG:Bogus instructions that are not tagged" }, { 0x08, "BOGUSTAG:Bogus instructions that are tagged" } } }; static const struct perfctr_unit_mask_2 p4_um_uop_type = { { .type = perfctr_um_type_bitmask, .default_value = 0x06, .nvalues = 2 }, { { 0x02, "TAGLOADS:The uop is a load operation" }, { 0x04, "TAGSTORES:The uop is a store operation" } } }; static const struct perfctr_unit_mask_4 p4_um_branch_retired = { { .type = perfctr_um_type_bitmask, .default_value = 0x0C, /* taken branches */ .nvalues = 4 }, { { 0x01, "MMNP:Branch Not-taken Predicted" }, { 0x02, "MMNM:Branch Not-taken Mispredicted" }, { 0x04, "MMTP:Branch Taken Predicted" }, { 0x08, "MMTM:Branch Taken Mispredicted" } } }; static const struct perfctr_unit_mask_1 p4_um_mispred_branch_retired = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 1 }, { { 0x01, "NBOGUS:The retired branch is not bogus" } } }; static const struct perfctr_unit_mask_5 p4_um_x87_assist = { { .type = perfctr_um_type_bitmask, .default_value = 0x1F, .nvalues = 5 }, { { 0x01, "FPSU:FP stack underflow" }, { 0x02, "FPSO:FP stack overflow" }, { 0x04, "POAO:x87 output overflow" }, { 0x08, "POAU:x87 output underflow" }, { 0x10, "PREA:x87 input assist" } } }; static const struct perfctr_unit_mask_3 p4_um_machine_clear = { { .type = perfctr_um_type_bitmask, .default_value = 0x01, .nvalues = 3 }, { { 0x01, "CLEAR:Count a portion of the cycles when the machine is cleared" }, { 0x04, "MOCLEAR:Count clears due to memory ordering issues" }, { 0x08, "SMCLEAR:Count clears due to self-modifying code issues" } } }; static const struct perfctr_event p4_events[] = { /* Non-Retirement Events: */ { 0x01, TC_ESCR_0_1, UM(p4_um_TC_deliver_mode), "TC_deliver_mode", "duration of the operating modes of the trace cache and decode engine" }, { 0x03, BPU_ESCR_0_1, UM(p4_um_BPU_fetch_request), "BPU_fetch_request", "instruction fetch requests by the Branch Prediction unit" }, { 0x18, ITLB_ESCR_0_1, UM(p4_um_ITLB_reference), "ITLB_reference", "translations using the Instruction Translation Look-aside Buffer" }, { 0x02, DAC_ESCR_0_1, UM(p4_um_memory_cancel), "memory_cancel", "cancelled requests in the Data cache Address Control unit" }, { 0x08, SAAT_ESCR_0_1, UM(p4_um_memory_complete), "memory_complete", "completed load split, store split, uncacheable split, uncacheable load" }, { 0x04, SAAT_ESCR_0_1, UM(p4_um_load_port_replay), "load_port_replay", /* XXX: only ESCR1 supports at-retirement */ "replayed events at the load port" }, { 0x05, SAAT_ESCR_0_1, UM(p4_um_store_port_replay), "store_port_replay", /* XXX: only ESCR1 supports at-retirement */ "replayed events at the store port" }, { 0x03, MOB_ESCR_0_1, UM(p4_um_MOB_load_replay), "MOB_load_replay", "replayed loads at the memory order buffer" }, { 0x01, PMH_ESCR_0_1, UM(p4_um_page_walk_type), "page_walk_type", "page walks by the page miss handler" }, { 0x0C, BSU_ESCR_0_1, UM(p4_um_BSQ_cache_reference), "BSQ_cache_reference", "cache references seen by the bus unit" }, { 0x03, FSB_ESCR_0_1, UM(p4_um_IOQ), "IOQ_allocation", /* XXX: ESCR1 unavailable if CPUID < 0xF27 */ "bus transactions" }, { 0x1A, FSB_ESCR_1, UM(p4_um_IOQ), "IOQ_active_entries", "number of active IOQ entries" }, { 0x17, FSB_ESCR_0_1, UM(p4_um_FSB_data_activity), "FSB_data_activity", "DRDY or DBSY events on the front side bus" }, { 0x05, BSU_ESCR_0, UM(p4_um_BSQ), "BSQ_allocation", "allocations in the bus sequence unit" }, { 0x06, BSU_ESCR_1, UM(p4_um_BSQ), "bsq_active_entries", "number of active BSQ entries" }, { 0x34, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "SSE_input_assist", "assists requested for SSE and SSE2 input operands" }, { 0x08, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "packed_SP_uop", "packed single-precision uops" }, { 0x0C, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "packed_DP_uop", "packed double-precision uops" }, { 0x0A, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "scalar_SP_uop", "scalar single-precision uops" }, { 0x0E, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "scalar_DP_uop", "scalar double-precision uops" }, { 0x02, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "64bit_MMX_uop", "64 bit SIMD MMX instructions" }, { 0x1A, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "128bit_MMX_uop", "128 bit integer SIMD SSE2 instructions" }, { 0x04, FIRM_ESCR_0_1, UM(p4_um_firm_uop), "x87_FP_uop", "x87 floating-point uops" }, { 0x2E, FIRM_ESCR_0_1, UM(p4_um_x87_SIMD_moves_uop), "x87_SIMD_moves_uop", "x87 FPU, MMX, SSE, or SSE2 load, store, and move uops" }, { 0x06, TC_ESCR_0_1, UM(p4_um_TC_misc), "TC_misc", "miscellaneous events detected by the TC" }, { 0x13, FSB_ESCR_0_1, UM(p4_um_global_power_events), "global_power_events", "time during which the processor is not stopped" }, { 0x05, MS_ESCR_0_1, UM(p4_um_tc_ms_xfer), "tc_ms_xfer", "number of times uop delivery changed from TC to MS ROM" }, { 0x09, MS_ESCR_0_1, UM(p4_um_uop_queue_writes), "uop_queue_writes", "number of valid uops written to the uop queue" }, { 0x05, TBPU_ESCR_0_1, UM(p4_um_branch_type), "retired_mispred_branch_type", "retired mispredicted branches by type" }, { 0x04, TBPU_ESCR_0_1, UM(p4_um_branch_type), "retired_branch_type", "retired branches by type" }, { 0x01, ALF_ESCR_0_1, UM(p4_um_resource_stall), "resource_stall", /* XXX: may not be supported in all P4 models */ "stalls in the Allocator" }, { 0x05, DAC_ESCR_0_1, UM(p4_um_WC_Buffer), "WC_Buffer", "write combining buffer operations" }, { 0x16, FSB_ESCR_0_1, UM(p4_um_b2b_cycles), "b2b_cycles", /* XXX: may not be supported in all P4 models */ "back-to-back bus cycles" }, { 0x08, FSB_ESCR_0_1, UM(p4_um_bnr), "bnr", /* XXX: may not be supported in all P4 models */ "bus not ready conditions" }, { 0x06, FSB_ESCR_0_1, UM(p4_um_snoop), "snoop", /* XXX: may not be supported in all P4 models */ "snoop hit modified bus traffic" }, { 0x04, FSB_ESCR_0_1, UM(p4_um_response), "response", /* XXX: may not be supported in all P4 models */ "different types of responses" }, { 0x08, CRU_ESCR_2_3, UM(p4_um_nbogus_bogus), "front_end_event", /* XXX: another ESCR must count uop_type */ /* XXX: can support PEBS */ "retired uops, tagged by the front-end tagging mechanism" }, { 0x0C, CRU_ESCR_2_3, UM(p4_um_execution_event), "execution_event", /* XXX: needs upstream ESCR */ /* XXX: can support PEBS */ "retired uops, tagged by the execution tagging mechanism" }, { 0x09, CRU_ESCR_2_3, UM(p4_um_nbogus_bogus), "replay_event", /* XXX: needs PEBS_ENABLE, PEBS_MATRIX_VERT, and possibly upstream ESCR */ /* XXX: can support PEBS */ "retired uops, tagged by the replay tagging mechanism" }, { 0x02, CRU_ESCR_0_1, UM(p4_um_instr_retired), "instr_retired", "retired instructions" }, { 0x01, CRU_ESCR_0_1, UM(p4_um_nbogus_bogus), "uops_retired", "retired uops" }, { 0x02, RAT_ESCR_0_1, UM(p4_um_uop_type), "uop_type", "tag uops for the front-end tagging mechanism" }, { 0x06, CRU_ESCR_2_3, UM(p4_um_branch_retired), "branch_retired", "retired branches" }, { 0x03, CRU_ESCR_0_1, UM(p4_um_mispred_branch_retired), "mispred_branch_retired", "retired mispredicted branches" }, { 0x03, CRU_ESCR_2_3, UM(p4_um_x87_assist), "x87_assist", "retired x87 instructions that required special handling" }, { 0x02, CRU_ESCR_2_3, UM(p4_um_machine_clear), "machine_clear", "cycles or occurrences when the entire pipeline is cleared" }, }; const struct perfctr_event_set perfctr_p4_event_set = { .cpu_type = PERFCTR_X86_INTEL_P4, .event_prefix = "P4_", .include = NULL, .nevents = ARRAY_SIZE(p4_events), .events = p4_events, }; /* * Intel Pentium 4 Model 3 events. */ static const struct perfctr_event p4m3_events[] = { { 0x07, CRU_ESCR_0_1, UM(p4_um_nbogus_bogus), "instr_completed", "retired and completed instructions" }, }; const struct perfctr_event_set perfctr_p4m3_event_set = { .cpu_type = PERFCTR_X86_INTEL_P4M3, .event_prefix = "P4M3_", .include = &perfctr_p4_event_set, .nevents = ARRAY_SIZE(p4m3_events), .events = p4m3_events, }; papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_sicortex.h000664 001750 001750 00000010720 13216244362 025636 0ustar00jshenry1963jshenry1963000000 000000 /* * Generic MIPS64 PMU specific types and definitions * * Contributed by Philip Mucci based on code from * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_SICORTEX_H__ #define __PFMLIB_SICORTEX_H__ #include /* MIPS are bi-endian */ #include /* * privilege level mask usage for MIPS: * * PFM_PLM0 = KERNEL * PFM_PLM1 = SUPERVISOR * PFM_PLM2 = INTERRUPT * PFM_PLM3 = USER */ #ifdef __cplusplus extern "C" { #endif /* * SiCortex specific */ typedef union { uint64_t val; /* complete register value */ struct { unsigned long sel_exl:1; /* int level */ unsigned long sel_os:1; /* system level */ unsigned long sel_sup:1; /* supervisor level */ unsigned long sel_usr:1; /* user level */ unsigned long sel_int:1; /* enable intr */ unsigned long sel_event_mask:6; /* event mask */ unsigned long sel_res1:23; /* reserved */ unsigned long sel_res2:32; /* reserved */ } perfsel; } pfm_sicortex_sel_reg_t; #define PMU_SICORTEX_SCB_NUM_COUNTERS 256 typedef union { uint64_t val; struct { unsigned long Interval:4; unsigned long IntBit:5; unsigned long NoInc:1; unsigned long AddrAssert:1; unsigned long MagicEvent:2; unsigned long Reserved:19; } sicortex_ScbPerfCtl_reg; struct { unsigned long HistGte:20; unsigned long Reserved:12; } sicortex_ScbPerfHist_reg; struct { unsigned long Bucket:8; unsigned long Reserved:24; } sicortex_ScbPerfBuckNum_reg; struct { unsigned long ena:1; unsigned long Reserved:31; } sicortex_ScbPerfEna_reg; struct { unsigned long event:15; unsigned long hist:1; unsigned long ifOther:2; unsigned long Reserved:15; } sicortex_ScbPerfBucket_reg; } pmc_sicortex_scb_reg_t; typedef union { uint64_t val; struct { unsigned long Reserved:2; uint64_t VPCL:38; unsigned long VPCH:2; } sicortex_CpuPerfVPC_reg; struct { unsigned long Reserved:5; unsigned long PEA:31; unsigned long Reserved2:12; unsigned long ASID:8; unsigned long L2STOP:4; unsigned long L2STATE:3; unsigned long L2HIT:1; } sicortex_CpuPerfPEA_reg; } pmd_sicortex_cpu_reg_t; typedef struct { unsigned long NoInc:1; unsigned long Interval:4; unsigned long HistGte:20; unsigned long Bucket:8; } pfmlib_sicortex_scb_t; typedef struct { unsigned long ifOther:2; unsigned long hist:1; } pfmlib_sicortex_scb_counter_t; #define PFMLIB_SICORTEX_INPUT_SCB_NONE (unsigned long)0x0 #define PFMLIB_SICORTEX_INPUT_SCB_INTERVAL (unsigned long)0x1 #define PFMLIB_SICORTEX_INPUT_SCB_NOINC (unsigned long)0x2 #define PFMLIB_SICORTEX_INPUT_SCB_HISTGTE (unsigned long)0x4 #define PFMLIB_SICORTEX_INPUT_SCB_BUCKET (unsigned long)0x8 typedef struct { unsigned long flags; pfmlib_sicortex_scb_counter_t pfp_sicortex_scb_counters[PMU_SICORTEX_SCB_NUM_COUNTERS]; pfmlib_sicortex_scb_t pfp_sicortex_scb_global; } pfmlib_sicortex_input_param_t; typedef struct { unsigned long reserved; } pfmlib_sicortex_output_param_t; /* CPU counter */ int pfm_sicortex_is_cpu(unsigned int i); /* SCB counter */ int pfm_sicortex_is_scb(unsigned int i); /* Reg 25 domain support */ int pfm_sicortex_support_domain(unsigned int i); /* VPC/PEA sampling support */ int pfm_sicortex_support_vpc_pea(unsigned int i); #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_GEN_MIPS64_H__ */ papi-5.6.0/man/man3/PAPI_library_init.3000664 001750 001750 00000003332 13216244356 021535 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_library_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_library_init \- .PP initialize the PAPI library\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP .nf @param version upon initialization, PAPI checks the argument against the internal value of PAPI_VER_CURRENT when the library was compiled. This guards against portability problems when updating the PAPI shared libraries on your system. @retval PAPI_EINVAL papi.h is different from the version used to compile the PAPI library. @retval PAPI_ENOMEM Insufficient memory to complete the operation. @retval PAPI_ECMP This component does not support the underlying hardware. @retval PAPI_ESYS A system or C library call failed inside PAPI, see the errno variable. PAPI_library_init() initializes the PAPI library. PAPI_is_initialized() check for initialization. It must be called before any low level PAPI functions can be used. If your application is making use of threads PAPI_thread_init must also be called prior to making any calls to the library other than PAPI_library_init() . @par Examples: .fi .PP .PP .nf * int retval; * retval = PAPI_library_init(PAPI_VER_CURRENT); * if (retval != PAPI_VER_CURRENT && retval > 0) { * fprintf(stderr,"PAPI library version mismatch!\en"); * exit(1); } * if (retval < 0) * handle_error(retval); * retval = PAPI_is_initialized(); * if (retval != PAPI_LOW_LEVEL_INITED) * handle_error(retval) * .fi .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_thread_init\fP PAPI .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/syst.c000664 001750 001750 00000021146 13216244362 023563 0ustar00jshenry1963jshenry1963000000 000000 /* * syst.c - example of a simple system wide monitoring program * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } #ifndef __NR_sched_setaffinity #ifdef __ia64__ #define __NR_sched_setaffinity 1231 #endif #endif /* * Hack to get this to work without libc support */ int my_setaffinity(pid_t pid, unsigned int len, unsigned long *mask) { return syscall(__NR_sched_setaffinity, pid, len, mask); } int main(int argc, char **argv) { char **p; unsigned long my_mask; pfarg_reg_t pc[NUM_PMCS]; pfarg_reg_t pd[NUM_PMDS]; pfarg_context_t ctx[1]; pfarg_load_t load_args; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_options_t pfmlib_options; unsigned int which_cpu; int ret, ctx_fd; unsigned int i; unsigned int num_counters; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { printf("Can't initialize library\n"); exit(1); } pfm_get_num_counters(&num_counters); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * be nice to user! */ if (argc > 1) { p = argv+1; for (i=0; *p ; i++, p++) { if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", *p); } } } else { if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("cannot find cycle event\n"); } if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) { fatal_error("cannot find inst retired event\n"); } i = 2; } /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * indicate we are using the monitors for a system-wide session. * This may impact the way the library sets up the PMC values. */ inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * In system wide mode, the perfmon context cannot be inherited. * Also in this mode, we cannot use the blocking form of user level notification. */ ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE; /* * pick a random CPU. Assumes CPU are numbered with no holes */ srandom(getpid()); which_cpu = random() % sysconf(_SC_NPROCESSORS_ONLN); /* * perfmon relies on the application to have the task pinned * on one CPU by the time the PFM_CONTEXT_LOAD command is issued. * The perfmon context will record the active CPU at the time of PFM_CONTEXT_LOAD * and will reject any access coming from another CPU. Therefore it * is advisable to pin the task ASAP before doing any perfmon calls. * * On RHAS and 2.5/2.6, this can be easily achieved using the * sched_setaffinity() system call. */ my_mask = 1UL << which_cpu; ret = my_setaffinity(getpid(), sizeof(unsigned long), &my_mask); if (ret == -1) { fatal_error("cannot set affinity to 0x%lx: %s\n", my_mask, strerror(errno)); } /* * after the call the task is pinned to which_cpu */ /* * now create the context for self monitoring/per-task */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extact our file descriptor */ ctx_fd = ctx->ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = outp.pfp_pmcs[i].reg_num; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * for system wide session, we can only attached to ourself */ load_args.load_pid = getpid(); if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * start monitoring. We must go to the kernel because psr.pp cannot be * changed at the user level. */ if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) { fatal_error("perfmonctl error PFM_START errno %d\n",errno); } printf("\n", which_cpu); printf("\n"); getchar(); /* * stop monitoring. We must go to the kernel because psr.pp cannot be * changed at the user level. */ if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) { fatal_error("perfmonctl error PFM_STOP errno %d\n",errno); } printf("\n\n", which_cpu); /* * now read the results */ if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("CPU%-2d PMD%u %20"PRIu64" %s\n", which_cpu, pd[i].reg_num, pd[i].reg_value, name); } /* * let's stop this now */ close(ctx_fd); return 0; } papi-5.6.0/src/libpfm-3.y/python/src/perfmon_int.i000664 001750 001750 00000012765 13216244363 024021 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2008 Google, Inc. * Contributed by Arun Sharma * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Python Bindings for perfmon. */ %module perfmon_int %{ #include #include static PyObject *libpfm_err; %} %include "carrays.i" %include "cstring.i" %include /* Some typemaps for corner cases SWIG can't handle */ /* Convert from Python --> C */ %typemap(memberin) pfmlib_event_t[ANY] { int i; for (i = 0; i < $1_dim0; i++) { $1[i] = $input[i]; } } %typemap(out) pfmlib_event_t[ANY] { int len, i; len = $1_dim0; $result = PyList_New(len); for (i = 0; i < len; i++) { PyObject *o = SWIG_NewPointerObj(SWIG_as_voidptr(&$1[i]), SWIGTYPE_p_pfmlib_event_t, 0 | 0 ); PyList_SetItem($result, i, o); } } /* Convert from Python --> C */ %typemap(memberin) pfmlib_reg_t[ANY] { int i; for (i = 0; i < $1_dim0; i++) { $1[i] = $input[i]; } } %typemap(out) pfmlib_reg_t[ANY] { int len, i; len = $1_dim0; $result = PyList_New(len); for (i = 0; i < len; i++) { PyObject *o = SWIG_NewPointerObj(SWIG_as_voidptr(&$1[i]), SWIGTYPE_p_pfmlib_reg_t, 0 | 0 ); PyList_SetItem($result, i, o); } } /* Convert libpfm errors into exceptions */ %typemap(out) os_err_t { if (result == -1) { PyErr_SetFromErrno(PyExc_OSError); SWIG_fail; } resultobj = SWIG_From_int((int)(result)); }; %typemap(out) pfm_err_t { if (result != PFMLIB_SUCCESS) { PyObject *obj = Py_BuildValue("(i,s)", result, pfm_strerror(result)); PyErr_SetObject(libpfm_err, obj); SWIG_fail; } else { PyErr_Clear(); } resultobj = SWIG_From_int((int)(result)); } /* Convert libpfm errors into exceptions */ %typemap(out) os_err_t { if (result == -1) { PyErr_SetFromErrno(PyExc_OSError); SWIG_fail; } resultobj = SWIG_From_int((int)(result)); }; %typemap(out) pfm_err_t { if (result != PFMLIB_SUCCESS) { PyObject *obj = Py_BuildValue("(i,s)", result, pfm_strerror(result)); PyErr_SetObject(libpfm_err, obj); SWIG_fail; } else { PyErr_Clear(); } resultobj = SWIG_From_int((int)(result)); } %cstring_output_maxsize(char *name, size_t maxlen) %cstring_output_maxsize(char *name, int maxlen) %extend pfmlib_regmask_t { unsigned int weight() { unsigned int w = 0; pfm_regmask_weight($self, &w); return w; } } /* Kernel interface */ %include %array_class(pfarg_pmc_t, pmc) %array_class(pfarg_pmd_t, pmd) /* Library interface */ %include %extend pfarg_ctx_t { void zero() { memset(self, 0, sizeof(self)); } } %extend pfarg_load_t { void zero() { memset(self, 0, sizeof(self)); } } %init %{ libpfm_err = PyErr_NewException("perfmon.libpfmError", NULL, NULL); PyDict_SetItemString(d, "libpfmError", libpfm_err); %} %inline %{ /* Helper functions to avoid pointer classes */ int pfm_py_get_pmu_type(void) { int tmp = -1; pfm_get_pmu_type(&tmp); return tmp; } unsigned int pfm_py_get_hw_counter_width(void) { unsigned int tmp = 0; pfm_get_hw_counter_width(&tmp); return tmp; } unsigned int pfm_py_get_num_events(void) { unsigned int tmp = 0; pfm_get_num_events(&tmp); return tmp; } int pfm_py_get_event_code(int idx) { int tmp = 0; pfm_get_event_code(idx, &tmp); return tmp; } unsigned int pfm_py_get_num_event_masks(int idx) { unsigned int tmp = 0; pfm_get_num_event_masks(idx, &tmp); return tmp; } unsigned int pfm_py_get_event_mask_code(int idx, int i) { unsigned int tmp = 0; pfm_get_event_mask_code(idx, i, &tmp); return tmp; } #define PFMON_MAX_EVTNAME_LEN 128 PyObject *pfm_py_get_event_name(int idx) { char name[PFMON_MAX_EVTNAME_LEN]; pfm_get_event_name(idx, name, PFMON_MAX_EVTNAME_LEN); return PyString_FromString(name); } PyObject *pfm_py_get_event_mask_name(int idx, int i) { char name[PFMON_MAX_EVTNAME_LEN]; pfm_get_event_mask_name(idx, i, name, PFMON_MAX_EVTNAME_LEN); return PyString_FromString(name); } PyObject *pfm_py_get_event_description(int idx) { char *desc; PyObject *ret; pfm_get_event_description(idx, &desc); ret = PyString_FromString(desc); free(desc); return ret; } PyObject *pfm_py_get_event_mask_description(int idx, int i) { char *desc; PyObject *ret; pfm_get_event_mask_description(idx, i, &desc); ret = PyString_FromString(desc); free(desc); return ret; } %} papi-5.6.0/man/man3/PAPIF_enum_event.3000664 001750 001750 00000001010 13216244355 021307 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_enum_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_enum_event \- .PP Return the number of events in an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_enum_event( C_INT EventCode, C_INT modifier, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_enum_event\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/components/appio/appio.h000664 001750 001750 00000004123 13216244356 022104 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file appio.h * CVS: $Id: appio.h,v 1.1.2.4 2012/02/01 05:01:00 tmohan Exp $ * * @author Philip Mucci * phil.mucci@samaratechnologygroup.com * * @author Tushar Mohan * tushar.mohan@samaratechnologygroup.com * * @ingroup papi_components * * @brief appio component * This file contains the source code for a component that enables * PAPI to access application level file and socket I/O information. * It does this through function replacement in the first person and * by trapping syscalls in the third person. */ #ifndef _PAPI_APPIO_H #define _PAPI_APPIO_H #include /************************* DEFINES SECTION ***********************************/ /* Set this equal to the number of elements in _appio_counter_info array */ #define APPIO_MAX_COUNTERS 45 /** Structure that stores private information of each event */ typedef struct APPIO_register { /* This is used by the framework. It likes it to be !=0 to do something */ unsigned int selector; } APPIO_register_t; /* * The following structures mimic the ones used by other components. It is more * convenient to use them like that as programming with PAPI makes specific * assumptions for them. */ /* This structure is used to build the table of events */ typedef struct APPIO_native_event_entry { APPIO_register_t resources; const char* name; const char* description; } APPIO_native_event_entry_t; typedef struct APPIO_reg_alloc { APPIO_register_t ra_bits; } APPIO_reg_alloc_t; typedef struct APPIO_control_state { int num_events; int counter_bits[APPIO_MAX_COUNTERS]; long long values[APPIO_MAX_COUNTERS]; // used for caching } APPIO_control_state_t; typedef struct APPIO_context { APPIO_control_state_t state; } APPIO_context_t; /************************* GLOBALS SECTION *********************************** *******************************************************************************/ #endif /* _PAPI_APPIO_H */ /* vim:set ts=4 sw=4 sts=4 et: */ papi-5.6.0/src/components/appio/appio.c000664 001750 001750 00000062251 13216244356 022105 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file appio.c * * @author Philip Mucci * phil.mucci@samaratechnologygroup.com * * @author Tushar Mohan * tusharmohan@gmail.com * * Credit to: * Jose Pedro Oliveira * jpo@di.uminho.pt * whose code in the linux net component was used as a template for * many sections of code in this component. * * @ingroup papi_components * * @brief appio component * This file contains the source code for a component that enables * PAPI to access application level file and socket I/O information. * It does this through function replacement in the first person and * by trapping syscalls in the third person. */ #include #include #include #include #include #include #include #include /* Headers required by PAPI */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "appio.h" // The PIC test implies it's built for shared linkage #ifdef PIC # include "dlfcn.h" #endif /* #pragma weak dlerror static void *_dlsym_fake(void *handle, const char* symbol) { (void) handle; (void) symbol; return NULL; } void *dlsym(void *handle, const char* symbol) __attribute__ ((weak, alias ("_dlsym_fake"))); */ papi_vector_t _appio_vector; /********************************************************************* * Private ********************************************************************/ //#define APPIO_FOO 1 static APPIO_native_event_entry_t * _appio_native_events; /* If you modify the appio_stats_t below, you MUST update APPIO_MAX_COUNTERS */ static __thread long long _appio_register_current[APPIO_MAX_COUNTERS]; typedef enum { READ_BYTES = 0, READ_CALLS, READ_ERR, READ_INTERRUPTED, READ_WOULD_BLOCK, READ_SHORT, READ_EOF, READ_BLOCK_SIZE, READ_USEC, WRITE_BYTES, WRITE_CALLS, WRITE_ERR, WRITE_SHORT, WRITE_INTERRUPTED, WRITE_WOULD_BLOCK, WRITE_BLOCK_SIZE, WRITE_USEC, OPEN_CALLS, OPEN_ERR, OPEN_FDS, SELECT_USEC, RECV_BYTES, RECV_CALLS, RECV_ERR, RECV_INTERRUPTED, RECV_WOULD_BLOCK, RECV_SHORT, RECV_EOF, RECV_BLOCK_SIZE, RECV_USEC, SOCK_READ_BYTES, SOCK_READ_CALLS, SOCK_READ_ERR, SOCK_READ_SHORT, SOCK_READ_WOULD_BLOCK, SOCK_READ_USEC, SOCK_WRITE_BYTES, SOCK_WRITE_CALLS, SOCK_WRITE_ERR, SOCK_WRITE_SHORT, SOCK_WRITE_WOULD_BLOCK, SOCK_WRITE_USEC, SEEK_CALLS, SEEK_ABS_STRIDE_SIZE, SEEK_USEC } _appio_stats_t ; static const struct appio_counters { const char *name; const char *description; } _appio_counter_info[APPIO_MAX_COUNTERS] = { { "READ_BYTES", "Bytes read"}, { "READ_CALLS", "Number of read calls"}, { "READ_ERR", "Number of read calls that resulted in an error"}, { "READ_INTERRUPTED","Number of read calls that timed out or were interruped"}, { "READ_WOULD_BLOCK","Number of read calls that would have blocked"}, { "READ_SHORT", "Number of read calls that returned less bytes than requested"}, { "READ_EOF", "Number of read calls that returned an EOF"}, { "READ_BLOCK_SIZE", "Average block size of reads"}, { "READ_USEC", "Real microseconds spent in reads"}, { "WRITE_BYTES", "Bytes written"}, { "WRITE_CALLS", "Number of write calls"}, { "WRITE_ERR", "Number of write calls that resulted in an error"}, { "WRITE_SHORT", "Number of write calls that wrote less bytes than requested"}, { "WRITE_INTERRUPTED","Number of write calls that timed out or were interrupted"}, { "WRITE_WOULD_BLOCK","Number of write calls that would have blocked"}, { "WRITE_BLOCK_SIZE","Mean block size of writes"}, { "WRITE_USEC", "Real microseconds spent in writes"}, { "OPEN_CALLS", "Number of open calls"}, { "OPEN_ERR", "Number of open calls that resulted in an error"}, { "OPEN_FDS", "Number of currently open descriptors"}, { "SELECT_USEC", "Real microseconds spent in select calls"}, { "RECV_BYTES", "Bytes read in recv/recvmsg/recvfrom"}, { "RECV_CALLS", "Number of recv/recvmsg/recvfrom calls"}, { "RECV_ERR", "Number of recv/recvmsg/recvfrom calls that resulted in an error"}, { "RECV_INTERRUPTED","Number of recv/recvmsg/recvfrom calls that timed out or were interruped"}, { "RECV_WOULD_BLOCK","Number of recv/recvmsg/recvfrom calls that would have blocked"}, { "RECV_SHORT", "Number of recv/recvmsg/recvfrom calls that returned less bytes than requested"}, { "RECV_EOF", "Number of recv/recvmsg/recvfrom calls that returned an EOF"}, { "RECV_BLOCK_SIZE", "Average block size of recv/recvmsg/recvfrom"}, { "RECV_USEC", "Real microseconds spent in recv/recvmsg/recvfrom"}, { "SOCK_READ_BYTES", "Bytes read from socket"}, { "SOCK_READ_CALLS", "Number of read calls on socket"}, { "SOCK_READ_ERR", "Number of read calls on socket that resulted in an error"}, { "SOCK_READ_SHORT", "Number of read calls on socket that returned less bytes than requested"}, { "SOCK_READ_WOULD_BLOCK", "Number of read calls on socket that would have blocked"}, { "SOCK_READ_USEC", "Real microseconds spent in read(s) on socket(s)"}, { "SOCK_WRITE_BYTES","Bytes written to socket"}, { "SOCK_WRITE_CALLS","Number of write calls to socket"}, { "SOCK_WRITE_ERR", "Number of write calls to socket that resulted in an error"}, { "SOCK_WRITE_SHORT","Number of write calls to socket that wrote less bytes than requested"}, { "SOCK_WRITE_WOULD_BLOCK","Number of write calls to socket that would have blocked"}, { "SOCK_WRITE_USEC", "Real microseconds spent in write(s) to socket(s)"}, { "SEEK_CALLS", "Number of seek calls"}, { "SEEK_ABS_STRIDE_SIZE", "Average absolute stride size of seeks"}, { "SEEK_USEC", "Real microseconds spent in seek calls"} }; /********************************************************************* *** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT **** ********************************************************************/ int __close(int fd); int close(int fd) { int retval; SUBDBG("appio: intercepted close(%d)\n", fd); retval = __close(fd); if ((retval == 0) && (_appio_register_current[OPEN_FDS]>0)) _appio_register_current[OPEN_FDS]--; return retval; } int __open(const char *pathname, int flags, mode_t mode); int open(const char *pathname, int flags, mode_t mode) { int retval; SUBDBG("appio: intercepted open(%s,%d,%d)\n", pathname, flags, mode); retval = __open(pathname,flags,mode); _appio_register_current[OPEN_CALLS]++; if (retval < 0) _appio_register_current[OPEN_ERR]++; else _appio_register_current[OPEN_FDS]++; return retval; } /* we use timeval as a zero value timeout to select in read/write for polling if the operation would block */ struct timeval zerotv; /* this has to be zero, so define it here */ int __select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { int retval; SUBDBG("appio: intercepted select(%d,%p,%p,%p,%p)\n", nfds,readfds,writefds,exceptfds,timeout); long long start_ts = PAPI_get_real_usec(); retval = __select(nfds,readfds,writefds,exceptfds,timeout); long long duration = PAPI_get_real_usec() - start_ts; _appio_register_current[SELECT_USEC] += duration; return retval; } off_t __lseek(int fd, off_t offset, int whence); off_t lseek(int fd, off_t offset, int whence) { off_t retval; SUBDBG("appio: intercepted lseek(%d,%ld,%d)\n", fd, offset, whence); long long start_ts = PAPI_get_real_usec(); retval = __lseek(fd, offset, whence); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[SEEK_CALLS]++; _appio_register_current[SEEK_USEC] += duration; if (offset < 0) offset = -offset; // get abs offset _appio_register_current[SEEK_ABS_STRIDE_SIZE]= (n * _appio_register_current[SEEK_ABS_STRIDE_SIZE] + offset)/(n+1); // mean absolute stride size return retval; } extern int errno; ssize_t __read(int fd, void *buf, size_t count); ssize_t read(int fd, void *buf, size_t count) { int retval; SUBDBG("appio: intercepted read(%d,%p,%lu)\n", fd, buf, (unsigned long)count); struct stat st; int issocket = 0; if (fstat(fd, &st) == 0) { if ((st.st_mode & S_IFMT) == S_IFSOCK) issocket = 1; } // check if read would block on descriptor fd_set readfds; FD_ZERO(&readfds); FD_SET(fd, &readfds); int ready = __select(fd+1, &readfds, NULL, NULL, &zerotv); if (ready == 0) { _appio_register_current[READ_WOULD_BLOCK]++; if (issocket) _appio_register_current[SOCK_READ_WOULD_BLOCK]++; } long long start_ts = PAPI_get_real_usec(); retval = __read(fd,buf, count); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[READ_CALLS]++; // read calls if (issocket) _appio_register_current[SOCK_READ_CALLS]++; // read calls if (retval > 0) { _appio_register_current[READ_BLOCK_SIZE]= (n * _appio_register_current[READ_BLOCK_SIZE] + count)/(n+1); // mean size _appio_register_current[READ_BYTES] += retval; // read bytes if (issocket) _appio_register_current[SOCK_READ_BYTES] += retval; if (retval < (int)count) { _appio_register_current[READ_SHORT]++; // read short if (issocket) _appio_register_current[SOCK_READ_SHORT]++; // read short } _appio_register_current[READ_USEC] += duration; if (issocket) _appio_register_current[SOCK_READ_USEC] += duration; } if (retval < 0) { _appio_register_current[READ_ERR]++; // read err if (issocket) _appio_register_current[SOCK_READ_ERR]++; // read err if (EINTR == errno) _appio_register_current[READ_INTERRUPTED]++; // signal interrupted the read //if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) { // _appio_register_current[READ_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking // if (issocket) _appio_register_current[SOCK_READ_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking //} } if (retval == 0) _appio_register_current[READ_EOF]++; // read eof return retval; } size_t _IO_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t retval; SUBDBG("appio: intercepted fread(%p,%lu,%lu,%p)\n", ptr, (unsigned long) size, (unsigned long) nmemb, (void*) stream); long long start_ts = PAPI_get_real_usec(); retval = _IO_fread(ptr,size,nmemb,stream); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[READ_CALLS]++; // read calls if (retval > 0) { _appio_register_current[READ_BLOCK_SIZE]= (n * _appio_register_current[READ_BLOCK_SIZE]+ size*nmemb)/(n+1);//mean size _appio_register_current[READ_BYTES]+= retval * size; // read bytes if (retval < nmemb) _appio_register_current[READ_SHORT]++; // read short _appio_register_current[READ_USEC] += duration; } /* A value of zero returned means one of two things..*/ if (retval == 0) { if (feof(stream)) _appio_register_current[READ_EOF]++; // read eof else _appio_register_current[READ_ERR]++; // read err } return retval; } ssize_t __write(int fd, const void *buf, size_t count); ssize_t write(int fd, const void *buf, size_t count) { int retval; SUBDBG("appio: intercepted write(%d,%p,%lu)\n", fd, buf, (unsigned long)count); struct stat st; int issocket = 0; if (fstat(fd, &st) == 0) { if ((st.st_mode & S_IFMT) == S_IFSOCK) issocket = 1; } // check if write would block on descriptor fd_set writefds; FD_ZERO(&writefds); FD_SET(fd, &writefds); int ready = __select(fd+1, NULL, &writefds, NULL, &zerotv); if (ready == 0) { _appio_register_current[WRITE_WOULD_BLOCK]++; if (issocket) _appio_register_current[SOCK_WRITE_WOULD_BLOCK]++; } long long start_ts = PAPI_get_real_usec(); retval = __write(fd,buf, count); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[WRITE_CALLS]++; // write calls if (issocket) _appio_register_current[SOCK_WRITE_CALLS]++; // socket write if (retval >= 0) { _appio_register_current[WRITE_BLOCK_SIZE]= (n * _appio_register_current[WRITE_BLOCK_SIZE] + count)/(n+1); // mean size _appio_register_current[WRITE_BYTES]+= retval; // write bytes if (issocket) _appio_register_current[SOCK_WRITE_BYTES] += retval; if (retval < (int)count) { _appio_register_current[WRITE_SHORT]++; // short write if (issocket) _appio_register_current[SOCK_WRITE_SHORT]++; } _appio_register_current[WRITE_USEC] += duration; if (issocket) _appio_register_current[SOCK_WRITE_USEC] += duration; } if (retval < 0) { _appio_register_current[WRITE_ERR]++; // err if (issocket) _appio_register_current[SOCK_WRITE_ERR]++; if (EINTR == errno) _appio_register_current[WRITE_INTERRUPTED]++; // signal interrupted the op //if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) { // _appio_register_current[WRITE_WOULD_BLOCK]++; //op would block on descriptor marked as non-blocking // if (issocket) _appio_register_current[SOCK_WRITE_WOULD_BLOCK]++; //} } return retval; } // The PIC test implies it's built for shared linkage #ifdef PIC static ssize_t (*__recv)(int sockfd, void *buf, size_t len, int flags) = NULL; ssize_t recv(int sockfd, void *buf, size_t len, int flags) { int retval; SUBDBG("appio: intercepted recv(%d,%p,%lu,%d)\n", sockfd, buf, (unsigned long)len, flags); if (!__recv) __recv = dlsym(RTLD_NEXT, "recv"); if (!__recv) { fprintf(stderr, "appio,c Internal Error: Could not obtain handle for real recv\n"); exit(1); } // check if recv would block on descriptor fd_set readfds; FD_ZERO(&readfds); FD_SET(sockfd, &readfds); int ready = __select(sockfd+1, &readfds, NULL, NULL, &zerotv); if (ready == 0) _appio_register_current[RECV_WOULD_BLOCK]++; long long start_ts = PAPI_get_real_usec(); retval = __recv(sockfd, buf, len, flags); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[RECV_CALLS]++; // read calls if (retval > 0) { _appio_register_current[RECV_BLOCK_SIZE]= (n * _appio_register_current[RECV_BLOCK_SIZE] + len)/(n+1); // mean size _appio_register_current[RECV_BYTES] += retval; // read bytes if (retval < (int)len) _appio_register_current[RECV_SHORT]++; // read short _appio_register_current[RECV_USEC] += duration; } if (retval < 0) { _appio_register_current[RECV_ERR]++; // read err if (EINTR == errno) _appio_register_current[RECV_INTERRUPTED]++; // signal interrupted the read if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) _appio_register_current[RECV_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking } if (retval == 0) _appio_register_current[RECV_EOF]++; // read eof return retval; } #endif /* PIC */ size_t _IO_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t retval; SUBDBG("appio: intercepted fwrite(%p,%lu,%lu,%p)\n", ptr, (unsigned long) size, (unsigned long) nmemb, (void*) stream); long long start_ts = PAPI_get_real_usec(); retval = _IO_fwrite(ptr,size,nmemb,stream); long long duration = PAPI_get_real_usec() - start_ts; int n = _appio_register_current[WRITE_CALLS]++; // write calls if (retval > 0) { _appio_register_current[WRITE_BLOCK_SIZE]= (n * _appio_register_current[WRITE_BLOCK_SIZE] + size*nmemb)/(n+1); // mean block size _appio_register_current[WRITE_BYTES]+= retval * size; // write bytes if (retval < nmemb) _appio_register_current[WRITE_SHORT]++; // short write _appio_register_current[WRITE_USEC] += duration; } if (retval == 0) _appio_register_current[WRITE_ERR]++; // err return retval; } /********************************************************************* *************** BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ********* *********************************************************************/ /* * This is called whenever a thread is initialized */ static int _appio_init_thread( hwd_context_t *ctx ) { ( void ) ctx; SUBDBG("_appio_init_thread %p\n", ctx); return PAPI_OK; } /* Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ static int _appio_init_component( int cidx ) { SUBDBG("_appio_component %d\n", cidx); _appio_native_events = (APPIO_native_event_entry_t *) papi_calloc(APPIO_MAX_COUNTERS, sizeof(APPIO_native_event_entry_t)); if (_appio_native_events == NULL ) { PAPIERROR( "malloc():Could not get memory for events table" ); return PAPI_ENOMEM; } int i; for (i=0; ivalues, 0, APPIO_MAX_COUNTERS*sizeof(appio_ctl->values[0])); return PAPI_OK; } static int _appio_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long ** events, int flags ) { (void) flags; (void) ctx; SUBDBG("_appio_read %p %p\n", ctx, ctl); APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; int i; for ( i=0; inum_events; i++ ) { int index = appio_ctl->counter_bits[i]; SUBDBG("event=%d, index=%d, val=%lld\n", i, index, _appio_register_current[index]); appio_ctl->values[index] = _appio_register_current[index]; } *events = appio_ctl->values; return PAPI_OK; } static int _appio_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void) ctx; SUBDBG("_appio_stop ctx=%p ctl=%p\n", ctx, ctl); APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; int i; for ( i=0; inum_events; i++ ) { int index = appio_ctl->counter_bits[i]; SUBDBG("event=%d, index=%d, val=%lld\n", i, index, _appio_register_current[index]); appio_ctl->values[i] = _appio_register_current[index]; } return PAPI_OK; } /* * Thread shutdown */ static int _appio_shutdown_thread( hwd_context_t *ctx ) { ( void ) ctx; return PAPI_OK; } /* * Clean up what was setup in appio_init_component(). */ static int _appio_shutdown_component( void ) { papi_free( _appio_native_events ); return PAPI_OK; } /* This function sets various options in the component * The valid codes being passed in are PAPI_SET_DEFDOM, * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and * PAPI_SET_INHERIT */ static int _appio_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) { ( void ) ctx; ( void ) code; ( void ) option; return PAPI_OK; } static int _appio_update_control_state( hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx ) { ( void ) ctx; ( void ) ctl; SUBDBG("_appio_update_control_state ctx=%p ctl=%p num_events=%d\n", ctx, ctl, count); int i, index; APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; (void) ctx; for ( i = 0; i < count; i++ ) { index = native[i].ni_event; appio_ctl->counter_bits[i] = index; native[i].ni_position = index; } appio_ctl->num_events = count; return PAPI_OK; } /* * This function has to set the bits needed to count different domains * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER * By default return PAPI_EINVAL if none of those are specified * and PAPI_OK with success * PAPI_DOM_USER is only user context is counted * PAPI_DOM_KERNEL is only the Kernel/OS context is counted * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) * PAPI_DOM_ALL is all of the domains */ static int _appio_set_domain( hwd_control_state_t *ctl, int domain ) { ( void ) ctl; int found = 0; if ( PAPI_DOM_USER == domain ) found = 1; if ( !found ) return PAPI_EINVAL; return PAPI_OK; } static int _appio_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) { ( void ) ctx; ( void ) ctl; return PAPI_OK; } /* * Native Event functions */ static int _appio_ntv_enum_events( unsigned int *EventCode, int modifier ) { int index; switch ( modifier ) { case PAPI_ENUM_FIRST: *EventCode = 0; return PAPI_OK; break; case PAPI_ENUM_EVENTS: index = *EventCode; if ( index < APPIO_MAX_COUNTERS - 1 ) { *EventCode = *EventCode + 1; return PAPI_OK; } else { return PAPI_ENOEVNT; } break; default: return PAPI_EINVAL; break; } return PAPI_EINVAL; } /* * */ static int _appio_ntv_name_to_code( const char *name, unsigned int *EventCode ) { int i; for ( i=0; i= 0 && index < APPIO_MAX_COUNTERS ) { strncpy( name, _appio_counter_info[index].name, len ); return PAPI_OK; } return PAPI_ENOEVNT; } /* * */ static int _appio_ntv_code_to_descr( unsigned int EventCode, char *desc, int len ) { int index = EventCode; if ( index >= 0 && index < APPIO_MAX_COUNTERS ) { strncpy(desc, _appio_counter_info[index].description, len ); return PAPI_OK; } return PAPI_ENOEVNT; } /* * */ static int _appio_ntv_code_to_bits( unsigned int EventCode, hwd_register_t *bits ) { int index = EventCode; if ( index >= 0 && index < APPIO_MAX_COUNTERS ) { memcpy( ( APPIO_register_t * ) bits, &( _appio_native_events[index].resources ), sizeof ( APPIO_register_t ) ); return PAPI_OK; } return PAPI_ENOEVNT; } /* * */ papi_vector_t _appio_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "appio", .short_name = "appio", .version = "1.1.2.4", .CmpIdx = 0, /* set by init_component */ .num_mpx_cntrs = APPIO_MAX_COUNTERS, .num_cntrs = APPIO_MAX_COUNTERS, .default_domain = PAPI_DOM_USER, .available_domains = PAPI_DOM_USER, .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .hardware_intr_sig = PAPI_INT_SIGNAL, /* component specific cmp_info initializations */ .fast_real_timer = 0, .fast_virtual_timer = 0, .attach = 0, .attach_must_ptrace = 0, }, /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( APPIO_context_t ), .control_state = sizeof ( APPIO_control_state_t ), .reg_value = sizeof ( APPIO_register_t ), .reg_alloc = sizeof ( APPIO_reg_alloc_t ), }, /* function pointers in this component */ .init_thread = _appio_init_thread, .init_component = _appio_init_component, .init_control_state = _appio_init_control_state, .start = _appio_start, .stop = _appio_stop, .read = _appio_read, .shutdown_thread = _appio_shutdown_thread, .shutdown_component = _appio_shutdown_component, .ctl = _appio_ctl, .update_control_state = _appio_update_control_state, .set_domain = _appio_set_domain, .reset = _appio_reset, .ntv_enum_events = _appio_ntv_enum_events, .ntv_name_to_code = _appio_ntv_name_to_code, .ntv_code_to_name = _appio_ntv_code_to_name, .ntv_code_to_descr = _appio_ntv_code_to_descr, .ntv_code_to_bits = _appio_ntv_code_to_bits /* .ntv_bits_to_info = NULL, */ }; /* vim:set ts=4 sw=4 sts=4 et: */ papi-5.6.0/src/libpfm-3.y/include/perfmon/perfmon_sparc.h000664 001750 001750 00000000543 13216244362 025305 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * This file should never be included directly, use * instead. */ #ifndef _PERFMON_SPARC_H_ #define _PERFMON_SPARC_H_ #define PFM_ARCH_MAX_PMCS 1 #define PFM_ARCH_MAX_PMDS 2 #endif /* _PERFMON_SPARC_H_ */ papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_get_full_event_name.3000664 001750 001750 00000000036 13216244361 025717 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_event_name.3 papi-5.6.0/src/ctests/zero_named.c000664 001750 001750 00000011167 13216244361 021124 0ustar00jshenry1963jshenry1963000000 000000 /* This test exercises the PAPI_{query, add, remove}_event APIs for PRESET events. It more or less duplicates the functionality of the classic "zero" test. */ #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int main( int argc, char **argv ) { int retval, num_tests = 1, tmp; int EventSet = PAPI_NULL; int num_events = 2; long long **values; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; const char *event_names[] = {"PAPI_TOT_CYC","PAPI_TOT_INS"}; char add_event_str[PAPI_MAX_STR_LEN]; double cycles_error; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Verify that the named events exist */ retval = PAPI_query_named_event(event_names[0]); if ( retval == PAPI_OK) { retval = PAPI_query_named_event(event_names[1]); } if ( retval != PAPI_OK ) { if (!quiet) printf("Trouble querying events\n"); test_skip( __FILE__, __LINE__, "PAPI_query_named_event", retval ); } /* Create an empty event set */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); /* add the events named above */ retval = PAPI_add_named_event( EventSet, event_names[0] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } retval = PAPI_add_named_event( EventSet, event_names[1] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } values = allocate_test_space( num_tests, num_events ); /* Gather before stats */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our test code */ do_flops( NUM_FLOPS ); /* Stop PAPI */ retval = PAPI_stop( EventSet, values[0] ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* Calculate total values */ elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; /* remove PAPI_TOT_CYC and PAPI_TOT_INS */ retval = PAPI_remove_named_event( EventSet, event_names[0] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } retval = PAPI_remove_named_event( EventSet, event_names[1] ); if ( retval != PAPI_OK ) { sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); test_fail( __FILE__, __LINE__, add_event_str, retval ); } if ( !quiet ) { printf( "PAPI_{query, add, remove}_named_event API test.\n" ); printf( "-----------------------------------------------\n" ); tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); printf( "Default granularity is: %d (%s)\n", tmp, stringify_granularity( tmp ) ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-------------------------------------------------------------------------\n" ); printf( "Test type : \t 1\n" ); /* cycles is first, other event second */ sprintf( add_event_str, "%-12s : \t", event_names[0] ); printf( TAB1, add_event_str, values[0][0] ); sprintf( add_event_str, "%-12s : \t", event_names[1] ); printf( TAB1, add_event_str, values[0][1] ); printf( TAB1, "Real usec : \t", elapsed_us ); printf( TAB1, "Real cycles : \t", elapsed_cyc ); printf( TAB1, "Virt usec : \t", elapsed_virt_us ); printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); printf( "-------------------------------------------------------------------------\n" ); printf( "Verification: PAPI_TOT_CYC should be roughly real_cycles\n" ); cycles_error=100.0*((double)values[0][0] - (double)elapsed_cyc)/ (double)values[0][0]; if (cycles_error>10.0) { printf("Error of %.2f%%\n",cycles_error); test_fail( __FILE__, __LINE__, "validation", 0 ); } } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/pfmlib_intel_hswep_unc_pcu.c000664 001750 001750 00000006730 13216244365 025171 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_hswep_unc_pcu.c : Intel Haswell-EP Power Control Unit (PCU) uncore PMU * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_hswep_unc_pcu_events.h" static void display_pcu(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; pfm_snbep_unc_reg_t f; __pfm_vbprintf("[UNC_PCU=0x%"PRIx64" event=0x%x sel_ext=%d occ_sel=0x%x en=%d " "edge=%d thres=%d occ_inv=%d occ_edge=%d] %s\n", reg->val, reg->ivbep_pcu.unc_event, reg->ivbep_pcu.unc_sel_ext, reg->ivbep_pcu.unc_occ, reg->ivbep_pcu.unc_en, reg->ivbep_pcu.unc_edge, reg->ivbep_pcu.unc_thres, reg->ivbep_pcu.unc_occ_inv, reg->ivbep_pcu.unc_occ_edge, pe[e->event].name); if (e->count == 1) return; f.val = e->codes[1]; __pfm_vbprintf("[UNC_PCU_FILTER=0x%"PRIx64" band0=%u band1=%u band2=%u band3=%u]\n", f.val, f.pcu_filt.filt0, f.pcu_filt.filt1, f.pcu_filt.filt2, f.pcu_filt.filt3); } pfmlib_pmu_t intel_hswep_unc_pcu_support = { .desc = "Intel Haswell-EP PCU uncore", .name = "hswep_unc_pcu", .perf_name = "uncore_pcu", .pmu = PFM_PMU_INTEL_HSWEP_UNC_PCU, .pme_count = LIBPFM_ARRAY_SIZE(intel_hswep_unc_p_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 2, .pe = intel_hswep_unc_p_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .pmu_detect = pfm_intel_hswep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_snbep_unc_can_auto_encode, .display_reg = display_pcu, }; papi-5.6.0/src/darwin-context.h000664 001750 001750 00000000000 13216244361 020427 0ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.19000664 001750 001750 00000040156 13216244367 024236 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.19.perfctr26/CREDITS.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/CREDITS 2008-06-22 17:26:36.000000000 +0200 @@ -2656,9 +2656,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.19.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/Documentation/ioctl-number.txt 2008-06-22 17:26:29.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.19.perfctr26/MAINTAINERS.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/MAINTAINERS 2008-06-22 17:26:36.000000000 +0200 @@ -2355,6 +2355,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.19.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/i386/Kconfig 2008-06-22 17:26:29.000000000 +0200 @@ -741,6 +741,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.19.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/i386/kernel/entry.S 2008-06-22 17:26:29.000000000 +0200 @@ -602,6 +602,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + KPROBE_ENTRY(page_fault) RING0_EC_FRAME pushl $do_page_fault --- linux-2.6.19.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 17:26:29.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -406,6 +407,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.19.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/i386/kernel/process.c 2008-06-22 17:26:29.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -381,6 +382,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -433,6 +435,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -697,6 +701,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.19.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/powerpc/Kconfig 2008-06-22 17:26:29.000000000 +0200 @@ -320,6 +320,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.19.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 17:08:53.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 17:26:29.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -458,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -570,6 +574,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.19.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/x86_64/Kconfig 2008-06-22 17:26:29.000000000 +0200 @@ -573,6 +573,8 @@ config CC_STACKPROTECTOR_ALL functions that use large-ish on-stack buffers. By enabling this option, GCC will be asked to do this for ALL functions. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.19.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 17:26:29.000000000 +0200 @@ -713,6 +713,12 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt END(spurious_interrupt) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.19.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 17:26:29.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -552,6 +553,8 @@ void __init init_IRQ(void) set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.19.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 17:26:29.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -370,6 +371,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(t); } void flush_thread(void) @@ -475,6 +477,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -658,6 +662,9 @@ __switch_to(struct task_struct *prev_p, */ if (next_p->fpu_counter>5) math_state_restore(); + + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.19.perfctr26/drivers/Makefile.~1~ 2008-06-22 17:20:36.000000000 +0200 +++ linux-2.6.19.perfctr26/drivers/Makefile 2008-06-22 17:26:29.000000000 +0200 @@ -72,6 +72,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.19.perfctr26/fs/exec.c.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/fs/exec.c 2008-06-22 17:26:36.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -872,6 +873,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.19.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 17:26:29.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.19.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 17:26:29.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.19.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-i386/processor.h 2008-06-22 17:26:29.000000000 +0200 @@ -467,6 +467,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.19.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-i386/system.h 2008-06-22 17:26:29.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.19.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-powerpc/processor.h 2008-06-22 17:26:29.000000000 +0200 @@ -170,6 +170,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.19.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 17:08:54.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 17:26:29.000000000 +0200 @@ -63,14 +63,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.19.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 15:56:55.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-x86_64/irq.h 2008-06-22 17:26:29.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #define NR_IRQS (NR_VECTORS + (32 *NR_CPUS)) #define NR_IRQ_VECTORS NR_IRQS --- linux-2.6.19.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-x86_64/processor.h 2008-06-22 17:26:29.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.19.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 16:44:02.000000000 +0200 +++ linux-2.6.19.perfctr26/include/asm-x86_64/system.h 2008-06-22 17:26:29.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.19.perfctr26/kernel/exit.c.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/kernel/exit.c 2008-06-22 17:26:29.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.19.perfctr26/kernel/sched.c.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/kernel/sched.c 2008-06-22 17:26:29.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4909,6 +4910,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.19.perfctr26/kernel/timer.c.~1~ 2008-06-22 17:20:37.000000000 +0200 +++ linux-2.6.19.perfctr26/kernel/timer.c 2008-06-22 17:26:29.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -972,6 +973,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.18000664 001750 001750 00000037562 13216244367 024244 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.18.perfctr26/CREDITS.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/CREDITS 2008-06-22 17:37:20.000000000 +0200 @@ -2629,9 +2629,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.18.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 17:20:35.000000000 +0200 +++ linux-2.6.18.perfctr26/Documentation/ioctl-number.txt 2008-06-22 17:37:14.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.18.perfctr26/MAINTAINERS.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/MAINTAINERS 2008-06-22 17:37:20.000000000 +0200 @@ -2272,6 +2272,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.18.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/i386/Kconfig 2008-06-22 17:37:14.000000000 +0200 @@ -737,6 +737,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.18.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/i386/kernel/entry.S 2008-06-22 17:37:14.000000000 +0200 @@ -591,6 +591,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + ENTRY(divide_error) RING0_INT_FRAME pushl $0 # no error code --- linux-2.6.18.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 17:37:14.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -420,6 +421,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.18.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 17:35:05.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/i386/kernel/process.c 2008-06-22 17:37:14.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -379,6 +380,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -431,6 +433,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -696,6 +700,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.18.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/powerpc/Kconfig 2008-06-22 17:37:14.000000000 +0200 @@ -320,6 +320,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.18.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 17:37:14.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -332,7 +333,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -464,6 +467,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -576,6 +580,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.18.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/x86_64/Kconfig 2008-06-22 17:37:14.000000000 +0200 @@ -526,6 +526,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.18.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 17:37:14.000000000 +0200 @@ -693,6 +693,12 @@ ENTRY(spurious_interrupt) END(spurious_interrupt) #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.18.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 17:37:14.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -590,6 +591,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.18.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 17:37:14.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -461,6 +463,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -618,6 +622,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.18.perfctr26/drivers/Makefile.~1~ 2008-06-22 17:35:06.000000000 +0200 +++ linux-2.6.18.perfctr26/drivers/Makefile 2008-06-22 17:37:14.000000000 +0200 @@ -71,6 +71,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.18.perfctr26/fs/exec.c.~1~ 2008-06-22 17:35:07.000000000 +0200 +++ linux-2.6.18.perfctr26/fs/exec.c 2008-06-22 17:37:20.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -875,6 +876,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.18.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 17:37:14.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.18.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 17:37:14.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.18.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 17:35:07.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-i386/processor.h 2008-06-22 17:37:14.000000000 +0200 @@ -471,6 +471,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.18.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 17:35:07.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-i386/system.h 2008-06-22 17:37:14.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.18.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 17:35:07.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-powerpc/processor.h 2008-06-22 17:37:14.000000000 +0200 @@ -169,6 +169,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.18.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 17:37:14.000000000 +0200 @@ -64,14 +64,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.18.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-x86_64/irq.h 2008-06-22 17:37:14.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.18.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-x86_64/processor.h 2008-06-22 17:37:14.000000000 +0200 @@ -274,6 +274,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.18.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/include/asm-x86_64/system.h 2008-06-22 17:37:14.000000000 +0200 @@ -20,7 +20,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -40,7 +41,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.18.perfctr26/kernel/exit.c.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/kernel/exit.c 2008-06-22 17:37:14.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -166,6 +167,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.18.perfctr26/kernel/sched.c.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/kernel/sched.c 2008-06-22 17:37:14.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -4845,6 +4846,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.18.perfctr26/kernel/timer.c.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.18.perfctr26/kernel/timer.c 2008-06-22 17:37:14.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -1181,6 +1182,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/libpfm4/lib/pfmlib_intel_core.c000664 001750 001750 00000005351 13216244365 023255 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_core.c : Intel Core PMU * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Core PMU = architectural perfmon v2 + PEBS */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_core_events.h" static const int core_models[] = { 15, /* Merom */ 23, /* Penryn */ 29, /* Dunnington */ 0 }; static int pfm_core_init(void *this) { pfm_intel_x86_cfg.arch_version = 2; return PFM_SUCCESS; } pfmlib_pmu_t intel_core_support={ .desc = "Intel Core", .name = "core", .pmu = PFM_PMU_INTEL_CORE, .pme_count = LIBPFM_ARRAY_SIZE(intel_core_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 2, .num_fixed_cntrs = 3, .max_encoding = 1, .supported_plm = INTEL_X86_PLM, .pe = intel_core_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = core_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_core_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.17000664 001750 001750 00000037402 13216244367 024234 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.17.perfctr26/CREDITS.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/CREDITS 2008-06-22 17:57:58.000000000 +0200 @@ -2628,9 +2628,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.17.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/Documentation/ioctl-number.txt 2008-06-22 17:57:52.000000000 +0200 @@ -186,6 +186,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.17.perfctr26/MAINTAINERS.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/MAINTAINERS 2008-06-22 17:57:58.000000000 +0200 @@ -2185,6 +2185,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.17.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/i386/Kconfig 2008-06-22 17:57:52.000000000 +0200 @@ -708,6 +708,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.17.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/i386/kernel/entry.S 2008-06-22 17:57:52.000000000 +0200 @@ -436,6 +436,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.17.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 17:57:52.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.17.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/i386/kernel/process.c 2008-06-22 17:57:52.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -380,6 +381,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -431,6 +433,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -689,6 +693,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.17.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/powerpc/Kconfig 2008-06-22 17:57:52.000000000 +0200 @@ -299,6 +299,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.17.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 17:57:52.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -333,7 +334,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -465,6 +468,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -577,6 +581,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.17.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/x86_64/Kconfig 2008-06-22 17:57:52.000000000 +0200 @@ -491,6 +491,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.17.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 17:57:52.000000000 +0200 @@ -642,6 +642,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.17.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 17:57:52.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -591,6 +592,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.17.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 17:57:52.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -358,6 +359,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -459,6 +461,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -616,6 +620,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.17.perfctr26/drivers/Makefile.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.17.perfctr26/drivers/Makefile 2008-06-22 17:57:52.000000000 +0200 @@ -71,6 +71,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.17.perfctr26/fs/exec.c.~1~ 2008-06-22 17:55:15.000000000 +0200 +++ linux-2.6.17.perfctr26/fs/exec.c 2008-06-22 17:57:58.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -889,6 +890,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.17.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 17:57:52.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.17.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 17:57:52.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.17.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-i386/processor.h 2008-06-22 17:57:52.000000000 +0200 @@ -469,6 +469,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.17.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-i386/system.h 2008-06-22 17:57:52.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ --- linux-2.6.17.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-powerpc/processor.h 2008-06-22 17:57:52.000000000 +0200 @@ -169,6 +169,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.17.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 17:57:52.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.17.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-x86_64/irq.h 2008-06-22 17:57:52.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.17.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-x86_64/processor.h 2008-06-22 17:57:52.000000000 +0200 @@ -264,6 +264,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.17.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/include/asm-x86_64/system.h 2008-06-22 17:57:52.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.17.perfctr26/kernel/exit.c.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/kernel/exit.c 2008-06-22 17:57:52.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.17.perfctr26/kernel/sched.c.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/kernel/sched.c 2008-06-22 17:57:52.000000000 +0200 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -4425,6 +4426,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.17.perfctr26/kernel/timer.c.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.17.perfctr26/kernel/timer.c 2008-06-22 17:57:52.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -830,6 +831,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.16000664 001750 001750 00000037305 13216244367 024235 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.16.perfctr26/CREDITS.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/CREDITS 2008-06-22 18:11:22.000000000 +0200 @@ -2631,9 +2631,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.16.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/Documentation/ioctl-number.txt 2008-06-22 18:11:15.000000000 +0200 @@ -188,6 +188,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.16.perfctr26/MAINTAINERS.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/MAINTAINERS 2008-06-22 18:11:22.000000000 +0200 @@ -2071,6 +2071,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.16.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/i386/Kconfig 2008-06-22 18:11:15.000000000 +0200 @@ -685,6 +685,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.16.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/i386/kernel/entry.S 2008-06-22 18:11:15.000000000 +0200 @@ -432,6 +432,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.16.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 18:11:15.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.16.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/i386/kernel/process.c 2008-06-22 18:11:15.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -388,6 +389,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -439,6 +441,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -697,6 +701,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.16.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/powerpc/Kconfig 2008-06-22 18:11:15.000000000 +0200 @@ -268,6 +268,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.16.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 18:11:15.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -330,7 +331,9 @@ struct task_struct *__switch_to(struct t #endif local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -459,6 +462,7 @@ void exit_thread(void) { kprobe_flush_task(current); discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -571,6 +575,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.16.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 18:08:16.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/x86_64/Kconfig 2008-06-22 18:11:15.000000000 +0200 @@ -462,6 +462,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz endmenu --- linux-2.6.16.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 18:08:16.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 18:11:15.000000000 +0200 @@ -655,6 +655,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.16.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 18:11:15.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -591,6 +592,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.16.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 18:08:16.000000000 +0200 +++ linux-2.6.16.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 18:11:15.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -372,6 +373,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -473,6 +475,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -627,6 +631,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.16.perfctr26/drivers/Makefile.~1~ 2008-06-22 18:08:16.000000000 +0200 +++ linux-2.6.16.perfctr26/drivers/Makefile 2008-06-22 18:11:15.000000000 +0200 @@ -70,6 +70,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.16.perfctr26/fs/exec.c.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/fs/exec.c 2008-06-22 18:11:22.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -883,6 +884,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.16.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 18:11:15.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.16.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 18:11:15.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.16.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-i386/processor.h 2008-06-22 18:11:15.000000000 +0200 @@ -464,6 +464,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.16.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-i386/system.h 2008-06-22 18:11:15.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ --- linux-2.6.16.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-powerpc/processor.h 2008-06-22 18:11:15.000000000 +0200 @@ -193,6 +193,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.16.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 17:55:16.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 18:11:15.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.16.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 17:35:10.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-x86_64/irq.h 2008-06-22 18:11:15.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.16.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-x86_64/processor.h 2008-06-22 18:11:15.000000000 +0200 @@ -260,6 +260,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.16.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/include/asm-x86_64/system.h 2008-06-22 18:11:15.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.16.perfctr26/kernel/exit.c.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/kernel/exit.c 2008-06-22 18:11:15.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.16.perfctr26/kernel/sched.c.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/kernel/sched.c 2008-06-22 18:11:15.000000000 +0200 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -4393,6 +4394,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.16.perfctr26/kernel/timer.c.~1~ 2008-06-22 18:08:17.000000000 +0200 +++ linux-2.6.16.perfctr26/kernel/timer.c 2008-06-22 18:11:15.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -837,6 +838,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.15000664 001750 001750 00000043703 13216244367 024233 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.15.perfctr26/CREDITS.~1~ 2008-06-22 18:22:10.000000000 +0200 +++ linux-2.6.15.perfctr26/CREDITS 2008-06-22 18:25:45.000000000 +0200 @@ -2631,9 +2631,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.15.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 18:08:15.000000000 +0200 +++ linux-2.6.15.perfctr26/Documentation/ioctl-number.txt 2008-06-22 18:25:38.000000000 +0200 @@ -188,6 +188,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.15.perfctr26/MAINTAINERS.~1~ 2008-06-22 18:22:10.000000000 +0200 +++ linux-2.6.15.perfctr26/MAINTAINERS 2008-06-22 18:25:45.000000000 +0200 @@ -2027,6 +2027,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.15.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/i386/Kconfig 2008-06-22 18:25:38.000000000 +0200 @@ -647,6 +647,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config PHYSICAL_START --- linux-2.6.15.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/i386/kernel/entry.S 2008-06-22 18:25:38.000000000 +0200 @@ -430,6 +430,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.15.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 18:25:38.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.15.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/i386/kernel/process.c 2008-06-22 18:25:38.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -387,6 +388,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -459,6 +461,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -722,6 +726,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.15.perfctr26/arch/powerpc/Kconfig.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/powerpc/Kconfig 2008-06-22 18:25:38.000000000 +0200 @@ -237,6 +237,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.15.perfctr26/arch/powerpc/kernel/head_32.S.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/powerpc/kernel/head_32.S 2008-06-22 18:25:38.000000000 +0200 @@ -458,7 +458,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, unknown_exception) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.15.perfctr26/arch/powerpc/kernel/process.c.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/powerpc/kernel/process.c 2008-06-22 18:25:38.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -328,7 +329,9 @@ struct task_struct *__switch_to(struct t #endif local_irq_save(flags); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -457,6 +460,7 @@ void exit_thread(void) { kprobe_flush_task(current); discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -569,6 +573,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.15.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/ppc/Kconfig 2008-06-22 18:25:38.000000000 +0200 @@ -288,6 +288,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx || E200 default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.15.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/ppc/kernel/head.S 2008-06-22 18:25:38.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, unknown_exception) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.15.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/ppc/kernel/process.c 2008-06-22 18:25:38.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -363,7 +364,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -426,6 +429,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); preempt_enable(); } @@ -521,6 +525,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.15.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/x86_64/Kconfig 2008-06-22 18:25:38.000000000 +0200 @@ -444,6 +444,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz endmenu --- linux-2.6.15.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 18:25:38.000000000 +0200 @@ -648,6 +648,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.15.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 18:25:38.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -592,6 +593,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.15.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 18:25:38.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -344,6 +345,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -445,6 +447,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -597,6 +601,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.15.perfctr26/drivers/Makefile.~1~ 2008-06-22 18:22:11.000000000 +0200 +++ linux-2.6.15.perfctr26/drivers/Makefile 2008-06-22 18:25:38.000000000 +0200 @@ -67,6 +67,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.15.perfctr26/fs/exec.c.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/fs/exec.c 2008-06-22 18:25:45.000000000 +0200 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -883,6 +884,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.15.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 18:25:38.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.15.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 18:25:38.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.15.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-i386/processor.h 2008-06-22 18:25:38.000000000 +0200 @@ -460,6 +460,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.15.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-i386/system.h 2008-06-22 18:25:38.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ --- linux-2.6.15.perfctr26/include/asm-powerpc/processor.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-powerpc/processor.h 2008-06-22 18:25:38.000000000 +0200 @@ -194,6 +194,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.15.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 18:25:38.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.15.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-x86_64/irq.h 2008-06-22 18:25:38.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.15.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-x86_64/processor.h 2008-06-22 18:25:38.000000000 +0200 @@ -254,6 +254,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.15.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/include/asm-x86_64/system.h 2008-06-22 18:25:38.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.15.perfctr26/kernel/exit.c.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/kernel/exit.c 2008-06-22 18:25:38.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.15.perfctr26/kernel/sched.c.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/kernel/sched.c 2008-06-22 18:25:38.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4458,6 +4459,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.15.perfctr26/kernel/timer.c.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.15.perfctr26/kernel/timer.c 2008-06-22 18:25:38.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -791,6 +792,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.14000664 001750 001750 00000040202 13216244367 024221 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.14.perfctr26/CREDITS.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/CREDITS 2008-06-22 19:12:23.000000000 +0200 @@ -2626,9 +2626,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.14.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/Documentation/ioctl-number.txt 2008-06-22 19:12:18.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.14.perfctr26/MAINTAINERS.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/MAINTAINERS 2008-06-22 19:12:23.000000000 +0200 @@ -1923,6 +1923,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.14.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/i386/Kconfig 2008-06-22 19:12:18.000000000 +0200 @@ -945,6 +945,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config PHYSICAL_START --- linux-2.6.14.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/i386/kernel/entry.S 2008-06-22 19:12:18.000000000 +0200 @@ -430,6 +430,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.14.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 17:55:14.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 19:12:18.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -421,6 +422,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.14.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/i386/kernel/process.c 2008-06-22 19:12:18.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -393,6 +394,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -472,6 +474,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -733,6 +737,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.14.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/ppc/Kconfig 2008-06-22 19:12:18.000000000 +0200 @@ -288,6 +288,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx || E200 default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.14.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/ppc/kernel/head.S 2008-06-22 19:12:18.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.14.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/ppc/kernel/process.c 2008-06-22 19:12:18.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.14.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/x86_64/Kconfig 2008-06-22 19:12:18.000000000 +0200 @@ -419,6 +419,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz endmenu --- linux-2.6.14.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 19:12:18.000000000 +0200 @@ -645,6 +645,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.14.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 19:12:18.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -590,6 +591,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.14.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.14.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 19:12:18.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -345,6 +346,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -454,6 +456,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -603,6 +607,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.14.perfctr26/drivers/Makefile.~1~ 2008-06-22 19:09:54.000000000 +0200 +++ linux-2.6.14.perfctr26/drivers/Makefile 2008-06-22 19:12:18.000000000 +0200 @@ -65,5 +65,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ --- linux-2.6.14.perfctr26/fs/exec.c.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/fs/exec.c 2008-06-22 19:12:23.000000000 +0200 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -892,6 +893,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.14.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 19:12:18.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.14.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 19:12:18.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.14.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-i386/processor.h 2008-06-22 19:12:18.000000000 +0200 @@ -458,6 +458,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.14.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-i386/system.h 2008-06-22 19:12:18.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ "movl %5,%%esp\n\t" /* restore ESP */ \ --- linux-2.6.14.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-ppc/processor.h 2008-06-22 19:12:18.000000000 +0200 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.14.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 19:12:18.000000000 +0200 @@ -67,14 +67,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.14.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-x86_64/irq.h 2008-06-22 19:12:18.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.14.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-x86_64/processor.h 2008-06-22 19:12:18.000000000 +0200 @@ -252,6 +252,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.14.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 18:22:12.000000000 +0200 +++ linux-2.6.14.perfctr26/include/asm-x86_64/system.h 2008-06-22 19:12:18.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.14.perfctr26/kernel/exit.c.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/kernel/exit.c 2008-06-22 19:12:18.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.14.perfctr26/kernel/sched.c.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/kernel/sched.c 2008-06-22 19:12:18.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4361,6 +4362,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.14.perfctr26/kernel/timer.c.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.14.perfctr26/kernel/timer.c 2008-06-22 19:12:18.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -846,6 +847,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.13000664 001750 001750 00000040157 13216244367 024231 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.13.perfctr26/CREDITS.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/CREDITS 2008-06-22 19:24:32.000000000 +0200 @@ -2624,9 +2624,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.13.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.13.perfctr26/Documentation/ioctl-number.txt 2008-06-22 19:24:26.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.13.perfctr26/MAINTAINERS.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/MAINTAINERS 2008-06-22 19:24:32.000000000 +0200 @@ -1842,6 +1842,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.13.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/i386/Kconfig 2008-06-22 19:24:26.000000000 +0200 @@ -941,6 +941,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config PHYSICAL_START --- linux-2.6.13.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/i386/kernel/entry.S 2008-06-22 19:24:26.000000000 +0200 @@ -427,6 +427,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.13.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 19:24:26.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -424,6 +425,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.13.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/i386/kernel/process.c 2008-06-22 19:24:26.000000000 +0200 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +400,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -478,6 +480,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -728,6 +732,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.13.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/ppc/Kconfig 2008-06-22 19:24:26.000000000 +0200 @@ -279,6 +279,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx || E200 default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.13.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/ppc/kernel/head.S 2008-06-22 19:24:26.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.13.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/ppc/kernel/process.c 2008-06-22 19:24:26.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.13.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 19:21:38.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/x86_64/Kconfig 2008-06-22 19:24:26.000000000 +0200 @@ -417,6 +417,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz endmenu --- linux-2.6.13.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 19:21:38.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 19:24:26.000000000 +0200 @@ -554,6 +554,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.13.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 19:21:38.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 19:24:26.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -579,6 +580,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.13.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 19:21:38.000000000 +0200 +++ linux-2.6.13.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 19:24:26.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -341,6 +342,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -450,6 +452,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -628,6 +632,8 @@ struct task_struct *__switch_to(struct t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.13.perfctr26/drivers/Makefile.~1~ 2008-06-22 19:21:38.000000000 +0200 +++ linux-2.6.13.perfctr26/drivers/Makefile 2008-06-22 19:24:26.000000000 +0200 @@ -63,5 +63,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_SGI_IOC4) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ --- linux-2.6.13.perfctr26/fs/exec.c.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/fs/exec.c 2008-06-22 19:24:32.000000000 +0200 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -896,6 +897,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.13.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 19:24:26.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.13.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 19:24:26.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.13.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-i386/processor.h 2008-06-22 19:24:26.000000000 +0200 @@ -456,6 +456,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.13.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-i386/system.h 2008-06-22 19:24:26.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.13.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-ppc/processor.h 2008-06-22 19:24:26.000000000 +0200 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.13.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 19:24:26.000000000 +0200 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.13.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-x86_64/irq.h 2008-06-22 19:24:26.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.13.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-x86_64/processor.h 2008-06-22 19:24:26.000000000 +0200 @@ -252,6 +252,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.13.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/include/asm-x86_64/system.h 2008-06-22 19:24:26.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.13.perfctr26/kernel/exit.c.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/kernel/exit.c 2008-06-22 19:24:26.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.13.perfctr26/kernel/sched.c.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/kernel/sched.c 2008-06-22 19:24:26.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4252,6 +4253,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.13.perfctr26/kernel/timer.c.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.13.perfctr26/kernel/timer.c 2008-06-22 19:24:26.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -846,6 +847,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.12000664 001750 001750 00000040067 13216244367 024230 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.12.perfctr26/CREDITS.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/CREDITS 2008-06-22 19:41:44.000000000 +0200 @@ -2617,9 +2617,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.12.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.12.perfctr26/Documentation/ioctl-number.txt 2008-06-22 19:41:40.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.12.perfctr26/MAINTAINERS.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/MAINTAINERS 2008-06-22 19:41:44.000000000 +0200 @@ -1786,6 +1786,12 @@ M: tsbogend@alpha.franken.de L: netdev@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.12.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/i386/Kconfig 2008-06-22 19:41:40.000000000 +0200 @@ -939,6 +939,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.12.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/i386/kernel/entry.S 2008-06-22 19:41:40.000000000 +0200 @@ -427,6 +427,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.12.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 19:41:40.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -412,6 +413,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.12.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/i386/kernel/process.c 2008-06-22 19:41:40.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -351,6 +352,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -423,6 +425,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -639,6 +643,8 @@ struct task_struct fastcall * __switch_t if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.12.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 19:39:44.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/ppc/Kconfig 2008-06-22 19:41:40.000000000 +0200 @@ -257,6 +257,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.12.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 19:21:37.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/ppc/kernel/head.S 2008-06-22 19:41:40.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.12.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/ppc/kernel/process.c 2008-06-22 19:41:40.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.12.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 19:39:45.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/x86_64/Kconfig 2008-06-22 19:41:40.000000000 +0200 @@ -381,6 +381,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + endmenu # --- linux-2.6.12.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 19:39:45.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 19:41:40.000000000 +0200 @@ -554,6 +554,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.12.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 19:39:45.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 19:41:40.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -568,6 +569,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.12.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 19:39:45.000000000 +0200 +++ linux-2.6.12.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 19:41:40.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -305,6 +306,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -407,6 +409,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -556,6 +560,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.12.perfctr26/drivers/Makefile.~1~ 2008-06-22 19:39:45.000000000 +0200 +++ linux-2.6.12.perfctr26/drivers/Makefile 2008-06-22 19:41:40.000000000 +0200 @@ -62,5 +62,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_BLK_DEV_SGIIOC4) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ --- linux-2.6.12.perfctr26/fs/exec.c.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/fs/exec.c 2008-06-22 19:41:44.000000000 +0200 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -882,6 +883,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.12.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 19:41:40.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.12.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 19:41:40.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.12.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-i386/processor.h 2008-06-22 19:41:40.000000000 +0200 @@ -456,6 +456,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.12.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-i386/system.h 2008-06-22 19:41:40.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.12.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-ppc/processor.h 2008-06-22 19:41:40.000000000 +0200 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.12.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 19:41:40.000000000 +0200 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.12.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-x86_64/irq.h 2008-06-22 19:41:40.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.12.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-x86_64/processor.h 2008-06-22 19:41:40.000000000 +0200 @@ -251,6 +251,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.12.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/include/asm-x86_64/system.h 2008-06-22 19:41:40.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.12.perfctr26/kernel/exit.c.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/kernel/exit.c 2008-06-22 19:41:40.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.12.perfctr26/kernel/sched.c.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/kernel/sched.c 2008-06-22 19:41:40.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -4083,6 +4084,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.12.perfctr26/kernel/timer.c.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.12.perfctr26/kernel/timer.c 2008-06-22 19:41:40.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -822,6 +823,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.11000775 001750 001750 00000040117 13216244367 024226 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.11.perfctr26/CREDITS.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/CREDITS 2008-06-22 20:20:14.000000000 +0200 @@ -2617,9 +2617,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.11.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.11.perfctr26/Documentation/ioctl-number.txt 2008-06-22 20:20:08.000000000 +0200 @@ -190,6 +190,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.11.perfctr26/MAINTAINERS.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/MAINTAINERS 2008-06-22 20:20:14.000000000 +0200 @@ -1769,6 +1769,12 @@ M: tsbogend@alpha.franken.de L: linux-net@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.11.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/i386/Kconfig 2008-06-22 20:20:08.000000000 +0200 @@ -888,6 +888,8 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.11.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/i386/kernel/entry.S 2008-06-22 20:20:08.000000000 +0200 @@ -372,6 +372,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.11.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 20:20:08.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -413,6 +414,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.11.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/i386/kernel/process.c 2008-06-22 20:20:08.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -339,6 +340,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -400,6 +402,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -623,6 +627,8 @@ struct task_struct fastcall * __switch_t if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.11.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/ppc/Kconfig 2008-06-22 20:20:08.000000000 +0200 @@ -245,6 +245,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.11.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/ppc/kernel/head.S 2008-06-22 20:20:08.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.11.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 19:09:53.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/ppc/kernel/process.c 2008-06-22 20:20:08.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -363,6 +366,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -455,6 +459,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.11.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/x86_64/Kconfig 2008-06-22 20:20:08.000000000 +0200 @@ -350,6 +350,9 @@ config X86_MCE_INTEL help Additional support for intel specific MCE features such as the thermal monitor. + +source "drivers/perfctr/Kconfig" + endmenu # --- linux-2.6.11.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 20:20:08.000000000 +0200 @@ -562,6 +562,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.11.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 20:20:08.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -562,6 +563,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.11.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 20:20:08.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -293,6 +294,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -395,6 +397,8 @@ int copy_thread(int nr, unsigned long cl asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -544,6 +548,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.11.perfctr26/drivers/Makefile.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.11.perfctr26/drivers/Makefile 2008-06-22 20:20:08.000000000 +0200 @@ -61,5 +61,6 @@ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_INFINIBAND) += infiniband/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ --- linux-2.6.11.perfctr26/fs/exec.c.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/fs/exec.c 2008-06-22 20:20:14.000000000 +0200 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -877,6 +878,7 @@ int flush_old_exec(struct linux_binprm * tcomm[i] = '\0'; set_task_comm(current, tcomm); + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.11.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 20:20:08.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.11.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 20:20:08.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.11.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-i386/processor.h 2008-06-22 20:20:08.000000000 +0200 @@ -444,6 +444,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.11.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-i386/system.h 2008-06-22 20:20:08.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.11.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 19:09:55.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-ppc/processor.h 2008-06-22 20:20:08.000000000 +0200 @@ -122,6 +122,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.11.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 19:21:39.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 20:20:08.000000000 +0200 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.11.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-x86_64/irq.h 2008-06-22 20:20:08.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.11.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-x86_64/processor.h 2008-06-22 20:20:08.000000000 +0200 @@ -253,6 +253,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.11.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/include/asm-x86_64/system.h 2008-06-22 20:20:08.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.11.perfctr26/kernel/exit.c.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/kernel/exit.c 2008-06-22 20:20:08.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.11.perfctr26/kernel/sched.c.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/kernel/sched.c 2008-06-22 20:20:08.000000000 +0200 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -4115,6 +4116,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.11.perfctr26/kernel/timer.c.~1~ 2008-06-22 20:17:15.000000000 +0200 +++ linux-2.6.11.perfctr26/kernel/timer.c 2008-06-22 20:20:08.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -820,6 +821,7 @@ void update_process_times(int user_tick) account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.10000775 001750 001750 00000037631 13216244367 024234 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.10.perfctr26/CREDITS.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/CREDITS 2008-06-22 20:57:42.000000000 +0200 @@ -2589,9 +2589,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.10.perfctr26/Documentation/ioctl-number.txt.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/Documentation/ioctl-number.txt 2008-06-22 20:57:10.000000000 +0200 @@ -188,6 +188,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.10.perfctr26/MAINTAINERS.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/MAINTAINERS 2008-06-22 20:57:42.000000000 +0200 @@ -1731,6 +1731,12 @@ M: tsbogend@alpha.franken.de L: linux-net@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PHRAM MTD DRIVER P: Jörn Engel M: joern@wh.fh-wedel.de --- linux-2.6.10.perfctr26/arch/i386/Kconfig.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/i386/Kconfig 2008-06-22 20:57:10.000000000 +0200 @@ -874,6 +874,8 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu --- linux-2.6.10.perfctr26/arch/i386/kernel/entry.S.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/i386/kernel/entry.S 2008-06-22 20:57:10.000000000 +0200 @@ -375,6 +375,16 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error --- linux-2.6.10.perfctr26/arch/i386/kernel/i8259.c.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/i386/kernel/i8259.c 2008-06-22 20:57:10.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -413,6 +414,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.10.perfctr26/arch/i386/kernel/process.c.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/i386/kernel/process.c 2008-06-22 20:57:10.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -323,6 +324,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); } void flush_thread(void) @@ -384,6 +386,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -607,6 +611,8 @@ struct task_struct fastcall * __switch_t if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) handle_io_bitmap(next, tss); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.10.perfctr26/arch/ppc/Kconfig.~1~ 2008-06-22 20:48:52.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/ppc/Kconfig 2008-06-22 20:57:10.000000000 +0200 @@ -236,6 +236,8 @@ config NOT_COHERENT_CACHE depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" --- linux-2.6.10.perfctr26/arch/ppc/kernel/head.S.~1~ 2008-06-22 20:17:14.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/ppc/kernel/head.S 2008-06-22 20:57:10.000000000 +0200 @@ -502,7 +502,11 @@ SystemCall: Trap_0f: EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT + EXC_XFER_EE(0xf00, do_perfctr_interrupt) +#else EXC_XFER_EE(0xf00, UnknownException) +#endif /* * Handle TLB miss for instruction on 603/603e. --- linux-2.6.10.perfctr26/arch/ppc/kernel/process.c.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/ppc/kernel/process.c 2008-06-22 20:57:10.000000000 +0200 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,9 @@ struct task_struct *__switch_to(struct t #endif /* CONFIG_SPE */ new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -374,6 +377,7 @@ void exit_thread(void) if (last_task_used_spe == current) last_task_used_spe = NULL; #endif + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -466,6 +470,8 @@ copy_thread(int nr, unsigned long clone_ p->thread.last_syscall = -1; + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.10.perfctr26/arch/x86_64/Kconfig.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/x86_64/Kconfig 2008-06-22 20:57:10.000000000 +0200 @@ -338,6 +338,8 @@ config X86_MCE machine check error logs. See ftp://ftp.x86-64.org/pub/linux/tools/mcelog +source "drivers/perfctr/Kconfig" + endmenu # --- linux-2.6.10.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/x86_64/kernel/entry.S 2008-06-22 20:57:10.000000000 +0200 @@ -560,6 +560,11 @@ ENTRY(spurious_interrupt) apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ --- linux-2.6.10.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/x86_64/kernel/i8259.c 2008-06-22 20:57:10.000000000 +0200 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -560,6 +561,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.10.perfctr26/arch/x86_64/kernel/process.c.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/arch/x86_64/kernel/process.c 2008-06-22 20:57:10.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -276,6 +277,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -378,6 +380,8 @@ int copy_thread(int nr, unsigned long cl asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -527,6 +531,8 @@ struct task_struct *__switch_to(struct t } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.10.perfctr26/drivers/Makefile.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/drivers/Makefile 2008-06-22 20:57:10.000000000 +0200 @@ -59,4 +59,5 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ --- linux-2.6.10.perfctr26/fs/exec.c.~1~ 2008-06-22 20:48:53.000000000 +0200 +++ linux-2.6.10.perfctr26/fs/exec.c 2008-06-22 20:57:42.000000000 +0200 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -843,6 +844,7 @@ int flush_old_exec(struct linux_binprm * tcomm[i] = '\0'; set_task_comm(current, tcomm); + perfctr_flush_thread(¤t->thread); flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || --- linux-2.6.10.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2008-06-22 20:57:10.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.10.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2008-06-22 15:56:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2008-06-22 20:57:10.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.10.perfctr26/include/asm-i386/processor.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-i386/processor.h 2008-06-22 20:57:10.000000000 +0200 @@ -436,6 +436,8 @@ struct thread_struct { unsigned long *io_bitmap_ptr; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.10.perfctr26/include/asm-i386/system.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-i386/system.h 2008-06-22 20:57:10.000000000 +0200 @@ -14,6 +14,7 @@ extern struct task_struct * FASTCALL(__s #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.10.perfctr26/include/asm-ppc/processor.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-ppc/processor.h 2008-06-22 20:57:10.000000000 +0200 @@ -126,6 +126,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.10.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-x86_64/hw_irq.h 2008-06-22 20:57:10.000000000 +0200 @@ -65,14 +65,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.10.perfctr26/include/asm-x86_64/irq.h.~1~ 2008-06-22 19:39:46.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-x86_64/irq.h 2008-06-22 20:57:10.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.10.perfctr26/include/asm-x86_64/processor.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-x86_64/processor.h 2008-06-22 20:57:10.000000000 +0200 @@ -258,6 +258,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD {} --- linux-2.6.10.perfctr26/include/asm-x86_64/system.h.~1~ 2008-06-22 20:48:54.000000000 +0200 +++ linux-2.6.10.perfctr26/include/asm-x86_64/system.h 2008-06-22 20:57:10.000000000 +0200 @@ -26,7 +26,8 @@ #define __EXTRA_CLOBBER \ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -46,7 +47,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.10.perfctr26/kernel/exit.c.~1~ 2008-06-22 20:48:55.000000000 +0200 +++ linux-2.6.10.perfctr26/kernel/exit.c 2008-06-22 20:57:10.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -91,6 +92,7 @@ repeat: zap_leader = (leader->exit_signal == -1); } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); spin_unlock(&p->proc_lock); --- linux-2.6.10.perfctr26/kernel/sched.c.~1~ 2008-06-22 20:48:55.000000000 +0200 +++ linux-2.6.10.perfctr26/kernel/sched.c 2008-06-22 20:57:10.000000000 +0200 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -3751,6 +3752,8 @@ int set_cpus_allowed(task_t *p, cpumask_ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.10.perfctr26/kernel/timer.c.~1~ 2008-06-22 20:48:55.000000000 +0200 +++ linux-2.6.10.perfctr26/kernel/timer.c 2008-06-22 20:57:10.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -850,6 +851,7 @@ static void update_one_process(struct ta do_process_times(p, user, system); do_it_virt(p, user); do_it_prof(p); + perfctr_sample_thread(&p->thread); } /* papi-5.6.0/src/perfctr-2.6.x/examples/perfex/arch.h000775 001750 001750 00000001621 13216244366 023745 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.2.2.3 2010/06/08 20:48:55 mikpe Exp $ * Architecture-specific support code. * * Copyright (C) 2004-2010 Mikael Pettersson */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) extern void do_print(FILE *resfile, const struct perfctr_info *info, const struct perfctr_cpu_control *cpu_control, const struct perfctr_sum_ctrs *sum); extern void do_arch_usage(void); /* Hack while phasing out an old number parsing bug. */ extern unsigned long my_strtoul(const char *nptr, char **endptr); extern unsigned int do_event_spec(unsigned int n, const char *arg, struct perfctr_cpu_control *cpu_control); extern int do_arch_option(int ch, const char *arg, struct perfctr_cpu_control *cpu_control); #if defined(__i386__) || defined(__x86_64__) #include "x86.h" #elif defined(__powerpc__) #include "ppc.h" #elif defined(__arm__) #include "arm.h" #endif papi-5.6.0/src/ctests/val_omp.c000664 001750 001750 00000012173 13216244361 020434 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: each OMP thread measures flops for its provided tasks, and compares this to expected flop counts, each thread having been provided with a random amount of work, such that the time and order that they complete their measurements varies. Specifically tested is the case where the value returned for some threads actually corresponds to that for another thread reading its counter values at the same time. - It is based on zero_omp.c but ignored much of its functionality. - It attempts to use the following two counters. It may use less depending on hardware counter resource limitations. These are counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + PAPI_FP_INS + PAPI_TOT_CYC Each thread inside the Thread routine: - Do prework (MAX_FLOPS - flops) - Get cyc. - Get us. - Start counters - Do flops - Stop and read counters - Get us. - Get cyc. - Return flops */ #include "papi_test.h" #ifdef _OPENMP #include #else #error "This compiler does not understand OPENMP" #endif const int MAX_FLOPS = NUM_FLOPS; extern int TESTS_QUIET; /* Declared in test_utils.c */ const PAPI_hw_info_t *hw_info = NULL; long long Thread( int n ) { int retval, num_tests = 1; int EventSet1 = PAPI_NULL; int PAPI_event, mask1; int num_events1; long long flops; long long **values; long long elapsed_us, elapsed_cyc; char event_name[PAPI_MAX_STR_LEN]; /* printf("Thread(n=%d) %#x started\n", n, omp_get_thread_num()); */ num_events1 = 2; /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depending on the availability of the event on the platform */ EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); values = allocate_test_space( num_tests, num_events1 ); do_flops( MAX_FLOPS - n ); /* prework for balance */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( n ); retval = PAPI_stop( EventSet1, values[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); flops = ( values[0] )[0]; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; remove_test_events( &EventSet1, mask1 ); if ( !TESTS_QUIET ) { /*printf("Thread %#x %-12s : \t%lld\t%d\n", omp_get_thread_num(), event_name, (values[0])[0], n); */ #if 0 printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", omp_get_thread_num( ), values[0][0] ); printf( "Thread %#x Real usec : \t%lld\n", omp_get_thread_num( ), elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", omp_get_thread_num( ), elapsed_cyc ); #endif } /* It is illegal for the threads to exit in OpenMP */ /* test_pass(__FILE__,0,0); */ free_test_space( values, num_tests ); PAPI_unregister_thread( ); /* printf("Thread %#x finished\n", omp_get_thread_num()); */ return flops; } int main( int argc, char **argv ) { int tid, retval; int maxthr = omp_get_max_threads( ); int flopper = 0; long long *flops = calloc( maxthr, sizeof ( long long ) ); long long *flopi = calloc( maxthr, sizeof ( long long ) ); tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ if ( maxthr < 2 ) test_skip( __FILE__, __LINE__, "omp_get_num_threads < 2", PAPI_EINVAL ); if ( ( flops == NULL ) || ( flopi == NULL ) ) test_fail( __FILE__, __LINE__, "calloc", PAPI_ENOMEM ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( omp_get_thread_num ) ); if ( retval != PAPI_OK ) if ( retval == PAPI_ECMP ) test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); else test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); flopper = Thread( 65536 ) / 65536; printf( "flopper=%d\n", flopper ); for ( int i = 0; i < 100000; i++ ) #pragma omp parallel private(tid) { tid = omp_get_thread_num( ); flopi[tid] = rand( ) * 3; flops[tid] = Thread( ( flopi[tid] / flopper ) % MAX_FLOPS ); #pragma omp barrier #pragma omp master if ( flops[tid] < flopi[tid] ) { printf( "test iteration=%d\n", i ); for ( int j = 0; j < omp_get_num_threads( ); j++ ) { printf( "Thread %#x Value %6lld %c %6lld", j, flops[j], ( flops[j] < flopi[j] ) ? '<' : '=', flopi[j] ); for ( int k = 0; k < omp_get_num_threads( ); k++ ) if ( ( k != j ) && ( flops[k] == flops[j] ) ) printf( " == Thread %#x!", k ); printf( "\n" ); } test_fail( __FILE__, __LINE__, "value returned for thread", PAPI_EBUG ); } } test_pass( __FILE__, NULL, 0 ); exit( 0 ); } papi-5.6.0/ChangeLogP440.txt000664 001750 001750 00000012207 13216244355 017500 0ustar00jshenry1963jshenry1963000000 000000 2012-04-17 * 8782daed cvs2cl.pl delete_before_release.sh gitlog2changelog.py...: Update the release machinery for git. gitlog2changelog.py takes the output of git log and parses it to something like a changelog. * 80ff04a9 doc/Doxyfile-html: Cover up an instance of doxygen using full paths. Doxygen ( up to 1.8.0, the most recent at this writing ) would use full paths in directory dependencies ignoring the use relative paths config option. * c556dad1 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump the version for the PAPI 4.4.0 release. 2012-04-14 * 27174c0b src/components/bgpm/CNKunit/CVS/Entries src/components/bgpm/CNKunit/CVS/Repository src/components/bgpm/CNKunit/CVS/Root...: Removed CVS stuff from Q code. * 970a2d50 src/configure src/configure.in src/linux-bgq.c...: Removed papi_events.csv parsing from Q code. (CVS stuff still needs to be taken care of.) 2012-04-13 * 853d6c74 src/libpfm-3.y/lib/intel_corei7_events.h src/libpfm-3.y/lib/intel_wsm_events.h src/libpfm-3.y/lib/pfmlib_intel_nhm.c: Add missing update to libpfm3 Somehow during all of the troubles we had with importing libpfm3 into CVS, we lost some Nehalem/Westmere updates. Tested on a Nehalem machine to make sure this doesn't break anything. 2012-04-12 * 07e4fcd6 INSTALL.txt: Updated INSTALL notes for Q * 2a0f919e src/Makefile.in src/Makefile.inc src/components/README...: Added missing files for Q merge. * 0b0f1863 src/Rules.bgpm src/components/bgpm/CNKunit/CVS/Entries src/components/bgpm/CNKunit/CVS/Repository...: Added PAPI support for Blue Gene/Q. 2012-02-17 * 147a4969 src/perfctr-2.6.x/usr.lib/event_set_centaur.o src/perfctr-2.6.x/usr.lib/event_set_p5.o src/perfctr-2.6.x/usr.lib/event_set_p6.o: Remove a few binary files in perfctr-2.6.x 2012-02-23 * 955bd899 src/perfctr-2.6.x/usr.lib/event_set_centaur.os src/perfctr-2.6.x/usr.lib/event_set_p5.os src/perfctr-2.6.x/usr.lib/event_set_p6.os: Removes the last of the binary files from perfctr2.6.x Some binary files were left out in the cold after a mishap trying to configure perfctr for the build test. 2012-02-17 * 5fe239c8 src/perfctr-2.6.x/CHANGES src/perfctr-2.6.x/INSTALL src/perfctr-2.6.x/Makefile...: More cleanups from the migration, latest version of libpfm-3.y perfctr-2.[6,7] Version numbers got really confused in cvs and the git cvsimport didn't know that eg 1.1.1.28 > 1.1 ( see perfctr-2.6.x/CHANGES revision 1.1.1.28.6.1 :~) 2012-03-13 * e7173952 src/libpfm-3.y/examples_v2.x/multiplex.c src/libpfm-3.y/examples_v2.x/pfmsetup.c src/libpfm-3.y/examples_v2.x/rtop.c...: Fix some libpfm3 warnings. libpfm3 is not maintained anymore, so applied these changes locally. libpfm3 is compiled with -Werror so they broke the build with newer gcc even though they are just warnings in example programs. 2012-04-09 * 10528517 src/libpfm-3.y/Makefile src/libpfm-3.y/README src/libpfm-3.y/docs/Makefile...: Copy over libpfm-3.y from cvs. libpfm3 was another one of our skeletons in CVS. Thanks to Steve Kaufmann for keeping us honest. 2012-02-17 * ec8c879e src/aix.c src/components/coretemp/linux-coretemp.c src/components/coretemp_freebsd/coretemp_freebsd.c...: The git conversion reset all of the CVS $Id$ lines to just $Id$ Since we depend on the $Id$ lines for the component names, I had to go back and fix all of them to be the component names again. 2012-03-09 * 71a2ae4f src/components/lmsensors/linux-lmsensors.c: Fix buffer overrun in lmsensors component Conflicts: src/components/lmsensors/linux-lmsensors.c * ec0e1e9a src/libpfm4/config.mk src/libpfm4/docs/man3/pfm_get_os_event_encoding.3 src/libpfm4/examples/showevtinfo.c...: Update to current git libpfm4 snapshot 2012-02-15 * 1312923e src/libpfm4/debian/changelog src/libpfm4/debian/control src/libpfm4/debian/rules...: The git cvsimport didn't get the latest version of the libpfm4 import. This should be the versions as were in cvs now. 2012-02-24 * 81847628 src/papi_events.csv: Fix broken Pentium 4 Prescott support We were missing the netbusrt_p declaration in papi_events.csv 2012-03-01 * 917afc7f src/papi_internal.c: Add some locking in _papi_hwi_shutdown_global_internal This caused a glibc double-free warning, and was caught by the Valgrind helgrind tool in krentel_pthreads There are some other potential locking issues in PAPI_shutdown, especially when debug is enabled. * f85c092f src/papi.c: Fix possible race in _papi_hwi_gather_all_thrspec_data The valgrind helgrind tool noticed this with the thrspecific test 2012-03-09 * 912311ed src/multiplex.c src/papi_internal.c src/papi_libpfm4_events.c...: Fix issue when using more than 31 multiplexed events on perf_event On perf_event we were setting num_mpx_cntrs to 64. This broke, as the MPX_EventSet struct only allocates room for PAPI_MPX_DEF_DEG events, which is 32. This patch makes perf_event use a value of 32 for num_mpx_cntrs, especially as 64 was arbitrarily chosen at some point (the actual value perf_event can support is static, but I'm pretty sure it is higher than 64). Conflicts: src/papi_libpfm4_events.c papi-5.6.0/src/Matlab/FlopsMatrixMatrix.m000664 001750 001750 00000001337 13216244356 022345 0ustar00jshenry1963jshenry1963000000 000000 function FlopsMatrixMatrix % Compute a Matrix Matrix multiply % on square arrays sized from 50 to 500, % in steps of 50. % % Use the PAPI flops call to measure the floating point operations performed. % For each size, display: % - number of floating point operations % - theoretical number of operations % - difference % - per cent error % - mflops/s fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'mflops') for n=50:50:500, a=rand(n);b=rand(n);c=rand(n); flops(0); c=c+a*b; [count,mflops]=flops; fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^3,count - 2*n^3, (1.0 - ((2*n^3) / count)) * 100,mflops) endpapi-5.6.0/src/libpfm4/docs/man3/pfm_get_event_attr_info.3000664 001750 001750 00000014075 13216244364 025432 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "December, 2009" "" "Linux Programmer's Manual" .SH NAME pfm_get_event_attr_info \- get event attribute information .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_get_event_attr_info(int " idx ", int " attr ", pfm_os_t " os ", pfm_event_attr_info_t *" info ");" .sp .SH DESCRIPTION This function returns in \fBinfo\fR information about the attribute designated by \fBattr\fR for the event specified in \fBidx\fR and the os layer in \fBos\fR. The \fBpfm_os_t\fR enumeration provides the following choices: .TP .B PFM_OS_NONE The returned information pertains only to what the PMU hardware exports. No operating system attributes is taken into account. .TP .B PFM_OS_PERF_EVENT The returned information includes the actual PMU hardware and the additional attributes exported by the perf_events kernel interface. The perf_event attributes pertain only the PMU hardware. In case perf_events is not detected, an error is returned. .TP .B PFM_OS_PERF_EVENT_EXT The returned information includes all of what is already provided by \fBPFM_OS_PERF_EVENT\fR plus all the software attributes controlled by perf_events, such as sampling period, precise sampling. .PP The \fBpfm_event_attr_info_t\fR structure is defined as follows: .nf typedef struct { const char *name; const char *desc; const char *equiv; size_t size; uint64_t code; pfm_attr_t type; int idx; pfm_attr_ctrl_t ctrl; int reserved1; struct { int is_dfl:1; int is_precise:1; int reserved:30; }; union { uint64_t dfl_val64; const char *dfl_str; int dfl_bool; int dfl_int; }; } pfm_event_attr_info_t; .fi The fields of this structure are defined as follows: .TP .B name This is the name of the attribute. This is a read-only string. .TP .B desc This is the description of the attribute. This is a read-only string. It may contain multiple sentences. .TP .B equiv Certain attributes may be just variations of other attributes for the same event. They may be provided as handy shortcuts to avoid supplying a long list of attributes. For those attributes, this field is not NULL and contains the complete equivalent attribute string. This string, once appended to the event name, may be used library calls requiring an event string. .TP .B code This is the raw attribute code. For PFM_ATTR_UMASK, this is the unit mask code. For all other attributes, this is an opaque index. .TP .B type This is the type of the attribute. Attributes represent either sub-events or extra filters that can be applied to the event. Filters (also called modifiers) may be tied to the event or the PMU register the event is programmed into. The type of an attribute determines how it must be specified. The following types are defined: .RS .TP .B PFM_ATTR_UMASK This is a unit mask, i.e., a sub-event. It is specified using its name. Depending on the event, it may be possible to specify multiple unit masks. .TP .B PFM_ATTR_MOD_BOOL This is a boolean attribute. It has a value of 0, 1, y or n. The value is specified after the equal sign, e.g., foo=1. As a convenience, the equal sign and value may be omitted, in which case this is equivalent to =1. .TP .B PFM_ATTR_MOD_INTEGER This is an integer attribute. It has a value which must be passed after the equal sign. The range of valid values depends on the attribute and is usually specified in its description. .PP .RE .TP .B idx This is the attribute index. It is identical to the value of \fBattr\fR passed to the call and is provided for completeness. .TP .B size This field contains the size of the struct passed. This field is used to provide for extensibility of the struct without compromising backward compatibility. The value should be set to \fBsizeof(pfm_event_attr_info_t)\fR. If instead, a value of \fB0\fR is specified, the library assumes the struct passed is identical to the first ABI version which size is \fBPFM_ATTR_INFO_ABI0\fR. Thus, if fields were added after the first ABI, they will not be set by the library. The library does check that bytes beyond what is implemented are zeroes. .TP .B is_dfl This field indicates whether or not this attribute is set by default. This applies mostly for PFM_ATTR_UMASK. If a unit mask is marked as default, and no unit mask is specified in the event string, then the library uses it by default. Note that there may be multiple defaults per event depending on how unit masks are grouped. .TP .B is_precise This field indicates whether or not this umask supports precise sampling. Precise sampling is a hardware mechanism that avoids instruction address skid when using interrupt-based sampling. On Intel X86 processors, this field indicates that the umask supports Precise Event-Based Sampling (PEBS). .TP .B dfl_val64, dfl_str, dfl_bool, dfl_int This union contains the value of an attribute. For PFM_ATTR_UMASK, the is the unit mask code, for all other types this is the actual value of the attribute. .TP .B ctrl This field indicates which layer or source controls the attribute. The following sources are defined: .RS .TP .B PFM_ATTR_CTRL_UNKNOWN The source controlling the attribute is not known. .TP .B PFM_ATTR_CTRL_PMU The attribute is controlled by the PMU hardware. .TP .B PFM_ATTR_CTRL_PERF_EVENT The attribute is controlled by the perf_events kernel interface. .RE .TP .B reserved These fields must be set to zero. .PP .SH RETURN If successful, the function returns \fBPFM_SUCCESS\fR and attribute information in \fBinfo\fR, otherwise it returns an error code. .SH ERRORS .TP .B PFMLIB_ERR_NOINIT Library has not been initialized properly. .TP .B PFMLIB_ERR_INVAL The \fBidx\fR or \fBattr\fR arguments are invalid or \fBinfo\fR is \fBNULL\fR or \fBsize\fR is not zero. .TP .B PFM_ERR_NOTSUPP The requested os layer has not been detected on the host system. .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/.indent.pro000664 001750 001750 00000007555 13216244356 017423 0ustar00jshenry1963jshenry1963000000 000000 /** * PAPI - Indent profile.

* * The purpose of this file is to standardize the PAPI's source code style. * Every new/modified source should be formatted with indent using this * profile before it is checked in again. * * @name .indent.pro * * @version $Revision$
* $Date$
* $Author$ * * @author Heike Jagode */ /* use tabs */ --use-tabs /* set tab size to 4 spaces */ --tab-size4 /* set indentation level to 4 spaces, and these will be turned into * tabs by default */ --indent-level4 /* don't put variables in column 16 */ //--declaration-indentation16 /* maximum length of a line is 80 */ --line-length80 /* breakup the procedure type */ --procnames-start-lines // --dont-break-procedure-type /* break long lines after the boolean operators && and || */ --break-after-boolean-operator /* if long lines are already broken up, GNU indent won't touch them */ --honour-newlines /* If a line has a left parenthesis which is not closed on that line, * then continuation lines will be lined up to start at the character * position just after the left parenthesis */ --continue-at-parentheses /* NO! (see --continue-at-parentheses) */ --continuation-indentation0 /* put braces on line with if, etc.*/ --braces-on-if-line //--braces-after-if-line /* put braces on the line after struct declaration lines */ --braces-after-struct-decl-line /* put braces on the line after function definition lines */ --braces-after-func-def-line /* indent braces 0 spaces */ --brace-indent0 /* NO extra struct/union brace indentation */ --struct-brace-indentation0 /* NO extra case brace indentation! */ --case-brace-indentation0 /* put a space after and before every parenthesis */ --space-after-parentheses /* NO extra parentheses indentation in broken lines */ --paren-indentation0 /* blank line causes problems with multi parameter function prototypes */ --no-blank-lines-after-declarations /* forces blank line after every procedure body */ --blank-lines-after-procedures /* NO newline is forced after each comma in a declaration */ --no-blank-lines-after-commas /* allow optional blank lines */ --leave-optional-blank-lines // --swallow-optional-blank-lines /* do not put comment delimiters on blank lines */ --no-comment-delimiters-on-blank-lines /* the maximum comment column is 79 */ --comment-line-length79 /* do not touch comments starting at column 0 */ --dont-format-first-column-comments /* no extra line comment indentation */ --line-comments-indentation0 /* dont star comments */ --dont-star-comments // --start-left-side-of-comments /* comments to the right of the code start at column 30 */ --comment-indentation30 /* comments after declarations start at column 40 */ --declaration-comment-column40 /* comments after #else #endif start at column 8 */ --else-endif-column8 /* Do not cuddle } and the while of a do {} while; */ --dont-cuddle-do-while /* Do cuddle } and else */ --cuddle-else //--dont-cuddle-else /* a case label indentation of 0 */ --case-indentation0 /* put no space after a cast operator */ //--no-space-after-casts /* no space after function call names; * but space after keywords for, it, while */ --no-space-after-function-call-names //--no-space-after-for //--no-space-after-if //--no-space-after-while /* Do not force space between special statements and semicolon */ --dont-space-special-semicolon // --space-special-semicolon /* put a space between sizeof and its argument :TODO: check */ --blank-before-sizeof /* enable verbose mode */ --verbose // --no-verbosity /* NO space between # and preprocessor directives */ // --leave-preprocessor-space /* format some comments but not all */ // --dont-format-comments /* NO gnu style as default */ // --gun_style /* K&R default style */ --k-and-r-style /* NO Berkeley default style */ // --original /* read this profile :-) */ // --ignore-profile papi-5.6.0/src/components/vmware/tests/vmware_basic.c000664 001750 001750 00000006741 13216244360 024767 0ustar00jshenry1963jshenry1963000000 000000 /** * @author Vince Weaver * * test case for vmware component * * * @brief * Tests basic vmware functionality */ #include #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 1 int main (int argc, char **argv) { int retval,cid,numcmp; int EventSet = PAPI_NULL; long long values[NUM_EVENTS]; int code; char event_name[PAPI_MAX_STR_LEN]; int total_events=0; int r; const PAPI_component_info_t *cmpinfo = NULL; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!TESTS_QUIET) { printf("Trying all vmware events\n"); } /* Find our Component */ numcmp = PAPI_num_components(); for(cid=0; cidname,"vmware")) { if (!TESTS_QUIET) printf("\tFound vmware component %d - %s\n", cid, cmpinfo->name); } else { continue; } PAPI_event_info_t info; /* Try all events one by one */ code = PAPI_NATIVE_MASK; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); while ( r == PAPI_OK ) { retval=PAPI_get_event_info(code,&info); if (retval!=PAPI_OK) { printf("Error getting event info\n"); test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); } retval = PAPI_event_code_to_name( code, event_name ); if ( retval != PAPI_OK ) { printf("Error translating %#x\n",code); test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } if (!TESTS_QUIET) printf(" %s ",event_name); EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_create_eventset()",retval); } retval = PAPI_add_event( EventSet, code ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_add_event()",retval); } /* start */ retval = PAPI_start( EventSet); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } /* do something */ usleep(100); /* stop */ retval = PAPI_stop( EventSet, values); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()",retval); } if (!TESTS_QUIET) printf(" value: %lld %s\n",values[0], info.units); retval = PAPI_cleanup_eventset( EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()",retval); } retval = PAPI_destroy_eventset( &EventSet ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()",retval); } total_events++; r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); } } if (total_events==0) { test_skip(__FILE__,__LINE__,"No vmware events found",0); } if (!TESTS_QUIET) { printf("\n"); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/nvml/configure000775 001750 001750 00000345023 13216244357 022406 0ustar00jshenry1963jshenry1963000000 000000 #! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.59. # # Copyright (C) 2003 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then set -o posix fi DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # Work around bugs in pre-3.0 UWIN ksh. $as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)$' \| \ . : '\(.\)' 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } /^X\/\(\/\/\)$/{ s//\1/; q; } /^X\/\(\/\).*/{ s//\1/; q; } s/.*/./; q'` # PATH needs CR, and LINENO needs CR and PATH. # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" || { # Find who we are. Look in the path if we contain no path at all # relative or not. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 { (exit 1); exit 1; }; } fi case $CONFIG_SHELL in '') as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for as_base in sh bash ksh sh5; do case $as_dir in /*) if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } CONFIG_SHELL=$as_dir/$as_base export CONFIG_SHELL exec "$CONFIG_SHELL" "$0" ${1+"$@"} fi;; esac done done ;; esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line before each line; the second 'sed' does the real # work. The second script uses 'N' to pair each line-number line # with the numbered line, and appends trailing '-' during # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) sed '=' <$as_myself | sed ' N s,$,-, : loop s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop s,-$,, s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && chmod +x $as_me.lineno || { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensible to this). . ./$as_me.lineno # Exit status is that of the last command. exit } case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in *c*,-n*) ECHO_N= ECHO_C=' ' ECHO_T=' ' ;; *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then # We could just check for DJGPP; but this test a) works b) is more generic # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). if test -f conf$$.exe; then # Don't use ln at all; we don't have any links as_ln_s='cp -p' else as_ln_s='ln -s' fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" # IFS # We need space, tab and new line, in precisely that order. as_nl=' ' IFS=" $as_nl" # CDPATH. $as_unset CDPATH # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` exec 6>&1 # # Initializations. # ac_default_prefix=/usr/local ac_config_libobj_dir=. cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. # This variable seems obsolete. It should probably be removed, and # only ac_max_sed_lines should be used. : ${ac_max_here_lines=38} # Identity of this package. PACKAGE_NAME= PACKAGE_TARNAME= PACKAGE_VERSION= PACKAGE_STRING= PACKAGE_BUGREPORT= # Factoring default headers for most tests. ac_includes_default="\ #include #if HAVE_SYS_TYPES_H # include #endif #if HAVE_SYS_STAT_H # include #endif #if STDC_HEADERS # include # include #else # if HAVE_STDLIB_H # include # endif #endif #if HAVE_STRING_H # if !STDC_HEADERS && HAVE_MEMORY_H # include # endif # include #endif #if HAVE_STRINGS_H # include #endif #if HAVE_INTTYPES_H # include #else # if HAVE_STDINT_H # include # endif #endif #if HAVE_UNISTD_H # include #endif" ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP NVML_INCDIR NVML_LIBDIR CUDA_DIR LIBOBJS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. ac_init_help= ac_init_version=false # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_option in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir=$ac_optarg ;; -disable-* | --disable-*) ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/-/_/g'` eval "enable_$ac_feature=no" ;; -enable-* | --enable-*) ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/-/_/g'` case $ac_option in *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; *) ac_optarg=yes ;; esac eval "enable_$ac_feature='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package| sed 's/-/_/g'` case $ac_option in *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; *) ac_optarg=yes ;; esac eval "with_$ac_package='$ac_optarg'" ;; -without-* | --without-*) ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package | sed 's/-/_/g'` eval "with_$ac_package=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) { echo "$as_me: error: unrecognized option: $ac_option Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` eval "$ac_envvar='$ac_optarg'" export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` { echo "$as_me: error: missing argument to $ac_option" >&2 { (exit 1); exit 1; }; } fi # Be sure to have absolute paths. for ac_var in exec_prefix prefix do eval ac_val=$`echo $ac_var` case $ac_val in [\\/$]* | ?:[\\/]* | NONE | '' ) ;; *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; };; esac done # Be sure to have absolute paths. for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ localstatedir libdir includedir oldincludedir infodir mandir do eval ac_val=$`echo $ac_var` case $ac_val in [\\/$]* | ?:[\\/]* ) ;; *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; };; esac done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used." >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_confdir=`(dirname "$0") 2>/dev/null || $as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$0" : 'X\(//\)[^/]' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$0" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 { (exit 1); exit 1; }; } else { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } fi fi (cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 { (exit 1); exit 1; }; } srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` ac_env_build_alias_set=${build_alias+set} ac_env_build_alias_value=$build_alias ac_cv_env_build_alias_set=${build_alias+set} ac_cv_env_build_alias_value=$build_alias ac_env_host_alias_set=${host_alias+set} ac_env_host_alias_value=$host_alias ac_cv_env_host_alias_set=${host_alias+set} ac_cv_env_host_alias_value=$host_alias ac_env_target_alias_set=${target_alias+set} ac_env_target_alias_value=$target_alias ac_cv_env_target_alias_set=${target_alias+set} ac_cv_env_target_alias_value=$target_alias ac_env_CC_set=${CC+set} ac_env_CC_value=$CC ac_cv_env_CC_set=${CC+set} ac_cv_env_CC_value=$CC ac_env_CFLAGS_set=${CFLAGS+set} ac_env_CFLAGS_value=$CFLAGS ac_cv_env_CFLAGS_set=${CFLAGS+set} ac_cv_env_CFLAGS_value=$CFLAGS ac_env_LDFLAGS_set=${LDFLAGS+set} ac_env_LDFLAGS_value=$LDFLAGS ac_cv_env_LDFLAGS_set=${LDFLAGS+set} ac_cv_env_LDFLAGS_value=$LDFLAGS ac_env_CPPFLAGS_set=${CPPFLAGS+set} ac_env_CPPFLAGS_value=$CPPFLAGS ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} ac_cv_env_CPPFLAGS_value=$CPPFLAGS ac_env_CPP_set=${CPP+set} ac_env_CPP_value=$CPP ac_cv_env_CPP_set=${CPP+set} ac_cv_env_CPP_value=$CPP # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures this package to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] _ACEOF cat <<_ACEOF Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data [PREFIX/share] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --infodir=DIR info documentation [PREFIX/info] --mandir=DIR man documentation [PREFIX/man] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then cat <<\_ACEOF Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-nvml-dir= Specify prefix to nvml libraries and headers --with-nvml-incdir= Specify directory of nvml header files (nvml.h) in a specific location --with-nvml-libdir= Specify directory of nvml library (libnvidia-ml.so) in a specific location --with-cuda-dir= Specify path to cuda root directory Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. _ACEOF fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. ac_popdir=`pwd` for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d $ac_dir || continue ac_builddir=. if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A "../" for each directory in $ac_dir_suffix. ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` else ac_dir_suffix= ac_top_builddir= fi case $srcdir in .) # No --srcdir option. We are building in place. ac_srcdir=. if test -z "$ac_top_builddir"; then ac_top_srcdir=. else ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` fi ;; [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ;; *) # Relative path. ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_builddir$srcdir ;; esac # Do not use `cd foo && pwd` to compute absolute paths, because # the directories may not exist. case `pwd` in .) ac_abs_builddir="$ac_dir";; *) case "$ac_dir" in .) ac_abs_builddir=`pwd`;; [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; *) ac_abs_builddir=`pwd`/"$ac_dir";; esac;; esac case $ac_abs_builddir in .) ac_abs_top_builddir=${ac_top_builddir}.;; *) case ${ac_top_builddir}. in .) ac_abs_top_builddir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; esac;; esac case $ac_abs_builddir in .) ac_abs_srcdir=$ac_srcdir;; *) case $ac_srcdir in .) ac_abs_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; esac;; esac case $ac_abs_builddir in .) ac_abs_top_srcdir=$ac_top_srcdir;; *) case $ac_top_srcdir in .) ac_abs_top_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; esac;; esac cd $ac_dir # Check for guested configure; otherwise get Cygnus style configure. if test -f $ac_srcdir/configure.gnu; then echo $SHELL $ac_srcdir/configure.gnu --help=recursive elif test -f $ac_srcdir/configure; then echo $SHELL $ac_srcdir/configure --help=recursive elif test -f $ac_srcdir/configure.ac || test -f $ac_srcdir/configure.in; then echo $ac_configure --help else echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi cd $ac_popdir done fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF Copyright (C) 2003 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit 0 fi exec 5>config.log cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ _ACEOF { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` hostinfo = `(hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. echo "PATH: $as_dir" done } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_sep= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; 2) ac_configure_args1="$ac_configure_args1 '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" # Get rid of the leading space. ac_sep=" " ;; esac done done $as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Be sure not to use single quotes in there, as some shells, # such as our DU 5.0 friend, will then `close' the trap. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo cat <<\_ASBOX ## ---------------- ## ## Cache variables. ## ## ---------------- ## _ASBOX echo # The following way of writing the cache mishandles newlines in values, { (set) 2>&1 | case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in *ac_space=\ *) sed -n \ "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" ;; *) sed -n \ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; esac; } echo cat <<\_ASBOX ## ----------------- ## ## Output variables. ## ## ----------------- ## _ASBOX echo for ac_var in $ac_subst_vars do eval ac_val=$`echo $ac_var` echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX ## ------------- ## ## Output files. ## ## ------------- ## _ASBOX echo for ac_var in $ac_subst_files do eval ac_val=$`echo $ac_var` echo "$ac_var='"'"'$ac_val'"'"'" done | sort echo fi if test -s confdefs.h; then cat <<\_ASBOX ## ----------- ## ## confdefs.h. ## ## ----------- ## _ASBOX echo sed "/^$/d" confdefs.h | sort echo fi test "$ac_signal" != 0 && echo "$as_me: caught signal $ac_signal" echo "$as_me: exit $exit_status" } >&5 rm -f core *.core && rm -rf conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo >confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special # files actually), so we avoid doing that. if test -f "$cache_file"; then { echo "$as_me:$LINENO: loading cache $cache_file" >&5 echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . $cache_file;; *) . ./$cache_file;; esac fi else { echo "$as_me:$LINENO: creating cache $cache_file" >&5 echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in `(set) 2>&1 | sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val="\$ac_cv_env_${ac_var}_value" eval ac_new_val="\$ac_env_${ac_var}_value" case $ac_old_set,$ac_new_set in set,) { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 echo "$as_me: former value: $ac_old_val" >&2;} { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 echo "$as_me: current value: $ac_new_val" >&2;} ac_cache_corrupted=: fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 echo "$as_me: error: changes in the environment can compromise the build" >&2;} { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Check whether --with-nvml_dir or --without-nvml_dir was given. if test "${with_nvml_dir+set}" = set; then withval="$with_nvml_dir" nvml_dir=$withval fi; # Check whether --with-nvml_incdir or --without-nvml_incdir was given. if test "${with_nvml_incdir+set}" = set; then withval="$with_nvml_incdir" nvml_incdir=$withval fi; # Check whether --with-nvml_libdir or --without-nvml_libdir was given. if test "${with_nvml_libdir+set}" = set; then withval="$with_nvml_libdir" nvml_libdir=$withval fi; nvml_dotest=0 if test "x$nvml_dir" != "x"; then nvml_incdir="$nvml_dir/include" nvml_libdir="$nvml_dir/lib64" nvml_dotest=1 else if test "x$nvml_libdir" != "x"; then LIBS="-L$nvml_libdir" nvml_dotest=1 fi if test "x$nvml_incdir" != "x"; then CPPFLAGS="-I$nvml_incdir" nvml_dotest=1 fi fi CFLAGS="$CFLAGS -I$nvml_incdir" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi CC=$ac_ct_CC else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi CC=$ac_ct_CC else CC="$ac_cv_prog_CC" fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6 else echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi test -n "$ac_ct_CC" && break done CC=$ac_ct_CC fi fi test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&5 echo "$as_me: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } # Provide some information about the compiler. echo "$as_me:$LINENO:" \ "checking for C compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 (eval $ac_compiler --version &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 (eval $ac_compiler -v &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 (eval $ac_compiler -V &5) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 (eval $ac_link_default) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Find the output, starting from the most likely. This scheme is # not robust to junk in `.', hence go to wildcards (a.*) only as a last # resort. # Be careful to initialize this variable, since it used to be cached. # Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. ac_cv_exeext= # b.out is created by i960 compilers. for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; conftest.$ac_ext ) # This is the source file. ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` # FIXME: I believe we export ac_cv_exeext for Libtool, # but it would be cool to find out if it's true. Does anybody # maintain Libtool? --akim. export ac_cv_exeext break;; * ) break;; esac done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: C compiler cannot create executables See \`config.log' for more details." >&5 echo "$as_me: error: C compiler cannot create executables See \`config.log' for more details." >&2;} { (exit 77); exit 77; }; } fi ac_exeext=$ac_cv_exeext echo "$as_me:$LINENO: result: $ac_file" >&5 echo "${ECHO_T}$ac_file" >&6 # Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. echo "$as_me:$LINENO: checking whether the C compiler works" >&5 echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { echo "$as_me:$LINENO: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&5 echo "$as_me: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi fi fi echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6 rm -f a.out a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save # Check the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 echo "$as_me:$LINENO: result: $cross_compiling" >&5 echo "${ECHO_T}$cross_compiling" >&6 echo "$as_me:$LINENO: checking for suffix of executables" >&5 echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` export ac_cv_exeext break;; * ) break;; esac done else { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest$ac_cv_exeext echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 echo "${ECHO_T}$ac_cv_exeext" >&6 rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT echo "$as_me:$LINENO: checking for suffix of object files" >&5 echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 if test "${ac_cv_objext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 echo "${ECHO_T}$ac_cv_objext" >&6 OBJEXT=$ac_cv_objext ac_objext=$OBJEXT echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 if test "${ac_cv_c_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 GCC=`test $ac_compiler_gnu = yes && echo yes` ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS CFLAGS="-g" echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 if test "${ac_cv_prog_cc_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_prog_cc_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_prog_cc_g=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 if test "${ac_cv_prog_cc_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_prog_cc_stdc=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std1 is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std1. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF # Don't try gcc -ansi; that turns off useful extensions and # breaks some systems' header files. # AIX -qlanglvl=ansi # Ultrix and OSF/1 -std1 # HP-UX 10.20 and later -Ae # HP-UX older versions -Aa -D_HPUX_SOURCE # SVR4 -Xc -D__EXTENSIONS__ for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_prog_cc_stdc=$ac_arg break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext done rm -f conftest.$ac_ext conftest.$ac_objext CC=$ac_save_CC fi case "x$ac_cv_prog_cc_stdc" in x|xno) echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6 ;; *) echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 CC="$CC $ac_cv_prog_cc_stdc" ;; esac # Some people use a C++ compiler to compile C. Since we use `exit', # in C++ we need to declare it. In case someone uses the same compiler # for both compiling C and C++ we need to have the C++ compiler decide # the declaration of exit, since it's the most demanding environment. cat >conftest.$ac_ext <<_ACEOF #ifndef __cplusplus choke me #endif _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then for ac_declaration in \ '' \ 'extern "C" void std::exit (int) throw (); using std::exit;' \ 'extern "C" void std::exit (int); using std::exit;' \ 'extern "C" void exit (int) throw ();' \ 'extern "C" void exit (int);' \ 'void exit (int);' do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_declaration #include int main () { exit (42); ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 continue fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_declaration int main () { exit (42); ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext done rm -f conftest* if test -n "$ac_declaration"; then echo '#ifdef __cplusplus' >>confdefs.h echo $ac_declaration >>confdefs.h echo '#endif' >>confdefs.h fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test "${ac_cv_prog_CPP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi echo "$as_me:$LINENO: result: $CPP" >&5 echo "${ECHO_T}$CPP" >&6 ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether non-existent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&5 echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu echo "$as_me:$LINENO: checking for egrep" >&5 echo $ECHO_N "checking for egrep... $ECHO_C" >&6 if test "${ac_cv_prog_egrep+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if echo a | (grep -E '(a|b)') >/dev/null 2>&1 then ac_cv_prog_egrep='grep -E' else ac_cv_prog_egrep='egrep' fi fi echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 echo "${ECHO_T}$ac_cv_prog_egrep" >&6 EGREP=$ac_cv_prog_egrep echo "$as_me:$LINENO: checking for ANSI C header files" >&5 echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 if test "${ac_cv_header_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_header_stdc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); exit (0); } _ACEOF rm -f conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 echo "${ECHO_T}$ac_cv_header_stdc" >&6 if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 if eval "test \"\${$as_ac_Header+set}\" = set"; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_Header=no" fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext fi echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done if test "${ac_cv_header_nvml_h+set}" = set; then echo "$as_me:$LINENO: checking for nvml.h" >&5 echo $ECHO_N "checking for nvml.h... $ECHO_C" >&6 if test "${ac_cv_header_nvml_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi echo "$as_me:$LINENO: result: $ac_cv_header_nvml_h" >&5 echo "${ECHO_T}$ac_cv_header_nvml_h" >&6 else # Is the header compilable? echo "$as_me:$LINENO: checking nvml.h usability" >&5 echo $ECHO_N "checking nvml.h usability... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest.$ac_objext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f conftest.err conftest.$ac_objext conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6 # Is the header present? echo "$as_me:$LINENO: checking nvml.h presence" >&5 echo $ECHO_N "checking nvml.h presence... $ECHO_C" >&6 cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null; then if test -s conftest.err; then ac_cpp_err=$ac_c_preproc_warn_flag ac_cpp_err=$ac_cpp_err$ac_c_werror_flag else ac_cpp_err= fi else ac_cpp_err=yes fi if test -z "$ac_cpp_err"; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6 # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: nvml.h: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: nvml.h: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: nvml.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: nvml.h: present but cannot be compiled" >&5 echo "$as_me: WARNING: nvml.h: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: nvml.h: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: see the Autoconf documentation" >&5 echo "$as_me: WARNING: nvml.h: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: nvml.h: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: nvml.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: nvml.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: nvml.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## ------------------------------------------ ## ## Report this to the AC_PACKAGE_NAME lists. ## ## ------------------------------------------ ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac echo "$as_me:$LINENO: checking for nvml.h" >&5 echo $ECHO_N "checking for nvml.h... $ECHO_C" >&6 if test "${ac_cv_header_nvml_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_header_nvml_h=$ac_header_preproc fi echo "$as_me:$LINENO: result: $ac_cv_header_nvml_h" >&5 echo "${ECHO_T}$ac_cv_header_nvml_h" >&6 fi if test $ac_cv_header_nvml_h = yes; then : else { { echo "$as_me:$LINENO: error: NVML component: nvml.h not found: use configure flags to set the path " >&5 echo "$as_me: error: NVML component: nvml.h not found: use configure flags to set the path " >&2;} { (exit 1); exit 1; }; } fi LDFLAGS="$LDFLAGS -L$nvml_libdir -Wl,-rpath,$nvml_libdir" echo "$as_me:$LINENO: checking for nvmlInit in -lnvidia-ml" >&5 echo $ECHO_N "checking for nvmlInit in -lnvidia-ml... $ECHO_C" >&6 if test "${ac_cv_lib_nvidia_ml_nvmlInit+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lnvidia-ml $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" #endif /* We use char because int might match the return type of a gcc2 builtin and then its argument prototype would still apply. */ char nvmlInit (); int main () { nvmlInit (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_lib_nvidia_ml_nvmlInit=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_nvidia_ml_nvmlInit=no fi rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi echo "$as_me:$LINENO: result: $ac_cv_lib_nvidia_ml_nvmlInit" >&5 echo "${ECHO_T}$ac_cv_lib_nvidia_ml_nvmlInit" >&6 if test $ac_cv_lib_nvidia_ml_nvmlInit = yes; then cat >>confdefs.h <<_ACEOF #define HAVE_LIBNVIDIA_ML 1 _ACEOF LIBS="-lnvidia-ml $LIBS" else { { echo "$as_me:$LINENO: error: NVML component: libnvidia-ml.so not found: use configure flags to set the path" >&5 echo "$as_me: error: NVML component: libnvidia-ml.so not found: use configure flags to set the path" >&2;} { (exit 1); exit 1; }; } fi NVML_INCDIR=$nvml_incdir NVML_LIBDIR=$nvml_libdir # Check whether --with-cuda-dir or --without-cuda-dir was given. if test "${with_cuda_dir+set}" = set; then withval="$with_cuda_dir" CUDA_DIR=$withval LDFLAGS="$LDFLAGS -L$withval/lib64 -Wl,-rpath,$withval/lib64" echo "$as_me:$LINENO: checking for cudaMalloc in -lcudart" >&5 echo $ECHO_N "checking for cudaMalloc in -lcudart... $ECHO_C" >&6 if test "${ac_cv_lib_cudart_cudaMalloc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lcudart $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any gcc2 internal prototype to avoid an error. */ #ifdef __cplusplus extern "C" #endif /* We use char because int might match the return type of a gcc2 builtin and then its argument prototype would still apply. */ char cudaMalloc (); int main () { cudaMalloc (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; } && { ac_try='test -s conftest$ac_exeext' { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 (eval $ac_try) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_lib_cudart_cudaMalloc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_cudart_cudaMalloc=no fi rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi echo "$as_me:$LINENO: result: $ac_cv_lib_cudart_cudaMalloc" >&5 echo "${ECHO_T}$ac_cv_lib_cudart_cudaMalloc" >&6 if test $ac_cv_lib_cudart_cudaMalloc = yes; then cat >>confdefs.h <<_ACEOF #define HAVE_LIBCUDART 1 _ACEOF LIBS="-lcudart $LIBS" else { { echo "$as_me:$LINENO: error: CUDA cudart library not found!" >&5 echo "$as_me: error: CUDA cudart library not found!" >&2;} { (exit 1); exit 1; }; } fi else { { echo "$as_me:$LINENO: error: Component requires path to cuda library." >&5 echo "$as_me: error: Component requires path to cuda library." >&2;} { (exit 1); exit 1; }; } fi; ac_config_files="$ac_config_files Makefile.nvml" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. { (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n \ "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" ;; esac; } | sed ' t clear : clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ : end' >>confcache if diff $cache_file confcache >/dev/null 2>&1; then :; else if test -w $cache_file; then test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" cat confcache >$cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # VPATH may cause trouble with some makes, so we remove $(srcdir), # ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=/{ s/:*\$(srcdir):*/:/; s/:*\${srcdir}:*/:/; s/:*@srcdir@:*/:/; s/^\([^=]*=[ ]*\):*/\1/; s/:*$//; s/^[^=]*=[ ]*$//; }' fi # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then we branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. cat >confdef2opt.sed <<\_ACEOF t clear : clear s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\),-D\1=\2,g t quote s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\),-D\1=\2,g t quote d : quote s,[ `~#$^&*(){}\\|;'"<>?],\\&,g s,\[,\\&,g s,\],\\&,g s,\$,$$,g p _ACEOF # We use echo to avoid assuming a particular line-breaking character. # The extra dot is to prevent the shell from consuming trailing # line-breaks from the sub-command output. A line-break within # single-quotes doesn't work because, if this script is created in a # platform that uses two characters for line-breaks (e.g., DOS), tr # would break. ac_LF_and_DOT=`echo; echo .` DEFS=`sed -n -f confdef2opt.sed confdefs.h | tr "$ac_LF_and_DOT" ' .'` rm -f confdef2opt.sed ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_i=`echo "$ac_i" | sed 's/\$U\././;s/\.o$//;s/\.obj$//'` # 2. Add them. ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : ${CONFIG_STATUS=./config.status} ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 echo "$as_me: creating $CONFIG_STATUS" >&6;} cat >$CONFIG_STATUS <<_ACEOF #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then set -o posix fi DUALCASE=1; export DUALCASE # for MKS sh # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # Work around bugs in pre-3.0 UWIN ksh. $as_unset ENV MAIL MAILPATH PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)$' \| \ . : '\(.\)' 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } /^X\/\(\/\/\)$/{ s//\1/; q; } /^X\/\(\/\).*/{ s//\1/; q; } s/.*/./; q'` # PATH needs CR, and LINENO needs CR and PATH. # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" || { # Find who we are. Look in the path if we contain no path at all # relative or not. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} { (exit 1); exit 1; }; } fi case $CONFIG_SHELL in '') as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for as_base in sh bash ksh sh5; do case $as_dir in /*) if ("$as_dir/$as_base" -c ' as_lineno_1=$LINENO as_lineno_2=$LINENO as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` test "x$as_lineno_1" != "x$as_lineno_2" && test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } CONFIG_SHELL=$as_dir/$as_base export CONFIG_SHELL exec "$CONFIG_SHELL" "$0" ${1+"$@"} fi;; esac done done ;; esac # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line before each line; the second 'sed' does the real # work. The second script uses 'N' to pair each line-number line # with the numbered line, and appends trailing '-' during # substitution so that $LINENO is not a special case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) sed '=' <$as_myself | sed ' N s,$,-, : loop s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, t loop s,-$,, s,^['$as_cr_digits']*\n,, ' >$as_me.lineno && chmod +x $as_me.lineno || { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensible to this). . ./$as_me.lineno # Exit status is that of the last command. exit } case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in *c*,-n*) ECHO_N= ECHO_C=' ' ECHO_T=' ' ;; *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; *) ECHO_N= ECHO_C='\c' ECHO_T= ;; esac if expr a : '\(a\)' >/dev/null 2>&1; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then # We could just check for DJGPP; but this test a) works b) is more generic # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). if test -f conf$$.exe; then # Don't use ln at all; we don't have any links as_ln_s='cp -p' else as_ln_s='ln -s' fi elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.file if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_executable_p="test -f" # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" # IFS # We need space, tab and new line, in precisely that order. as_nl=' ' IFS=" $as_nl" # CDPATH. $as_unset CDPATH exec 6>&1 # Open the log real soon, to keep \$[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. Logging --version etc. is OK. exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX } >&5 cat >&5 <<_CSEOF This file was extended by $as_me, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ _CSEOF echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 echo >&5 _ACEOF # Files that config.status was made for. if test -n "$ac_config_files"; then echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS fi if test -n "$ac_config_headers"; then echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS fi if test -n "$ac_config_links"; then echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS fi if test -n "$ac_config_commands"; then echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS fi cat >>$CONFIG_STATUS <<\_ACEOF ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. Usage: $0 [OPTIONS] [FILE]... -h, --help print this help, then exit -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ config.status configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" Copyright (C) 2003 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." srcdir=$srcdir _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # If no file are specified by the user, then we need to provide default # value. By we need to know if files were specified by the user. ac_need_defaults=: while test $# != 0 do case $1 in --*=*) ac_option=`expr "x$1" : 'x\([^=]*\)='` ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` ac_shift=: ;; -*) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; *) # This is not an option, so the user has probably given explicit # arguments. ac_option=$1 ac_need_defaults=false;; esac case $ac_option in # Handling of the options. _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --vers* | -V ) echo "$ac_cs_version"; exit 0 ;; --he | --h) # Conflict between --help and --header { { echo "$as_me:$LINENO: error: ambiguous option: $1 Try \`$0 --help' for more information." >&5 echo "$as_me: error: ambiguous option: $1 Try \`$0 --help' for more information." >&2;} { (exit 1); exit 1; }; };; --help | --hel | -h ) echo "$ac_cs_usage"; exit 0 ;; --debug | --d* | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift CONFIG_FILES="$CONFIG_FILES $ac_optarg" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" ac_need_defaults=false;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 Try \`$0 --help' for more information." >&5 echo "$as_me: error: unrecognized option: $1 Try \`$0 --help' for more information." >&2;} { (exit 1); exit 1; }; } ;; *) ac_config_targets="$ac_config_targets $1" ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF if \$ac_cs_recheck; then echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF for ac_config_target in $ac_config_targets do case "$ac_config_target" in # Handling of arguments. "Makefile.nvml" ) CONFIG_FILES="$CONFIG_FILES Makefile.nvml" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason to put it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Create a temporary directory, and hook for its removal unless debugging. $debug || { trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { tmp=./confstat$$-$RANDOM (umask 077 && mkdir $tmp) } || { echo "$me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } _ACEOF cat >>$CONFIG_STATUS <<_ACEOF # # CONFIG_FILES section. # # No need to generate the scripts if there are no CONFIG_FILES. # This happens for instance when ./config.status config.h if test -n "\$CONFIG_FILES"; then # Protect against being on the right side of a sed subst in config.status. sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF s,@SHELL@,$SHELL,;t t s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t s,@exec_prefix@,$exec_prefix,;t t s,@prefix@,$prefix,;t t s,@program_transform_name@,$program_transform_name,;t t s,@bindir@,$bindir,;t t s,@sbindir@,$sbindir,;t t s,@libexecdir@,$libexecdir,;t t s,@datadir@,$datadir,;t t s,@sysconfdir@,$sysconfdir,;t t s,@sharedstatedir@,$sharedstatedir,;t t s,@localstatedir@,$localstatedir,;t t s,@libdir@,$libdir,;t t s,@includedir@,$includedir,;t t s,@oldincludedir@,$oldincludedir,;t t s,@infodir@,$infodir,;t t s,@mandir@,$mandir,;t t s,@build_alias@,$build_alias,;t t s,@host_alias@,$host_alias,;t t s,@target_alias@,$target_alias,;t t s,@DEFS@,$DEFS,;t t s,@ECHO_C@,$ECHO_C,;t t s,@ECHO_N@,$ECHO_N,;t t s,@ECHO_T@,$ECHO_T,;t t s,@LIBS@,$LIBS,;t t s,@CC@,$CC,;t t s,@CFLAGS@,$CFLAGS,;t t s,@LDFLAGS@,$LDFLAGS,;t t s,@CPPFLAGS@,$CPPFLAGS,;t t s,@ac_ct_CC@,$ac_ct_CC,;t t s,@EXEEXT@,$EXEEXT,;t t s,@OBJEXT@,$OBJEXT,;t t s,@CPP@,$CPP,;t t s,@EGREP@,$EGREP,;t t s,@NVML_INCDIR@,$NVML_INCDIR,;t t s,@NVML_LIBDIR@,$NVML_LIBDIR,;t t s,@CUDA_DIR@,$CUDA_DIR,;t t s,@LIBOBJS@,$LIBOBJS,;t t s,@LTLIBOBJS@,$LTLIBOBJS,;t t CEOF _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_lines=48 ac_sed_frag=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_lines # Line after last line for current file. ac_more_lines=: ac_sed_cmds= while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag else sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag fi if test ! -s $tmp/subs.frag; then ac_more_lines=false else # The purpose of the label and of the branching condition is to # speed up the sed processing (if there are no `@' at all, there # is no need to browse any of the substitutions). # These are the two extra sed commands mentioned above. (echo ':t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" else ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" fi ac_sed_frag=`expr $ac_sed_frag + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_lines` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi fi # test -n "$CONFIG_FILES" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case $ac_file in - | *:- | *:-:* ) # input from stdin cat >$tmp/stdin ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; * ) ac_file_in=$ac_file.in ;; esac # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. ac_dir=`(dirname "$ac_file") 2>/dev/null || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` { if $as_mkdir_p; then mkdir -p "$ac_dir" else as_dir="$ac_dir" as_dirs= while test ! -d "$as_dir"; do as_dirs="$as_dir $as_dirs" as_dir=`(dirname "$as_dir") 2>/dev/null || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| \ . : '\(.\)' 2>/dev/null || echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } /^X\(\/\/\)[^/].*/{ s//\1/; q; } /^X\(\/\/\)$/{ s//\1/; q; } /^X\(\/\).*/{ s//\1/; q; } s/.*/./; q'` done test ! -n "$as_dirs" || mkdir $as_dirs fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} { (exit 1); exit 1; }; }; } ac_builddir=. if test "$ac_dir" != .; then ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A "../" for each directory in $ac_dir_suffix. ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` else ac_dir_suffix= ac_top_builddir= fi case $srcdir in .) # No --srcdir option. We are building in place. ac_srcdir=. if test -z "$ac_top_builddir"; then ac_top_srcdir=. else ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` fi ;; [\\/]* | ?:[\\/]* ) # Absolute path. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ;; *) # Relative path. ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_builddir$srcdir ;; esac # Do not use `cd foo && pwd` to compute absolute paths, because # the directories may not exist. case `pwd` in .) ac_abs_builddir="$ac_dir";; *) case "$ac_dir" in .) ac_abs_builddir=`pwd`;; [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; *) ac_abs_builddir=`pwd`/"$ac_dir";; esac;; esac case $ac_abs_builddir in .) ac_abs_top_builddir=${ac_top_builddir}.;; *) case ${ac_top_builddir}. in .) ac_abs_top_builddir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; esac;; esac case $ac_abs_builddir in .) ac_abs_srcdir=$ac_srcdir;; *) case $ac_srcdir in .) ac_abs_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; esac;; esac case $ac_abs_builddir in .) ac_abs_top_srcdir=$ac_top_srcdir;; *) case $ac_top_srcdir in .) ac_abs_top_srcdir=$ac_abs_builddir;; [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; esac;; esac if test x"$ac_file" != x-; then { echo "$as_me:$LINENO: creating $ac_file" >&5 echo "$as_me: creating $ac_file" >&6;} rm -f "$ac_file" fi # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ if test x"$ac_file" = x-; then configure_input= else configure_input="$ac_file. " fi configure_input=$configure_input"Generated from `echo $ac_file_in | sed 's,.*/,,'` by configure." # First look for the input files in the build tree, otherwise in the # src tree. ac_file_inputs=`IFS=: for f in $ac_file_in; do case $f in -) echo $tmp/stdin ;; [\\/$]*) # Absolute (can't be DOS-style, as IFS=:) test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } echo "$f";; *) # Relative if test -f "$f"; then # Build tree echo "$f" elif test -f "$srcdir/$f"; then # Source tree echo "$srcdir/$f" else # /dev/null tree { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 echo "$as_me: error: cannot find input file: $f" >&2;} { (exit 1); exit 1; }; } fi;; esac done` || { (exit 1); exit 1; } _ACEOF cat >>$CONFIG_STATUS <<_ACEOF sed "$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s,@configure_input@,$configure_input,;t t s,@srcdir@,$ac_srcdir,;t t s,@abs_srcdir@,$ac_abs_srcdir,;t t s,@top_srcdir@,$ac_top_srcdir,;t t s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t s,@builddir@,$ac_builddir,;t t s,@abs_builddir@,$ac_abs_builddir,;t t s,@top_builddir@,$ac_top_builddir,;t t s,@abs_top_builddir@,$ac_abs_top_builddir,;t t " $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out rm -f $tmp/stdin if test x"$ac_file" != x-; then mv $tmp/out $ac_file else cat $tmp/out rm -f $tmp/out fi done _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF { (exit 0); exit 0; } _ACEOF chmod +x $CONFIG_STATUS ac_clean_files=$ac_clean_files_save # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || { (exit 1); exit 1; } fi papi-5.6.0/src/libpfm4/python/src/session.py000664 001750 001750 00000004723 13216244366 023037 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2008 Google, Inc. # Contributed by Arun Sharma # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # from perfmon import * import os import sys # Common base class class Session: def __init__(self, events): self.system = System() self.event_names = events self.events = [] self.fds = [] for e in events: err, encoding = pfm_get_perf_event_encoding(e, PFM_PLM0 | PFM_PLM3, None, None) self.events.append(encoding) def __del__(self): pass def read(self, fd): # TODO: determine counter width return os.read(fd, 8) class SystemWideSession(Session): def __init__(self, cpus, events): self.cpus = cpus Session.__init__(self, events) def __del__(self): Session.__del__(self) def start(self): self.cpu_fds = [] for c in self.cpus: self.cpu_fds.append([]) cur_cpu_fds = self.cpu_fds[-1] for e in self.events: cur_cpu_fds.append(perf_event_open(e, -1, c, -1, 0)) def read(self, c, i): index = self.cpus.index(c) return Session.read(self, self.cpu_fds[index][i]) class PerThreadSession(Session): def __init__(self, pid, events): self.pid = pid Session.__init__(self, events) def __del__(self): Session.__del__(self) def start(self): for e in self.events: self.fds.append(perf_event_open(e, self.pid, -1, -1, 0)) def read(self, i): return Session.read(self, self.fds[i]) papi-5.6.0/src/validation_tests/matrix_multiply.h000664 001750 001750 00000000340 13216244370 024307 0ustar00jshenry1963jshenry1963000000 000000 long long naive_matrix_multiply_estimated_flops(int quiet); long long naive_matrix_multiply_estimated_loads(int quiet); long long naive_matrix_multiply_estimated_stores(int quiet); double naive_matrix_multiply(int quiet); papi-5.6.0/src/ftests/strtest.F000664 001750 001750 00000024434 13216244361 020460 0ustar00jshenry1963jshenry1963000000 000000 C Strtest - Perform some basic tests of the functionality of the C string passing to and from the PAPI Fortran interface. C C Test 1: Look up an event name from an event code. Use this name C to try and locate the event code using the name received. C Long, short and too short strings are used in the tests C C Test 2: Look up a PAPI error string. Use long, short and too C short strings to store the result. C C Test 3: Look up and display event descriptions C using PAPIf_get_event_info. C C Comments: C When using the Fortran interface it may not always be possible to C use the PAPI predefined constants as actual arguments. Due to the C values in these compilers might occasionally cast these into the C wrong type. In the code below the line code=MSGCODE is used to C make sure that the event code get the right type. C #include "fpapi_test.h" C Set MSGLEN to the number of characters in the named event in MSGCODE #define MSGLEN 11 #define MSGCODE PAPI_L1_DCM #define ERRCODE PAPI_EINVAL program strtest implicit integer (p) CHARACTER*(PAPI_MAX_STR_LEN) papistr CHARACTER*(PAPI_MAX_STR_LEN*2) papidblstr CHARACTER*(PAPI_MAX_STR_LEN) ckstr CHARACTER*(MSGLEN) invstr1 CHARACTER*(MSGLEN+1) invstr2 CHARACTER*(MSGLEN+2) invstr3 CHARACTER*(MSGLEN-1) invstr4 CHARACTER*(MSGLEN-2) invstr5 integer check,lastchar integer code,papicode integer getstrlen external getstrlen integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() check=PAPI_VER_CURRENT call PAPIF_library_init(check) if ( check.NE.PAPI_VER_CURRENT) then call PAPIF_perror( 'PAPI_library_init' ) call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', check) end if code=MSGCODE if (tests_quiet .EQ. 0) then print *,'---------------------------------------------------' print *,' Testing PAPIF_name_to_code/PAPIF_code_to_name ' print *,'---------------------------------------------------' print *,' These tests look up an event name and event code' print *,' On no occasion should a NULL character be found(+)' print *,' When strings are too short, the lookup should fail' print * print *,' Tests use the event code ',code print * end if lastchar=PAPI_MAX_STR_LEN call checkstr(code,ckstr,check,lastchar,tests_quiet) lastchar=getstrlen(ckstr) call checkstr(code,invstr1,check,lastchar,tests_quiet) call checkstr(code,invstr2,check,lastchar,tests_quiet) call checkstr(code,invstr3,check,lastchar,tests_quiet) call checkstr(code,invstr4,check,lastchar,tests_quiet) call checkstr(code,invstr5,check,lastchar,tests_quiet) if (tests_quiet .EQ. 0) then print *,'---------------------------------------------------' print *,' Testing PAPIF_descr_event ' print *,'---------------------------------------------------' print *,' These tests should return a PAPI description for' print *,' various event names and argument shapes.' print *,' On no occasion should a NULL character be found(+)' print * print 200,'Test 1' end if papistr=" " papicode=PAPI_L1_DCM call test_papif_descr(papistr,papicode,papidblstr, . check,tests_quiet) call checkcode(papicode,PAPI_L1_DCM,tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 2' end if papistr=" " papicode=PAPI_L2_DCM call test_papif_descr(papistr,papicode,papidblstr, . check,tests_quiet) call checkname(papistr,"PAPI_L2_DCM",tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 3' end if invstr1=" " papicode=PAPI_L1_ICM call test_papif_descr(invstr1,papicode,papidblstr, . check,tests_quiet) call checkcode(papicode,PAPI_L1_ICM,tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 4' end if invstr1=" " papicode=PAPI_L2_ICM call test_papif_descr(invstr1,papicode,papidblstr, . check,tests_quiet) call checkname(invstr1,"PAPI_L2_ICM",tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 5 (This should get a truncated description)' end if invstr2=" " papicode=PAPI_L3_DCM call test_papif_descr(invstr2,papicode,invstr1, . check,tests_quiet) call checkcode(papicode,PAPI_L3_DCM,tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 6 (This should get a truncated description)' end if invstr2=" " papicode=PAPI_L3_ICM call test_papif_descr(invstr2,papicode,invstr1, . check,tests_quiet) call checkname(invstr2,"PAPI_L3_ICM",tests_quiet) if (tests_quiet .EQ. 0) then print * print 200,'Test 7 (This should get a truncated name)' end if invstr4=" " papicode=PAPI_L1_DCM call test_papif_descr(invstr4,papicode,papistr, . check,tests_quiet) if (tests_quiet .EQ. 0) then call checkname(invstr4,"PAPI_L1_DCM",tests_quiet) end if 200 format(t1,a) if (tests_quiet .EQ. 0) then print *,'---------------------------------------------------' print *,'(+) Fortran implementations that do not provide the' print *,' string argument length might show NULL '// . 'characters.' print *,' This may or may not be OK depending on the '// . 'Fortran' print *,' compiler. See papi_fwrappers.c and your Fortran' print *,' compiler reference manual.' end if call ftests_pass(__FILE__) end subroutine checkstr(incode,string,check,lastchar,quiet) implicit integer (P) integer incode integer check,lastchar, quiet character*(*) string integer code integer getstrlen external getstrlen 100 format(t1,a,i4) if (quiet .EQ. 0) then print 100,"Testing string length ",len(string) if(len(string).lt.lastchar)then print *,'This call should return an error code.' end if end if code=incode call PAPIF_event_code_to_name(code,string,check) if(check.ne.PAPI_OK)then if (len(string).ge.lastchar)then call ftest_fail(__FILE__, __LINE__, . 'PAPIF_event_code_to_name', check) else if (quiet .EQ. 0) then call PAPIF_perror( 'PAPIF_event_code_to_name' ) print *,'*ERROR* ' print *,'******* '//'Error in checkstr using '// $ 'PAPIF_event_code_to_name' end if end if end if 200 format(t1,a,'"',a,'"') if (quiet .EQ. 0) then print 200,'The event name is: ',string(1:getstrlen(string)) end if call PAPIF_event_name_to_code(string,code,check) if(check.ne.PAPI_OK)then if (len(string).ge.lastchar)then call ftest_fail(__FILE__, __LINE__, . 'PAPIF_event_name_to_code', check) else if (quiet .EQ. 0) then call PAPIF_perror( 'PAPIF_event_name_to_code' ) print *,'*ERROR* ' print *,'******* '//'Error in checkstr using '// $ 'PAPIF_event_name_to_code' end if end if end if call findnull(string,quiet) if (quiet .EQ. 0) then print * end if return end subroutine test_papif_descr(name,code,string,check,quiet) implicit integer (P) integer code,count,flags integer check,quiet character*(*) name,string character*(PAPI_MAX_STR_LEN) label,note integer getstrlen external getstrlen C This API was deprecated with PAPI 3 C call PAPIF_describe_event(name,code,string,check) call PAPIF_get_event_info(code,name,string,label,count, $ note,flags,check) 100 format(t1,a,'"',a,'"') if (quiet .EQ. 0) then print 100,'The event description is: ', $ string(1:getstrlen(string)) end if if(check.ne.PAPI_OK)then if (quiet .EQ. 0) then call PAPIF_perror( 'PAPI_get_event_info' ) print *,'*ERROR* ' print *,'******* '//'Error in test_papif_descr using '// $ 'PAPIF_get_event_info' else call ftest_fail(__FILE__, __LINE__, . 'PAPIF_get_event_info', check) end if end if call findnull(string,quiet) call findnull(name,quiet) return end integer function getstrlen(string) implicit integer (P) character*(*) string integer i do i=len(string),1,-1 if(string(i:i).ne.' ') then goto 20 end if end do getstrlen=0 return 20 continue getstrlen=i return end subroutine findnull(string,quiet) implicit integer (P) integer quiet,i character*(*) string i=index(string,char(0)) if(i.gt.0)then if(quiet.EQ.0)then print *,'NULL character found in string!!!' else call ftest_fail(__FILE__, __LINE__, . 'NULL character found in string!!!', 0) end if end if return end subroutine checkcode(code,check,quiet) implicit integer (P) integer code integer check,quiet if(code.ne.check)then if(quiet.EQ.0)then print 100,'Code look up failed?' else call ftest_fail(__FILE__, __LINE__, . 'Code look up failed?', 0) end if end if 100 format(t2,a) return end subroutine checkname(name,check,quiet) implicit integer (P) character*(*) name character*(*) check integer i,quiet integer getstrlen i=getstrlen(name) if(name(1:i).ne.check)then if (quiet .eq. 0) then print 100,'PAPI name incorrect?' print 110,'Got: ',name(1:i) print 110,'Expected: ',check else call ftest_fail(__FILE__, __LINE__, . 'PAPI name incorrect?', 0) end if end if 100 format(t2,a) 110 format(a12,'"',a,'"') return end papi-5.6.0/src/perfctr-2.7.x/usr.lib/global.c000664 001750 001750 00000003261 13216244370 022525 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: global.c,v 1.11 2004/05/13 23:35:27 mikpe Exp $ * Library interface to global-mode performance counters. * * Copyright (C) 1999-2004 Mikael Pettersson */ #include #include #include #include "libperfctr.h" #include "marshal.h" struct gperfctr { /* XXX: kill this struct */ int fd; }; struct gperfctr *gperfctr_open(void) { struct gperfctr *gperfctr; gperfctr = malloc(sizeof(*gperfctr)); if( gperfctr ) { gperfctr->fd = -1; if( 1 || gperfctr->fd >= 0 ) { if( perfctr_abi_check_fd(gperfctr->fd) >= 0 ) return gperfctr; close(gperfctr->fd); } free(gperfctr); } return NULL; } void gperfctr_close(struct gperfctr *gperfctr) { close(gperfctr->fd); free(gperfctr); } int gperfctr_control(const struct gperfctr *gperfctr, struct gperfctr_cpu_control *arg) { return perfctr_sys_w(gperfctr->fd, GPERFCTR_CONTROL, arg, &gperfctr_cpu_control_sdesc); } int gperfctr_read(const struct gperfctr *gperfctr, struct gperfctr_cpu_state *arg) { return perfctr_sys_wr(gperfctr->fd, GPERFCTR_READ, arg, &gperfctr_cpu_state_only_cpu_sdesc, &gperfctr_cpu_state_sdesc); } int gperfctr_stop(const struct gperfctr *gperfctr) { return _sys_perfctr(GPERFCTR_STOP, gperfctr->fd, 0); } int gperfctr_start(const struct gperfctr *gperfctr, unsigned int interval_usec) { return _sys_perfctr(GPERFCTR_START, gperfctr->fd, (void*)(long)interval_usec); } int gperfctr_info(const struct gperfctr *gperfctr, struct perfctr_info *info) { return perfctr_info(gperfctr->fd, info); } struct perfctr_cpus_info *gperfctr_cpus_info(const struct gperfctr *gperfctr) { return perfctr_cpus_info(gperfctr->fd); } papi-5.6.0/src/testlib/clockcore.h000664 001750 001750 00000000145 13216244370 021105 0ustar00jshenry1963jshenry1963000000 000000 #define CLOCKCORE_VIRT_CYC_FAIL -1 #define CLOCKCORE_VIRT_USEC_FAIL -2 int clockcore( int quiet ); papi-5.6.0/src/libpfm4/docs/man3/libpfm_intel_snb_unc.3000664 001750 001750 00000003613 13216244364 024712 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_intel_snb_unc - support for Intel Sandy Bridge uncore PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: snb_unc_cbo0, snb_unc_cbo1, snb_unc_cbo2, snb_unc_cbo3 .B PMU desc: Intel Sandy Bridge C-box uncore .sp .SH DESCRIPTION The library supports the Intel Sandy Bridge client part (model 42) uncore PMU. The support is currently limited to the Coherency Box, so called C-Box for up to 4 physical cores. Each physical core has an associated C-Box which it uses to communicate with the L3 cache. The C-boxes all support the same set of events. However, Core 0 C-box (snb_unc_cbo0) supports an additional uncore clock ticks event: \fBUNC_CLOCKTICKS\fR. .SH MODIFIERS The following modifiers are supported on Intel Sandy Bridge C-Box uncore PMU: .TP .B i Invert the meaning of the event. The counter will now count cycles in which the event is \fBnot\fR occurring. This is a boolean modifier .TP .B e Enable edge detection, i.e., count only when there is a state transition from no occurrence of the event to at least one occurrence. This modifier must be combined with a counter mask modifier (m) with a value greater or equal to one. This is a boolean modifier. .TP .B c Set the counter mask value. The mask acts as a threshold. The counter will count the number of cycles in which the number of occurrences of the event is greater or equal to the threshold. This is an integer modifier with values in the range [0:255]. .P Both the \fBUNC_CBO_CACHE_LOOKUP\fR and \fBUNC_CBO_XSNP_RESPONSE\fR requires two umasks to be valid. For \fBUNC_CBO_CACHE_LOOKUP\fR the first umask must be one of the MESI state umasks, the second has to be one of the filters. For \fBUNC_CBO_XSNP_RESPONSE\fR the first umask must be one of the snoop types, the second has to be one of the filters. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/components/net/tests/Makefile000664 001750 001750 00000001246 13216244357 023107 0ustar00jshenry1963jshenry1963000000 000000 NAME=net include ../../Makefile_comp_tests.target TESTS = net_list_events net_values_by_code net_values_by_name net_tests: $(TESTS) %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< net_list_events: net_list_events.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_list_events.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) net_values_by_code: net_values_by_code.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_values_by_code.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) net_values_by_name: net_values_by_name.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_values_by_name.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/components/host_micpower/README000664 001750 001750 00000002707 13216244357 023264 0ustar00jshenry1963jshenry1963000000 000000 This is a component that exports power information for Intel Xeon Phi cards (MIC). The component makes use of the MicAccessAPI distributed with the Intel Manycore Platform Software Stack. (http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss) Specifically in the intel-mic-sysmgmt package. A configure script allows for non-default locations for the sysmgmt sdk. see ./configure --help About the data, PAPI retrieves the data via the MicGetPowerUsage call. Per the SDK documentation: MicGetPowerUsage - Retrieve power usage values of Intel® Xeon Phiâ„¢ Coprocessor and components. Data Fields MicPwrPws  total0   Total power utilization by Intel® Xeon Phiâ„¢ product codenamed “Knights Corner†device, Averaged over Time Window 0 (uWatts). MicPwrPws  total1   Total power utilization by Intel® Xeon Phiâ„¢ product codenamed “Knights Corner†device, Averaged over Time Window 1 (uWatts). MicPwrPws  inst   Instantaneous power (uWatts). MicPwrPws  imax   Max instantaneous power (uWatts). MicPwrPws  pcie   PCI-E connector power (uWatts). MicPwrPws  c2x3   2x3 connector power (uWatts). MicPwrPws  c2x4   2x4 connector power (uWatts). MicPwrVrr  vccp   Core rail (uVolts). MicPwrVrr  vddg   Uncore rail (uVolts). MicPwrVrr  vddq   Memory subsystem rail (uVolts). papi-5.6.0/src/ftests/tenth.F000664 001750 001750 00000015676 13216244361 020102 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" #define ITERS 100 #if defined(sun) && defined(sparc) #define CACHE_LEVEL "PAPI_L2_TCM" #define EVT1 PAPI_L2_TCM #define EVT2 PAPI_L2_TCA #define EVT3 PAPI_L2_TCH #define EVT1_STR "PAPI_L2_TCM" #define EVT2_STR "PAPI_L2_TCA" #define EVT3_STR "PAPI_L2_TCH" #else #if defined(__powerpc__) #define CACHE_LEVEL "PAPI_L1_DCA" #define EVT1 PAPI_L1_DCA #define EVT2 PAPI_L1_DCW #define EVT3 PAPI_L1_DCR #define EVT1_STR "PAPI_L1_DCA" #define EVT2_STR "PAPI_L1_DCW" #define EVT3_STR "PAPI_L1_DCR" #else #define CACHE_LEVEL "PAPI_L1_TCM" #define EVT1 PAPI_L1_TCM #define EVT2 PAPI_L1_ICM #define EVT3 PAPI_L1_DCM #define EVT1_STR "PAPI_L1_TCM" #define EVT2_STR "PAPI_L1_ICM" #define EVT3_STR "PAPI_L1_DCM" #endif #endif program tenth implicit integer (p) integer*8 values(10) integer es1, es2, es3 integer*4 mask1, mask2, mask3 integer domain, granularity character*(PAPI_MAX_STR_LEN) domainstr, grnstr integer retval Integer last_char External last_char integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() es1 = PAPI_NULL es2 = PAPI_NULL es3 = PAPI_NULL mask1 = EVT1 mask2 = EVT2 mask3 = EVT3 retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_event(mask1, retval) if ( retval.NE.PAPI_OK) then call ftest_skip(__FILE__, __LINE__, .'PAPIf_query_event', retval) end if call PAPIf_query_event(mask2, retval) if ( retval.NE.PAPI_OK) then call ftest_skip(__FILE__, __LINE__, .'PAPIf_query_event', retval) end if call PAPIf_query_event(mask3, retval) if ( retval.NE.PAPI_OK) then call ftest_skip(__FILE__, __LINE__, .'PAPIf_query_event', retval) end if call PAPIf_create_eventset(es1, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( es1, mask1, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_create_eventset(es2, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( es2, mask2, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_create_eventset(es3, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', * retval) end if call PAPIf_add_event( es3, mask3, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call fdo_l1misses(ITERS) call PAPIf_start(es1, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_l1misses(ITERS) call PAPIf_stop(es1, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_start(es2, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_l1misses(ITERS) call PAPIf_stop(es2, values(3), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_start(es3, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_l1misses(ITERS) call PAPIf_stop(es3, values(5), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_remove_event( es1, mask1, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es2, mask2, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es3, mask3, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if if (tests_quiet .EQ. 0) then #if (defined(sun) && defined(sparc)) print *, "Test case 10: start, stop for derived event ", *"PAPI_L2_TCM." #else print *, "Test case 10: start, stop for derived event ", *"PAPI_L1_TCM." #endif print *, "------------------------------------------------------" end if call PAPIf_get_domain(es1, domain, PAPI_DEFDOM, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_domain', retval) end if call stringify_domain(domain, domainstr) if (tests_quiet .EQ. 0) then write (*,900) "Default domain is:", domain, domainstr 900 format(a20, i3, " ", a70) end if call PAPIf_get_granularity(es1, granularity, PAPI_DEFGRN, *retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_granularity', *retval) end if call stringify_granularity(granularity, grnstr) if (tests_quiet .EQ. 0) then write (*,800) "Default granularity is:", granularity, grnstr 800 format(a25, i3, " ", a20) print *, "Using", NUM_FLOPS, " iterations of c += b*c" print *, "------------------------------------------------------" write (*,500) "Test type", 1, 2, 3 #if (defined(sun) && defined(sparc)) write (*,500) EVT1_STR, values(1), 0, 0 write (*,500) EVT2_STR, 0, values(3), 0 write (*,500) EVT3_STR, 0, 0, values(5) print *, "------------------------------------------------", *"------" print *, "Verification:" print *, "First number row 1 approximately equals (2,2) - (3,3) ", *"or ",(values(3)-values(5)) #else write (*,500) EVT1_STR, values(1), 0, 0 write (*,500) EVT2_STR, 0, values(3), 0 write (*,500) EVT3_STR, 0, 0, values(5) print *, "------------------------------------------------", *"------" print *, "Verification:" print *, "First number row 1 approximately equals (2,2) + (3,3) ", *"or ", (values(3)+values(5)) #endif end if 500 format(A13, ": ", I10, I10, I10) call ftests_pass(__FILE__) end papi-5.6.0/src/libpfm-3.y/examples_v3.x/syst.c000664 001750 001750 00000017447 13216244362 023062 0ustar00jshenry1963jshenry1963000000 000000 /* * syst.c - example of a simple system wide monitoring program * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * pin task to CPU */ #ifndef __NR_sched_setaffinity #error "you need to define __NR_sched_setaffinity" #endif #define MAX_CPUS 2048 #define NR_CPU_BITS (MAX_CPUS>>3) int pin_cpu(pid_t pid, unsigned int cpu) { uint64_t my_mask[NR_CPU_BITS]; if (cpu >= MAX_CPUS) fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); my_mask[cpu>>6] = 1ULL << (cpu&63); return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); } int main(int argc, char **argv) { char **p; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; uint64_t pdo[NUM_PMDS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_options_t pfmlib_options; unsigned int which_cpu; int ret, ctx_fd; unsigned int i, l; unsigned int num_counters; char name[MAX_EVT_NAME_LEN]; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); pfm_get_num_counters(&num_counters); memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(pdo, 0, sizeof(pdo)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); /* * be nice to user! */ if (argc > 1) { p = argv+1; for (i=0; *p ; i++, p++) { if (pfm_find_full_event(*p, &inp.pfp_events[i]) != PFMLIB_SUCCESS) fatal_error("Cannot find %s event\n", *p); } } else { if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; } /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * indicate we are using the monitors for a system-wide session. * This may impact the way the library sets up the PMC values. */ inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; /* * pick a random CPU. Assumes CPU are numbered with no holes */ srandom(getpid()); which_cpu = random() % sysconf(_SC_NPROCESSORS_ONLN); /* * The monitored CPU is determined by the processor core * executing the PFM_LOAD_CONTEXT command. To ensure, we * measure the right core, we pin the thread before making * the call. */ ret = pin_cpu(getpid(), which_cpu); if (ret == -1) fatal_error("cannot set affinity to CPU%d: %s\n", which_cpu, strerror(errno)); /* * after the call the task is pinned to which_cpu */ /* * now create the system-wide session */ ctx_fd = pfm_create(PFM_FL_SYSTEM_WIDE, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. * We use pfp_pmc_count to determine the number of PMC to intialize. * We use pfp_pmd_count to determine the number of PMD to initialize. * Some events/features may cause extra PMCs to be used, leading to: * - pfp_pmc_count may be >= pfp_event_count * - pfp_pmd_count may be >= pfp_event_count */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * Now program the registers */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMDS) error errno %d\n",errno); /* * attach the session to the CPU */ if (pfm_attach(ctx_fd, 0, which_cpu) == -1) fatal_error("pfm_attach error errno %d\n",errno); printf("\n", which_cpu); for(l=0; l < 10; l++) { /* * start monitoring */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START) == -1) fatal_error("pfm_set_state(start) error errno %d\n",errno); sleep(2); /* * stop monitoring. * changed at the user level. */ if (pfm_set_state(ctx_fd, 0, PFM_ST_STOP) == -1) fatal_error("pfm_set_state(stop) error errno %d\n",errno); /* * read the results */ if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error( "pfm_read error errno %d\n",errno); /* * print the results */ puts("------------------------"); for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("CPU%-2d PMD%-3u raw=%-20"PRIu64" delta=%-20"PRIu64" %s\n", which_cpu, pd[i].reg_num, pd[i].reg_value, pd[i].reg_value - pdo[i], name); pdo[i] = pd[i].reg_value; } } /* * destroy everything */ close(ctx_fd); return 0; } papi-5.6.0/src/components/cuda/sampling/test/000775 001750 001750 00000000000 13216244357 023221 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/testlib/clockcore.c000664 001750 001750 00000005726 13216244370 021112 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "papi.h" #include "clockcore.h" #define NUM_ITERS 1000000 static char *func_name[] = { "PAPI_get_real_cyc", "PAPI_get_real_usec", "PAPI_get_virt_cyc", "PAPI_get_virt_usec" }; static int CLOCK_ERROR = 0; static int clock_res_check( int flag, int quiet ) { if ( CLOCK_ERROR ) { return -1; } long long *elapsed_cyc, total_cyc = 0, uniq_cyc = 0, diff_cyc = 0; int i; double min, max, average, std, tmp; elapsed_cyc = ( long long * ) calloc( NUM_ITERS, sizeof ( long long ) ); /* Real */ switch ( flag ) { case 0: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_real_cyc( ); break; case 1: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_real_usec( ); break; case 2: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_virt_cyc( ); break; case 3: for ( i = 0; i < NUM_ITERS; i++ ) elapsed_cyc[i] = ( long long ) PAPI_get_virt_usec( ); break; default: return -1; } min = max = ( double ) ( elapsed_cyc[1] - elapsed_cyc[0] ); for ( i = 1; i < NUM_ITERS; i++ ) { if ( elapsed_cyc[i] - elapsed_cyc[i - 1] < 0 ) { CLOCK_ERROR = 1; fprintf(stderr,"Error! Negative elapsed time\n"); free( elapsed_cyc ); return -1; } diff_cyc = elapsed_cyc[i] - elapsed_cyc[i - 1]; if ( min > diff_cyc ) min = ( double ) diff_cyc; if ( max < diff_cyc ) max = ( double ) diff_cyc; if ( diff_cyc != 0 ) uniq_cyc++; total_cyc += diff_cyc; } average = ( double ) total_cyc / ( NUM_ITERS - 1 ); std = 0; for ( i = 1; i < NUM_ITERS; i++ ) { tmp = ( double ) ( elapsed_cyc[i] - elapsed_cyc[i - 1] ); tmp = tmp - average; std += tmp * tmp; } if ( !quiet ) { std = sqrt( std / ( NUM_ITERS - 2 ) ); printf( "%s: min %.3lf max %.3lf \n", func_name[flag], min, max ); printf( " average %.3lf std %.3lf\n", average, std ); if ( uniq_cyc == NUM_ITERS - 1 ) { printf( "%s : %7.3f <%7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc / ( double ) uniq_cyc ); } else if ( uniq_cyc ) { printf( "%s : %7.3f %7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc / ( double ) uniq_cyc ); } else { printf( "%s : %7.3f >%7.3f\n", func_name[flag], ( double ) total_cyc / ( double ) ( NUM_ITERS ), ( double ) total_cyc ); } } free( elapsed_cyc ); return PAPI_OK; } int clockcore( int quiet ) { /* check PAPI_get_real_cyc */ clock_res_check( 0, quiet ); /* check PAPI_get_real_usec */ clock_res_check( 1, quiet ); /* check PAPI_get_virt_cyc */ /* Virtual */ if ( PAPI_get_virt_cyc( ) != -1 ) { clock_res_check( 2, quiet ); } else { return CLOCKCORE_VIRT_CYC_FAIL; } /* check PAPI_get_virt_usec */ if ( PAPI_get_virt_usec( ) != -1 ) { clock_res_check( 3, quiet ); } else { return CLOCKCORE_VIRT_USEC_FAIL; } return PAPI_OK; } papi-5.6.0/src/validation_tests/matrix_multiply.c000664 001750 001750 00000003367 13216244370 024316 0ustar00jshenry1963jshenry1963000000 000000 #include #define NUM_RUNS 3 #define MATRIX_SIZE 512 static double a[MATRIX_SIZE][MATRIX_SIZE]; static double b[MATRIX_SIZE][MATRIX_SIZE]; static double c[MATRIX_SIZE][MATRIX_SIZE]; long long naive_matrix_multiply_estimated_flops(int quiet) { long long muls,divs,adds; /* setup */ muls=MATRIX_SIZE*MATRIX_SIZE; divs=MATRIX_SIZE*MATRIX_SIZE; adds=MATRIX_SIZE*MATRIX_SIZE; /* multiply */ muls+=MATRIX_SIZE*MATRIX_SIZE*MATRIX_SIZE; adds+=MATRIX_SIZE*MATRIX_SIZE*MATRIX_SIZE; /* sum */ adds+=MATRIX_SIZE*MATRIX_SIZE; if (!quiet) { printf("Estimated flops: adds: %lld muls: %lld divs: %lld\n", adds,muls,divs); } return adds+muls+divs; } long long naive_matrix_multiply_estimated_loads(int quiet) { long long loads=0; /* setup */ loads+=0; /* multiply */ loads+=MATRIX_SIZE*MATRIX_SIZE*MATRIX_SIZE*2; /* sum */ loads+=MATRIX_SIZE*MATRIX_SIZE; if (!quiet) { printf("Estimated loads: %lld\n",loads); } return loads; } long long naive_matrix_multiply_estimated_stores(int quiet) { long long stores=0; /* setup */ stores+=MATRIX_SIZE*MATRIX_SIZE*2; /* multiply */ stores+=MATRIX_SIZE*MATRIX_SIZE; /* sum */ stores+=1; if (!quiet) { printf("Estimated stores: %lld\n",stores); } return stores; } double naive_matrix_multiply(int quiet) { double s; int i,j,k; for(i=0;i * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef __PFMLIB_MONTECITO_PRIV_H__ #define __PFMLIB_MONTECITO_PRIV_H__ /* * Event type definitions * * The virtual events are not really defined in the specs but are an artifact used * to quickly and easily setup EAR and/or BTB. The event type encodes the exact feature * which must be configured in combination with a counting monitor. * For instance, DATA_EAR_CACHE_LAT4 is a virtual D-EAR cache event. If the user * requests this event, this will configure a counting monitor to count DATA_EAR_EVENTS * and PMC11 will be configured for cache mode. The latency is encoded in the umask, here * it would correspond to 4 cycles. * */ #define PFMLIB_MONT_EVENT_NORMAL 0x0 /* standard counter */ #define PFMLIB_MONT_EVENT_ETB 0x1 /* virtual event used with ETB configuration */ #define PFMLIB_MONT_EVENT_IEAR_TLB 0x2 /* virtual event used for I-EAR TLB configuration */ #define PFMLIB_MONT_EVENT_IEAR_CACHE 0x3 /* virtual event used for I-EAR cache configuration */ #define PFMLIB_MONT_EVENT_DEAR_TLB 0x4 /* virtual event used for D-EAR TLB configuration */ #define PFMLIB_MONT_EVENT_DEAR_CACHE 0x5 /* virtual event used for D-EAR cache configuration */ #define PFMLIB_MONT_EVENT_DEAR_ALAT 0x6 /* virtual event used for D-EAR ALAT configuration */ #define event_is_ear(e) ((e)->pme_type >= PFMLIB_MONT_EVENT_IEAR_TLB &&(e)->pme_type <= PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_iear(e) ((e)->pme_type == PFMLIB_MONT_EVENT_IEAR_TLB || (e)->pme_type == PFMLIB_MONT_EVENT_IEAR_CACHE) #define event_is_dear(e) ((e)->pme_type >= PFMLIB_MONT_EVENT_DEAR_TLB && (e)->pme_type <= PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_ear_cache(e) ((e)->pme_type == PFMLIB_MONT_EVENT_DEAR_CACHE || (e)->pme_type == PFMLIB_MONT_EVENT_IEAR_CACHE) #define event_is_ear_tlb(e) ((e)->pme_type == PFMLIB_MONT_EVENT_IEAR_TLB || (e)->pme_type == PFMLIB_MONT_EVENT_DEAR_TLB) #define event_is_ear_alat(e) ((e)->pme_type == PFMLIB_MONT_EVENT_DEAR_ALAT) #define event_is_etb(e) ((e)->pme_type == PFMLIB_MONT_EVENT_ETB) /* * Itanium encoding structure * (code must be first 8 bits) */ typedef struct { unsigned long pme_code:8; /* major event code */ unsigned long pme_type:3; /* see definitions above */ unsigned long pme_caf:2; /* Active, Floating, Causal, Self-Floating */ unsigned long pme_ig1:3; /* ignored */ unsigned long pme_umask:16; /* unit mask*/ unsigned long pme_ig:32; /* ignored */ } pme_mont_entry_code_t; typedef union { unsigned long pme_vcode; pme_mont_entry_code_t pme_mont_code; /* must not be larger than vcode */ } pme_mont_code_t; typedef union { unsigned long qual; /* generic qualifier */ struct { unsigned long pme_iar:1; /* instruction address range supported */ unsigned long pme_opm:1; /* opcode match supported */ unsigned long pme_dar:1; /* data address range supported */ unsigned long pme_all:1; /* supports all_thrd=1 */ unsigned long pme_mesi:1; /* event supports MESI */ unsigned long pme_res1:11; /* reserved */ unsigned long pme_group:3; /* event group */ unsigned long pme_set:4; /* event set*/ unsigned long pme_res2:41; /* reserved */ } pme_qual; } pme_mont_qualifiers_t; typedef struct { char *pme_name; pme_mont_code_t pme_entry_code; unsigned long pme_counters; /* supported counters */ unsigned int pme_maxincr; pme_mont_qualifiers_t pme_qualifiers; char *pme_desc; /* text description of the event */ } pme_mont_entry_t; /* * We embed the umask value into the event code. Because it really is * like a subevent. * pme_code: * - lower 16 bits: major event code * - upper 16 bits: unit mask */ #define pme_code pme_entry_code.pme_mont_code.pme_code #define pme_umask pme_entry_code.pme_mont_code.pme_umask #define pme_used pme_qualifiers.pme_qual_struct.pme_used #define pme_type pme_entry_code.pme_mont_code.pme_type #define pme_caf pme_entry_code.pme_mont_code.pme_caf #define event_opcm_ok(e) ((e)->pme_qualifiers.pme_qual.pme_opm==1) #define event_iarr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_iar==1) #define event_darr_ok(e) ((e)->pme_qualifiers.pme_qual.pme_dar==1) #define event_all_ok(e) ((e)->pme_qualifiers.pme_qual.pme_all==1) #define event_mesi_ok(e) ((e)->pme_qualifiers.pme_qual.pme_mesi==1) #endif /* __PFMLIB_MONTECITO_PRIV_H__ */ papi-5.6.0/src/libpfm4/lib/events/intel_bdw_events.h000664 001750 001750 00000306170 13216244364 024447 0ustar00jshenry1963jshenry1963000000 000000 /* * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: bdw (Intel Broadwell) */ static const intel_x86_umask_t bdw_baclears[]={ { .uname = "ANY", .udesc = "Number of front-end re-steers due to BPU misprediction", .ucode = 0x1f00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_br_inst_exec[]={ { .uname = "NONTAKEN_CONDITIONAL", .udesc = "All macro conditional nontaken branch instructions", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NONTAKEN_COND", .udesc = "All macro conditional nontaken branch instructions", .ucode = 0x4100, .uequiv = "NONTAKEN_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_CONDITIONAL", .udesc = "Taken speculative and retired macro-conditional branches", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_COND", .udesc = "Taken speculative and retired macro-conditional branches", .ucode = 0x8100, .uequiv = "TAKEN_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_DIRECT_JUMP", .udesc = "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects", .ucode = 0x8200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_INDIRECT_JUMP_NON_CALL_RET", .udesc = "Taken speculative and retired indirect branches excluding calls and returns", .ucode = 0x8400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_INDIRECT_NEAR_RETURN", .udesc = "Taken speculative and retired indirect branches with return mnemonic", .ucode = 0x8800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_DIRECT_NEAR_CALL", .udesc = "Taken speculative and retired direct near calls", .ucode = 0x9000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_CONDITIONAL", .udesc = "Speculative and retired macro-conditional branches", .ucode = 0xc100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_COND", .udesc = "Speculative and retired macro-conditional branches", .ucode = 0xc100, .uequiv = "ALL_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY_COND", .udesc = "Speculative and retired macro-conditional branches", .ucode = 0xc100, .uequiv = "ALL_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DIRECT_JMP", .udesc = "Speculative and retired macro-unconditional branches excluding calls and indirects", .ucode = 0xc200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_INDIRECT_JUMP_NON_CALL_RET", .udesc = "Speculative and retired indirect branches excluding calls and returns", .ucode = 0xc400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_INDIRECT_NEAR_RETURN", .udesc = "Speculative and retired indirect return branches", .ucode = 0xc800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DIRECT_NEAR_CALL", .udesc = "Speculative and retired direct near calls", .ucode = 0xd000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_INDIRECT_NEAR_CALL", .udesc = "All indirect calls, including both register and memory indirect", .ucode = 0xa000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_BRANCHES", .udesc = "All branch instructions executed", .ucode = 0xff00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_br_inst_retired[]={ { .uname = "CONDITIONAL", .udesc = "Counts all taken and not taken macro conditional branch instructions", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "COND", .udesc = "Counts all taken and not taken macro conditional branch instructions", .ucode = 0x100, .uequiv = "CONDITIONAL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_CALL", .udesc = "Counts all macro direct and indirect near calls", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "Counts all taken and not taken macro branches including far branches (architectural event)", .ucode = 0x0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_PEBS, }, { .uname = "NEAR_RETURN", .udesc = "Counts the number of near ret instructions retired", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NOT_TAKEN", .udesc = "Counts all not taken macro branch instructions retired", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "NEAR_TAKEN", .udesc = "Counts the number of near branch taken instructions retired", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "FAR_BRANCH", .udesc = "Counts the number of far branch instructions retired", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_br_misp_exec[]={ { .uname = "NONTAKEN_CONDITIONAL", .udesc = "Not taken speculative and retired mispredicted macro conditional branches", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NONTAKEN_COND", .udesc = "Not taken speculative and retired mispredicted macro conditional branches", .ucode = 0x4100, .uequiv = "NONTAKEN_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_CONDITIONAL", .udesc = "Taken speculative and retired mispredicted macro conditional branches", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_COND", .udesc = "Taken speculative and retired mispredicted macro conditional branches", .ucode = 0x8100, .uequiv = "TAKEN_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_INDIRECT_JUMP_NON_CALL_RET", .udesc = "Taken speculative and retired mispredicted indirect branches excluding calls and returns", .ucode = 0x8400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_CONDITIONAL", .udesc = "Speculative and retired mispredicted macro conditional branches", .ucode = 0xc100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY_COND", .udesc = "Speculative and retired mispredicted macro conditional branches", .ucode = 0xc100, .uequiv = "ALL_CONDITIONAL", .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_INDIRECT_JUMP_NON_CALL_RET", .udesc = "All mispredicted indirect branches that are not calls nor returns", .ucode = 0xc400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_BRANCHES", .udesc = "Speculative and retired mispredicted macro conditional branches", .ucode = 0xff00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "TAKEN_INDIRECT_NEAR_CALL", .udesc = "Taken speculative and retired mispredicted indirect calls", .ucode = 0xa000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TAKEN_RETURN_NEAR", .udesc = "Taken speculative and retired mispredicted direct returns", .ucode = 0x8800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_br_misp_retired[]={ { .uname = "CONDITIONAL", .udesc = "All mispredicted macro conditional branch instructions", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "COND", .udesc = "All mispredicted macro conditional branch instructions", .ucode = 0x100, .uequiv = "CONDITIONAL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_BRANCHES", .udesc = "All mispredicted macro branches (architectural event)", .ucode = 0x0, /* architectural encoding */ .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "NEAR_TAKEN", .udesc = "Number of near branch instructions retired that were mispredicted and taken", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RET", .udesc = "Number of mispredicted ret instructions retired", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_cpl_cycles[]={ { .uname = "RING0", .udesc = "Unhalted core cycles when the thread is in ring 0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RING123", .udesc = "Unhalted core cycles when thread is in rings 1, 2, or 3", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RING0_TRANS", .udesc = "Number of intervals between processor halts while thread is in ring 0", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_cpu_clk_thread_unhalted[]={ { .uname = "REF_XCLK", .udesc = "Count Xclk pulses (100Mhz) when the core is unhalted", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REF_XCLK_ANY", .udesc = "Count Xclk pulses (100Mhz) when the at least one thread on the physical core is unhalted", .ucode = 0x100 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "REF_XCLK:t", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "REF_P", .udesc = "Cycles when the core is unhalted (count at 100 Mhz)", .ucode = 0x100, .uequiv = "REF_XCLK", .uflags= INTEL_X86_NCOMBO, }, { .uname = "THREAD_P", .udesc = "Cycles when thread is not halted", .ucode = 0x000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ONE_THREAD_ACTIVE", .udesc = "Counts Xclk (100Mhz) pulses when this thread is unhalted and the other thread is halted", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_cycle_activity[]={ { .uname = "CYCLES_L2_PENDING", .udesc = "Cycles with pending L2 miss loads (must use with HT off only)", .ucode = 0x0100 | (0x1 << INTEL_X86_CMASK_BIT), .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0xf, }, { .uname = "CYCLES_LDM_PENDING", .udesc = "Cycles with pending memory loads", .ucode = 0x0200 | (0x2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uequiv = "CYCLES_MEM_ANY", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_MEM_ANY", .udesc = "Cycles with pending memory loads", .ucode = 0x0200 | (0x2 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_L1D_PENDING", .udesc = "Cycles with pending L1D load cache misses", .ucode = 0x0800 | (0x8 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0x4, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_LDM_PENDING", .udesc = "Executions stalls when there is at least one pending demand load request", .ucode = 0x0600 | (0x6 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0x4, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_L1D_PENDING", .udesc = "Executions stalls while there is at least one L1D demand load outstanding", .ucode = 0x0c00 | (0xc << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0x4, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_L2_PENDING", .udesc = "Execution stalls while there is at least one L2 demand load pending outstanding", .ucode = 0x0500 | (0x5 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALLS_TOTAL", .udesc = "Cycles during which no instructions were executed in the execution stage of the pipeline", .ucode = 0x0400 | (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_NO_EXECUTE", .udesc = "Cycles during which no instructions were executed in the execution stage of the pipeline", .ucode = 0x0400 | (0x4 << INTEL_X86_CMASK_BIT), .modhw = _INTEL_X86_ATTR_C, .uequiv = "STALLS_TOTAL", .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_dtlb_load_misses[]={ { .uname = "MISS_CAUSES_A_WALK", .udesc = "Misses in all DTLB levels that cause page walks", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_4K", .udesc = "Misses in all TLB levels causes a page walk that completes (4K)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_2M_4M", .udesc = "Misses in all TLB levels causes a page walk of 2MB/4MB page sizes that completes", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_1G", .udesc = "Misses in all TLB levels causes a page walk of 1GB page sizes that completes", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", .ucode = 0xe00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_DURATION", .udesc = "Cycles when PMH is busy with page walks", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT_4K", .udesc = "Misses that miss the DTLB and hit the STLB (4KB)", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT_2M", .udesc = "Misses that miss the DTLB and hit the STLB (2MB)", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT", .udesc = "Number of cache load STLB hits. No page walk", .ucode = 0x6000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_itlb_misses[]={ { .uname = "MISS_CAUSES_A_WALK", .udesc = "Misses in all DTLB levels that cause page walks", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_4K", .udesc = "Misses in all TLB levels causes a page walk that completes (4KB)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_2M_4M", .udesc = "Misses in all TLB levels causes a page walk that completes (2MB/4MB)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED_1G", .udesc = "Misses in all TLB levels causes a page walk that completes (1GB)", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_COMPLETED", .udesc = "Misses in all TLB levels causes a page walk of any page size that completes", .ucode = 0xe00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WALK_DURATION", .udesc = "Cycles when PMH is busy with page walks", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT_4K", .udesc = "Misses that miss the DTLB and hit the STLB (4KB)", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT_2M", .udesc = "Misses that miss the DTLB and hit the STLB (2MB)", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_HIT", .udesc = "Number of cache load STLB hits. No page walk", .ucode = 0x6000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_fp_assist[]={ { .uname = "X87_OUTPUT", .udesc = "Number of X87 FP assists due to output values", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "X87_INPUT", .udesc = "Number of X87 FP assists due to input values", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_OUTPUT", .udesc = "Number of SIMD FP assists due to output values", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_INPUT", .udesc = "Number of SIMD FP assists due to input values", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Cycles with any input/output SEE or FP assists", .ucode = 0x1e00 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL", .udesc = "Cycles with any input and output SSE or FP assist", .ucode = 0x1e00 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "ANY", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_icache[]={ { .uname = "MISSES", .udesc = "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IFDATA_STALL", .udesc = "Number of cycles where a code fetch is stalled due to L1 miss", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "HIT", .udesc = "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. Includes cacheable and uncacheable accesses and uncacheable fetches", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_idq[]={ { .uname = "EMPTY", .udesc = "Cycles the Instruction Decode Queue (IDQ) is empty", .ucode = 0x200, .ucntmsk= 0xf, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MITE_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from MITE path", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DSB_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_DSB_UOPS", .udesc = "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_MITE_UOPS", .udesc = "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_UOPS", .udesc = "Number of Uops were delivered into Instruction Decode Queue (IDQ) from MS, initiated by Decode Stream Buffer (DSB) or MITE", .ucode = 0x3000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MS_UOPS_CYCLES", .udesc = "Number of cycles that Uops were delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB) or MITE", .ucode = 0x3000 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MS_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_SWITCHES", .udesc = "Number of cycles that Uops were delivered into Instruction Decode Queue (IDQ) when MS_Busy, initiated by Decode Stream Buffer (DSB) or MITE", .ucode = 0x3000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "MS_UOPS:c=1:e", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "MITE_UOPS_CYCLES", .udesc = "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", .ucode = 0x400 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MITE_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "DSB_UOPS_CYCLES", .udesc = "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", .ucode = 0x800 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "DSB_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_UOPS_CYCLES", .udesc = "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "MS_DSB_OCCUR", .udesc = "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequencer (MS) is busy", .ucode = 0x1000 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "MS_DSB_UOPS:c=1:e=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "ALL_DSB_CYCLES_4_UOPS", .udesc = "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", .ucode = 0x1800 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_DSB_CYCLES_ANY_UOPS", .udesc = "Cycles Decode Stream Buffer (DSB) is delivering any Uop", .ucode = 0x1800 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_CYCLES_4_UOPS", .udesc = "Cycles MITE is delivering 4 Uops", .ucode = 0x2400 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_CYCLES_ANY_UOPS", .udesc = "Cycles MITE is delivering any Uop", .ucode = 0x2400 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "ALL_MITE_UOPS", .udesc = "Number of uops delivered to Instruction Decode Queue (IDQ) from any path", .ucode = 0x3c00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_idq_uops_not_delivered[]={ { .uname = "CORE", .udesc = "Count number of non-delivered uops to Resource Allocation Table (RAT)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CYCLES_0_UOPS_DELIV_CORE", .udesc = "Cycles per thread when 4 or more uops are not delivered to the Resource Allocation Table (RAT) when backend is not stalled", .ucode = 0x100 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uflags = INTEL_X86_NCOMBO, .uequiv = "CORE:c=4", .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_LE_1_UOP_DELIV_CORE", .udesc = "Cycles per thread when 3 or more uops are not delivered to the Resource Allocation Table (RAT) when backend is not stalled", .ucode = 0x100 | (3 << INTEL_X86_CMASK_BIT), /* cnt=3 */ .uequiv = "CORE:c=3", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_LE_2_UOP_DELIV_CORE", .udesc = "Cycles with less than 2 uops delivered by the front end", .ucode = 0x100 | (2 << INTEL_X86_CMASK_BIT), /* cnt=2 */ .uequiv = "CORE:c=2", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_LE_3_UOP_DELIV_CORE", .udesc = "Cycles with less than 3 uops delivered by the front end", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "CORE:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_FE_WAS_OK", .udesc = "Cycles Front-End (FE) delivered 4 uops or Resource Allocation Table (RAT) was stalling FE", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 inv=1 */ .uequiv = "CORE:c=1:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_I, }, }; static const intel_x86_umask_t bdw_inst_retired[]={ { .uname = "ANY_P", .udesc = "Number of instructions retired. General Counter - architectural event", .ucode = 0x000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution (Precise Event)", .ucode = 0x100, .uequiv = "PREC_DIST", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "TOTAL_CYCLES", .udesc = "Number of cycles using always true condition", .ucode = 0x100 | INTEL_X86_MOD_INV | (10 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=10 */ .uequiv = "PREC_DIST:i=1:c=10", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "PREC_DIST", .udesc = "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution (Precise event)", .ucode = 0x100, .ucntmsk= 0x2, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "X87", .udesc = "Number of FPU operations retired (instructions with no exceptions)", .ucode = 0x200, .ucntmsk= 0x2, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_int_misc[]={ { .uname = "RECOVERY_CYCLES", .udesc = "Cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", .ucode = 0x300 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "RECOVERY_CYCLES_ANY", .udesc = "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", .ucode = 0x300 | (1 << INTEL_X86_CMASK_BIT) | INTEL_X86_MOD_ANY, /* cnt=1 any=1 */ .uequiv = "RECOVERY_CYCLES:t", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_T, }, { .uname = "RECOVERY_STALLS_COUNT", .udesc = "Number of occurrences waiting for Machine Clears", .ucode = 0x300 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "RAT_STALL_CYCLES", .udesc = "Cycles when the Resource Allocation Table (RAT) external stall event is sent to the Instruction Decode Queue (IDQ) for the thread. Also includes cycles when the allocator is serving another thread", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_itlb[]={ { .uname = "ITLB_FLUSH", .udesc = "Flushing of the Instruction TLB (ITLB) pages independent of page size", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_l1d[]={ { .uname = "REPLACEMENT", .udesc = "L1D Data line replacements", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_sq_misc[]={ { .uname = "SPLIT_LOCK", .udesc = "Number of split locks in the super queue (SQ)", .ucode = 0x1000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_l1d_pend_miss[]={ { .uname = "PENDING", .udesc = "Cycles with L1D load misses outstanding", .ucode = 0x100, .ucntmsk = 0x4, .uflags = INTEL_X86_DFL, }, { .uname = "PENDING_CYCLES", .udesc = "Cycles with L1D load misses outstanding", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "PENDING:c=1", .ucntmsk = 0x4, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "PENDING_CYCLES_ANY", .udesc = "Cycles with L1D load misses outstanding from any thread", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT) | INTEL_X86_MOD_ANY, /* cnt=1 any=1 */ .uequiv = "PENDING:c=1:t", .ucntmsk = 0x4, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_T, }, { .uname = "OCCURRENCES", .udesc = "Number L1D miss outstanding", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "PENDING:c=1:e=1", .ucntmsk = 0x4, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "EDGE", .udesc = "Number L1D miss outstanding", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "PENDING:c=1:e=1", .ucntmsk = 0x4, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, { .uname = "FB_FULL", .udesc = "Number of cycles a demand request was blocked due to Fill Buffer (FB) unavailability", .ucode = 0x200 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_l2_demand_rqsts[]={ { .uname = "WB_HIT", .udesc = "WB requests that hit L2 cache", .ucode = 0x5000, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_l2_lines_in[]={ { .uname = "I", .udesc = "L2 cache lines in I state filling L2", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "S", .udesc = "L2 cache lines in S state filling L2", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "E", .udesc = "L2 cache lines in E state filling L2", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL", .udesc = "L2 cache lines filling L2", .ucode = 0x700, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "L2 cache lines filling L2", .uequiv = "ALL", .ucode = 0x700, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_l2_lines_out[]={ { .uname = "DEMAND_CLEAN", .udesc = "Number of clean L2 cachelines evicted by demand", .ucode = 0x500, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_l2_rqsts[]={ { .uname = "DEMAND_DATA_RD_MISS", .udesc = "Demand Data Read requests that miss L2 cache", .ucode = 0x2100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_HIT", .udesc = "Demand Data Read requests that hit L2 cache", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_MISS", .udesc = "RFO requests that miss L2 cache", .ucode = 0x2200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_MISS", .udesc = "RFO requests that miss L2 cache", .ucode = 0x2200, .uequiv = "DEMAND_RFO_MISS", .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_HIT", .udesc = "RFO requests that hit L2 cache", .ucode = 0x4200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT", .udesc = "RFO requests that hit L2 cache", .ucode = 0x4200, .uequiv = "DEMAND_RFO_HIT", .uflags = INTEL_X86_NCOMBO, }, { .uname = "CODE_RD_MISS", .udesc = "L2 cache misses when fetching instructions", .ucode = 0x2400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_MISS", .udesc = "All demand requests that miss the L2 cache", .ucode = 0x2700, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CODE_RD_HIT", .udesc = "L2 cache hits when fetching instructions, code reads", .ucode = 0x4400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L2_PF_MISS", .udesc = "Requests from the L2 hardware prefetchers that miss L2 cache", .ucode = 0x3800, .uequiv = "PF_MISS", .uflags = INTEL_X86_NCOMBO, }, { .uname = "PF_MISS", .udesc = "Requests from the L2 hardware prefetchers that miss L2 cache", .ucode = 0x3800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS", .udesc = "All requests that miss the L2 cache", .ucode = 0x3f00, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L2_PF_HIT", .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", .ucode = 0xd800, .uequiv = "PF_HIT", .uflags = INTEL_X86_NCOMBO, }, { .uname = "PF_HIT", .udesc = "Requests from the L2 hardware prefetchers that hit L2 cache", .ucode = 0xd800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_DATA_RD", .udesc = "Any data read request to L2 cache", .ucode = 0xe100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_RFO", .udesc = "Any data RFO request to L2 cache", .ucode = 0xe200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_CODE_RD", .udesc = "Any code read request to L2 cache", .ucode = 0xe400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DEMAND_REFERENCES", .udesc = "All demand requests to L2 cache ", .ucode = 0xe700, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_PF", .udesc = "Any L2 HW prefetch request to L2 cache", .ucode = 0xf800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REFERENCES", .udesc = "All requests to L2 cache", .ucode = 0xff00, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_l2_trans[]={ { .uname = "DEMAND_DATA_RD", .udesc = "Demand Data Read requests that access L2 cache", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO", .udesc = "RFO requests that access L2 cache", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CODE_RD", .udesc = "L2 cache accesses when fetching instructions", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_PF", .udesc = "L2 or L3 HW prefetches that access L2 cache, including rejects", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L1D_WB", .udesc = "L1D writebacks that access L2 cache", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L2_FILL", .udesc = "L2 fill requests that access L2 cache", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "L2_WB", .udesc = "L2 writebacks that access L2 cache", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_REQUESTS", .udesc = "Transactions accessing L2 pipe", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_ld_blocks[]={ { .uname = "STORE_FORWARD", .udesc = "Counts the number of loads blocked by overlapping with store buffer entries that cannot be forwarded", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NO_SR", .udesc = "number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_ld_blocks_partial[]={ { .uname = "ADDRESS_ALIAS", .udesc = "False dependencies in MOB due to partial compare on address", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_load_hit_pre[]={ { .uname = "HW_PF", .udesc = "Non software-prefetch load dispatches that hit FB allocated for hardware prefetch", .ucode = 0x200, }, { .uname = "SW_PF", .udesc = "Non software-prefetch load dispatches that hit FB allocated for software prefetch", .ucode = 0x100, }, }; static const intel_x86_umask_t bdw_lock_cycles[]={ { .uname = "SPLIT_LOCK_UC_LOCK_DURATION", .udesc = "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CACHE_LOCK_DURATION", .udesc = "cycles that the L1D is locked", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_longest_lat_cache[]={ { .uname = "MISS", .udesc = "Core-originated cacheable demand requests missed LLC - architectural event", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REFERENCE", .udesc = "Core-originated cacheable demand requests that refer to LLC - architectural event", .ucode = 0x4f00, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_machine_clears[]={ { .uname = "CYCLES", .udesc = "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MEMORY_ORDERING", .udesc = "Number of Memory Ordering Machine Clears detected", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SMC", .udesc = "Number of Self-modifying code (SMC) Machine Clears detected", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MASKMOV", .udesc = "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "COUNT", .udesc = "Number of machine clears (nukes) of any type", .ucode = 0x100 | INTEL_X86_MOD_EDGE | (1 << INTEL_X86_CMASK_BIT), /* edge=1 cnt=1 */ .uequiv = "CYCLES:c=1:e", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_E | _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_mem_load_uops_l3_hit_retired[]={ { .uname = "XSNP_MISS", .udesc = "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_HIT", .udesc = "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_HITM", .udesc = "Load had HitM Response from a core on same socket (shared L3). (Non PEBS", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "XSNP_NONE", .udesc = "Retired load uops which data sources were hits in L3 without snoops required", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_mem_load_uops_l3_miss_retired[]={ { .uname = "LOCAL_DRAM", .udesc = "Retired load uops missing L3 cache but hitting local memory (Precise Event)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS , }, { .uname = "REMOTE_DRAM", .udesc = "Number of retired load uops that missed L3 but were service by remote RAM, snoop not needed, snoop miss, snoop hit data not forwarded (Precise Event)", .ucode = 0x400, .umodel = PFM_PMU_INTEL_BDW_EP, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_HITM", .udesc = "Number of retired load uops whose data sources was remote HITM (Precise Event)", .ucode = 0x1000, .umodel = PFM_PMU_INTEL_BDW_EP, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "REMOTE_FWD", .udesc = "Load uops that miss in the L3 whose data source was forwarded from a remote cache (Precise Event)", .ucode = 0x2000, .umodel = PFM_PMU_INTEL_BDW_EP, .uflags= INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_mem_load_uops_retired[]={ { .uname = "L1_HIT", .udesc = "Retired load uops with L1 cache hits as data source", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_HIT", .udesc = "Retired load uops with L2 cache hits as data source", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_HIT", .udesc = "Retired load uops with L3 cache hits as data source", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L1_MISS", .udesc = "Retired load uops which missed the L1D", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L2_MISS", .udesc = "Retired load uops which missed the L2. Unknown data source excluded", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "L3_MISS", .udesc = "Retired load uops which missed the L3", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "HIT_LFB", .udesc = "Retired load uops which missed L1 but hit line fill buffer (LFB)", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_mem_trans_retired[]={ { .uname = "LOAD_LATENCY", .udesc = "Memory load instructions retired above programmed clocks, minimum threshold value is 3 (Precise Event and ldlat required)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_LDLAT | INTEL_X86_DFL, }, { .uname = "LATENCY_ABOVE_THRESHOLD", .udesc = "Memory load instructions retired above programmed clocks, minimum threshold value is 3 (Precise Event and ldlat required)", .ucode = 0x100, .uequiv = "LOAD_LATENCY", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_LDLAT | INTEL_X86_NO_AUTOENCODE, }, }; static const intel_x86_umask_t bdw_mem_uops_retired[]={ { .uname = "STLB_MISS_LOADS", .udesc = "Load uops with true STLB miss retired to architected path", .ucode = 0x1100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STLB_MISS_STORES", .udesc = "Store uops with true STLB miss retired to architected path", .ucode = 0x1200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOCK_LOADS", .udesc = "Load uops with locked access retired", .ucode = 0x2100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SPLIT_LOADS", .udesc = "Line-splitted load uops retired", .ucode = 0x4100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "SPLIT_STORES", .udesc = "Line-splitted store uops retired", .ucode = 0x4200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_LOADS", .udesc = "All load uops retired", .ucode = 0x8100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ALL_STORES", .udesc = "All store uops retired", .ucode = 0x8200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, }; static const intel_x86_umask_t bdw_misalign_mem_ref[]={ { .uname = "LOADS", .udesc = "Speculative cache-line split load uops dispatched to the L1D", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STORES", .udesc = "Speculative cache-line split store-address uops dispatched to L1D", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_move_elimination[]={ { .uname = "INT_ELIMINATED", .udesc = "Number of integer Move Elimination candidate uops that were eliminated", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_ELIMINATED", .udesc = "Number of SIMD Move Elimination candidate uops that were eliminated", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "INT_NOT_ELIMINATED", .udesc = "Number of integer Move Elimination candidate uops that were not eliminated", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SIMD_NOT_ELIMINATED", .udesc = "Number of SIMD Move Elimination candidate uops that were not eliminated", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_offcore_requests[]={ { .uname = "DEMAND_DATA_RD", .udesc = "Demand data read requests sent to uncore (use with HT off only)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD", .udesc = "Demand code read requests sent to uncore (use with HT off only)", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO", .udesc = "Demand RFOs requests sent to uncore (use with HT off only)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ALL_DATA_RD", .udesc = "Data read requests sent to uncore (use with HT off only)", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_other_assists[]={ { .uname = "AVX_TO_SSE", .udesc = "Number of transitions from AVX-256 to legacy SSE when penalty applicable", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SSE_TO_AVX", .udesc = "Number of transitions from legacy SSE to AVX-256 when penalty applicable", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY_WB_ASSIST", .udesc = "Number of times any microcode assist is invoked by HW upon uop writeback", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_resource_stalls[]={ { .uname = "ANY", .udesc = "Cycles Allocation is stalled due to Resource Related reason", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Cycles Allocation is stalled due to Resource Related reason", .ucode = 0x100, .uequiv = "ANY", .uflags = INTEL_X86_NCOMBO, }, { .uname = "RS", .udesc = "Stall cycles caused by absence of eligible entries in Reservation Station (RS)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SB", .udesc = "Cycles Allocator is stalled due to Store Buffer full (not including draining from synch)", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ROB", .udesc = "ROB full stall cycles", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_rob_misc_events[]={ { .uname = "LBR_INSERTS", .udesc = "Count each time an new Last Branch Record (LBR) is inserted", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_rs_events[]={ { .uname = "EMPTY_CYCLES", .udesc = "Cycles the Reservation Station (RS) is empty for this thread", .ucode = 0x100, .uflags = INTEL_X86_DFL, }, { .uname = "EMPTY_END", .udesc = "Number of times the reservation station (RS) was empty", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT) | INTEL_X86_MOD_EDGE, /* inv=1, cmask=1,edge=1 */ .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_E, }, }; static const intel_x86_umask_t bdw_tlb_flush[]={ { .uname = "DTLB_THREAD", .udesc = "Count number of DTLB flushes of thread-specific entries", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STLB_ANY", .udesc = "Count number of any STLB flushes", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_uops_executed[]={ { .uname = "CORE", .udesc = "Number of uops executed from any thread", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "THREAD", .udesc = "Number of uops executed per thread each cycle", .ucode = 0x100, .uflags = INTEL_X86_DFL | INTEL_X86_NCOMBO, }, { .uname = "STALL_CYCLES", .udesc = "Number of cycles with no uops executed", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "THREAD:c=1:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_GE_1_UOP_EXEC", .udesc = "Cycles where at least 1 uop was executed per thread", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "THREAD:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_GE_2_UOPS_EXEC", .udesc = "Cycles where at least 2 uops were executed per thread", .ucode = 0x100 | (2 << INTEL_X86_CMASK_BIT), /* cnt=2 */ .uequiv = "THREAD:c=2", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_GE_3_UOPS_EXEC", .udesc = "Cycles where at least 3 uops were executed per thread", .ucode = 0x100 | (3 << INTEL_X86_CMASK_BIT), /* cnt=3 */ .uequiv = "THREAD:c=3", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_GE_4_UOPS_EXEC", .udesc = "Cycles where at least 4 uops were executed per thread", .ucode = 0x100 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uequiv = "THREAD:c=4", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_GE_1", .udesc = "Cycles where at least 1 uop was executed from any thread", .ucode = 0x200 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "CORE:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_GE_2", .udesc = "Cycles where at least 2 uops were executed from any thread", .ucode = 0x200 | (2 << INTEL_X86_CMASK_BIT), /* cnt=2 */ .uequiv = "CORE:c=2", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_GE_3", .udesc = "Cycles where at least 3 uops were executed from any thread", .ucode = 0x200 | (3 << INTEL_X86_CMASK_BIT), /* cnt=3 */ .uequiv = "CORE:c=3", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_GE_4", .udesc = "Cycles where at least 4 uops were executed from any thread", .ucode = 0x200 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uequiv = "CORE:c=4", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CORE_CYCLES_NONE", .udesc = "Cycles where no uop is executed on any thread", .ucode = 0x200 | INTEL_X86_MOD_INV, /* inv=1 */ .uequiv = "CORE:i", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_I, }, }; static const intel_x86_umask_t bdw_uops_executed_port[]={ { .uname = "PORT_0", .udesc = "Cycles which a Uop is executed on port 0", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_1", .udesc = "Cycles which a Uop is executed on port 1", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_2", .udesc = "Cycles which a Uop is executed on port 2", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_3", .udesc = "Cycles which a Uop is executed on port 3", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_4", .udesc = "Cycles which a Uop is executed on port 4", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_5", .udesc = "Cycles which a Uop is executed on port 5", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_6", .udesc = "Cycles which a Uop is executed on port 6", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_7", .udesc = "Cycles which a Uop is executed on port 7", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PORT_0_CORE", .udesc = "tbd", .ucode = 0x100 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_0:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_1_CORE", .udesc = "tbd", .ucode = 0x200 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_1:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_2_CORE", .udesc = "tbd", .ucode = 0x400 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_2:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_3_CORE", .udesc = "tbd", .ucode = 0x800 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_3:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_4_CORE", .udesc = "tbd", .ucode = 0x1000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_4:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_5_CORE", .udesc = "tbd", .ucode = 0x2000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_5:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_6_CORE", .udesc = "tbd", .ucode = 0x4000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_6:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, { .uname = "PORT_7_CORE", .udesc = "tbd", .ucode = 0x8000 | INTEL_X86_MOD_ANY, /* any=1 */ .uequiv = "PORT_7:t=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T, }, }; static const intel_x86_umask_t bdw_uops_issued[]={ { .uname = "ANY", .udesc = "Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "ALL", .udesc = "Number of Uops issued by the Resource Allocation Table (RAT) to the Reservation Station (RS)", .ucode = 0x100, .uequiv = "ANY", .uflags = INTEL_X86_NCOMBO, }, { .uname = "FLAGS_MERGE", .udesc = "Number of flags-merge uops being allocated. Such uops adds delay", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SLOW_LEA", .udesc = "Number of slow LEA or similar uops allocated. Such uop has 3 sources regardless if result of LEA instruction or not", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "SINGLE_MUL", .udesc = "Number of Multiply packed/scalar single precision uops allocated", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STALL_CYCLES", .udesc = "Counts the number of cycles no uops issued by this thread", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ANY:c=1:i=1", .uflags = INTEL_X86_NCOMBO, .ucntmsk = 0xf, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CORE_STALL_CYCLES", .udesc = "Counts the number of cycles no uops issued on this core", .ucode = 0x100 | INTEL_X86_MOD_ANY | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* any=1 inv=1 cnt=1 */ .uequiv = "ANY:c=1:i=1:t=1", .ucntmsk = 0xf, .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_T | _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_uops_retired[]={ { .uname = "ALL", .udesc = "All uops that actually retired", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS | INTEL_X86_DFL, }, { .uname = "ANY", .udesc = "All uops that actually retired", .ucode = 0x100, .uequiv = "ALL", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "RETIRE_SLOTS", .udesc = "number of retirement slots used non PEBS", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "STALL_CYCLES", .udesc = "Cycles no executable uops retired (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ALL:i=1:c=1", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "TOTAL_CYCLES", .udesc = "Number of cycles using always true condition applied to PEBS uops retired event", .ucode = 0x100 | INTEL_X86_MOD_INV | (10 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=10 */ .uequiv = "ALL:i=1:c=10", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "CORE_STALL_CYCLES", .udesc = "Cycles no executable uops retired on core (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | (1 << INTEL_X86_CMASK_BIT), /* inv=1 cnt=1 */ .uequiv = "ALL:i=1:c=1:t=1", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C, }, { .uname = "STALL_OCCURRENCES", .udesc = "Number of transitions from stalled to unstalled execution (Precise Event)", .ucode = 0x100 | INTEL_X86_MOD_INV | INTEL_X86_MOD_EDGE| (1 << INTEL_X86_CMASK_BIT), /* inv=1 edge=1 cnt=1 */ .uequiv = "ALL:c=1:i=1:e=1", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, .modhw = _INTEL_X86_ATTR_I | _INTEL_X86_ATTR_C | _INTEL_X86_ATTR_E, }, }; static const intel_x86_umask_t bdw_offcore_response[]={ { .uname = "DMND_DATA_RD", .udesc = "Request: number of demand and DCU prefetch data reads of full and partial cachelines as well as demand data page table entry cacheline reads. Does not count L2 data read prefetches or instruction fetches", .ucode = 1ULL << (0 + 8), .grpid = 0, }, { .uname = "DMND_RFO", .udesc = "Request: number of demand and DCU prefetch reads for ownership (RFO) requests generated by a write to data cacheline. Does not count L2 RFO prefetches", .ucode = 1ULL << (1 + 8), .grpid = 0, }, { .uname = "DMND_IFETCH", .udesc = "Request: number of demand and DCU prefetch instruction cacheline reads. Does not count L2 code read prefetches", .ucode = 1ULL << (2 + 8), .grpid = 0, }, { .uname = "WB", .udesc = "Request: number of writebacks (modified to exclusive) transactions", .ucode = 1ULL << (3 + 8), .grpid = 0, }, { .uname = "PF_DATA_RD", .udesc = "Request: number of data cacheline reads generated by L2 prefetchers", .ucode = 1ULL << (4 + 8), .grpid = 0, }, { .uname = "PF_RFO", .udesc = "Request: number of RFO requests generated by L2 prefetchers", .ucode = 1ULL << (5 + 8), .grpid = 0, }, { .uname = "PF_IFETCH", .udesc = "Request: number of code reads generated by L2 prefetchers", .ucode = 1ULL << (6 + 8), .grpid = 0, }, { .uname = "PF_LLC_DATA_RD", .udesc = "Request: number of L3 prefetcher requests to L2 for loads", .ucode = 1ULL << (7 + 8), .grpid = 0, }, { .uname = "PF_LLC_RFO", .udesc = "Request: number of RFO requests generated by L2 prefetcher", .ucode = 1ULL << (8 + 8), .grpid = 0, }, { .uname = "PF_LLC_IFETCH", .udesc = "Request: number of L2 prefetcher requests to L3 for instruction fetches", .ucode = 1ULL << (9 + 8), .grpid = 0, }, { .uname = "BUS_LOCKS", .udesc = "Request: number bus lock and split lock requests", .ucode = 1ULL << (10 + 8), .grpid = 0, }, { .uname = "STRM_ST", .udesc = "Request: number of streaming store requests", .ucode = 1ULL << (11 + 8), .grpid = 0, }, { .uname = "OTHER", .udesc = "Request: counts one of the following transaction types, including L3 invalidate, I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences, lock, unlock, split lock", .ucode = 1ULL << (15+8), .grpid = 0, }, { .uname = "ANY_IFETCH", .udesc = "Request: combination of PF_IFETCH | DMND_IFETCH | PF_LLC_IFETCH", .uequiv = "PF_IFETCH:DMND_IFETCH:PF_LLC_IFETCH", .ucode = 0x24100, .grpid = 0, }, { .uname = "ANY_REQUEST", .udesc = "Request: combination of all request umasks", .uequiv = "DMND_DATA_RD:DMND_RFO:DMND_IFETCH:WB:PF_DATA_RD:PF_RFO:PF_IFETCH:PF_LLC_DATA_RD:PF_LLC_RFO:PF_LLC_IFETCH:BUS_LOCKS:STRM_ST:OTHER", .ucode = 0x8fff00, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, .grpid = 0, }, { .uname = "ANY_DATA", .udesc = "Request: combination of DMND_DATA | PF_DATA_RD | PF_LLC_DATA_RD", .uequiv = "DMND_DATA_RD:PF_DATA_RD:PF_LLC_DATA_RD", .ucode = 0x9100, .grpid = 0, }, { .uname = "ANY_RFO", .udesc = "Request: combination of DMND_RFO | PF_RFO | PF_LLC_RFO", .uequiv = "DMND_RFO:PF_RFO:PF_LLC_RFO", .ucode = 0x10300, .grpid = 0, }, { .uname = "ANY_RESPONSE", .udesc = "Response: count any response type", .ucode = 1ULL << (16+8), .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, .grpid = 1, }, { .uname = "NO_SUPP", .udesc = "Supplier: counts number of times supplier information is not available", .ucode = 1ULL << (17+8), .grpid = 1, }, { .uname = "L3_HITM", .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", .ucode = 1ULL << (18+8), .grpid = 1, }, { .uname = "LLC_HITM", .udesc = "Supplier: counts L3 hits in M-state (initial lookup)", .ucode = 1ULL << (18+8), .uequiv = "L3_HITM", .grpid = 1, }, { .uname = "L3_HITE", .udesc = "Supplier: counts L3 hits in E-state", .ucode = 1ULL << (19+8), .grpid = 1, }, { .uname = "LLC_HITE", .udesc = "Supplier: counts L3 hits in E-state", .ucode = 1ULL << (19+8), .uequiv = "L3_HITE", .grpid = 1, }, { .uname = "L3_HITS", .udesc = "Supplier: counts L3 hits in S-state", .ucode = 1ULL << (20+8), .grpid = 1, }, { .uname = "LLC_HITS", .udesc = "Supplier: counts L3 hits in S-state", .ucode = 1ULL << (20+8), .uequiv = "L3_HITS", .grpid = 1, }, { .uname = "L3_HITF", .udesc = "Supplier: counts L3 hits in F-state", .ucode = 1ULL << (21+8), .grpid = 1, }, { .uname = "LLC_HITF", .udesc = "Supplier: counts L3 hits in F-state", .ucode = 1ULL << (20+8), .uequiv = "L3_HITF", .grpid = 1, }, { .uname = "L3_HITMESF", .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", .ucode = 0xfULL << (18+8), .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", .grpid = 1, }, { .uname = "LLC_HITMESF", .udesc = "Supplier: counts L3 hits in any state (M, E, S, F)", .ucode = 0xfULL << (18+8), .uequiv = "L3_HITMESF", .grpid = 1, }, { .uname = "L3_HIT", .udesc = "Alias for L3_HITMESF", .ucode = 0xfULL << (18+8), .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", .grpid = 1, }, { .uname = "LLC_HIT", .udesc = "Alias for LLC_HITMESF", .ucode = 0xfULL << (18+8), .uequiv = "L3_HITM:L3_HITE:L3_HITS:L3_HITF", .grpid = 1, }, { .uname = "L3_MISS_LOCAL", .udesc = "Supplier: counts L3 misses to local DRAM", .ucode = 1ULL << (26+8), .grpid = 1, }, { .uname = "LLC_MISS_LOCAL", .udesc = "Supplier: counts L3 misses to local DRAM", .ucode = 1ULL << (26+8), .uequiv = "L3_MISS_LOCAL", .grpid = 1, }, { .uname = "LLC_MISS_LOCAL_DRAM", .udesc = "Supplier: counts L3 misses to local DRAM", .ucode = 1ULL << (26+8), .uequiv = "L3_MISS_LOCAL", .grpid = 1, }, { .uname = "L3_MISS", .udesc = "Supplier: counts L3 misses to local DRAM", .ucode = 1ULL << (26+8), .uequiv = "L3_MISS_LOCAL", .grpid = 1, .umodel = PFM_PMU_INTEL_BDW, }, { .uname = "L3_MISS", .udesc = "Supplier: counts L3 misses to local or remote DRAM", .ucode = 0xfULL << (26+8), .uequiv = "L3_MISS_LOCAL:L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP0", .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", .ucode = 0x1ULL << (27+8), .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP0_DRAM", .udesc = "Supplier: counts L3 misses to remote DRAM with 0 hop", .ucode = 0x1ULL << (27+8), .uequiv = "L3_MISS_REMOTE_HOP0", .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP1", .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", .ucode = 0x1ULL << (28+8), .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP1_DRAM", .udesc = "Supplier: counts L3 misses to remote DRAM with 1 hop", .ucode = 0x1ULL << (28+8), .uequiv = "L3_MISS_REMOTE_HOP1", .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP2P", .udesc = "Supplier: counts L3 misses to remote DRAM with 2P hops", .ucode = 0x1ULL << (29+8), .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_HOP2P_DRAM", .udesc = "Supplier: counts L3 misses to remote DRAM with 2P hops", .ucode = 0x1ULL << (29+8), .uequiv = "L3_MISS_REMOTE_HOP2P", .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE", .udesc = "Supplier: counts L3 misses to remote node", .uequiv = "L3_MISS_REMOTE_HOP0:L3_MISS_REMOTE_HOP1:L3_MISS_REMOTE_HOP2P", .ucode = 0x7ULL << (27+8), .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "L3_MISS_REMOTE_DRAM", .udesc = "Supplier: counts L3 misses to remote node", .ucode = 0x7ULL << (27+8), .uequiv = "L3_MISS_REMOTE", .umodel = PFM_PMU_INTEL_BDW_EP, .grpid = 1, }, { .uname = "SPL_HIT", .udesc = "Supplier: counts L3 supplier hit", .ucode = 0x1ULL << (30+8), .grpid = 1, }, { .uname = "SNP_NONE", .udesc = "Snoop: counts number of times no snoop-related information is available", .ucode = 1ULL << (31+8), .grpid = 2, }, { .uname = "SNP_NOT_NEEDED", .udesc = "Snoop: counts the number of times no snoop was needed to satisfy the request", .ucode = 1ULL << (32+8), .grpid = 2, }, { .uname = "SNP_MISS", .udesc = "Snoop: counts number of times a snoop was needed and it missed all snooped caches", .ucode = 1ULL << (33+8), .grpid = 2, }, { .uname = "SNP_NO_FWD", .udesc = "Snoop: counts number of times a snoop was needed and it hit in at leas one snooped cache", .ucode = 1ULL << (34+8), .grpid = 2, }, { .uname = "SNP_FWD", .udesc = "Snoop: counts number of times a snoop was needed and data was forwarded from a remote socket", .ucode = 1ULL << (35+8), .grpid = 2, }, { .uname = "HITM", .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", .ucode = 1ULL << (36+8), .uequiv = "SNP_HITM", .grpid = 2, }, { .uname = "SNP_HITM", .udesc = "Snoop: counts number of times a snoop was needed and it hitM-ed in local or remote cache", .ucode = 1ULL << (36+8), .grpid = 2, }, { .uname = "NON_DRAM", .udesc = "Snoop: counts number of times target was a non-DRAM system address. This includes MMIO transactions", .ucode = 1ULL << (37+8), .grpid = 2, }, { .uname = "SNP_ANY", .udesc = "Snoop: any snoop reason", .ucode = 0x7fULL << (31+8), .uequiv = "SNP_NONE:SNP_NOT_NEEDED:SNP_MISS:SNP_NO_FWD:SNP_FWD:HITM:NON_DRAM", .uflags = INTEL_X86_DFL, .grpid = 2, }, }; static const intel_x86_umask_t bdw_hle_retired[]={ { .uname = "START", .udesc = "Number of times an HLE execution started", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COMMIT", .udesc = "Number of times an HLE execution successfully committed", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED", .udesc = "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one) (Precise Event)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ABORTED_MISC1", .udesc = "Number of times an HLE execution aborted due to various memory events", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC2", .udesc = "Number of times an HLE execution aborted due to uncommon conditions", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC3", .udesc = "Number of times an HLE execution aborted due to HLE-unfriendly instructions", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC4", .udesc = "Number of times an HLE execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC5", .udesc = "Number of times an HLE execution aborted due to none of the other 4 reasons (e.g., interrupt)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_rtm_retired[]={ { .uname = "START", .udesc = "Number of times an RTM execution started", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "COMMIT", .udesc = "Number of times an RTM execution successfully committed", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED", .udesc = "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one) (Precise Event)", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "ABORTED_MISC1", .udesc = "Number of times an RTM execution aborted due to various memory events", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC2", .udesc = "Number of times an RTM execution aborted due to uncommon conditions", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC3", .udesc = "Number of times an RTM execution aborted due to RTM-unfriendly instructions", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC4", .udesc = "Number of times an RTM execution aborted due to incompatible memory type", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORTED_MISC5", .udesc = "Number of times an RTM execution aborted due to none of the other 4 reasons (e.g., interrupt)", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_tx_mem[]={ { .uname = "ABORT_CONFLICT", .udesc = "Number of times a transactional abort was signaled due to data conflict on a transactionally accessed address", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_CAPACITY", .udesc = "Number of times a transactional abort was signaled due to data capacity limitation", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_STORE_TO_ELIDED_LOCK", .udesc = "Number of times a HLE transactional execution aborted due to a non xrelease prefixed instruction writing to an elided lock in the elision buffer", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", .udesc = "Number of times a HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_MISMATCH", .udesc = "Number of times a HLE transaction execution aborted due to xrelease lock not satisfying the address and value requirements in the elision buffer", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", .udesc = "Number of times a HLE transaction execution aborted due to an unsupported read alignment from the elision buffer", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ABORT_HLE_ELISION_BUFFER_FULL", .udesc = "Number of times a HLE clock could not be elided due to ElisionBufferAvailable being zero", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_tx_exec[]={ { .uname = "MISC1", .udesc = "Number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC2", .udesc = "Number of times a class of instructions that may cause a transactional abort was executed inside a transactional region", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC3", .udesc = "Number of times an instruction execution caused the supported nest count to be exceeded", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC4", .udesc = "Number of times an instruction a xbegin instruction was executed inside HLE transactional region", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISC5", .udesc = "Number of times an instruction with HLE xacquire prefix was executed inside a RTM transactional region", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_offcore_requests_outstanding[]={ { .uname = "ALL_DATA_RD_CYCLES", .udesc = "Cycles with cacheable data read transactions in the superQ (use with HT off only)", .uequiv = "ALL_DATA_RD:c=1", .ucode = 0x800 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD_CYCLES", .udesc = "Cycles with demand code reads transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_CODE_RD:c=1", .ucode = 0x200 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_CYCLES", .udesc = "Cycles with demand data read transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_DATA_RD:c=1", .ucode = 0x100 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, { .uname = "ALL_DATA_RD", .udesc = "Cacheable data read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x800, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_CODE_RD", .udesc = "Code read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD", .udesc = "Demand data read transactions in the superQ every cycle (use with HT off only)", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_DATA_RD_GE_6", .udesc = "Cycles with at lesat 6 offcore outstanding demand data read requests in the uncore queue", .uequiv = "DEMAND_DATA_RD:c=6", .ucode = 0x100 | (6 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "DEMAND_RFO", .udesc = "Outstanding RFO (store) transactions in the superQ every cycle (use with HT off only)", .ucode = 0x400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DEMAND_RFO_CYCLES", .udesc = "Cycles with outstanding RFO (store) transactions in the superQ (use with HT off only)", .uequiv = "DEMAND_RFO:c=1", .ucode = 0x400 | (0x1 << INTEL_X86_CMASK_BIT), .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_ild_stall[]={ { .uname = "LCP", .udesc = "Stall caused by changing prefix length of the instruction", .ucode = 0x100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_page_walker_loads[]={ { .uname = "DTLB_L1", .udesc = "Number of DTLB page walker loads that hit in the L1D and line fill buffer", .ucode = 0x1100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ITLB_L1", .udesc = "Number of ITLB page walker loads that hit in the L1I and line fill buffer", .ucode = 0x2100, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DTLB_L2", .udesc = "Number of DTLB page walker loads that hit in the L2", .ucode = 0x1200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ITLB_L2", .udesc = "Number of ITLB page walker loads that hit in the L2", .ucode = 0x2200, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DTLB_L3", .udesc = "Number of DTLB page walker loads that hit in the L3", .ucode = 0x1400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "ITLB_L3", .udesc = "Number of ITLB page walker loads that hit in the L3", .ucode = 0x2400, .uflags= INTEL_X86_NCOMBO, }, { .uname = "DTLB_MEMORY", .udesc = "Number of DTLB page walker loads that hit memory", .ucode = 0x1800, .uflags= INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t bdw_lsd[]={ { .uname = "UOPS", .udesc = "Number of uops delivered by the Loop Stream Detector (LSD)", .ucode = 0x100, .uflags= INTEL_X86_DFL, }, { .uname = "ACTIVE", .udesc = "Cycles with uops delivered by the LSD but which did not come from decoder", .ucode = 0x100 | (1 << INTEL_X86_CMASK_BIT), /* cnt=1 */ .uequiv = "UOPS:c=1", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, { .uname = "CYCLES_4_UOPS", .udesc = "Cycles with 4 uops delivered by the LSD but which did not come from decoder", .ucode = 0x100 | (4 << INTEL_X86_CMASK_BIT), /* cnt=4 */ .uequiv = "UOPS:c=4", .uflags = INTEL_X86_NCOMBO, .modhw = _INTEL_X86_ATTR_C, }, }; static const intel_x86_umask_t bdw_dsb2mite_switches[]={ { .uname = "PENALTY_CYCLES", .udesc = "Number of DSB to MITE switch true penalty cycles", .ucode = 0x0200, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_ept[]={ { .uname = "WALK_CYCLES", .udesc = "Cycles for an extended page table walk", .ucode = 0x1000, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_arith[]={ { .uname = "FPU_DIV_ACTIVE", .udesc = "Cycles when divider is busy execuing divide operations", .ucode = 0x0100, .uflags= INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_fp_arith[]={ { .uname = "SCALAR_DOUBLE", .udesc = "Number of scalar double precision floating-point arithmetic instructions (multiply by 1 to get flops)", .ucode = 0x0100, }, { .uname = "SCALAR_SINGLE", .udesc = "Number of scalar single precision floating-point arithmetic instructions (multiply by 1 to get flops)", .ucode = 0x0200, }, { .uname = "SCALAR", .udesc = "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element", .ucode = 0x0300, .uequiv = "SCALAR_DOUBLE:SCALAR_SINGLE", }, { .uname = "128B_PACKED_DOUBLE", .udesc = "Number of scalar 128-bit packed double precision floating-point arithmetic instructions (multiply by 2 to get flops)", .ucode = 0x0400, }, { .uname = "128B_PACKED_SINGLE", .udesc = "Number of scalar 128-bit packed single precision floating-point arithmetic instructions (multiply by 4 to get flops)", .ucode = 0x0800, }, { .uname = "256B_PACKED_DOUBLE", .udesc = "Number of scalar 256-bit packed double precision floating-point arithmetic instructions (multiply by 4 to get flops)", .ucode = 0x1000, }, { .uname = "256B_PACKED_SINGLE", .udesc = "Number of scalar 256-bit packed single precision floating-point arithmetic instructions (multiply by 8 to get flops)", .ucode = 0x2000, }, { .uname = "PACKED", .udesc = "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element", .ucode = 0x3c00, .uequiv = "128B_PACKED_DOUBLE:128B_PACKED_SINGLE:256B_PACKED_SINGLE:256B_PACKED_DOUBLE", }, { .uname = "SINGLE", .udesc = "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element", .ucode = 0x2a00, .uequiv = "256B_PACKED_SINGLE:128B_PACKED_SINGLE:SCALAR_SINGLE", }, { .uname = "DOUBLE", .udesc = "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element", .ucode = 0x1500, .uequiv = "SCALAR_DOUBLE:128B_PACKED_DOUBLE:256B_PACKED_DOUBLE", }, }; static const intel_x86_umask_t bdw_offcore_requests_buffer[]={ { .uname = "SQ_FULL", .udesc = "Number of cycles the offcore requests buffer is full", .ucode = 0x0100, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_umask_t bdw_uops_dispatches_cancelled[]={ { .uname = "SIMD_PRF", .udesc = "Number of uops cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports exceeds the read bandwidth of the register file. This umask applies to instructions: DPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*", .ucode = 0x0300, .uflags= INTEL_X86_NCOMBO | INTEL_X86_DFL, }, }; static const intel_x86_entry_t intel_bdw_pe[]={ { .name = "UNHALTED_CORE_CYCLES", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0x20000000full, .code = 0x3c, }, { .name = "UNHALTED_REFERENCE_CYCLES", .desc = "Unhalted reference cycles", .modmsk = INTEL_FIXED3_ATTRS, .cntmsk = 0x400000000ull, .code = 0x0300, /* pseudo encoding */ .flags = INTEL_X86_FIXED, }, { .name = "INSTRUCTION_RETIRED", .desc = "Number of instructions at retirement", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "INSTRUCTIONS_RETIRED", .desc = "This is an alias for INSTRUCTION_RETIRED", .modmsk = INTEL_V4_ATTRS, .equiv = "INSTRUCTION_RETIRED", .cntmsk = 0x10000000full, .code = 0xc0, }, { .name = "BRANCH_INSTRUCTIONS_RETIRED", .desc = "Count branch instructions at retirement. Specifically, this event counts the retirement of the last micro-op of a branch instruction", .modmsk = INTEL_V4_ATTRS, .equiv = "BR_INST_RETIRED:ALL_BRANCHES", .cntmsk = 0xff, .code = 0xc4, }, { .name = "MISPREDICTED_BRANCH_RETIRED", .desc = "Count mispredicted branch instructions at retirement. Specifically, this event counts at retirement of the last micro-op of a branch instruction in the architectural path of the execution and experienced misprediction in the branch prediction hardware", .modmsk = INTEL_V4_ATTRS, .equiv = "BR_MISP_RETIRED:ALL_BRANCHES", .cntmsk = 0xff, .code = 0xc5, }, { .name = "BACLEARS", .desc = "Branch re-steered", .code = 0xe6, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_baclears), .umasks = bdw_baclears }, { .name = "BR_INST_EXEC", .desc = "Branch instructions executed", .code = 0x88, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_br_inst_exec), .umasks = bdw_br_inst_exec }, { .name = "BR_INST_RETIRED", .desc = "Branch instructions retired (Precise Event)", .code = 0xc4, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_br_inst_retired), .umasks = bdw_br_inst_retired }, { .name = "BR_MISP_EXEC", .desc = "Mispredicted branches executed", .code = 0x89, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_br_misp_exec), .umasks = bdw_br_misp_exec }, { .name = "BR_MISP_RETIRED", .desc = "Mispredicted retired branches (Precise Event)", .code = 0xc5, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_br_misp_retired), .umasks = bdw_br_misp_retired }, { .name = "CPL_CYCLES", .desc = "Unhalted core cycles at a specific ring level", .code = 0x5c, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_cpl_cycles), .umasks = bdw_cpl_cycles }, { .name = "CPU_CLK_THREAD_UNHALTED", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .code = 0x3c, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_cpu_clk_thread_unhalted), .umasks = bdw_cpu_clk_thread_unhalted }, { .name = "CPU_CLK_UNHALTED", .desc = "Count core clock cycles whenever the clock signal on the specific core is running (not halted)", .code = 0x3c, .cntmsk = 0xff, .modmsk = INTEL_V4_ATTRS, .equiv = "CPU_CLK_THREAD_UNHALTED", }, { .name = "CYCLE_ACTIVITY", .desc = "Stalled cycles", .code = 0xa3, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_cycle_activity), .umasks = bdw_cycle_activity }, { .name = "DTLB_LOAD_MISSES", .desc = "Data TLB load misses", .code = 0x8, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_dtlb_load_misses), .umasks = bdw_dtlb_load_misses }, { .name = "DTLB_STORE_MISSES", .desc = "Data TLB store misses", .code = 0x49, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_dtlb_load_misses), .umasks = bdw_dtlb_load_misses /* shared */ }, { .name = "FP_ASSIST", .desc = "X87 floating-point assists", .code = 0xca, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_fp_assist), .umasks = bdw_fp_assist }, { .name = "HLE_RETIRED", .desc = "HLE execution (Precise Event)", .code = 0xc8, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_hle_retired), .umasks = bdw_hle_retired }, { .name = "ICACHE", .desc = "Instruction Cache", .code = 0x80, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_icache), .umasks = bdw_icache }, { .name = "IDQ", .desc = "IDQ operations", .code = 0x79, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_idq), .umasks = bdw_idq }, { .name = "IDQ_UOPS_NOT_DELIVERED", .desc = "Uops not delivered", .code = 0x9c, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_idq_uops_not_delivered), .umasks = bdw_idq_uops_not_delivered }, { .name = "INST_RETIRED", .desc = "Number of instructions retired (Precise Event)", .code = 0xc0, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_inst_retired), .umasks = bdw_inst_retired }, { .name = "INT_MISC", .desc = "Miscellaneous interruptions", .code = 0xd, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_int_misc), .umasks = bdw_int_misc }, { .name = "ITLB", .desc = "Instruction TLB", .code = 0xae, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_itlb), .umasks = bdw_itlb }, { .name = "ITLB_MISSES", .desc = "Instruction TLB misses", .code = 0x85, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_itlb_misses), .umasks = bdw_itlb_misses }, { .name = "L1D", .desc = "L1D cache", .code = 0x51, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l1d), .umasks = bdw_l1d }, { .name = "L1D_PEND_MISS", .desc = "L1D pending misses", .code = 0x48, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l1d_pend_miss), .umasks = bdw_l1d_pend_miss }, { .name = "L2_DEMAND_RQSTS", .desc = "Demand Data Read requests to L2", .code = 0x27, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l2_demand_rqsts), .umasks = bdw_l2_demand_rqsts }, { .name = "L2_LINES_IN", .desc = "L2 lines allocated", .code = 0xf1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l2_lines_in), .umasks = bdw_l2_lines_in }, { .name = "L2_LINES_OUT", .desc = "L2 lines evicted", .code = 0xf2, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l2_lines_out), .umasks = bdw_l2_lines_out }, { .name = "L2_RQSTS", .desc = "L2 requests", .code = 0x24, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l2_rqsts), .umasks = bdw_l2_rqsts }, { .name = "L2_TRANS", .desc = "L2 transactions", .code = 0xf0, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_l2_trans), .umasks = bdw_l2_trans }, { .name = "LD_BLOCKS", .desc = "Blocking loads", .code = 0x3, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_ld_blocks), .umasks = bdw_ld_blocks }, { .name = "LD_BLOCKS_PARTIAL", .desc = "Partial load blocks", .code = 0x7, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_ld_blocks_partial), .umasks = bdw_ld_blocks_partial }, { .name = "LOAD_HIT_PRE", .desc = "Load dispatches", .code = 0x4c, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_load_hit_pre), .umasks = bdw_load_hit_pre }, { .name = "LOCK_CYCLES", .desc = "Locked cycles in L1D and L2", .code = 0x63, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_lock_cycles), .umasks = bdw_lock_cycles }, { .name = "LONGEST_LAT_CACHE", .desc = "L3 cache", .code = 0x2e, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_longest_lat_cache), .umasks = bdw_longest_lat_cache }, { .name = "MACHINE_CLEARS", .desc = "Machine clear asserted", .code = 0xc3, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_machine_clears), .umasks = bdw_machine_clears }, { .name = "MEM_LOAD_UOPS_L3_HIT_RETIRED", .desc = "L3 hit load uops retired (Precise Event)", .code = 0xd2, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_load_uops_l3_hit_retired), .umasks = bdw_mem_load_uops_l3_hit_retired }, { .name = "MEM_LOAD_UOPS_LLC_HIT_RETIRED", .desc = "L3 hit load uops retired (Precise Event)", .equiv = "MEM_LOAD_UOPS_L3_HIT_RETIRED", .code = 0xd2, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_load_uops_l3_hit_retired), .umasks = bdw_mem_load_uops_l3_hit_retired }, { .name = "MEM_LOAD_UOPS_L3_MISS_RETIRED", .desc = "Load uops retired that missed the L3 (Precise Event)", .code = 0xd3, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_load_uops_l3_miss_retired), .umasks = bdw_mem_load_uops_l3_miss_retired }, { .name = "MEM_LOAD_UOPS_LLC_MISS_RETIRED", .desc = "Load uops retired that missed the L3 (Precise Event)", .equiv = "MEM_LOAD_UOPS_L3_MISS_RETIRED", .code = 0xd3, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_load_uops_l3_miss_retired), .umasks = bdw_mem_load_uops_l3_miss_retired }, { .name = "MEM_LOAD_UOPS_RETIRED", .desc = "Retired load uops (Precise Event)", .code = 0xd1, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_load_uops_retired), .umasks = bdw_mem_load_uops_retired }, { .name = "MEM_TRANS_RETIRED", .desc = "Memory transactions retired (Precise Event)", .code = 0xcd, .cntmsk = 0x8, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS | _INTEL_X86_ATTR_LDLAT, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_trans_retired), .umasks = bdw_mem_trans_retired }, { .name = "MEM_UOPS_RETIRED", .desc = "Memory uops retired (Precise Event)", .code = 0xd0, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_mem_uops_retired), .umasks = bdw_mem_uops_retired }, { .name = "MISALIGN_MEM_REF", .desc = "Misaligned memory references", .code = 0x5, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_misalign_mem_ref), .umasks = bdw_misalign_mem_ref }, { .name = "MOVE_ELIMINATION", .desc = "Move Elimination", .code = 0x58, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_move_elimination), .umasks = bdw_move_elimination }, { .name = "OFFCORE_REQUESTS", .desc = "Demand Data Read requests sent to uncore", .code = 0xb0, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_offcore_requests), .umasks = bdw_offcore_requests }, { .name = "OTHER_ASSISTS", .desc = "Software assist", .code = 0xc1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_other_assists), .umasks = bdw_other_assists }, { .name = "RESOURCE_STALLS", .desc = "Cycles Allocation is stalled due to Resource Related reason", .code = 0xa2, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_resource_stalls), .umasks = bdw_resource_stalls }, { .name = "ROB_MISC_EVENTS", .desc = "ROB miscellaneous events", .code = 0xcc, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_rob_misc_events), .umasks = bdw_rob_misc_events }, { .name = "RS_EVENTS", .desc = "Reservation Station", .code = 0x5e, .cntmsk = 0xf, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_rs_events), .umasks = bdw_rs_events }, { .name = "RTM_RETIRED", .desc = "Restricted Transaction Memory execution (Precise Event)", .code = 0xc9, .cntmsk = 0xf, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_rtm_retired), .umasks = bdw_rtm_retired }, { .name = "TLB_FLUSH", .desc = "TLB flushes", .code = 0xbd, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_tlb_flush), .umasks = bdw_tlb_flush }, { .name = "UOPS_EXECUTED", .desc = "Uops executed", .code = 0xb1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_uops_executed), .umasks = bdw_uops_executed }, { .name = "LSD", .desc = "Loop stream detector", .code = 0xa8, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_lsd), .umasks = bdw_lsd, }, { .name = "UOPS_EXECUTED_PORT", .desc = "Uops dispatch to specific ports", .code = 0xa1, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_uops_executed_port), .umasks = bdw_uops_executed_port }, { .name = "UOPS_ISSUED", .desc = "Uops issued", .code = 0xe, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_uops_issued), .umasks = bdw_uops_issued }, { .name = "ARITH", .desc = "Arithmetic uop", .code = 0x14, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_arith), .umasks = bdw_arith }, { .name = "UOPS_RETIRED", .desc = "Uops retired (Precise Event)", .code = 0xc2, .cntmsk = 0xff, .ngrp = 1, .flags = INTEL_X86_PEBS, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_uops_retired), .umasks = bdw_uops_retired }, { .name = "TX_MEM", .desc = "Transactional memory aborts", .code = 0x54, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_tx_mem), .umasks = bdw_tx_mem, }, { .name = "TX_EXEC", .desc = "Transactional execution", .code = 0x5d, .cntmsk = 0xff, .ngrp = 1, .modmsk = INTEL_V4_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(bdw_tx_exec), .umasks = bdw_tx_exec }, { .name = "OFFCORE_REQUESTS_OUTSTANDING", .desc = "Outstanding offcore requests", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x60, .numasks = LIBPFM_ARRAY_SIZE(bdw_offcore_requests_outstanding), .ngrp = 1, .umasks = bdw_offcore_requests_outstanding, }, { .name = "ILD_STALL", .desc = "Instruction Length Decoder stalls", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x87, .numasks = LIBPFM_ARRAY_SIZE(bdw_ild_stall), .ngrp = 1, .umasks = bdw_ild_stall, }, { .name = "PAGE_WALKER_LOADS", .desc = "Page walker loads", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xbc, .numasks = LIBPFM_ARRAY_SIZE(bdw_page_walker_loads), .ngrp = 1, .umasks = bdw_page_walker_loads, }, { .name = "DSB2MITE_SWITCHES", .desc = "Number of DSB to MITE switches", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0xab, .numasks = LIBPFM_ARRAY_SIZE(bdw_dsb2mite_switches), .ngrp = 1, .umasks = bdw_dsb2mite_switches, }, { .name = "EPT", .desc = "Extended page table", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0x4f, .numasks = LIBPFM_ARRAY_SIZE(bdw_ept), .ngrp = 1, .umasks = bdw_ept, }, { .name = "FP_ARITH", .desc = "Floating-point", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xc7, .numasks = LIBPFM_ARRAY_SIZE(bdw_fp_arith), .ngrp = 1, .umasks = bdw_fp_arith, }, { .name = "OFFCORE_REQUESTS_BUFFER", .desc = "Offcore reqest buffer", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xff, .code = 0xb2, .numasks = LIBPFM_ARRAY_SIZE(bdw_offcore_requests_buffer), .ngrp = 1, .umasks = bdw_offcore_requests_buffer, }, { .name = "UOPS_DISPATCHES_CANCELLED", .desc = "Micro-ops cancelled", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xa0, .numasks = LIBPFM_ARRAY_SIZE(bdw_uops_dispatches_cancelled), .ngrp = 1, .umasks = bdw_uops_dispatches_cancelled, }, { .name = "SQ_MISC", .desc = "SuperQueue miscellaneous", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0xf4, .numasks = LIBPFM_ARRAY_SIZE(bdw_sq_misc), .ngrp = 1, .umasks = bdw_sq_misc, }, { .name = "OFFCORE_RESPONSE_0", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x1b7, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(bdw_offcore_response), .ngrp = 3, .umasks = bdw_offcore_response, }, { .name = "OFFCORE_RESPONSE_1", .desc = "Offcore response event (must provide at least one request type and either any_response or any combination of supplier + snoop)", .modmsk = INTEL_V4_ATTRS, .cntmsk = 0xf, .code = 0x1bb, .flags= INTEL_X86_NHM_OFFCORE, .numasks = LIBPFM_ARRAY_SIZE(bdw_offcore_response), .ngrp = 3, .umasks = bdw_offcore_response, /* identical to actual umasks list for this event */ }, }; papi-5.6.0/src/ftests/Makefile.target.in000664 001750 001750 00000001020 13216244361 022155 0ustar00jshenry1963jshenry1963000000 000000 PACKAGE_TARNAME = @PACKAGE_TARNAME@ prefix = @prefix@ exec_prefix = @exec_prefix@ datarootdir = @datarootdir@ datadir = @datadir@/${PACKAGE_TARNAME} testlibdir = $(datadir)/testlib DATADIR = $(DESTDIR)$(datadir) INCLUDE = -I. -I@includedir@ -I$(testlibdir) LIBDIR = @libdir@ LIBRARY = @LIBRARY@ SHLIB=@SHLIB@ PAPILIB = ../@LINKLIB@ TESTLIB = $(testlibdir)/libtestlib.a LDFLAGS = @LDL@ CC = @CC@ F77 = @F77@ CC_R = @CC_R@ CFLAGS = @CFLAGS@ OMPCFLGS = @OMPCFLGS@ FFLAGS = @FFLAGS@ TOPTFLAGS= @TOPTFLAGS@ FTOPTFLAGS= @TOPTFLAGS@ papi-5.6.0/src/components/bgpm/NWunit/linux-NWunit.h000664 001750 001750 00000003220 13216244357 024414 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file linux-NWunit.h * @author Heike Jagode * jagode@eecs.utk.edu * Mods: < your name here > * < your email address > * BGPM / NWunit component * * Tested version of bgpm (early access) * * @brief * This file has the source code for a component that enables PAPI-C to * access hardware monitoring counters for BG/Q through the bgpm library. */ #ifndef _PAPI_NWUNIT_H #define _PAPI_NWUNIT_H #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #include "extras.h" #include "../../../linux-bgq-common.h" /************************* DEFINES SECTION *********************************** *******************************************************************************/ /* this number assumes that there will never be more events than indicated */ //#define NWUNIT_MAX_COUNTERS UPC_NW_ALL_LINKCTRS #define NWUNIT_MAX_COUNTERS UPC_NW_NUM_CTRS #define NWUNIT_MAX_EVENTS PEVT_NWUNIT_LAST_EVENT #define OFFSET ( PEVT_IOUNIT_LAST_EVENT + 1 ) /** Structure that stores private information of each event */ typedef struct NWUNIT_register { unsigned int selector; /* Signifies which counter slot is being used */ /* Indexed from 1 as 0 has a special meaning */ } NWUNIT_register_t; typedef struct NWUNIT_reg_alloc { NWUNIT_register_t ra_bits; } NWUNIT_reg_alloc_t; typedef struct NWUNIT_control_state { int EventGroup; long long counts[NWUNIT_MAX_COUNTERS]; } NWUNIT_control_state_t; typedef struct NWUNIT_context { NWUNIT_control_state_t state; } NWUNIT_context_t; #endif /* _PAPI_NWUNIT_H */ papi-5.6.0/src/components/host_micpower/configure000775 001750 001750 00000377233 13216244357 024324 0ustar00jshenry1963jshenry1963000000 000000 #! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.63 for host_micpower version-0.1. # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH if test "x$CONFIG_SHELL" = x; then if (eval ":") 2>/dev/null; then as_have_required=yes else as_have_required=no fi if test $as_have_required = yes && (eval ": (as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=\$LINENO as_lineno_2=\$LINENO test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } ") 2> /dev/null; then : else as_candidate_shells= as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. case $as_dir in /*) for as_base in sh bash ksh sh5; do as_candidate_shells="$as_candidate_shells $as_dir/$as_base" done;; esac done IFS=$as_save_IFS for as_shell in $as_candidate_shells $SHELL; do # Try only shells that exist, to save several forks. if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { ("$as_shell") 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : _ASEOF }; then CONFIG_SHELL=$as_shell as_have_required=yes if { "$as_shell" 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : (as_func_return () { (exit $1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = "$1" ); then : else exitcode=1 echo positional parameters were not saved. fi test $exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } _ASEOF }; then break fi fi done if test "x$CONFIG_SHELL" != x; then for as_var in BASH_ENV ENV do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done export CONFIG_SHELL exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} fi if test $as_have_required = no; then echo This script requires a shell more modern than all the echo shells that I found on your system. Please install a echo modern shell, or manually run the script under such a echo shell if you do have one. { (exit 1); exit 1; } fi fi fi (eval "as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0") || { echo No shell found that supports shell functions. echo Please tell bug-autoconf@gnu.org about your system, echo including any error possibly output before this message. echo This can help us improve future autoconf versions. echo Configuration will now proceed without shell functions. } as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='host_micpower' PACKAGE_TARNAME='host_micpower' PACKAGE_VERSION='version-0.1' PACKAGE_STRING='host_micpower version-0.1' PACKAGE_BUGREPORT='' # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='LTLIBOBJS LIBOBJS EGREP GREP CPP SYSMGMT_LIBS SYSMGMT_CFLAGS OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking with_sysmgmt_include_path with_sysmgmt_lib_path ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CPP' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) { $as_echo "$as_me: error: unrecognized option: $ac_option Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` { $as_echo "$as_me: error: missing argument to $ac_option" >&2 { (exit 1); exit 1; }; } fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2 { (exit 1); exit 1; }; } ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; } done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used." >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || { $as_echo "$as_me: error: working directory cannot be determined" >&2 { (exit 1); exit 1; }; } test "X$ac_ls_di" = "X$ac_pwd_ls_di" || { $as_echo "$as_me: error: pwd does not report name of working directory" >&2 { (exit 1); exit 1; }; } # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2 { (exit 1); exit 1; }; } pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures host_micpower version-0.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/host_micpower] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of host_micpower version-0.1:";; esac cat <<\_ACEOF Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-sysmgmt-include-path location of the MPSS sysmgmt api headers, defaults to /opt/intel/mic/sysmgmt/sdk/include --with-sysmgmt-lib-path location of the MPSS sysmgmt libraries, feed to the runtime linker; defaults to /opt/intel/mic/sysmgmt/sdk/lib/Linux Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF host_micpower configure version-0.1 generated by GNU Autoconf 2.63 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by host_micpower $as_me version-0.1, which was generated by GNU Autoconf 2.63. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; 2) ac_configure_args1="$ac_configure_args1 '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac done done $as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo cat <<\_ASBOX ## ---------------- ## ## Cache variables. ## ## ---------------- ## _ASBOX echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo cat <<\_ASBOX ## ----------------- ## ## Output variables. ## ## ----------------- ## _ASBOX echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX ## ------------------- ## ## File substitutions. ## ## ------------------- ## _ASBOX echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then cat <<\_ASBOX ## ----------- ## ## confdefs.h. ## ## ----------- ## _ASBOX echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then ac_site_file1=$CONFIG_SITE elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test -r "$ac_site_file"; then { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special # files actually), so we avoid doing that. if test -f "$cache_file"; then { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 $as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&5 $as_echo "$as_me: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } # Provide some information about the compiler. $as_echo "$as_me:$LINENO: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 { (ac_try="$ac_compiler --version >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler --version >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -v >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler -v >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -V >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler -V >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { (ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi { $as_echo "$as_me:$LINENO: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } if test -z "$ac_file"; then $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: C compiler cannot create executables See \`config.log' for more details." >&5 $as_echo "$as_me: error: C compiler cannot create executables See \`config.log' for more details." >&2;} { (exit 77); exit 77; }; }; } fi ac_exeext=$ac_cv_exeext # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:$LINENO: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi fi fi { $as_echo "$as_me:$LINENO: result: yes" >&5 $as_echo "yes" >&6; } rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } { $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } { $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi rm -f conftest$ac_cv_exeext { $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT { $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if test "${ac_cv_objext+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if test "${ac_cv_c_compiler_gnu+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_compiler_gnu=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if test "${ac_cv_prog_cc_g+set}" = set; then $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 CFLAGS="" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if test "${ac_cv_prog_cc_c89+set}" = set; then $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_c89=$ac_arg else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:$LINENO: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:$LINENO: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Check whether --with-sysmgmt-include-path was given. if test "${with_sysmgmt_include_path+set}" = set; then withval=$with_sysmgmt_include_path; SYSMGMT_CFLAGS="-I$withval" else SYSMGMT_CFLAGS="-I/opt/intel/mic/sysmgmt/sdk/include" fi # Check whether --with-sysmgmt-lib-path was given. if test "${with_sysmgmt_lib_path+set}" = set; then withval=$with_sysmgmt_lib_path; SYSMGMT_LIBS="-Wl,-rpath,$withval" else SYSMGMT_LIBS="-Wl,-rpath,/opt/intel/mic/sysmgmt/sdk/lib/Linux" fi #AC_ARG_WITH([scif-lib-path], # [AS_HELP_STRING([--with-scif-lib-path],[location of the SCIF library, needed by libMicAccessApi.so]), # [], # []) OLD_CPPFLAGS=$CPPFLAGS CPPFLAGS="-DMICACCESSAPI -DLINUX $SYSMGMT_CFLAGS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test "${ac_cv_prog_CPP+set}" = set; then $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:$LINENO: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&5 $as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if test "${ac_cv_path_GREP+set}" = set; then $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 $as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:$LINENO: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if test "${ac_cv_path_EGREP+set}" = set; then $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 $as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if test "${ac_cv_header_stdc+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_header_stdc=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -rf conftest.dSYM rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` { $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 $as_echo_n "checking for $ac_header... " >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval "$as_ac_Header=yes" else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_Header=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi ac_res=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } as_val=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` if test "x$as_val" = x""yes; then cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in MicAccessApi.h do as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then { $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 $as_echo_n "checking for $ac_header... " >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then $as_echo_n "(cached) " >&6 fi ac_res=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:$LINENO: checking $ac_header usability" >&5 $as_echo_n "checking $ac_header usability... " >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_header_compiler=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:$LINENO: checking $ac_header presence" >&5 $as_echo_n "checking $ac_header presence... " >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then ac_header_preproc=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext { $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { $as_echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { $as_echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 $as_echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { $as_echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 $as_echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ;; esac { $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 $as_echo_n "checking for $ac_header... " >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then $as_echo_n "(cached) " >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi ac_res=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi as_val=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` if test "x$as_val" = x""yes; then cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF else { { $as_echo "$as_me:$LINENO: error: Couldn't find MicAccessApi.h...try installing MPSS from \ http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss" >&5 $as_echo "$as_me: error: Couldn't find MicAccessApi.h...try installing MPSS from \ http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss" >&2;} { (exit 1); exit 1; }; } fi done CPPFLAGS=$OLD_CPPFLAGS ac_config_files="$ac_config_files Makefile.host_micpower" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then test "x$cache_file" != "x/dev/null" && { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} cat confcache >$cache_file else { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. ac_script=' :mline /\\$/{ N s,\\\n,, b mline } t clear :clear s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g t quote s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\[/\\&/g s/\]/\\&/g s/\$/$$/g H :any ${ g s/^\n// s/\n/ /g p } ' DEFS=`sed -n "$ac_script" confdefs.h` ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : ${CONFIG_STATUS=./config.status} ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 # Save the log message, to keep $[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by host_micpower $as_me version-0.1, which was generated by GNU Autoconf 2.63. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. Usage: $0 [OPTION]... [FILE]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ host_micpower config.status version-0.1 configured by $0, generated by GNU Autoconf 2.63, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" Copyright (C) 2008 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac CONFIG_FILES="$CONFIG_FILES '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) { $as_echo "$as_me: error: unrecognized option: $1 Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *) ac_config_targets="$ac_config_targets $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "Makefile.host_micpower") CONFIG_FILES="$CONFIG_FILES Makefile.host_micpower" ;; *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 $as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= trap 'exit_status=$? { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status ' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || { $as_echo "$as_me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=' ' ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\).*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\).*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \ || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5 $as_echo "$as_me: error: could not setup config files machinery" >&2;} { (exit 1); exit 1; }; } _ACEOF # VPATH may cause trouble with some makes, so we remove $(srcdir), # ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=/{ s/:*\$(srcdir):*/:/ s/:*\${srcdir}:*/:/ s/:*@srcdir@:*/:/ s/^\([^=]*=[ ]*\):*/\1/ s/:*$// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" eval set X " :F $CONFIG_FILES " shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5 $as_echo "$as_me: error: invalid tag $ac_tag" >&2;} { (exit 1); exit 1; }; };; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 $as_echo "$as_me: error: cannot find input file: $ac_f" >&2;} { (exit 1); exit 1; }; };; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac ac_file_inputs="$ac_file_inputs '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:$LINENO: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$tmp/stdin" \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` { as_dir="$ac_dir" case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 $as_echo "$as_me: error: cannot create directory $as_dir" >&2;} { (exit 1); exit 1; }; }; } ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p ' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&2;} rm -f "$tmp/stdin" case $ac_file in -) cat "$tmp/out" && rm -f "$tmp/out";; *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";; esac \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } ;; esac done # for ac_tag { (exit 0); exit 0; } _ACEOF chmod +x $CONFIG_STATUS ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5 $as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || { (exit 1); exit 1; } fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi papi-5.6.0/src/perfctr-2.6.x/linux/drivers/perfctr/ppc_compat.h000775 001750 001750 00000007705 13216244367 026342 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc_compat.h,v 1.1.2.8 2009/01/23 17:01:02 mikpe Exp $ * Performance-monitoring counters driver. * PPC32-specific compatibility definitions for 2.6 kernels. * * Copyright (C) 2004-2007, 2009 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) #include #else static inline int reserve_pmc_hardware(void (*new_perf_irq)(struct pt_regs*)) { return 0; } static inline void release_pmc_hardware(void) { } #endif extern int perfctr_reserve_pmc_hardware(void); static inline void perfctr_release_pmc_hardware(void) { release_pmc_hardware(); } #undef MMCR0_FC #undef MMCR0_FCECE #undef MMCR0_FCM0 #undef MMCR0_FCM1 #undef MMCR0_FCP #undef MMCR0_FCS #undef MMCR0_PMC1CE #undef MMCR0_PMC1SEL #undef MMCR0_PMC2SEL #undef MMCR0_PMXE #undef MMCR0_TBEE #undef MMCR0_TRIGGER #undef MMCR1_PMC3SEL #undef MMCR1_PMC4SEL #undef MMCR1_PMC5SEL #undef MMCR1_PMC6SEL #undef SPRN_MMCR0 #undef SPRN_MMCR1 #undef SPRN_MMCR2 #undef SPRN_PMC1 #undef SPRN_PMC2 #undef SPRN_PMC3 #undef SPRN_PMC4 #undef SPRN_PMC5 #undef SPRN_PMC6 #define SPRN_MMCR0 0x3B8 /* 604 and up */ #define SPRN_PMC1 0x3B9 /* 604 and up */ #define SPRN_PMC2 0x3BA /* 604 and up */ #define SPRN_SIA 0x3BB /* 604 and up */ #define SPRN_MMCR1 0x3BC /* 604e and up */ #define SPRN_PMC3 0x3BD /* 604e and up */ #define SPRN_PMC4 0x3BE /* 604e and up */ #define SPRN_MMCR2 0x3B0 /* 7400 and up */ #define SPRN_BAMR 0x3B7 /* 7400 and up */ #define SPRN_PMC5 0x3B1 /* 7450 and up */ #define SPRN_PMC6 0x3B2 /* 7450 and up */ /* MMCR0 layout (74xx terminology) */ #define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ #define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ #define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ #define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ #define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ #define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. * Cleared by hardware when a PM exception occurs. * 604: PMXE is not cleared by hardware. */ #define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. * FCECE is treated as 0 if TRIGGER is 1. * 74xx: FC is set when the event occurs. * 604/750: ineffective when PMXE=0. */ #define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. */ #define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ #define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ #define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ #define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. * 604/750: Overrides FCECE (DISCOUNT). */ #define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. * 74xx: cleared by hardware when the event occurs. */ #define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ #define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ /* MMCR1 layout (604e-7457) */ #define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ #define MMCR1_PMC4SEL 0x07C00000 /* PMC4 event selector, 5 bits. */ #define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ #define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ #define MMCR1__RESERVED 0x000007FF /* should be zero */ /* MMCR2 layout (7400-7457) */ #define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ #define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ #define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ #define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ #define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) papi-5.6.0/src/components/bgpm/NWunit/linux-NWunit.c000664 001750 001750 00000026273 13216244357 024424 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file linux-NWunit.c * @author Heike Jagode * jagode@eecs.utk.edu * Mods: < your name here > * < your email address > * BGPM / NWunit component * * Tested version of bgpm (early access) * * @brief * This file has the source code for a component that enables PAPI-C to * access hardware monitoring counters for BG/Q through the bgpm library. */ #include "linux-NWunit.h" /* Declare our vector in advance */ papi_vector_t _NWunit_vector; /***************************************************************************** ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* *****************************************************************************/ /* * This is called whenever a thread is initialized */ int NWUNIT_init_thread( hwd_context_t * ctx ) { #ifdef DEBUG_BGQ printf( "NWUNIT_init_thread\n" ); #endif ( void ) ctx; return PAPI_OK; } /* Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ int NWUNIT_init_component( int cidx ) { #ifdef DEBUG_BGQ printf( "NWUNIT_init_component\n" ); #endif _NWunit_vector.cmp_info.CmpIdx = cidx; #ifdef DEBUG_BGQ printf( "NWUNIT_init_component cidx = %d\n", cidx ); #endif return ( PAPI_OK ); } /* * Control of counters (Reading/Writing/Starting/Stopping/Setup) * functions */ int NWUNIT_init_control_state( hwd_control_state_t * ptr ) { #ifdef DEBUG_BGQ printf( "NWUNIT_init_control_state\n" ); #endif int retval; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; this_state->EventGroup = Bgpm_CreateEventSet(); retval = _check_BGPM_error( this_state->EventGroup, "Bgpm_CreateEventSet" ); if ( retval < 0 ) return retval; return PAPI_OK; } /* * */ int NWUNIT_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) { #ifdef DEBUG_BGQ printf( "NWUNIT_start\n" ); #endif ( void ) ctx; int retval; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; retval = Bgpm_Attach( this_state->EventGroup, UPC_NW_ALL_LINKS, 0); retval = _check_BGPM_error( retval, "Bgpm_Attach" ); if ( retval < 0 ) return retval; retval = Bgpm_ResetStart( this_state->EventGroup ); retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); if ( retval < 0 ) return retval; return ( PAPI_OK ); } /* * */ int NWUNIT_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) { #ifdef DEBUG_BGQ printf( "NWUNIT_stop\n" ); #endif ( void ) ctx; int retval; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; retval = Bgpm_Stop( this_state->EventGroup ); retval = _check_BGPM_error( retval, "Bgpm_Stop" ); if ( retval < 0 ) return retval; return ( PAPI_OK ); } /* * */ int NWUNIT_read( hwd_context_t * ctx, hwd_control_state_t * ptr, long_long ** events, int flags ) { #ifdef DEBUG_BGQ printf( "NWUNIT_read\n" ); #endif ( void ) ctx; ( void ) flags; int i, numEvts; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; numEvts = Bgpm_NumEvents( this_state->EventGroup ); if ( numEvts == 0 ) { #ifdef DEBUG_BGPM printf ("Error: ret value is %d for BGPM API function Bgpm_NumEvents.\n", numEvts ); #endif //return ( EXIT_FAILURE ); } for ( i = 0; i < numEvts; i++ ) this_state->counts[i] = _common_getEventValue( i, this_state->EventGroup ); *events = this_state->counts; return ( PAPI_OK ); } /* * */ int NWUNIT_shutdown_thread( hwd_context_t * ctx ) { #ifdef DEBUG_BGQ printf( "NWUNIT_shutdown_thread\n" ); #endif ( void ) ctx; return ( PAPI_OK ); } /* This function sets various options in the component * The valid codes being passed in are PAPI_SET_DEFDOM, * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT */ int NWUNIT_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) { #ifdef DEBUG_BGQ printf( "NWUNIT_ctl\n" ); #endif ( void ) ctx; ( void ) code; ( void ) option; return ( PAPI_OK ); } //int NWUNIT_ntv_code_to_bits ( unsigned int EventCode, hwd_register_t * bits ); /* * */ int NWUNIT_update_control_state( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) { #ifdef DEBUG_BGQ printf( "NWUNIT_update_control_state: count = %d\n", count ); #endif ( void ) ctx; int retval, index, i; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; // Delete and re-create BGPM eventset retval = _common_deleteRecreate( &this_state->EventGroup ); if ( retval < 0 ) return retval; // otherwise, add the events to the eventset for ( i = 0; i < count; i++ ) { index = ( native[i].ni_event ) + OFFSET; native[i].ni_position = i; #ifdef DEBUG_BGQ printf("NWUNIT_update_control_state: ADD event: i = %d, index = %d\n", i, index ); #endif /* Add events to the BGPM eventGroup */ retval = Bgpm_AddEvent( this_state->EventGroup, index ); retval = _check_BGPM_error( retval, "Bgpm_AddEvent" ); if ( retval < 0 ) return retval; } return ( PAPI_OK ); } /* * This function has to set the bits needed to count different domains * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER * By default return PAPI_EINVAL if none of those are specified * and PAPI_OK with success * PAPI_DOM_USER is only user context is counted * PAPI_DOM_KERNEL is only the Kernel/OS context is counted * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) * PAPI_DOM_ALL is all of the domains */ int NWUNIT_set_domain( hwd_control_state_t * cntrl, int domain ) { #ifdef DEBUG_BGQ printf( "NWUNIT_set_domain\n" ); #endif int found = 0; ( void ) cntrl; if ( PAPI_DOM_USER & domain ) found = 1; if ( PAPI_DOM_KERNEL & domain ) found = 1; if ( PAPI_DOM_OTHER & domain ) found = 1; if ( !found ) return ( PAPI_EINVAL ); return ( PAPI_OK ); } /* * */ int NWUNIT_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) { #ifdef DEBUG_BGQ printf( "NWUNIT_reset\n" ); #endif ( void ) ctx; int retval; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; /* we can't simply call Bgpm_Reset() since PAPI doesn't have the restriction that an EventSet has to be stopped before resetting is possible. However, BGPM does have this restriction. Hence we need to stop, reset and start */ retval = Bgpm_Stop( this_state->EventGroup ); retval = _check_BGPM_error( retval, "Bgpm_Stop" ); if ( retval < 0 ) return retval; retval = Bgpm_ResetStart( this_state->EventGroup ); retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); if ( retval < 0 ) return retval; return ( PAPI_OK ); } /* * PAPI Cleanup Eventset * * Destroy and re-create the BGPM / NWunit EventSet */ int NWUNIT_cleanup_eventset( hwd_control_state_t * ctrl ) { #ifdef DEBUG_BGQ printf( "NWUNIT_cleanup_eventset\n" ); #endif int retval; NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ctrl; // create a new empty bgpm eventset // reason: bgpm doesn't permit to remove events from an eventset; // hence we delete the old eventset and create a new one retval = _common_deleteRecreate( &this_state->EventGroup ); // HJ try to use delete() only if ( retval < 0 ) return retval; return ( PAPI_OK ); } /* * Native Event functions */ int NWUNIT_ntv_enum_events( unsigned int *EventCode, int modifier ) { //printf( "NWUNIT_ntv_enum_events\n" ); switch ( modifier ) { case PAPI_ENUM_FIRST: *EventCode = 0; return ( PAPI_OK ); break; case PAPI_ENUM_EVENTS: { int index = ( *EventCode ) + OFFSET; if ( index < NWUNIT_MAX_EVENTS ) { *EventCode = *EventCode + 1; return ( PAPI_OK ); } else return ( PAPI_ENOEVNT ); break; } default: return ( PAPI_EINVAL ); } return ( PAPI_EINVAL ); } /* * */ int NWUNIT_ntv_name_to_code( const char *name, unsigned int *event_code ) { #ifdef DEBUG_BGQ printf( "NWUNIT_ntv_name_to_code\n" ); #endif int ret; /* Return event id matching a given event label string */ ret = Bgpm_GetEventIdFromLabel ( name ); if ( ret <= 0 ) { #ifdef DEBUG_BGPM printf ("Error: ret value is %d for BGPM API function '%s'.\n", ret, "Bgpm_GetEventIdFromLabel" ); #endif return PAPI_ENOEVNT; } else if ( ret < OFFSET || ret > NWUNIT_MAX_EVENTS ) // not a NWUnit event return PAPI_ENOEVNT; else *event_code = ( ret - OFFSET ) ; return PAPI_OK; } /* * */ int NWUNIT_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { #ifdef DEBUG_BGQ //printf( "NWUNIT_ntv_code_to_name\n" ); #endif int index; index = ( EventCode ) + OFFSET; if ( index >= MAX_COUNTERS ) return PAPI_ENOEVNT; strncpy( name, Bgpm_GetEventIdLabel( index ), len ); if ( name == NULL ) { #ifdef DEBUG_BGPM printf ("Error: ret value is NULL for BGPM API function Bgpm_GetEventIdLabel.\n" ); #endif return PAPI_ENOEVNT; } return ( PAPI_OK ); } /* * */ int NWUNIT_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { #ifdef DEBUG_BGQ //printf( "NWUNIT_ntv_code_to_descr\n" ); #endif int retval, index; index = ( EventCode ) + OFFSET; retval = Bgpm_GetLongDesc( index, name, &len ); retval = _check_BGPM_error( retval, "Bgpm_GetLongDesc" ); if ( retval < 0 ) return retval; return ( PAPI_OK ); } /* * */ int NWUNIT_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) { #ifdef DEBUG_BGQ printf( "NWUNIT_ntv_code_to_bits\n" ); #endif ( void ) EventCode; ( void ) bits; return ( PAPI_OK ); } /* * */ papi_vector_t _NWunit_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "bgpm/NWUnit", .short_name = "NWUnit", .description = "Blue Gene/Q NWUnit component", .num_cntrs = NWUNIT_MAX_COUNTERS, .num_native_events = NWUNIT_MAX_EVENTS-OFFSET+1, .num_mpx_cntrs = NWUNIT_MAX_COUNTERS, .default_domain = PAPI_DOM_USER, .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .hardware_intr_sig = PAPI_INT_SIGNAL, .hardware_intr = 1, .kernel_multiplex = 0, /* component specific cmp_info initializations */ .fast_real_timer = 0, .fast_virtual_timer = 0, .attach = 0, .attach_must_ptrace = 0, } , /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( NWUNIT_context_t ), .control_state = sizeof ( NWUNIT_control_state_t ), .reg_value = sizeof ( NWUNIT_register_t ), .reg_alloc = sizeof ( NWUNIT_reg_alloc_t ), } , /* function pointers in this component */ .init_thread = NWUNIT_init_thread, .init_component = NWUNIT_init_component, .init_control_state = NWUNIT_init_control_state, .start = NWUNIT_start, .stop = NWUNIT_stop, .read = NWUNIT_read, .shutdown_thread = NWUNIT_shutdown_thread, .cleanup_eventset = NWUNIT_cleanup_eventset, .ctl = NWUNIT_ctl, .update_control_state = NWUNIT_update_control_state, .set_domain = NWUNIT_set_domain, .reset = NWUNIT_reset, .ntv_name_to_code = NWUNIT_ntv_name_to_code, .ntv_enum_events = NWUNIT_ntv_enum_events, .ntv_code_to_name = NWUNIT_ntv_code_to_name, .ntv_code_to_descr = NWUNIT_ntv_code_to_descr, .ntv_code_to_bits = NWUNIT_ntv_code_to_bits }; papi-5.6.0/src/perfctr-2.7.x/etc/000775 001750 001750 00000000000 13216244367 020322 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/components/appio/000775 001750 001750 00000000000 13216244356 020623 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/zero.c000664 001750 001750 00000012423 13216244361 017754 0ustar00jshenry1963jshenry1963000000 000000 /* zero.c */ /* This is possibly the most important PAPI tests, and is the one */ /* that is often used as a quick test that PAPI is working. */ /* We should make sure that it always passes, if possible. */ /* Traditionally it used FLOPS, due to the importance of this to HPC. */ /* This has been changed to use Instructions/Cycles as some recent */ /* major Intel chips do not have good floating point events and would fail. */ #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #define NUM_EVENTS 2 #define NUM_LOOPS 200 int main( int argc, char **argv ) { int retval, tmp, result, i; int EventSet1 = PAPI_NULL; long long values[NUM_EVENTS]; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; double ipc; int quiet=0; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Initialize the EventSet */ retval=PAPI_create_eventset(&EventSet1); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Add PAPI_TOT_CYC */ retval=PAPI_add_named_event(EventSet1,"PAPI_TOT_CYC"); if (retval!=PAPI_OK) { if (!quiet) printf("Trouble adding PAPI_TOT_CYC\n"); test_skip( __FILE__, __LINE__, "adding PAPI_TOT_CYC", retval ); } /* Add PAPI_TOT_INS */ retval=PAPI_add_named_event(EventSet1,"PAPI_TOT_INS"); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "adding PAPI_TOT_INS", retval ); } /* warm up the processor to pull it out of idle state */ for(i=0;i<100;i++) { result=instructions_million(); } if (result==CODE_UNIMPLEMENTED) { if (!quiet) printf("Instructions testcode not available\n"); test_skip( __FILE__, __LINE__, "No instructions code", retval ); } /* Gather before stats */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Start PAPI */ retval = PAPI_start( EventSet1 ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our work code */ for(i=0;i (1000000*NUM_LOOPS)) { printf("%s Error of %.2f%%\n", "PAPI_TOT_INS", (100.0 * (double)(values[1] - (1000000*NUM_LOOPS)))/(1000000*NUM_LOOPS)); test_fail( __FILE__, __LINE__, "Instruction validation", 0 ); } /* Check that TOT_CYC is non-zero */ if(values[0]==0) { printf("Cycles is zero\n"); test_fail( __FILE__, __LINE__, "Cycles validation", 0 ); } /* Unless you have an amazing processor, IPC should be < 100 */ if ((ipc <=0.01 ) || (ipc >=100.0)) { printf("Unlikely IPC of %.2f%%\n", ipc); test_fail( __FILE__, __LINE__, "IPC validation", 0 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/freebsd/map-i7.h000664 001750 001750 00000036720 13216244361 020207 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-i7.h * CVS: $Id: map-i7.h,v 1.1.2.2 2010/03/06 16:12:08 servat Exp $ * Author: George Neville-Neil * gnn@freebsd.org */ #ifndef FreeBSD_MAP_I7 #define FreeBSD_MAP_I7 enum NativeEvent_Value_i7Processor { PNE_I7_SB_FORWARD_ANY= PAPI_NATIVE_MASK , PNE_I7_LOAD_BLOCK_STD, PNE_I7_LOAD_BLOCK_ADDRESS_OFFSET, PNE_I7_SB_DRAIN_CYCLES, PNE_I7_MISALIGN_MEM_REF_LOAD, PNE_I7_MISALIGN_MEM_REF_STORE, PNE_I7_MISALIGN_MEM_REF_ANY, PNE_I7_STORE_BLOCKS_NOT_STA, PNE_I7_STORE_BLOCKS_STA, PNE_I7_STORE_BLOCKS_AT_RET, PNE_I7_STORE_BLOCKS_L1D_BLOCK, PNE_I7_STORE_BLOCKS_ANY, PNE_I7_PARTIAL_ADDRESS_ALIAS, PNE_I7_DTLB_LOAD_MISSES_ANY, PNE_I7_DTLB_LOAD_MISSES_WALK_COMPLETED, PNE_I7_DTLB_LOAD_MISSES_STLB_HIT, PNE_I7_DTLB_LOAD_MISSES_PDE_MISS, PNE_I7_DTLB_LOAD_MISSES_PDP_MISS, PNE_I7_DTLB_LOAD_MISSES_LARGE_WALK_COMPLETED, PNE_I7_MEMORY_DISAMBIGURATION_RESET, PNE_I7_MEMORY_DISAMBIGURATION_SUCCESS, PNE_I7_MEMORY_DISAMBIGURATION_WATCHDOG, PNE_I7_MEMORY_DISAMBIGURATION_WATCH_CYCLES, PNE_I7_MEM_INST_RETIRED_LOADS, PNE_I7_MEM_INST_RETIRED_STORES, PNE_I7_MEM_STORE_RETIRED_DTLB_MISS, PNE_I7_UOPS_ISSUED_ANY, PNE_I7_UOPS_ISSUED_FUSED, PNE_I7_MEM_UNCORE_RETIRED_OTHER_CORE_L2_HITM, PNE_I7_MEM_UNCORE_RETIRED_REMOTE_CACHE_LOCAL_HOME_HIT, PNE_I7_MEM_UNCORE_RETIRED_REMOTE_DRAM, PNE_I7_MEM_UNCORE_RETIRED_LOCAL_DRAM, PNE_I7_FP_COMP_OPS_EXE_X87, PNE_I7_FP_COMP_OPS_EXE_MMX, PNE_I7_FP_COMP_OPS_EXE_SSE_FP, PNE_I7_FP_COMP_OPS_EXE_SSE2_INTEGER, PNE_I7_FP_COMP_OPS_EXE_SSE_FP_PACKED, PNE_I7_FP_COMP_OPS_EXE_SSE_FP_SCALAR, PNE_I7_FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION, PNE_I7_FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION, PNE_I7_SIMD_INT_128_PACKED_MPY, PNE_I7_SIMD_INT_128_PACKED_SHIFT, PNE_I7_SIMD_INT_128_PACK, PNE_I7_SIMD_INT_128_UNPACK, PNE_I7_SIMD_INT_128_PACKED_LOGICAL, PNE_I7_SIMD_INT_128_PACKED_ARITH, PNE_I7_SIMD_INT_128_SHUFFLE_MOVE, PNE_I7_LOAD_DISPATCH_RS, PNE_I7_LOAD_DISPATCH_RS_DELAYED, PNE_I7_LOAD_DISPATCH_MOB, PNE_I7_LOAD_DISPATCH_ANY, PNE_I7_ARITH_CYCLES_DIV_BUSY, PNE_I7_ARITH_MUL, PNE_I7_INST_QUEUE_WRITES, PNE_I7_INST_DECODED_DEC0, PNE_I7_TWO_UOP_INSTS_DECODED, PNE_I7_HW_INT_RCV, PNE_I7_HW_INT_CYCLES_MASKED, PNE_I7_HW_INT_CYCLES_PENDING_AND_MASKED, PNE_I7_INST_QUEUE_WRITE_CYCLES, PNE_I7_L2_RQSTS_LD_HIT, PNE_I7_L2_RQSTS_LD_MISS, PNE_I7_L2_RQSTS_LOADS, PNE_I7_L2_RQSTS_RFO_HIT, PNE_I7_L2_RQSTS_RFO_MISS, PNE_I7_L2_RQSTS_RFOS, PNE_I7_L2_RQSTS_IFETCH_HIT, PNE_I7_L2_RQSTS_IFETCH_MISS, PNE_I7_L2_RQSTS_IFETCHES, PNE_I7_L2_RQSTS_PREFETCH_HIT, PNE_I7_L2_RQSTS_PREFETCH_MISS, PNE_I7_L2_RQSTS_PREFETCHES, PNE_I7_L2_RQSTS_MISS, PNE_I7_L2_RQSTS_REFERENCES, PNE_I7_L2_DATA_RQSTS_DEMAND_I_STATE, PNE_I7_L2_DATA_RQSTS_DEMAND_S_STATE, PNE_I7_L2_DATA_RQSTS_DEMAND_E_STATE, PNE_I7_L2_DATA_RQSTS_DEMAND_M_STATE, PNE_I7_L2_DATA_RQSTS_DEMAND_MESI, PNE_I7_L2_DATA_RQSTS_PREFETCH_I_STATE, PNE_I7_L2_DATA_RQSTS_PREFETCH_S_STATE, PNE_I7_L2_DATA_RQSTS_PREFETCH_E_STATE, PNE_I7_L2_DATA_RQSTS_PREFETCH_M_STATE, PNE_I7_L2_DATA_RQSTS_PREFETCH_MESI, PNE_I7_L2_DATA_RQSTS_ANY, PNE_I7_L2_WRITE_RFO_I_STATE, PNE_I7_L2_WRITE_RFO_S_STATE, PNE_I7_L2_WRITE_RFO_E_STATE, PNE_I7_L2_WRITE_RFO_M_STATE, PNE_I7_L2_WRITE_RFO_HIT, PNE_I7_L2_WRITE_RFO_MESI, PNE_I7_L2_WRITE_LOCK_I_STATE, PNE_I7_L2_WRITE_LOCK_S_STATE, PNE_I7_L2_WRITE_LOCK_E_STATE, PNE_I7_L2_WRITE_LOCK_M_STATE, PNE_I7_L2_WRITE_LOCK_HIT, PNE_I7_L2_WRITE_LOCK_MESI, PNE_I7_L1D_WB_L2_I_STATE, PNE_I7_L1D_WB_L2_S_STATE, PNE_I7_L1D_WB_L2_E_STATE, PNE_I7_L1D_WB_L2_M_STATE, PNE_I7_L1D_WB_L2_MESI, PNE_I7_L3_LAT_CACHE_REFERENCE, PNE_I7_L3_LAT_CACHE_MISS, PNE_I7_CPU_CLK_UNHALTED_THREAD_P, PNE_I7_CPU_CLK_UNHALTED_REF_P, PNE_I7_UOPS_DECODED_DEC0, PNE_I7_L1D_CACHE_LD_I_STATE, PNE_I7_L1D_CACHE_LD_S_STATE, PNE_I7_L1D_CACHE_LD_E_STATE, PNE_I7_L1D_CACHE_LD_M_STATE, PNE_I7_L1D_CACHE_LD_MESI, PNE_I7_L1D_CACHE_ST_I_STATE, PNE_I7_L1D_CACHE_ST_S_STATE, PNE_I7_L1D_CACHE_ST_E_STATE, PNE_I7_L1D_CACHE_ST_M_STATE, PNE_I7_L1D_CACHE_ST_MESI, PNE_I7_L1D_CACHE_LOCK_HIT, PNE_I7_L1D_CACHE_LOCK_S_STATE, PNE_I7_L1D_CACHE_LOCK_E_STATE, PNE_I7_L1D_CACHE_LOCK_M_STATE, PNE_I7_L1D_ALL_REF_ANY, PNE_I7_L1D_ALL_REF_CACHEABLE, PNE_I7_L1D_PEND_MISS_LOAD_BUFFERS_FULL, PNE_I7_DTLB_MISSES_ANY, PNE_I7_DTLB_MISSES_WALK_COMPLETED, PNE_I7_DTLB_MISSES_STLB_HIT, PNE_I7_DTLB_MISSES_PDE_MISS, PNE_I7_DTLB_MISSES_PDP_MISS, PNE_I7_DTLB_MISSES_LARGE_WALK_COMPLETED, PNE_I7_SSE_MEM_EXEC_NTA, PNE_I7_SSE_MEM_EXEC_STREAMING_STORES, PNE_I7_LOAD_HIT_PRE, PNE_I7_SFENCE_CYCLES, PNE_I7_L1D_PREFETCH_REQUESTS, PNE_I7_L1D_PREFETCH_MISS, PNE_I7_L1D_PREFETCH_TRIGGERS, PNE_I7_EPT_EPDE_MISS, PNE_I7_EPT_EPDPE_HIT, PNE_I7_EPT_EPDPE_MISS, PNE_I7_L1D_REPL, PNE_I7_L1D_M_REPL, PNE_I7_L1D_M_EVICT, PNE_I7_L1D_M_SNOOP_EVICT, PNE_I7_L1D_CACHE_PREFETCH_LOCK_FB_HIT, PNE_I7_L1D_CACHE_LOCK_FB_HIT, PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_DATA, PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_CODE, PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_RFO, PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_ANY_READ, PNE_I7_CACHE_LOCK_CYCLES_L1D_L2, PNE_I7_CACHE_LOCK_CYCLES_L1D, PNE_I7_IO_TRANSACTIONS, PNE_I7_L1I_HITS, PNE_I7_L1I_MISSES, PNE_I7_L1I_READS, PNE_I7_L1I_CYCLES_STALLED, PNE_I7_IFU_IVC_FULL, PNE_I7_IFU_IVC_L1I_EVICTION, PNE_I7_LARGE_ITLB_HIT, PNE_I7_L1I_OPPORTUNISTIC_HITS, PNE_I7_ITLB_MISSES_ANY, PNE_I7_ITLB_MISSES_WALK_COMPLETED, PNE_I7_ITLB_MISSES_WALK_CYCLES, PNE_I7_ITLB_MISSES_STLB_HIT, PNE_I7_ITLB_MISSES_PDE_MISS, PNE_I7_ITLB_MISSES_PDP_MISS, PNE_I7_ITLB_MISSES_LARGE_WALK_COMPLETED, PNE_I7_ILD_STALL_ANY, PNE_I7_ILD_STALL_IQ_FULL, PNE_I7_ILD_STALL_LCP, PNE_I7_ILD_STALL_MRU, PNE_I7_ILD_STALL_REGEN, PNE_I7_BR_INST_EXEC_ANY, PNE_I7_BR_INST_EXEC_COND, PNE_I7_BR_INST_EXEC_DIRECT, PNE_I7_BR_INST_EXEC_DIRECT_NEAR_CALL, PNE_I7_BR_INST_EXEC_INDIRECT_NEAR_CALL, PNE_I7_BR_INST_EXEC_INDIRECT_NON_CALL, PNE_I7_BR_INST_EXEC_NEAR_CALLS, PNE_I7_BR_INST_EXEC_NON_CALLS, PNE_I7_BR_INST_EXEC_RETURN_NEAR, PNE_I7_BR_INST_EXEC_TAKEN, PNE_I7_BR_MISP_EXEC_COND, PNE_I7_BR_MISP_EXEC_DIRECT, PNE_I7_BR_MISP_EXEC_INDIRECT_NON_CALL, PNE_I7_BR_MISP_EXEC_NON_CALLS, PNE_I7_BR_MISP_EXEC_RETURN_NEAR, PNE_I7_BR_MISP_EXEC_DIRECT_NEAR_CALL, PNE_I7_BR_MISP_EXEC_INDIRECT_NEAR_CALL, PNE_I7_BR_MISP_EXEC_NEAR_CALLS, PNE_I7_BR_MISP_EXEC_TAKEN, PNE_I7_BR_MISP_EXEC_ANY, PNE_I7_RESOURCE_STALLS_ANY, PNE_I7_RESOURCE_STALLS_LOAD, PNE_I7_RESOURCE_STALLS_RS_FULL, PNE_I7_RESOURCE_STALLS_STORE, PNE_I7_RESOURCE_STALLS_ROB_FULL, PNE_I7_RESOURCE_STALLS_FPCW, PNE_I7_RESOURCE_STALLS_MXCSR, PNE_I7_RESOURCE_STALLS_OTHER, PNE_I7_MACRO_INSTS_FUSIONS_DECODED, PNE_I7_BACLEAR_FORCE_IQ, PNE_I7_LSD_UOPS, PNE_I7_ITLB_FLUSH, PNE_I7_OFFCORE_REQUESTS_DEMAND_READ_DATA, PNE_I7_OFFCORE_REQUESTS_DEMAND_READ_CODE, PNE_I7_OFFCORE_REQUESTS_DEMAND_RFO, PNE_I7_OFFCORE_REQUESTS_ANY_READ, PNE_I7_OFFCORE_REQUESTS_ANY_RFO, PNE_I7_OFFCORE_REQUESTS_UNCACHED_MEM, PNE_I7_OFFCORE_REQUESTS_L1D_WRITEBACK, PNE_I7_OFFCORE_REQUESTS_ANY, PNE_I7_UOPS_EXECUTED_PORT0, PNE_I7_UOPS_EXECUTED_PORT1, PNE_I7_UOPS_EXECUTED_PORT2_CORE, PNE_I7_UOPS_EXECUTED_PORT3_CORE, PNE_I7_UOPS_EXECUTED_PORT4_CORE, PNE_I7_UOPS_EXECUTED_PORT5, PNE_I7_UOPS_EXECUTED_CORE_ACTIVE_CYCLES, PNE_I7_UOPS_EXECUTED_PORT015, PNE_I7_UOPS_EXECUTED_PORT234, PNE_I7_OFFCORE_REQUESTS_SQ_FULL, PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_DATA, PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_INVALIDATE, PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_CODE, PNE_I7_OFF_CORE_RESPONSE_0, PNE_I7_SNOOP_RESPONSE_HIT, PNE_I7_SNOOP_RESPONSE_HITE, PNE_I7_SNOOP_RESPONSE_HITM, PNE_I7_PIC_ACCESSES_TPR_READS, PNE_I7_PIC_ACCESSES_TPR_WRITES, PNE_I7_INST_RETIRED_ANY_P, PNE_I7_INST_RETIRED_X87, PNE_I7_UOPS_RETIRED_ANY, PNE_I7_UOPS_RETIRED_RETIRE_SLOTS, PNE_I7_UOPS_RETIRED_MACRO_FUSED, PNE_I7_MACHINE_CLEARS_CYCLES, PNE_I7_MACHINE_CLEARS_MEM_ORDER, PNE_I7_MACHINE_CLEARS_SMC, PNE_I7_MACHINE_CLEARS_FUSION_ASSIST, PNE_I7_BR_INST_RETIRED_ALL_BRANCHES, PNE_I7_BR_INST_RETIRED_CONDITIONAL, PNE_I7_BR_INST_RETIRED_NEAR_CALL, PNE_I7_BR_MISP_RETIRED_ALL_BRANCHES, PNE_I7_BR_MISP_RETIRED_NEAR_CALL, PNE_I7_SSEX_UOPS_RETIRED_PACKED_SINGLE, PNE_I7_SSEX_UOPS_RETIRED_SCALAR_SINGLE, PNE_I7_SSEX_UOPS_RETIRED_PACKED_DOUBLE, PNE_I7_SSEX_UOPS_RETIRED_SCALAR_DOUBLE, PNE_I7_SSEX_UOPS_RETIRED_VECTOR_INTEGER, PNE_I7_ITLB_MISS_RETIRED, PNE_I7_MEM_LOAD_RETIRED_L1D_HIT, PNE_I7_MEM_LOAD_RETIRED_L2_HIT, PNE_I7_MEM_LOAD_RETIRED_OTHER_CORE_L2_HIT_HITM, PNE_I7_MEM_LOAD_RETIRED_HIT_LFB, PNE_I7_MEM_LOAD_RETIRED_DTLB_MISS, PNE_I7_MEM_LOAD_RETIRED_L3_MISS, PNE_I7_MEM_LOAD_RETIRED_L3_UNSHARED_HIT, PNE_I7_FP_MMX_TRANS_TO_FP, PNE_I7_FP_MMX_TRANS_TO_MMX, PNE_I7_FP_MMX_TRANS_ANY, PNE_I7_MACRO_INSTS_DECODED, PNE_I7_UOPS_DECODED_MS, PNE_I7_UOPS_DECODED_ESP_FOLDING, PNE_I7_UOPS_DECODED_ESP_SYNC, PNE_I7_RAT_STALLS_FLAGS, PNE_I7_RAT_STALLS_REGISTERS, PNE_I7_RAT_STALLS_ROB_READ_PORT, PNE_I7_RAT_STALLS_SCOREBOARD, PNE_I7_RAT_STALLS_ANY, PNE_I7_SEG_RENAME_STALLS, PNE_I7_ES_REG_RENAMES, PNE_I7_UOP_UNFUSION, PNE_I7_BR_INST_DECODED, PNE_I7_BOGUS_BR, PNE_I7_BPU_MISSED_CALL_RET, PNE_I7_L2_HW_PREFETCH_DATA_TRIGGER, PNE_I7_L2_HW_PREFETCH_CODE_TRIGGER, PNE_I7_L2_HW_PREFETCH_DCA_TRIGGER, PNE_I7_L2_HW_PREFETCH_KICK_START, PNE_I7_SQ_MISC_PROMOTION, PNE_I7_SQ_MISC_PROMOTION_POST_GO, PNE_I7_SQ_MISC_LRU_HINTS, PNE_I7_SQ_MISC_FILL_DROPPED, PNE_I7_SQ_MISC_SPLIT_LOCK, PNE_I7_SQ_FULL_STALL_CYCLES, PNE_I7_FP_ASSIST_ALL, PNE_I7_FP_ASSIST_OUTPUT, PNE_I7_FP_ASSIST_INPUT, PNE_I7_SEGMENT_REG_LOADS, PNE_I7_SIMD_INT_64_PACKED_MPY, PNE_I7_SIMD_INT_64_PACKED_SHIFT, PNE_I7_SIMD_INT_64_PACK, PNE_I7_SIMD_INT_64_UNPACK, PNE_I7_SIMD_INT_64_PACKED_LOGICAL, PNE_I7_SIMD_INT_64_PACKED_ARITH, PNE_I7_SIMD_INT_64_SHUFFLE_MOVE, PNE_I7_INSTR_RETIRED_ANY, PNE_I7_CPU_CLK_UNHALTED_CORE, PNE_I7_CPU_CLK_UNHALTED_REF, PNE_I7_GQ_CYCLES_FULL_READ_TRACKER, PNE_I7_GQ_CYCLES_FULL_WRITE_TRACKER, PNE_I7_GQ_CYCLES_FULL_PEER_PROBE_TRACKER, PNE_I7_GQ_CYCLES_NOT_EMPTY_READ_TRACKER, PNE_I7_GQ_CYCLES_NOT_EMPTY_WRITE_TRACKER, PNE_I7_GQ_CYCLES_NOT_EMPTY_PEER_PROBE_TRACKER, PNE_I7_GQ_ALLOC_READ_TRACKER, PNE_I7_GQ_ALLOC_RT_L3_MISS, PNE_I7_GQ_ALLOC_RT_TO_L3_RESP, PNE_I7_GQ_ALLOC_RT_TO_RTID_ACQUIRED, PNE_I7_GQ_ALLOC_WT_TO_RTID_ACQUIRED, PNE_I7_GQ_ALLOC_WRITE_TRACKER, PNE_I7_GQ_ALLOC_PEER_PROBE_TRACKER, PNE_I7_GQ_DATA_FROM_QPI, PNE_I7_GQ_DATA_FROM_QMC, PNE_I7_GQ_DATA_FROM_L3, PNE_I7_GQ_DATA_FROM_CORES_02, PNE_I7_GQ_DATA_FROM_CORES_13, PNE_I7_GQ_DATA_TO_QPI_QMC, PNE_I7_GQ_DATA_TO_L3, PNE_I7_GQ_DATA_TO_CORES, PNE_I7_SNP_RESP_TO_LOCAL_HOME_I_STATE, PNE_I7_SNP_RESP_TO_LOCAL_HOME_S_STATE, PNE_I7_SNP_RESP_TO_LOCAL_HOME_FWD_S_STATE, PNE_I7_SNP_RESP_TO_LOCAL_HOME_FWD_I_STATE, PNE_I7_SNP_RESP_TO_LOCAL_HOME_CONFLICT, PNE_I7_SNP_RESP_TO_LOCAL_HOME_WB, PNE_I7_SNP_RESP_TO_REMOTE_HOME_I_STATE, PNE_I7_SNP_RESP_TO_REMOTE_HOME_S_STATE, PNE_I7_SNP_RESP_TO_REMOTE_HOME_FWD_S_STATE, PNE_I7_SNP_RESP_TO_REMOTE_HOME_FWD_I_STATE, PNE_I7_SNP_RESP_TO_REMOTE_HOME_CONFLICT, PNE_I7_SNP_RESP_TO_REMOTE_HOME_WB, PNE_I7_SNP_RESP_TO_REMOTE_HOME_HITM, PNE_I7_L3_HITS_READ, PNE_I7_L3_HITS_WRITE, PNE_I7_L3_HITS_PROBE, PNE_I7_L3_HITS_ANY, PNE_I7_L3_MISS_READ, PNE_I7_L3_MISS_WRITE, PNE_I7_L3_MISS_PROBE, PNE_I7_L3_MISS_ANY, PNE_I7_L3_LINES_IN_M_STATE, PNE_I7_L3_LINES_IN_E_STATE, PNE_I7_L3_LINES_IN_S_STATE, PNE_I7_L3_LINES_IN_F_STATE, PNE_I7_L3_LINES_IN_ANY, PNE_I7_L3_LINES_OUT_M_STATE, PNE_I7_L3_LINES_OUT_E_STATE, PNE_I7_L3_LINES_OUT_S_STATE, PNE_I7_L3_LINES_OUT_I_STATE, PNE_I7_L3_LINES_OUT_F_STATE, PNE_I7_L3_LINES_OUT_ANY, PNE_I7_QHL_REQUESTS_IOH_READS, PNE_I7_QHL_REQUESTS_IOH_WRITES, PNE_I7_QHL_REQUESTS_REMOTE_READS, PNE_I7_QHL_REQUESTS_REMOTE_WRITES, PNE_I7_QHL_REQUESTS_LOCAL_READS, PNE_I7_QHL_REQUESTS_LOCAL_WRITES, PNE_I7_QHL_CYCLES_FULL_IOH, PNE_I7_QHL_CYCLES_FULL_REMOTE, PNE_I7_QHL_CYCLES_FULL_LOCAL, PNE_I7_QHL_CYCLES_NOT_EMPTY_IOH, PNE_I7_QHL_CYCLES_NOT_EMPTY_REMOTE, PNE_I7_QHL_CYCLES_NOT_EMPTY_LOCAL, PNE_I7_QHL_OCCUPANCY_IOH, PNE_I7_QHL_OCCUPANCY_REMOTE, PNE_I7_QHL_OCCUPANCY_LOCAL, PNE_I7_QHL_ADDRESS_CONFLICTS_2WAY, PNE_I7_QHL_ADDRESS_CONFLICTS_3WAY, PNE_I7_QHL_CONFLICT_CYCLES_IOH, PNE_I7_QHL_CONFLICT_CYCLES_REMOTE, PNE_I7_QHL_CONFLICT_CYCLES_LOCAL, PNE_I7_QHL_TO_QMC_BYPASS, PNE_I7_QMC_NORMAL_FULL_READ_CH0, PNE_I7_QMC_NORMAL_FULL_READ_CH1, PNE_I7_QMC_NORMAL_FULL_READ_CH2, PNE_I7_QMC_NORMAL_FULL_WRITE_CH0, PNE_I7_QMC_NORMAL_FULL_WRITE_CH1, PNE_I7_QMC_NORMAL_FULL_WRITE_CH2, PNE_I7_QMC_ISOC_FULL_READ_CH0, PNE_I7_QMC_ISOC_FULL_READ_CH1, PNE_I7_QMC_ISOC_FULL_READ_CH2, PNE_I7_QMC_ISOC_FULL_WRITE_CH0, PNE_I7_QMC_ISOC_FULL_WRITE_CH1, PNE_I7_QMC_ISOC_FULL_WRITE_CH2, PNE_I7_QMC_BUSY_READ_CH0, PNE_I7_QMC_BUSY_READ_CH1, PNE_I7_QMC_BUSY_READ_CH2, PNE_I7_QMC_BUSY_WRITE_CH0, PNE_I7_QMC_BUSY_WRITE_CH1, PNE_I7_QMC_BUSY_WRITE_CH2, PNE_I7_QMC_OCCUPANCY_CH0, PNE_I7_QMC_OCCUPANCY_CH1, PNE_I7_QMC_OCCUPANCY_CH2, PNE_I7_QMC_ISSOC_OCCUPANCY_CH0, PNE_I7_QMC_ISSOC_OCCUPANCY_CH1, PNE_I7_QMC_ISSOC_OCCUPANCY_CH2, PNE_I7_QMC_ISSOC_READS_ANY, PNE_I7_QMC_NORMAL_READS_CH0, PNE_I7_QMC_NORMAL_READS_CH1, PNE_I7_QMC_NORMAL_READS_CH2, PNE_I7_QMC_NORMAL_READS_ANY, PNE_I7_QMC_HIGH_PRIORITY_READS_CH0, PNE_I7_QMC_HIGH_PRIORITY_READS_CH1, PNE_I7_QMC_HIGH_PRIORITY_READS_CH2, PNE_I7_QMC_HIGH_PRIORITY_READS_ANY, PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH0, PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH1, PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH2, PNE_I7_QMC_CRITICAL_PRIORITY_READS_ANY, PNE_I7_QMC_WRITES_FULL_CH0, PNE_I7_QMC_WRITES_FULL_CH1, PNE_I7_QMC_WRITES_FULL_CH2, PNE_I7_QMC_WRITES_FULL_ANY, PNE_I7_QMC_WRITES_PARTIAL_CH0, PNE_I7_QMC_WRITES_PARTIAL_CH1, PNE_I7_QMC_WRITES_PARTIAL_CH2, PNE_I7_QMC_WRITES_PARTIAL_ANY, PNE_I7_QMC_CANCEL_CH0, PNE_I7_QMC_CANCEL_CH1, PNE_I7_QMC_CANCEL_CH2, PNE_I7_QMC_CANCEL_ANY, PNE_I7_QMC_PRIORITY_UPDATES_CH0, PNE_I7_QMC_PRIORITY_UPDATES_CH1, PNE_I7_QMC_PRIORITY_UPDATES_CH2, PNE_I7_QMC_PRIORITY_UPDATES_ANY, PNE_I7_QHL_FRC_ACK_CNFLTS_LOCAL, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_0, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_0, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_0, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_1, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_1, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_1, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_LINK_0, PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_LINK_1, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_0, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_0, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_0, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_1, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_1, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_1, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_LINK_0, PNE_I7_QPI_TX_STALLED_MULTI_FLIT_LINK_1, PNE_I7_QPI_TX_HEADER_BUSY_LINK_0, PNE_I7_QPI_TX_HEADER_BUSY_LINK_1, PNE_I7_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_0, PNE_I7_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_1, PNE_I7_DRAM_OPEN_CH0, PNE_I7_DRAM_OPEN_CH1, PNE_I7_DRAM_OPEN_CH2, PNE_I7_DRAM_PAGE_CLOSE_CH0, PNE_I7_DRAM_PAGE_CLOSE_CH1, PNE_I7_DRAM_PAGE_CLOSE_CH2, PNE_I7_DRAM_PAGE_MISS_CH0, PNE_I7_DRAM_PAGE_MISS_CH1, PNE_I7_DRAM_PAGE_MISS_CH2, PNE_I7_DRAM_READ_CAS_CH0, PNE_I7_DRAM_READ_CAS_AUTOPRE_CH0, PNE_I7_DRAM_READ_CAS_CH1, PNE_I7_DRAM_READ_CAS_AUTOPRE_CH1, PNE_I7_DRAM_READ_CAS_CH2, PNE_I7_DRAM_READ_CAS_AUTOPRE_CH2, PNE_I7_DRAM_WRITE_CAS_CH0, PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH0, PNE_I7_DRAM_WRITE_CAS_CH1, PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH1, PNE_I7_DRAM_WRITE_CAS_CH2, PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH2, PNE_I7_DRAM_REFRESH_CH0, PNE_I7_DRAM_REFRESH_CH1, PNE_I7_DRAM_REFRESH_CH2, PNE_I7_DRAM_PRE_ALL_CH0, PNE_I7_DRAM_PRE_ALL_CH1, PNE_I7_DRAM_PRE_ALL_CH2, PNE_I7_NATNAME_GUARD }; extern Native_Event_LabelDescription_t i7Processor_info[]; extern hwi_search_t i7Processor_map[]; #endif papi-5.6.0/man/man1/papi_error_codes.1000664 001750 001750 00000001500 13216244355 021542 0ustar00jshenry1963jshenry1963000000 000000 .TH "papi_error_codes" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME papi_error_codes \- papi_error_codes utility\&. .PP file error_codes\&.c .SH "NAME" .PP papi_error_codes - lists all currently defined PAPI error codes\&. .SH "Synopsis" .PP papi_error_codes .SH "Description" .PP papi_error_codes is a PAPI utility program that displays all defined error codes from papi\&.h and their error strings from papi_data\&.h\&. If an error string is not defined, a warning is generated\&. This can help trap newly defined error codes for which error strings are not yet defined\&. .SH "Options" .PP This utility has no command line options\&. .SH "Bugs" .PP There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. papi-5.6.0/src/Matlab/PAPI_Matlab.readme000775 001750 001750 00000014373 13216244356 021731 0ustar00jshenry1963jshenry1963000000 000000 Running PAPI's High Level API in the MATLAB Environment If you have the desire to do this, you most likely already know why you want to make calls to PAPI inside of a MATLAB environment. If you don't know much about what composes PAPI's high level API, you should probably take a look at this: http://icl.cs.utk.edu/projects/papi/files/documentation/PAPI_USER_GUIDE_23.htm#WHAT_IS_HIGH_LEVEL_API This section of the PAPI user guide covers C and FORTRAN calls, but at the moment, you can only make C calls from the MATLAB environment. There is one overall function to call from Matlab; from there, you specify which of the 6 specific functions you want to call, and then the arguments to each. Here are some examples: PAPI_num_counters - Returns the number of available hardware counters on the system. Ex: num_counters = PAPI('num') PAPI_flips - Has 3 possibilities: Initialize FLIP counting with: PAPI('flips') Record the number of floating point instructions since initialization: ops = PAPI('flips') Record the number of floating point instructions and the incremental rate of floating point execution since initialization: [ops, mflips] = PAPI('flips') Use PAPI_stop_counters to stop counting flips and reset the counters. PAPI_flops - Identical to PAPI_flips, but counts floating point *operations* rather than instructions. In most cases, these two are identical, but some instructions (e.g. FMA) might contain multiple operations or vice versa. PAPI_ipc - Has 3 possibilities: Initialize instruction per cycle counting with: PAPI('ipc', 0) Record the number of instructions since initialization: ins = PAPI('ipc') Record the number of instructions and the incremental rate of instructions per cycle since initialization: [ins, ipc] = PAPI('ipc') PAPI_start_counters - Specify the events to count (in text form or the actual numeric code; NOTE: make sure to not confuse normal decimal and hexadecimal.) You cannot specify more events than there are hardware counters. To begin counting cycles and instructions: PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS'); PAPI_read_counters - Simply specify the variables to read the values into. You cannot specify more variables than there are hardware counters. This will reset the counters. To read the above events you just started: [cycles, instructions] = PAPI('read'); PAPI_accum_counters - This function adds the value you pass to the readings in the hardware counter. You cannot specify more variables than there are hardware counters. This function will reset the counters. To add the values currently in the counters to the previously read values: [cycles, instructions] = PAPI('accum', cycles, instructions); PAPI_stop_counters - This function reads the value of the running hardware counters into the variables you specify. You cannot specify more variables than there are hardware counters. To stop the running counters you previously started and record their values: [cycles, instructions] = PAPI('stop'); PAPI_Matlab.c, when compiled, functions simply as a wrapper. In order to use the calls, you need to know a little about mex. mex is simply the compiler you use to make your code run in the MATLAB environment. If you don't know how to use mex, you might want to acquaint yourself a bit. "mex -setup "might be needed if you encounter problems, but the simplest explanation might be to substitute "mex" for "gcc" and you are on your way. All the other rules for compiling PAPI are the same. mex compilations can de done inside or outside of the Matlab environment, but in this case, it is recommended that you compile outside of Matlab. For some reason, compiling inside does not work on some systems. So far, the Linux environment and the Windows environment have been tested, but _in theory_ this code should work anywhere PAPI and Matlab both work. The following instructions are for a Linux/Unix environment: Assuming papi.h is present in /usr/local/include and libpapi.so is present in /usr/local/lib, the below should work. If not, you may need to alter the compile strings and/or the #include statement in PAPI_Matlab.c. Also, the compile string will be different for different platforms. For instance, if I want to compile and run on a linux machine assuming PAPI_Matlab.c is in your current working directory (you'll have a different compile string on a different architecture): 1. Compile the wrapper: mex -I/usr/local/include PAPI_Matlab.c /usr/local/lib/libpapi.so -output PAPI 2. Start Matlab: matlab 3. Run the code: a. Find the number of hardware counters on your system: num_counters = PAPI('num') b. Play with flips - the first makes sure the counters are stopped and clear; the second initializes the counting; the third returns the number of floating point instructions since the first call, and the fourth line does the same as the second AND reports the incremental rate of floating point execution since the last call: PAPI('stop') PAPI('flips') ins = PAPI('flips') [ins, mflips] = PAPI('flips') c. Play with instructions per cycle - the first makes sure the counters are stopped and clear; the second initializes counting; the third returns the number of instructions since the first call, and the fourth line does the same as the second AND reports the incremental rate of instructions per cycle since the last call: PAPI('stop') PAPI('ipc') ins = PAPI('ipc') [ins, ipc] = PAPI('ipc') d. Try the example m files included with the distribution: PAPIInnerProduct.m PAPIMatrixVector.m PAPIMatrixMatrix.m e. Start counting: PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS') f. Read the counters and reset: [cycles, instr] = PAPI('read') g. Add the current value of the counters to a previous read and reset: [cycles, instr] = PAPI('accum', cycles, instr) h. Read the counters and stop them: [cycles, instr] = PAPI('stop') You can pass as many events as you like to be counted or recorded, as long as that number does not exceed the number of available hardware counters. Contact ralph@eecs.utk.edu or/and ptools-perfapi@icl.utk.edu with any questions regarding PAPI calls in Matlab - either errors or questions. Also, this has just been implemented, so changes could be coming.......... papi-5.6.0/man/man1/papi_event_chooser.1000664 001750 001750 00000001377 13216244355 022113 0ustar00jshenry1963jshenry1963000000 000000 .TH "papi_event_chooser" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME papi_event_chooser \- papi_event_chooser utility\&. .PP file event_chooser\&.c .SH "NAME" .PP papi_event_chooser - given a list of named events, lists other events that can be counted with them\&. .SH "Synopsis" .PP papi_event_chooser NATIVE | PRESET < event > < event > \&.\&.\&. .SH "Description" .PP papi_event_chooser is a PAPI utility program that reports information about the current PAPI installation and supported preset events\&. .SH "Options" .PP This utility has no command line options\&. .SH "Bugs" .PP There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. papi-5.6.0/src/freebsd/map-i7.c000664 001750 001750 00000220535 13216244361 020201 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-i7.c * Author: George Neville-Neil * gnn@freebsd.org * Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** i7 SUBSTRATE i7 SUBSTRATE i7 SUBSTRATE i7 SUBSTRATE i7 SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_i7 must match i7_info */ Native_Event_LabelDescription_t i7Processor_info[] = { {"SB_FORWARD.ANY", "Counts the number of store forwards. "}, {"LOAD_BLOCK.STD", "Counts the number of loads blocked by a preceding store with unknown data."}, {"LOAD_BLOCK.ADDRESS_OFFSET", "Counts the number of loads blocked by a preceding store address."}, {"SB_DRAIN.CYCLES", "Counts the cycles of store buffer drains."}, {"MISALIGN_MEM_REF.LOAD", "Counts the number of misaligned load references."}, {"MISALIGN_MEM_REF.STORE", "Counts the number of misaligned store references."}, {"MISALIGN_MEM_REF.ANY", "Counts the number of misaligned memory references."}, {"STORE_BLOCKS.NOT_STA", "This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load."}, {"STORE_BLOCKS.STA", "This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)."}, {"STORE_BLOCKS.AT_RET", "Counts number of loads delayed with at-Retirement block code. The following loads need to be executed at retirement and wait for all senior stores on the same thread to be drained: load splitting across 4K boundary (page split), load accessing uncacheable (UC or USWC) memory, load lock, and load with page table in UC or USWC memory region."}, {"STORE_BLOCKS.L1D_BLOCK", "Cacheable loads delayed with L1D block code."}, {"STORE_BLOCKS.ANY", "All loads delayed due to store blocks."}, {"PARTIAL_ADDRESS_ALIAS", "Counts false dependency due to partial address aliasing."}, {"DTLB_LOAD_MISSES.ANY", "Counts all load misses that cause a page walk."}, {"DTLB_LOAD_MISSES.WALK_COMPLETED", "Counts number of completed page walks due to load miss in the STLB."}, {"DTLB_LOAD_MISSES.STLB_HIT", "Number of cache load STLB hits."}, {"DTLB_LOAD_MISSES.PDE_MISS", "Number of DTLB cache load misses where the low part of the linear to physical address translation was missed."}, {"DTLB_LOAD_MISSES.PDP_MISS", "Number of DTLB cache load misses where the high part of the linear to physical address translation was missed."}, {"DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to load miss in the STLB."}, {"MEMORY_DISAMBIGURATION.RESET", "Counts memory disambiguration reset cycles."}, {"MEMORY_DISAMBIGURATION.SUCCESS", "Counts the number of loads that memory disambiguration succeeded."}, {"MEMORY_DISAMBIGURATION.WATCHDOG", "Counts the number of times the memory disambiguration watchdog kicked in."}, {"MEMORY_DISAMBIGURATION.WATCH_CYCLES", "Counts the cycles that the memory disambiguration watchdog is active."}, {"MEM_INST_RETIRED.LOADS", "Counts the number of instructions with an architecturally-visible store retired on the architected path."}, {"MEM_INST_RETIRED.STORES", "Counts the number of instructions with an architecturally-visible store retired on the architected path."}, {"MEM_STORE_RETIRED.DTLB_MISS", "The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not counter prefetches."}, {"UOPS_ISSUED.ANY", "Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, {"UOPS_ISSUED.FUSED", "Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station."}, {"MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM", "Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket."}, {"MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only counts locally homed lines."}, {"MEM_UNCORE_RETIRED.REMOTE_DRAM", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and was remotely homed. This includes both DRAM access and HITM in a remote socket's cache for remotely homed lines."}, {"MEM_UNCORE_RETIRED.LOCAL_DRAM", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and required a local socket memory reference. This includes locally homed cachelines that were in a modified state in another socket."}, {"FP_COMP_OPS_EXE.X87", "Counts the number of FP Computational Uops Executed. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction."}, {"FP_COMP_OPS_EXE.MMX", "Counts number of MMX Uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP", "Counts number of SSE and SSE2 FP uops executed."}, {"FP_COMP_OPS_EXE.SSE2_INTEGER", "Counts number of SSE2 integer uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP_PACKED", "Counts number of SSE FP packed uops executed."}, {"FP_COMP_OPS_EXE.SSE_FP_SCALAR", "Counts number of SSE FP scalar uops executed."}, {"FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION", "Counts number of SSE* FP single precision uops executed."}, {"FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION", "Counts number of SSE* FP double precision uops executed."}, {"SIMD_INT_128.PACKED_MPY", "Counts number of 128 bit ED_MPY integer multiply operations."}, {"SIMD_INT_128.PACKED_SHIFT", "Counts number of 128 bit SIMD integer shift operations."}, {"SIMD_INT_128.PACK", " Counts number of 128 bit SIMD integer pack operations."}, {"SIMD_INT_128.UNPACK", "Counts number of 128 bit SIMD integer unpack operations."}, {"SIMD_INT_128.PACKED_LOGICAL", "Counts number of 128 bit SIMD integer logical operations."}, {"SIMD_INT_128.PACKED_ARITH", "Counts number of 128 bit SIMD integer arithmetic operations."}, {"SIMD_INT_128.SHUFFLE_MOVE", "Counts number of 128 bit SIMD integer shuffle and move operations."}, {"LOAD_DISPATCH.RS", "Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer."}, {"LOAD_DISPATCH.RS_DELAYED", "Counts the number of delayed RS dispatches at the stage latch. If an RS dispatch can not bypass to LB, it has another chance to dispatch from the one-cycle delayed staging latch before it is written into the LB."}, {"LOAD_DISPATCH.MOB", "Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer."}, {"LOAD_DISPATCH.ANY", "Counts all loads dispatched from the Reservation Station."}, {"ARITH.CYCLES_DIV_BUSY", "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE."}, {"ARITH.MUL", "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD."}, {"INST_QUEUE_WRITES", "Counts the number of instructions written into the instruction queue every cycle."}, {"INST_DECODED.DEC0", "Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop"}, {"TWO_UOP_INSTS_DECODED", "An instruction that generates two uops was decoded."}, {"HW_INT.RCV", "Number of interrupts received."}, {"HW_INT.CYCLES_MASKED", "Number of cycles interrupts are masked."}, {"HW_INT.CYCLES_PENDING_AND_MASKED", "Number of cycles interrupts are pending and masked."}, {"INST_QUEUE_WRITE_CYCLES", "This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. If SSE* instructions that are 6 bytes or longer arrive one after another, then front end throughput may limit execution speed. "}, {"L2_RQSTS.LD_HIT", "Counts number of loads that hit the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches. L2 loads can be rejected for various reasons. Only non rejected loads are counted."}, {"L2_RQSTS.LD_MISS", "Counts the number of loads that miss the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches."}, {"L2_RQSTS.LOADS", "Counts all L2 load requests. L2 loads include both L1D demand misses as well as L1D prefetches."}, {"L2_RQSTS.RFO_HIT", "Counts the number of store RFO requests that hit the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Count includes WC memory requests, where the data is not fetched but the permission to write the line is required."}, {"L2_RQSTS.RFO_MISS", "Counts the number of store RFO requests that miss the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, {"L2_RQSTS.RFOS", "Counts all L2 store RFO requests. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, {"L2_RQSTS.IFETCH_HIT", "Counts number of instruction fetches that hit the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.IFETCH_MISS", "Counts number of instruction fetches that miss the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.IFETCHES", "Counts all instruction fetches. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, {"L2_RQSTS.PREFETCH_HIT", "Counts L2 prefetch hits for both code and data."}, {"L2_RQSTS.PREFETCH_MISS", "Counts L2 prefetch misses for both code and data."}, {"L2_RQSTS.PREFETCHES", "Counts all L2 prefetches for both code and data."}, {"L2_RQSTS.MISS", "Counts all L2 misses for both code and data."}, {"L2_RQSTS.REFERENCES", "Counts all L2 requests for both code and data."}, {"L2_DATA_RQSTS.DEMAND.I_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.S_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.E_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.M_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.DEMAND.MESI", "Counts all L2 data demand requests. L2 demand loads are both L1D demand misses and L1D prefetches."}, {"L2_DATA_RQSTS.PREFETCH.I_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, {"L2_DATA_RQSTS.PREFETCH.S_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state. A prefetch RFO will miss on an S state line, while a prefetch read will hit on an S state line."}, {"L2_DATA_RQSTS.PREFETCH.E_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state."}, {"L2_DATA_RQSTS.PREFETCH.M_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state."}, {"L2_DATA_RQSTS.PREFETCH.MESI", "Counts all L2 prefetch requests."}, {"L2_DATA_RQSTS.ANY", "Counts all L2 data requests."}, {"L2_WRITE.RFO.I_STATE", "Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.S_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,. This is a demand RFO request."}, {"L2_WRITE.RFO.E_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.M_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.HIT", "Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.RFO.MESI", "Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, {"L2_WRITE.LOCK.I_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, {"L2_WRITE.LOCK.S_STATE", "Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state."}, {"L2_WRITE.LOCK.E_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state."}, {"L2_WRITE.LOCK.M_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state."}, {"L2_WRITE.LOCK.HIT", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state."}, {"L2_WRITE.LOCK.MESI", "Counts all L2 demand lock RFO requests."}, {"L1D_WB_L2.I_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i.e. a cache miss."}, {"L1D_WB_L2.S_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state."}, {"L1D_WB_L2.E_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state."}, {"L1D_WB_L2.M_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state."}, {"L1D_WB_L2.MESI", "Counts all L1 writebacks to the L2."}, {"L3_LAT_CACHE.REFERENCE", "This event counts requests originating from the core that reference a cache line in the last level cache. The event count includes speculative traffic but excludes cache line fills due to a L2 hardware-prefetch. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended."}, {"L3_LAT_CACHE.MISS", "This event counts each cache miss condition for references to the last level cache. The event count may include speculative traffic but excludes cache line fills due to L2 hardware-prefetches. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended."}, {"CPU_CLK_UNHALTED.THREAD_P", "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling."}, {"CPU_CLK_UNHALTED.REF_P", "Increments at the frequency of TSC when not halted."}, {"UOPS_DECODED.DEC0", "Counts micro-ops decoded by decoder 0."}, {"L1D_CACHE_LD.I_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i.e. the read request missed the cache. Counter 0, 1 only."}, {"L1D_CACHE_LD.S_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state. Counter 0, 1 only."}, {"L1D_CACHE_LD.E_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state. Counter 0, 1 only."}, {"L1D_CACHE_LD.M_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state. Counter 0, 1 only."}, {"L1D_CACHE_LD.MESI", "Counts L1 data cache read requests. Counter 0, 1 only."}, {"L1D_CACHE_ST.I_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state. Counter 0, 1 only."}, {"L1D_CACHE_ST.S_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state. Counter 0, 1 only."}, {"L1D_CACHE_ST.E_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state. Counter 0, 1 only."}, {"L1D_CACHE_ST.M_STATE", "Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state. Counter 0, 1 only."}, {"L1D_CACHE_ST.MESI", "Counts L1 data cache store RFO requests. Counter 0, 1 only."}, {"L1D_CACHE_LOCK.HIT", "Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer. The lock portion of the load lock transaction must hit in the L1D. The initial load will pull the lock into the L1 data cache. Counter 0, 1 only."}, {"L1D_CACHE_LOCK.S_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the shared state. Counter 0, 1 only."}, {"L1D_CACHE_LOCK.E_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the exclusive state. Counter 0, 1 only."}, {"L1D_CACHE_LOCK.M_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the modified state. Counter 0, 1 only."}, {"L1D_ALL_REF.ANY", "Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types. The event counts memory accesses only when they are actually performed. For example, a load blocked by unknown store address and later performed is only counted once. The event does not include non- memory accesses, such as I/O accesses. Counter 0, 1 only."}, {"L1D_ALL_REF.CACHEABLE", "Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations. Counter 0, 1 only."}, {"L1D_PEND_MISS.LOAD_BUFFERS_FULL", "Counts cycles of L1 data cache load fill buffers full. Counter 0, 1 only."}, {"DTLB_MISSES.ANY", "Counts the number of misses in the STLB which causes a page walk."}, {"DTLB_MISSES.WALK_COMPLETED", "Counts number of misses in the STLB which resulted in a completed page walk."}, {"DTLB_MISSES.STLB_HIT", "Counts the number of DTLB first level misses that hit in the second level TLB. This event is only relevant if the core contains multiple DTLB levels."}, {"DTLB_MISSES.PDE_MISS", "Number of DTLB cache misses where the low part of the linear to physical address translation was missed."}, {"DTLB_MISSES.PDP_MISS", "Number of DTLB misses where the high part of the linear to physical address translation was missed."}, {"DTLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, {"SSE_MEM_EXEC.NTA", "Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache."}, {"SSE_MEM_EXEC.STREAMING_STORES", "Counts number of SSE non- temporal stores."}, {"LOAD_HIT_PRE", "Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished."}, {"SFENCE_CYCLES", "Counts store fence cycles."}, {"L1D_PREFETCH.REQUESTS", "Counts number of hardware prefetch requests dispatched out of the prefetch FIFO."}, {"L1D_PREFETCH.MISS", "Counts number of hardware prefetch requests that miss the L1D. There are two prefetchers in the L1D. A streamer, which predicts lines sequentially after this one should be fetched, and the IP prefetcher that remembers access patterns for the current instruction. The streamer prefetcher stops on an L1D hit, while the IP prefetcher does not."}, {"L1D_PREFETCH.TRIGGERS", "Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO. Some of the prefetch requests are dropped due to overwrites or competition between the IP index prefetcher and streamer prefetcher. The prefetch FIFO contains 4 entries."}, {"EPT.EPDE_MISS", "Counts Extended Page Directory Entry misses. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches."}, {"EPT.EPDPE_HIT", "Counts Extended Page Directory Pointer Entry hits."}, {"EPT.EPDPE_MISS", "Counts Extended Page Directory Pointer Entry misses."}, {"L1D.REPL", "Counts the number of lines brought into the L1 data cache. Counter 0, 1 only."}, {"L1D.M_REPL", "Counts the number of modified lines brought into the L1 data cache. Counter 0, 1 only."}, {"L1D.M_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to replacement. Counter 0, 1 only."}, {"L1D.M_SNOOP_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention. Counter 0, 1 only."}, {"L1D_CACHE_PREFETCH_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated instructions accepted into the fill buffer."}, {"L1D_CACHE_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA", "Counts weighted cycles of offcore demand data read requests. Does not include L2 prefetch requests."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE", "Counts weighted cycles of offcore demand code read requests. Does not include L2 prefetch requests."}, {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO", "Counts weighted cycles of offcore demand RFO requests. Does not include L2 prefetch requests."}, {"OFFCORE_REQUESTS_OUTSTANDING.ANY.READ", "Counts weighted cycles of offcore read requests of any kind. Include L2 prefetch requests."}, {"CACHE_LOCK_CYCLES.L1D_L2", "Cycle count during which the L1D and L2 are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. Counter 0, 1 only.L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such accesses."}, {"CACHE_LOCK_CYCLES.L1D", "Counts the number of cycles that cacheline in the L1 data cache unit is locked. Counter 0, 1 only."}, {"IO_TRANSACTIONS", "Counts the number of completed I/O transactions."}, {"L1I.HITS", "Counts all instruction fetches that hit the L1 instruction cache."}, {"L1I.MISSES", "Counts all instruction fetches that miss the L1I cache. This includes instruction cache misses, streaming buffer misses, victim cache misses and uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."}, {"L1I.READS", "Counts all instruction fetches, including uncacheable fetches that bypass the L1I."}, {"L1I.CYCLES_STALLED", "Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault."}, {"IFU_IVC.FULL", "Instruction Fetch unit victim cache full."}, {"IFU_IVC.L1I_EVICTION", "L1 Instruction cache evictions."}, {"LARGE_ITLB.HIT", "Counts number of large ITLB hits."}, {"L1I_OPPORTUNISTIC_HITS", "Opportunistic hits in streaming."}, {"ITLB_MISSES.ANY", "Counts the number of misses in all levels of the ITLB which causes a page walk."}, {"ITLB_MISSES.WALK_COMPLETED", "Counts number of misses in all levels of the ITLB which resulted in a completed page walk."}, {"ITLB_MISSES.WALK_CYCLES", "Counts ITLB miss page walk cycles."}, {"ITLB_MISSES.STLB_HIT", "Counts the number of ITLB misses that hit in the second level TLB."}, {"ITLB_MISSES.PDE_MISS", "Number of ITLB misses where the low part of the linear to physical address translation was missed."}, {"ITLB_MISSES.PDP_MISS", "Number of ITLB misses where the high part of the linear to physical address translation was missed."}, {"ITLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, {"ILD_STALL.ANY", ""}, {"ILD_STALL.IQ_FULL", ""}, {"ILD_STALL.LCP", "Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX.W (for EM64T) instructions which change the length of the decoded instruction."}, {"ILD_STALL.MRU", ""}, {"ILD_STALL.REGEN", ""}, {"BR_INST_EXEC.ANY", "Counts all near executed branches (not necessarily retired). This includes only instructions and not micro-op branches. Frequent branching is not necessarily a major performance issue. However frequent branch mispredictions may be a problem."}, {"BR_INST_EXEC.COND", ""}, {"BR_INST_EXEC.DIRECT", ""}, {"BR_INST_EXEC.DIRECT_NEAR_CALL", ""}, {"BR_INST_EXEC.INDIRECT_NEAR_CALL", ""}, {"BR_INST_EXEC.INDIRECT_NON_CALL", ""}, {"BR_INST_EXEC.NEAR_CALLS", ""}, {"BR_INST_EXEC.NON_CALLS", ""}, {"BR_INST_EXEC.RETURN_NEAR", ""}, {"BR_INST_EXEC.TAKEN", ""}, {"BR_MISP_EXEC.COND", "Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired."}, {"BR_MISP_EXEC.DIRECT", "Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)."}, {"BR_MISP_EXEC.INDIRECT_NON_CALL", "Counts the number of executed mispredicted indirect near branch instructions that are not calls."}, {"BR_MISP_EXEC.NON_CALLS", "Counts mispredicted non call near branches executed, but not necessarily retired."}, {"BR_MISP_EXEC.RETURN_NEAR", "Counts mispredicted indirect branches that have a rear return mnemonic."}, {"BR_MISP_EXEC.DIRECT_NEAR_CALL", "Counts mispredicted non-indirect near calls executed, (should always be 0)."}, {"BR_MISP_EXEC.INDIRECT_NEAR_CALL", "Counts mispredicted indirect near calls exeucted, including both register and memory indirect."}, {"BR_MISP_EXEC.NEAR_CALLS", "Counts all mispredicted near call branches executed, but not necessarily retired."}, {"BR_MISP_EXEC.TAKEN", "Counts executed mispredicted near branches that are taken, but not necessarily retired."}, {"BR_MISP_EXEC.ANY", "Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired."}, {"RESOURCE_STALLS.ANY", "Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. Does not include stalls due to SuperQ (off core) queue full, too many cache misses, etc."}, {"RESOURCE_STALLS.LOAD", "Counts the cycles of stall due to lack of load buffer for load operation."}, {"RESOURCE_STALLS.RS_FULL", "This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle. A high count of this event indicates that there are long latency operations in the pipe (possibly load and store operations that miss the L2 cache, or instructions dependent upon instructions further down the pipeline that have yet to retire. When RS is full, new instructions can not enter the reservation station and start execution."}, {"RESOURCE_STALLS.STORE", "This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i.e. all store buffers are used). The stall ends when a store instruction commits its data to the cache or memory."}, {"RESOURCE_STALLS.ROB_FULL", "Counts the cycles of stall due to re- order buffer full."}, {"RESOURCE_STALLS.FPCW", "Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word."}, {"RESOURCE_STALLS.MXCSR", "Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename. The MXCSR provides control and status for the MMX registers."}, {"RESOURCE_STALLS.OTHER", "Counts the number of cycles while execution was stalled due to other resource issues."}, {"MACRO_INSTS.FUSIONS_DECODED", "Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired."}, {"BACLEAR_FORCE_IQ", "Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediciton direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, {"LSD.UOPS", "Counts the number of micro-ops delivered by loop stream detector Use cmask=1 and invert to count cycles."}, {"ITLB.FLUSH", "Counts the number of ITLB flushes."}, {"OFFCORE_REQUESTS.DEMAND.READ_DATA", "Counts number of offcore demand data read requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.DEMAND.READ_CODE", "Counts number of offcore demand code read requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.DEMAND.RFO", "Counts number of offcore demand RFO requests. Does not count L2 prefetch requests."}, {"OFFCORE_REQUESTS.ANY.READ", "Counts number of offcore read requests. Includes L2 prefetch requests."}, {"OFFCORE_REQUESTS.ANY.RFO", "Counts number of offcore RFO requests. Includes L2 prefetch requests."}, {"OFFCORE_REQUESTS.UNCACHED_MEM", "Counts number of offcore uncached memory requests."}, {"OFFCORE_REQUESTS.L1D_WRITEBACK", "Counts number of L1D writebacks to the uncore."}, {"OFFCORE_REQUESTS.ANY", "Counts all offcore requests."}, {"UOPS_EXECUTED.PORT0", "Counts number of Uops executed that were issued on port 0. Port 0 handles integer arithmetic, SIMD and FP add Uops."}, {"UOPS_EXECUTED.PORT1", "Counts number of Uops executed that were issued on port 1. Port 1 handles integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops."}, {"UOPS_EXECUTED.PORT2_CORE", "Counts number of Uops executed that were issued on port 2. Port 2 handles the load Uops. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT3_CORE", "Counts number of Uops executed that were issued on port 3. Port 3 handles store Uops. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT4_CORE", "Counts number of Uops executed that where issued on port 4. Port 4 handles the value to be stored for the store Uops issued on port 3. This is a core count only and can not be collected per thread."}, {"UOPS_EXECUTED.PORT5", "Counts number of Uops executed that where issued on port 5."}, {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES", "Counts cycles when the Uops are executing."}, {"UOPS_EXECUTED.PORT015", "Counts number of Uops executed that where issued on port 0, 1, or 5. use cmask=1, invert=1 to count stall cycles."}, {"UOPS_EXECUTED.PORT234", "Counts number of Uops executed that where issued on port 2, 3, or 4."}, {"OFFCORE_REQUESTS_SQ_FULL", "Counts number of cycles the SQ is full to handle off-core requests."}, {"SNOOPQ_REQUESTS_OUTSTANDING.DATA", "Counts weighted cycles of snoopq requests for data. Counter 0 only Use cmask=1 to count cycles not empty."}, {"SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE", "Counts weighted cycles of snoopq invalidate requests. Counter 0 only Use cmask=1 to count cycles not empty."}, {"SNOOPQ_REQUESTS_OUTSTANDING.CODE", "Counts weighted cycles of snoopq requests for code. Counter 0 only Use cmask=1 to count cycles not empty."}, {"OFF_CORE_RESPONSE_0", "see Section 19.17.1.3, ?Off-core Response Performance Monitoring in the Processor Core?"}, {"SNOOP_RESPONSE.HIT", "Counts HIT snoop response sent by this thread in response to a snoop request."}, {"SNOOP_RESPONSE.HITE", "Counts HIT E snoop response sent by this thread in response to a snoop request."}, {"SNOOP_RESPONSE.HITM", "Counts HIT M snoop response sent by this thread in response to a snoop request."}, {"PIC_ACCESSES.TPR_READS", "Counts number of TPR reads."}, {"PIC_ACCESSES.TPR_WRITES", "Counts number of TPR writes."}, {"INST_RETIRED.ANY_P", "See Table A-1 Notes: INST_RETIRED.ANY is counted by a designated fixed counter. INST_RETIRED.ANY_P is counted by a programmable counter and is an architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions."}, {"INST_RETIRED.X87", "Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions."}, {"UOPS_RETIRED.ANY", "Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two micro- ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. Use cmask=1 and invert to count active cycles or stalled cycles."}, {"UOPS_RETIRED.RETIRE_SLOTS", "Counts the number of retirement slots used each cycle."}, {"UOPS_RETIRED.MACRO_FUSED", "Counts number of macro-fused uops retired."}, {"MACHINE_CLEARS.CYCLES", "Counts the cycles machine clear is asserted."}, {"MACHINE_CLEARS.MEM_ORDER", "Counts the number of machine clears due to memory order conflicts."}, {"MACHINE_CLEARS.SMC", "Counts the number of times that a program writes to a code section. Self-modifying code causes a sever penalty in all Intel 64 and IA-32 processors. The modified cache line is written back to the L2 and L3caches."}, {"MACHINE_CLEARS.FUSION_ASSIST", "Counts the number of macro-fusion assists."}, {"BR_INST_RETIRED.ALL_BRANCHES", "See Table A-1."}, {"BR_INST_RETIRED.CONDITIONAL", "Counts the number of conditional branch instructions retired."}, {"BR_INST_RETIRED.NEAR_CALL", "Counts the number of direct & indirect near unconditional calls retired."}, {"BR_MISP_RETIRED.ALL_BRANCHES", "See Table A-1."}, {"BR_MISP_RETIRED.NEAR_CALL", "Counts mispredicted direct & indirect near unconditional retired calls."}, {"SSEX_UOPS_RETIRED.PACKED_SINGLE", "Counts SIMD packed single- precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.SCALAR_SINGLE", "Counts SIMD calar single-precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.PACKED_DOUBLE", "Counts SIMD packed double- precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.SCALAR_DOUBLE", "Counts SIMD scalar double-precision floating point Uops retired."}, {"SSEX_UOPS_RETIRED.VECTOR_INTEGER", "Counts 128-bit SIMD vector integer Uops retired."}, {"ITLB_MISS_RETIRED", "Counts the number of retired instructions that missed the ITLB when the instruction was fetched."}, {"MEM_LOAD_RETIRED.L1D_HIT", "Counts number of retired loads that hit the L1 data cache."}, {"MEM_LOAD_RETIRED.L2_HIT", "Counts number of retired loads that hit the L2 data cache."}, {"MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM", "Counts number of retired loads that hit in a sibling core's L2 (on die core). Since the L3 is inclusive of all cores on the package, this is an L3 hit. This counts both clean or modified hits."}, {"MEM_LOAD_RETIRED.HIT_LFB", "Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache. This is counting secondary L1D misses."}, {"MEM_LOAD_RETIRED.DTLB_MISS", "Counts the number of retired loads that missed the DTLB. The DTLB miss is not counted if the load operation causes a fault. This event counts loads from cacheable memory only. The event does not count loads by software prefetches. Counts both primary and secondary misses to the TLB."}, {"MEM_LOAD_RETIRED.L3_MISS", "Counts number of retired loads that miss the L3 cache."}, {"MEM_LOAD_RETIRED.L3_UNSHARED_HIT", "Couns number of retired loads that hit their own, unshared lines in the L3 cache."}, {"FP_MMX_TRANS.TO_FP", "Counts the first floating-point instruction following any MMX instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"FP_MMX_TRANS.TO_MMX", "Counts the first MMX instruction following a floating-point instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"FP_MMX_TRANS.ANY", "Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, {"MACRO_INSTS.DECODED", "Counts the number of instructions decoded, (but not necessarily executed or retired)."}, {"UOPS_DECODED.MS", "Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS delivers uops when the instruction is more than 4 uops long or a microcode assist is occurring."}, {"UOPS_DECODED.ESP_FOLDING", "Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc. ESP instructions do not generate a Uop to increment or decrement ESP. Instead, they update an ESP_Offset register that keeps track of the delta to the current value of the ESP register."}, {"UOPS_DECODED.ESP_SYNC", "Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register."}, {"RAT_STALLS.FLAGS", "Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall. A partial register stall may occur when two conditions are met: 1) an instruction modifies some, but not all, of the flags in the flag register and 2) the next instruction, which depends on flags, depends on flags that were not modified by this instruction."}, {"RAT_STALLS.REGISTERS", "This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction."}, {"RAT_STALLS.ROB_READ_PORT", "Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline. Note that, at this stage in the pipeline, additional stalls may occur at the same cycle and prevent the stalled micro-ops from entering the pipe. In such a case, micro-ops retry entering the execution pipe in the next cycle and the ROB-read port stall is counted again."}, {"RAT_STALLS.SCOREBOARD", "Counts the cycles where we stall due to microarchitecturally required serialization. Microcode scoreboarding stalls."}, {"RAT_STALLS.ANY", "Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe. Cycles when partial register stalls occurred Cycles when flag stalls occurred Cycles floating-point unit (FPU) status word stalls occurred. To count each of these conditions separately use the events: RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and RAT_STALLS.FPSW."}, {"SEG_RENAME_STALLS", "Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front-end of the pipeline until the renamed segment retires."}, {"ES_REG_RENAMES", "Counts the number of times the ES segment register is renamed."}, {"UOP_UNFUSION", "Counts unfusion events due to floating point exception to a fused uop."}, {"BR_INST_DECODED", "Counts the number of branch instructions decoded."}, {"BOGUS_BR", "Counts the number of bogus branches."}, {"BPU_MISSED_CALL_RET", "Counts number of times the Branch Prediciton Unit missed predicting a call or return branch."}, {"L2_HW_PREFETCH.DATA_TRIGGER", "Count L2 HW data prefetcher triggered."}, {"L2_HW_PREFETCH.CODE_TRIGGER", "Count L2 HW code prefetcher triggered."}, {"L2_HW_PREFETCH.DCA_TRIGGER", "Count L2 HW DCA prefetcher triggered."}, {"L2_HW_PREFETCH.KICK_START", "Count L2 HW prefetcher kick started."}, {"SQ_MISC.PROMOTION", "Counts the number of L2 secondary misses that hit the Super Queue."}, {"SQ_MISC.PROMOTION_POST_GO", "Counts the number of L2 secondary misses during the Super Queue filling L2."}, {"SQ_MISC.LRU_HINTS", "Counts number of Super Queue LRU hints sent to L3."}, {"SQ_MISC.FILL_DROPPED", "Counts the number of SQ L2 fills dropped due to L2 busy."}, {"SQ_MISC.SPLIT_LOCK", "Counts the number of SQ lock splits across a cache line."}, {"SQ_FULL_STALL_CYCLES", "Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore."}, {"FP_ASSIST.ALL", "Counts the number of floating point operations executed that required micro-code assist intervention. Assists are required in the following cases: SSE instructions, (Denormal input when the DAZ flag is off or Underflow result when the FTZ flag is off): x87 instructions, (NaN or denormal are loaded to a register or used as input from memory, Division by 0 or Underflow output)."}, {"FP_ASSIST.OUTPUT", "Counts number of floating point micro-code assist when the output value (destination register) is invalid."}, {"FP_ASSIST.INPUT", "Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid."}, {"SEGMENT_REG_LOADS", "Counts number of segment register loads."}, {"SIMD_INT_64.PACKED_MPY", "Counts number of SID integer 64 bit packed multiply operations."}, {"SIMD_INT_64.PACKED_SHIFT", "Counts number of SID integer 64 bit packed shift operations."}, {"SIMD_INT_64.PACK", "Counts number of SID integer 64 bit pack operations."}, {"SIMD_INT_64.UNPACK", "Counts number of SID integer 64 bit unpack operations."}, {"SIMD_INT_64.PACKED_LOGICAL", "Counts number of SID integer 64 bit logical operations."}, {"SIMD_INT_64.PACKED_ARITH", "Counts number of SID integer 64 bit arithmetic operations."}, {"SIMD_INT_64.SHUFFLE_MOVE", "Counts number of SID integer 64 bit shift or move operations."}, {"INSTR_RETIRED_ANY", "Instructions retired (IAF)"}, {"CPU_CLK_UNHALTED_CORE", "Unhalted core cycles (IAF)"}, {"CPU_CLK_UNHALTED_REF", "Unhalted reference cycles (IAF)"}, {"GQ_CYCLES_FULL.READ_TRACKER", "Uncore cycles Global Queue read tracker is full."}, {"GQ_CYCLES_FULL.WRITE_TRACKER", "Uncore cycles Global Queue write tracker is full."}, {"GQ_CYCLES_FULL.PEER_PROBE_TRACKER", "Uncore cycles Global Queue peer probe tracker is full. The peer probe tracker queue tracks snoops from the IOH and remote sockets."}, {"GQ_CYCLES_NOT_EMPTY.READ_TRACKER", "Uncore cycles were Global Queue read tracker has at least one valid entry."}, {"GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER", "Uncore cycles were Global Queue write tracker has at least one valid entry."}, {"GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER", "Uncore cycles were Global Queue peer probe tracker has at least one valid entry. The peer probe tracker queue tracks IOH and remote socket snoops."}, {"GQ_ALLOC.READ_TRACKER", "Counts the number of tread tracker allo- cate to deallocate entries. The GQ read tracker allocate to deal- locate occupancy count is divided by the count to obtain the average read tracker latency."}, {"GQ_ALLOC.RT_L3_MISS", "Counts the number GQ read tracker entries for which a full cache line read has missed the L3. The GQ read tracker L3 miss to fill occupancy count is divided by this count to obtain the average cache line read L3 miss latency. The latency represents the time after which the L3 has determined that the cache line has missed. The time between a GQ read tracker allocation and the L3 determining that the cache line has missed is the average L3 hit latency. The total L3 cache line read miss latency is the hit latency + L3 miss latency."}, {"GQ_ALLOC.RT_TO_L3_RESP", "Counts the number of GQ read tracker entries that are allocated in the read tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy count is divided by this count to obtain the average L3 hit latency."}, {"GQ_ALLOC.RT_TO_RTID_ACQUIRED", "Counts the number of GQ read tracker entries that are allocated in the read tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ read tracker L3 miss to RTID acquired occupancy count is divided by this count to obtain the average latency for a read L3 miss to acquire an RTID."}, {"GQ_ALLOC.WT_TO_RTID_ACQUIRED", "Counts the number of GQ write tracker entries that are allocated in the write tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is divided by this count to obtain the average latency for a write L3 miss to acquire an RTID."}, {"GQ_ALLOC.WRITE_TRACKER", "Counts the number of GQ write tracker entries that are allocated in the write tracker queue that miss the L3. The GQ write tracker occupancy count is divided by the this count to obtain the average L3 write miss latency."}, {"GQ_ALLOC.PEER_PROBE_TRACKER", "Counts the number of GQ peer probe tracker (snoop) entries that are allocated in the peer probe tracker queue that miss the L3. The GQ peer probe occupancy count is divided by this count to obtain the average L3 peer probe miss latency."}, {"GQ_DATA.FROM_QPI", "Cycles Global Queue Quickpath Interface input data port is busy importing data from the Quickpath Inter- face. Each cycle the input port can transfer 8 or 16 bytes of data."}, {"GQ_DATA.FROM_QMC", "Cycles Global Queue Quickpath Memory Interface input data port is busy importing data from the Quick- path Memory Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, {"GQ_DATA.FROM_L3", "Cycles GQ L3 input data port is busy importing data from the Last Level Cache. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.FROM_CORES_02", "Cycles GQ Core 0 and 2 input data port is busy importing data from processor cores 0 and 2. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.FROM_CORES_13", "Cycles GQ Core 1 and 3 input data port is busy importing data from processor cores 1 and 3. Each cycle the input port can transfer 32 bytes of data."}, {"GQ_DATA.TO_QPI_QMC", "Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath Interface or Quickpath Memory Interface. Each cycle the output port can transfer 32 bytes of data."}, {"GQ_DATA.TO_L3", "Cycles GQ L3 output data port is busy sending data to the Last Level Cache. Each cycle the output port can transfer 32 bytes of data."}, {"GQ_DATA.TO_CORES", "Cycles GQ Core output data port is busy sending data to the Cores. Each cycle the output port can trans- fer 32 bytes of data."}, {"SNP_RESP_TO_LOCAL_HOME.I_STATE", "Number of snoop responses to the local home that L3 does not have the referenced cache line."}, {"SNP_RESP_TO_LOCAL_HOME.S_STATE", "Number of snoop responses to the local home that L3 has the referenced line cached in the S state."}, {"SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to the local home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the local home in the S state."}, {"SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to the local home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the local home in the M state."}, {"SNP_RESP_TO_LOCAL_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, {"SNP_RESP_TO_LOCAL_HOME.WB", "Number of responses to code or data read snoops to the local home that the L3 has the referenced line cached in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.I_STATE", "Number of snoop responses to a remote home that L3 does not have the referenced cache line."}, {"SNP_RESP_TO_REMOTE_HOME.S_STATE", "Number of snoop responses to a remote home that L3 has the referenced line cached in the S state."}, {"SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the remote home in the S state."}, {"SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to a remote home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the remote home in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, {"SNP_RESP_TO_REMOTE_HOME.WB", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced line cached in the M state."}, {"SNP_RESP_TO_REMOTE_HOME.HITM", "Number of HITM snoop responses to a remote home."}, {"L3_HITS.READ", "Number of code read, data read and RFO requests that hit in the L3."}, {"L3_HITS.WRITE", "Number of writeback requests that hit in the L3. Writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, {"L3_HITS.PROBE", "Number of snoops from IOH or remote sock- ets that hit in the L3."}, {"L3_HITS.ANY", "Number of reads and writes that hit the L3."}, {"L3_MISS.READ", "Number of code read, data read and RFO requests that miss the L3."}, {"L3_MISS.WRITE", "Number of writeback requests that miss the L3. Should always be zero as writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, {"L3_MISS.PROBE", "Number of snoops from IOH or remote sock- ets that miss the L3."}, {"L3_MISS.ANY", "Number of reads and writes that miss the L3."}, {"L3_LINES_IN.M_STATE", "Counts the number of L3 lines allocated in M state. The only time a cache line is allocated in the M state is when the line was forwarded in M state is forwarded due to a Snoop Read Invalidate Own request."}, {"L3_LINES_IN.E_STATE", "Counts the number of L3 lines allocated in E state."}, {"L3_LINES_IN.S_STATE", "Counts the number of L3 lines allocated in S state."}, {"L3_LINES_IN.F_STATE", "Counts the number of L3 lines allocated in F state."}, {"L3_LINES_IN.ANY", "Counts the number of L3 lines allocated in any state."}, {"L3_LINES_OUT.M_STATE", "Counts the number of L3 lines victimized that were in the M state. When the victim cache line is in M state, the line is written to its home cache agent which can be either local or remote."}, {"L3_LINES_OUT.E_STATE", "Counts the number of L3 lines victimized that were in the E state."}, {"L3_LINES_OUT.S_STATE", "Counts the number of L3 lines victimized that were in the S state."}, {"L3_LINES_OUT.I_STATE", "Counts the number of L3 lines victimized that were in the I state."}, {"L3_LINES_OUT.F_STATE", "Counts the number of L3 lines victimized that were in the F state."}, {"L3_LINES_OUT.ANY", "Counts the number of L3 lines victimized in any state."}, {"QHL_REQUESTS.IOH_READS", "Counts number of Quickpath Home Logic read requests from the IOH."}, {"QHL_REQUESTS.IOH_WRITES", "Counts number of Quickpath Home Logic write requests from the IOH."}, {"QHL_REQUESTS.REMOTE_READS", "Counts number of Quickpath Home Logic read requests from a remote socket."}, {"QHL_REQUESTS.REMOTE_WRITES", "Counts number of Quickpath Home Logic write requests from a remote socket."}, {"QHL_REQUESTS.LOCAL_READS", "Counts number of Quickpath Home Logic read requests from the local socket."}, {"QHL_REQUESTS.LOCAL_WRITES", "Counts number of Quickpath Home Logic write requests from the local socket."}, {"QHL_CYCLES_FULL.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH are full."}, {"QHL_CYCLES_FULL.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker are full."}, {"QHL_CYCLES_FULL.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker are full."}, {"QHL_CYCLES_NOT_EMPTY.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy."}, {"QHL_CYCLES_NOT_EMPTY.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is busy."}, {"QHL_CYCLES_NOT_EMPTY.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker is busy."}, {"QHL_OCCUPANCY.IOH", "QHL IOH tracker allocate to deallocate read occupancy."}, {"QHL_OCCUPANCY.REMOTE", "QHL remote tracker allocate to deallocate read occupancy."}, {"QHL_OCCUPANCY.LOCAL", "QHL local tracker allocate to deallocate read occupancy."}, {"QHL_ADDRESS_CONFLICTS.2WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 conflicts. The AAT is a struc- ture that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, {"QHL_ADDRESS_CONFLICTS.3WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 conflicts. The AAT is a struc- ture that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, {"QHL_CONFLICT_CYCLES.IOH", "Counts cycles the Quickpath Home Logic IOH Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, {"QHL_CONFLICT_CYCLES.REMOTE", "Counts cycles the Quickpath Home Logic Remote Tracker contains two or more requests with an address con- flict. A max of 3 requests can be in conflict."}, {"QHL_CONFLICT_CYCLES.LOCAL", "Counts cycles the Quickpath Home Logic Local Tracker contains two or more requests with an address con- flict. A max of 3 requests can be in conflict."}, {"QHL_TO_QMC_BYPASS", "Counts number or requests to the Quickpath Memory Controller that bypass the Quickpath Home Logic. All local accesses can be bypassed. For remote requests, only read requests can be bypassed."}, {"QMC_NORMAL_FULL.READ.CH0", "Uncore cycles all the entries in the DRAM channel 0 medium or low priority queue are occupied with read requests."}, {"QMC_NORMAL_FULL.READ.CH1", "Uncore cycles all the entries in the DRAM channel 1 medium or low priority queue are occupied with read requests."}, {"QMC_NORMAL_FULL.READ.CH2", "Uncore cycles all the entries in the DRAM channel 2 medium or low priority queue are occupied with read requests."}, {"QMC_NORMAL_FULL.WRITE.CH0", "Uncore cycles all the entries in the DRAM channel 0 medium or low priority queue are occupied with write requests."}, {"QMC_NORMAL_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 medium or low priority queue are occupied with write requests."}, {"QMC_NORMAL_FULL.WRITE.CH2", "Uncore cycles all the entries in the DRAM channel 2 medium or low priority queue are occupied with write requests."}, {"QMC_ISOC_FULL.READ.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.READ.CH1", "Counts cycles all the entries in the DRAM channel 1high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.READ.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous read requests."}, {"QMC_ISOC_FULL.WRITE.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous write requests."}, {"QMC_ISOC_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous write requests."}, {"QMC_ISOC_FULL.WRITE.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous write requests."}, {"QMC_BUSY.READ.CH0", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 0."}, {"QMC_BUSY.READ.CH1", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 1."}, {"QMC_BUSY.READ.CH2", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 2."}, {"QMC_BUSY.WRITE.CH0", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 0."}, {"QMC_BUSY.WRITE.CH1", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 1."}, {"QMC_BUSY.WRITE.CH2", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 2."}, {"QMC_OCCUPANCY.CH0", "IMC channel 0 normal read request occupancy."}, {"QMC_OCCUPANCY.CH1", "IMC channel 1 normal read request occupancy."}, {"QMC_OCCUPANCY.CH2", "IMC channel 2 normal read request occupancy."}, {"QMC_ISSOC_OCCUPANCY.CH0", "IMC channel 0 issoc read request occupancy."}, {"QMC_ISSOC_OCCUPANCY.CH1", "IMC channel 1 issoc read request occupancy."}, {"QMC_ISSOC_OCCUPANCY.CH2", "IMC channel 2 issoc read request occu- pancy."}, {"QMC_ISSOC_READS.ANY", "IMC issoc read request occupancy."}, {"QMC_NORMAL_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 medium and low priority read requests. The QMC channel 0 normal read occupancy divided by this count provides the average QMC channel 0 read latency."}, {"QMC_NORMAL_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 medium and low priority read requests. The QMC channel 1 normal read occupancy divided by this count provides the average QMC channel 1 read latency."}, {"QMC_NORMAL_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 medium and low priority read requests. The QMC channel 2 normal read occupancy divided by this count provides the average QMC channel 2 read latency."}, {"QMC_NORMAL_READS.ANY", "Counts the number of Quickpath Memory Con- troller medium and low priority read requests. The QMC normal read occupancy divided by this count provides the average QMC read latency."}, {"QMC_HIGH_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 high priority isochronous read requests."}, {"QMC_HIGH_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Con- troller high priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 critical priority isochronous read requests."}, {"QMC_CRITICAL_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Con- troller critical priority isochronous read requests."}, {"QMC_WRITES.FULL.CH0", "Counts number of full cache line writes to DRAM channel 0."}, {"QMC_WRITES.FULL.CH1", "Counts number of full cache line writes to DRAM channel 1."}, {"QMC_WRITES.FULL.CH2", "Counts number of full cache line writes to DRAM channel 2."}, {"QMC_WRITES.FULL.ANY", "Counts number of full cache line writes to DRAM."}, {"QMC_WRITES.PARTIAL.CH0", "Counts number of partial cache line writes to DRAM channel 0."}, {"QMC_WRITES.PARTIAL.CH1", "Counts number of partial cache line writes to DRAM channel 1."}, {"QMC_WRITES.PARTIAL.CH2", "Counts number of partial cache line writes to DRAM channel 2."}, {"QMC_WRITES.PARTIAL.ANY", "Counts number of partial cache line writes to DRAM."}, {"QMC_CANCEL.CH0", "Counts number of DRAM channel 0 cancel requests."}, {"QMC_CANCEL.CH1", "Counts number of DRAM channel 1 cancel requests."}, {"QMC_CANCEL.CH2", "Counts number of DRAM channel 2 cancel requests."}, {"QMC_CANCEL.ANY", "Counts number of DRAM cancel requests."}, {"QMC_PRIORITY_UPDATES.CH0", "Counts number of DRAM channel 0 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.CH1", "Counts number of DRAM channel 1 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.CH2", "Counts number of DRAM channel 2 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QMC_PRIORITY_UPDATES.ANY", "Counts number of DRAM priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, {"QHL_FRC_ACK_CNFLTS.LOCAL", "Counts number of Force Acknowledge Con- flict messages sent by the Quickpath Home Logic to the local home."}, {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0", "Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0", "Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0", "Counts cycles the Quickpath outbound link 0 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1", "Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1", "Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1", "Counts cycles the Quickpath outbound link 1 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_SINGLE_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0", "Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1", "Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of VNA and VN0 cred- its. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_STALLED_MULTI_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of VNA and VN0 cred- its. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, {"QPI_TX_HEADER.BUSY.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is busy."}, {"QPI_TX_HEADER.BUSY.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is busy."}, {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0", "Number of cycles that snoop packets incom- ing to the Quickpath Interface link 0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1", "Number of cycles that snoop packets incom- ing to the Quickpath Interface link 1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, {"DRAM_OPEN.CH0", "Counts number of DRAM Channel 0 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_OPEN.CH1", "Counts number of DRAM Channel 1 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_OPEN.CH2", "Counts number of DRAM Channel 2 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, {"DRAM_PAGE_CLOSE.CH0", "DRAM channel 0 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_CLOSE.CH1", "DRAM channel 1 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_CLOSE.CH2", "DRAM channel 2 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH0", "Counts the number of precharges (PRE) that were issued to DRAM channel 0 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH1", "Counts the number of precharges (PRE) that were issued to DRAM channel 1 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_PAGE_MISS.CH2", "Counts the number of precharges (PRE) that were issued to DRAM channel 2 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, {"DRAM_READ_CAS.CH0", "Counts the number of times a read CAS com- mand was issued on DRAM channel 0."}, {"DRAM_READ_CAS.AUTOPRE_CH0", "Counts the number of times a read CAS com- mand was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_READ_CAS.CH1", "Counts the number of times a read CAS com- mand was issued on DRAM channel 1."}, {"DRAM_READ_CAS.AUTOPRE_CH1", "Counts the number of times a read CAS com- mand was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_READ_CAS.CH2", "Counts the number of times a read CAS com- mand was issued on DRAM channel 2."}, {"DRAM_READ_CAS.AUTOPRE_CH2", "Counts the number of times a read CAS com- mand was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0."}, {"DRAM_WRITE_CAS.AUTOPRE_CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1."}, {"DRAM_WRITE_CAS.AUTOPRE_CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_WRITE_CAS.CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2."}, {"DRAM_WRITE_CAS.AUTOPRE_CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, {"DRAM_REFRESH.CH0", "Counts number of DRAM channel 0 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_REFRESH.CH1", "Counts number of DRAM channel 1 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_REFRESH.CH2", "Counts number of DRAM channel 2 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, {"DRAM_PRE_ALL.CH0", "Counts number of DRAM Channel 0 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, {"DRAM_PRE_ALL.CH1", "Counts number of DRAM Channel 1 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, {"DRAM_PRE_ALL.CH2", "Counts number of DRAM Channel 2 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, { NULL, NULL } }; papi-5.6.0/src/libpfm4/perf_examples/task_cpu.c000664 001750 001750 00000022733 13216244365 023501 0ustar00jshenry1963jshenry1963000000 000000 /* * task_cpu.c - example of per-thread remote monitoring with per-cpu breakdown * * Copyright (c) 2010 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "perf_util.h" #define MAX_GROUPS 256 #define MAX_CPUS 64 typedef struct { const char *events[MAX_GROUPS]; int num_groups; int format_group; int inherit; int print; int pin; int ncpus; pid_t pid; } options_t; static options_t options; static volatile int quit; int child(char **arg) { /* * execute the requested command */ execvp(arg[0], arg); errx(1, "cannot exec: %s\n", arg[0]); /* not reached */ } static void read_groups(perf_event_desc_t *fds, int num) { uint64_t *values = NULL; size_t new_sz, sz = 0; int i, evt; ssize_t ret; /* * { u64 nr; * { u64 time_enabled; } && PERF_FORMAT_ENABLED * { u64 time_running; } && PERF_FORMAT_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID * } cntr[nr]; * } && PERF_FORMAT_GROUP * * we do not use FORMAT_ID in this program */ for (evt = 0; evt < num; ) { int num_evts_to_read; if (options.format_group) { num_evts_to_read = perf_get_group_nevents(fds, num, evt); new_sz = sizeof(uint64_t) * (3 + num_evts_to_read); } else { num_evts_to_read = 1; new_sz = sizeof(uint64_t) * 3; } if (new_sz > sz) { sz = new_sz; values = realloc(values, sz); } if (!values) err(1, "cannot allocate memory for values\n"); ret = read(fds[evt].fd, values, new_sz); if (ret != (ssize_t)new_sz) { /* unsigned */ if (ret == -1) err(1, "cannot read values event %s", fds[evt].name); /* likely pinned and could not be loaded */ warnx("could not read event %d, tried to read %zu bytes, but got %zd", evt, new_sz, ret); } /* * propagate to save area */ for (i = evt; i < (evt + num_evts_to_read); i++) { if (options.format_group) values[0] = values[3 + (i - evt)]; /* * scaling because we may be sharing the PMU and * thus may be multiplexed */ fds[i].values[0] = values[0]; fds[i].values[1] = values[1]; fds[i].values[2] = values[2]; } evt += num_evts_to_read; } if (values) free(values); } static void print_counts(perf_event_desc_t *fds, int num, int cpu) { double ratio; uint64_t val, delta; int i; read_groups(fds, num); for(i=0; i < num; i++) { val = perf_scale(fds[i].values); delta = perf_scale_delta(fds[i].values, fds[i].prev_values); ratio = perf_scale_ratio(fds[i].values); /* separate groups */ if (perf_is_group_leader(fds, i)) putchar('\n'); if (options.print) printf("CPU%-2d %'20"PRIu64" %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", cpu, val, delta, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); else printf("CPU%-2d %'20"PRIu64" %s (%.2f%% scaling, ena=%'"PRIu64", run=%'"PRIu64")\n", cpu, val, fds[i].name, (1.0-ratio)*100.0, fds[i].values[1], fds[i].values[2]); } } static void sig_handler(int n) { quit = 1; } int parent(char **arg) { perf_event_desc_t *fds, *fds_cpus[MAX_CPUS]; int status, ret, i, num_fds = 0, grp, group_fd; int ready[2], go[2], cpu; char buf; pid_t pid; go[0] = go[1] = -1; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed"); if (options.ncpus >= MAX_CPUS) errx(1, "maximum number of cpus exceeded (%d)", MAX_CPUS); memset(fds_cpus, 0, sizeof(fds_cpus)); for (cpu=0; cpu < options.ncpus; cpu++) { for (grp = 0; grp < options.num_groups; grp++) { num_fds = 0; ret = perf_setup_list_events(options.events[grp], &fds_cpus[cpu], &num_fds); if (ret || !num_fds) exit(1); } } pid = options.pid; if (!pid) { ret = pipe(ready); if (ret) err(1, "cannot create pipe ready"); ret = pipe(go); if (ret) err(1, "cannot create pipe go"); /* * Create the child task */ if ((pid=fork()) == -1) err(1, "Cannot fork process"); /* * and launch the child code * * The pipe is used to avoid a race condition * between for() and exec(). We need the pid * of the new tak but we want to start measuring * at the first user level instruction. Thus we * need to prevent exec until we have attached * the events. */ if (pid == 0) { close(ready[0]); close(go[1]); /* * let the parent know we exist */ close(ready[1]); if (read(go[0], &buf, 1) == -1) err(1, "unable to read go_pipe"); exit(child(arg)); } close(ready[1]); close(go[0]); if (read(ready[0], &buf, 1) == -1) err(1, "unable to read child_ready_pipe"); close(ready[0]); } for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; for(i=0; i < num_fds; i++) { int is_group_leader; /* boolean */ is_group_leader = perf_is_group_leader(fds, i); if (is_group_leader) { /* this is the group leader */ group_fd = -1; } else { group_fd = fds[fds[i].group_leader].fd; } /* * create leader disabled with enable_on-exec */ if (!options.pid) { fds[i].hw.disabled = is_group_leader; fds[i].hw.enable_on_exec = is_group_leader; } fds[i].hw.read_format = PERF_FORMAT_SCALE; /* request timing information necessary for scaling counts */ if (is_group_leader && options.format_group) fds[i].hw.read_format |= PERF_FORMAT_GROUP; if (options.inherit) fds[i].hw.inherit = 1; if (options.pin && is_group_leader) fds[i].hw.pinned = 1; fds[i].fd = perf_event_open(&fds[i].hw, pid, cpu, group_fd, 0); if (fds[i].fd == -1) { warn("cannot attach event%d %s", i, fds[i].name); goto error; } } } if (!options.pid && go[1] > -1) close(go[1]); if (options.print) { if (!options.pid) { while(waitpid(pid, &status, WNOHANG) == 0) { sleep(1); for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; print_counts(fds, num_fds, cpu); } } } else { while(quit == 0) { sleep(1); for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; print_counts(fds, num_fds, cpu); } } } } else { if (!options.pid) waitpid(pid, &status, 0); else { pause(); for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; for(i=0; i < num_fds; i++) ioctl(fds[i].fd, PERF_EVENT_IOC_DISABLE, 0); } } for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; print_counts(fds, num_fds, cpu); } } for (cpu=0; cpu < options.ncpus; cpu++) { fds = fds_cpus[cpu]; for(i=0; i < num_fds; i++) close(fds[i].fd); perf_free_fds(fds, num_fds); } /* free libpfm resources cleanly */ pfm_terminate(); return 0; error: free(fds); if (!options.pid) kill(SIGKILL, pid); /* free libpfm resources cleanly */ pfm_terminate(); return -1; } static void usage(void) { printf("usage: task_cpu [-h] [-i] [-g] [-p] [-P] [-t pid] [-e event1,event2,...] cmd\n" "-h\t\tget help\n" "-i\t\tinherit across fork\n" "-f\t\tuse PERF_FORMAT_GROUP for reading up counts (experimental, not working)\n" "-p\t\tprint counts every second\n" "-P\t\tpin events\n" "-t pid\tmeasure existing pid\n" "-e ev,ev\tgroup of events to measure (multiple -e switches are allowed)\n" ); } int main(int argc, char **argv) { int c; setlocale(LC_ALL, ""); while ((c=getopt(argc, argv,"+he:ifpPt:")) != -1) { switch(c) { case 'e': if (options.num_groups < MAX_GROUPS) { options.events[options.num_groups++] = optarg; } else { errx(1, "you cannot specify more than %d groups.\n", MAX_GROUPS); } break; case 'f': options.format_group = 1; break; case 'p': options.print = 1; break; case 'P': options.pin = 1; break; case 'i': options.inherit = 1; break; case 't': options.pid = atoi(optarg); break; case 'h': usage(); exit(0); default: errx(1, "unknown error"); } } options.ncpus = sysconf(_SC_NPROCESSORS_ONLN); if (options.ncpus < 1) errx(1, "cannot determine number of online processors"); if (options.num_groups == 0) { options.events[0] = "cycles,instructions"; options.num_groups = 1; } if (!argv[optind] && !options.pid) errx(1, "you must specify a command to execute or a thread to attach to\n"); signal(SIGINT, sig_handler); return parent(argv+optind); } papi-5.6.0/man/man3/PAPI_mpx_info_t.3000664 001750 001750 00000001352 13216244356 021210 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_mpx_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_mpx_info_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "int \fBtimer_sig\fP" .br .ti -1c .RI "int \fBtimer_num\fP" .br .ti -1c .RI "int \fBtimer_us\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Field Documentation" .PP .SS "int PAPI_mpx_info_t::timer_num" Number of the itimer or POSIX 1 timer used by the multiplex timer: PAPI_ITIMER .SS "int PAPI_mpx_info_t::timer_sig" Signal number used by the multiplex timer, 0 if not: PAPI_SIGNAL .SS "int PAPI_mpx_info_t::timer_us" uS between switching of sets: PAPI_MPX_DEF_US .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.6.x/etc/costs/Opteron-2.4000775 001750 001750 00000001376 13216244366 023337 0ustar00jshenry1963jshenry1963000000 000000 [data from a 2.4 GHz Opteron 850] PERFCTR INIT: vendor 2, family 15, model 5, stepping 10, clock 2405892 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 242 cycles PERFCTR INIT: rdtsc cost is 13.0 cycles (1075 total) PERFCTR INIT: rdpmc cost is 10.0 cycles (882 total) PERFCTR INIT: rdmsr (counter) cost is 50.9 cycles (3505 total) PERFCTR INIT: rdmsr (evntsel) cost is 56.8 cycles (3880 total) PERFCTR INIT: wrmsr (counter) cost is 72.2 cycles (4863 total) PERFCTR INIT: wrmsr (evntsel) cost is 328.8 cycles (21289 total) PERFCTR INIT: read cr4 cost is 6.0 cycles (627 total) PERFCTR INIT: write cr4 cost is 67.0 cycles (4535 total) PERFCTR INIT: write LVTPC cost is 26.9 cycles (1967 total) PERFCTR INIT: sync_core cost is 164.3 cycles (10761 total) papi-5.6.0/src/examples/overflow_pthreads.c000664 001750 001750 00000011410 13216244361 023036 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: overflow dispatch with pthreads - This example tests the dispatch of overflow calls from PAPI. The event set is counted in the default counting domain and default granularity, depending on the platform. Usually this is the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). The Eventset contains: + PAPI_TOT_INS (overflow monitor) + PAPI_TOT_CYC Each thread will do the followings : - enable overflow - Start eventset 1 - Do flops - Stop eventset 1 - disable overflow */ #include #include #include #include "papi.h" #define THRESHOLD 200000 #define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } int total = 0; void do_flops(int n) { int i; double c = 0.11; double a = 0.5; double b = 6.2; for (i=0; i < n; i++) c += a * b; } /* overflow handler */ void handler(int EventSet, void *address, long long overflow_vector, void *context) { fprintf(stderr, OVER_FMT, EventSet, address, overflow_vector); total++; } void *Thread(void *arg) { int retval; int EventSet1=PAPI_NULL; long long values[2]; long long elapsed_us, elapsed_cyc; fprintf(stderr,"Thread %lx running PAPI\n",PAPI_thread_id()); /* create the event set */ if ( (retval = PAPI_create_eventset(&EventSet1))!=PAPI_OK) ERROR_RETURN(retval); /* query whether the event exists */ if ((retval=PAPI_query_event(PAPI_TOT_INS)) != PAPI_OK) ERROR_RETURN(retval); if ((retval=PAPI_query_event(PAPI_TOT_CYC)) != PAPI_OK) ERROR_RETURN(retval); /* add events to the event set */ if ( (retval = PAPI_add_event(EventSet1, PAPI_TOT_INS))!= PAPI_OK) ERROR_RETURN(retval); if ( (retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC)) != PAPI_OK) ERROR_RETURN(retval); elapsed_us = PAPI_get_real_usec(); elapsed_cyc = PAPI_get_real_cyc(); retval = PAPI_overflow(EventSet1, PAPI_TOT_CYC, THRESHOLD, 0, handler); if(retval !=PAPI_OK) ERROR_RETURN(retval); /* start counting */ if((retval = PAPI_start(EventSet1))!=PAPI_OK) ERROR_RETURN(retval); do_flops(*(int *)arg); if ((retval = PAPI_stop(EventSet1, values))!=PAPI_OK) ERROR_RETURN(retval); elapsed_us = PAPI_get_real_usec() - elapsed_us; elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; /* disable overflowing */ retval = PAPI_overflow(EventSet1, PAPI_TOT_CYC, 0, 0, handler); if(retval !=PAPI_OK) ERROR_RETURN(retval); /* remove the event from the eventset */ retval = PAPI_remove_event(EventSet1, PAPI_TOT_INS); if (retval != PAPI_OK) ERROR_RETURN(retval); retval = PAPI_remove_event(EventSet1, PAPI_TOT_CYC); if (retval != PAPI_OK) ERROR_RETURN(retval); printf("Thread %#x PAPI_TOT_INS : \t%lld\n",(int)PAPI_thread_id(), values[0]); printf(" PAPI_TOT_CYC: \t%lld\n", values[1]); printf(" Real usec : \t%lld\n", elapsed_us); printf(" Real cycles : \t%lld\n", elapsed_cyc); pthread_exit(NULL); } int main(int argc, char **argv) { pthread_t thread_one; pthread_t thread_two; int flops1, flops2; int rc,retval; pthread_attr_t attr; long long elapsed_us, elapsed_cyc; /* papi library initialization */ if ((retval=PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { printf("Library initialization error! \n"); exit(1); } /* thread initialization */ retval=PAPI_thread_init((unsigned long(*)(void))(pthread_self)); if (retval != PAPI_OK) ERROR_RETURN(retval); /* return the number of microseconds since some arbitrary starting point */ elapsed_us = PAPI_get_real_usec(); /* return the number of cycles since some arbitrary starting point */ elapsed_cyc = PAPI_get_real_cyc(); /* pthread attribution init */ pthread_attr_init(&attr); pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); /* create the first thread */ flops1 = 1000000; rc = pthread_create(&thread_one, &attr, Thread, (void *)&flops1); if (rc) ERROR_RETURN(rc); /* create the second thread */ flops2 = 4000000; rc = pthread_create(&thread_two, &attr, Thread, (void *)&flops2); if (rc) ERROR_RETURN(rc); /* wait for the threads to finish */ pthread_attr_destroy(&attr); pthread_join(thread_one, NULL); pthread_join(thread_two, NULL); /* compute the elapsed cycles and microseconds */ elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; elapsed_us = PAPI_get_real_usec() - elapsed_us; printf("Master real usec : \t%lld\n", elapsed_us); printf("Master real cycles : \t%lld\n", elapsed_cyc); /* clean up */ PAPI_shutdown(); exit(0); } papi-5.6.0/src/libpfm4/lib/events/intel_hswep_unc_sbo_events.h000664 001750 001750 00000017322 13216244364 026527 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: hswep_unc_sbo (Intel Haswell-EP S-Box uncore PMU) */ static const intel_x86_umask_t hswep_unc_s_ring_ad_used[]={ { .uname = "UP_EVEN", .udesc = "Up and Even ring polarity filter", .ucode = 0x100, }, { .uname = "UP_ODD", .udesc = "Up and odd ring polarity filter", .ucode = 0x200, }, { .uname = "DOWN_EVEN", .udesc = "Down and even ring polarity filter", .ucode = 0x400, }, { .uname = "DOWN_ODD", .udesc = "Down and odd ring polarity filter", .ucode = 0x800, }, { .uname = "UP", .udesc = "Up ring polarity filter", .ucode = 0x3300, }, { .uname = "DOWN", .udesc = "Down ring polarity filter", .ucode = 0xcc00, }, }; static const intel_x86_umask_t hswep_unc_s_ring_bounces[]={ { .uname = "AD_CACHE", .udesc = "AD_CACHE", .ucode = 0x100, }, { .uname = "AK_CORE", .udesc = "Acknowledgments to core", .ucode = 0x200, }, { .uname = "BL_CORE", .udesc = "Data responses to core", .ucode = 0x400, }, { .uname = "IV_CORE", .udesc = "Snoops of processor cache", .ucode = 0x800, }, }; static const intel_x86_umask_t hswep_unc_s_ring_iv_used[]={ { .uname = "ANY", .udesc = "Any filter", .ucode = 0x0f00, .uflags = INTEL_X86_DFL, }, { .uname = "UP", .udesc = "Filter on any up polarity", .ucode = 0x0300, }, { .uname = "DOWN", .udesc = "Filter on any down polarity", .ucode = 0xcc00, }, }; static const intel_x86_umask_t hswep_unc_s_rxr_bypass[]={ { .uname = "AD_CRD", .udesc = "AD credis", .ucode = 0x0100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AD_BNC", .udesc = "AD bounces", .ucode = 0x0200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_CRD", .udesc = "BL credits", .ucode = 0x0400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_BNC", .udesc = "BL bounces", .ucode = 0x0800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK", .udesc = "AK", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IV", .udesc = "IV", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_s_txr_ads_used[]={ { .uname = "AD", .udesc = "onto AD ring", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK", .udesc = "Onto AK ring", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL", .udesc = "Onto BL ring", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, } }; static const intel_x86_entry_t intel_hswep_unc_s_pe[]={ { .name = "UNC_S_CLOCKTICKS", .desc = "S-box Uncore clockticks", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x00, }, { .name = "UNC_S_RING_AD_USED", .desc = "Address ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x1b, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_ring_ad_used), .ngrp = 1, .umasks = hswep_unc_s_ring_ad_used, }, { .name = "UNC_S_RING_AK_USED", .desc = "Acknowledgement ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x1c, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = hswep_unc_s_ring_ad_used, }, { .name = "UNC_S_RING_BL_USED", .desc = "Bus or Data ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x1d, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = hswep_unc_s_ring_ad_used, }, { .name = "UNC_S_RING_IV_USED", .desc = "Invalidate ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x1e, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_ring_iv_used), .ngrp = 1, .umasks = hswep_unc_s_ring_iv_used, }, { .name = "UNC_S_RING_BOUNCES", .desc = "Number of LLC responses that bounced in the ring", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x05, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_ring_bounces), .ngrp = 1, .umasks = hswep_unc_s_ring_bounces, }, { .name = "UNC_S_FAST_ASSERTED", .desc = "Number of cycles in which the local distress or incoming distress signals are asserted (FaST). Incoming distress includes both up and down", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x09, }, { .name = "UNC_C_BOUNCE_CONTROL", .desc = "Bounce control", .modmsk = HSWEP_UNC_SBO_ATTRS, .cntmsk = 0xf, .code = 0x0a, }, { .name = "UNC_S_RXR_OCCUPANCY", .desc = "Ingress Occupancy", .code = 0x11, .cntmsk = 0x1, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_rxr_bypass), /* shared with rxr_bypass */ .umasks = hswep_unc_s_rxr_bypass, }, { .name = "UNC_S_RXR_BYPASS", .desc = "Ingress Allocations", .code = 0x12, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_rxr_bypass), .umasks = hswep_unc_s_rxr_bypass }, { .name = "UNC_S_RXR_INSERTS", .desc = "Ingress Allocations", .code = 0x13, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_rxr_bypass), /* shared with rxr_bypass */ .umasks = hswep_unc_s_rxr_bypass }, { .name = "UNC_S_TXR_ADS_USED", .desc = "Egress events", .code = 0x04, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_txr_ads_used), .umasks = hswep_unc_s_txr_ads_used }, { .name = "UNC_S_TXR_INSERTS", .desc = "Egress allocations", .code = 0x02, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_rxr_bypass), /* shared with rxr_bypass */ .umasks = hswep_unc_s_rxr_bypass }, { .name = "UNC_S_TXR_OCCUPANCY", .desc = "Egress allocations", .code = 0x01, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_SBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_s_rxr_bypass), /* shared with rxr_bypass */ .umasks = hswep_unc_s_rxr_bypass }, }; papi-5.6.0/ChangeLogP560.txt000664 001750 001750 00000350103 13216244355 017503 0ustar00jshenry1963jshenry1963000000 000000 Tue Dec 5 20:10:50 2017 -0800 William Cohen * src/libpfm4/lib/events/power9_events.h, src/libpfm4/tests/validate_power.c: Update libpfm4 Current with commit 206dea666e7c259c7ca53b16f934660344293475 Ensure unique names for IBM Power 9 events Older versions of PAPI use the event name to look up the libpfm event number when doing the enumeration of the available events. If there were multiple events with the same name in libpfm, the earliest one would be selected. This selection would cause the enumeration of events in papi_native_avail to get stuck looping on the first duplicated named event in a pmu. In the case of IBM Power 9 the enumeration would get stuck on PM_CO0_BUSY. Gave each event a unique name to avoid this unfortunate behavior. 2017-11-16 Will Schmidt * src/papi_events.csv: revised papi_derived patch. [PATCH, papi] Updated derived entries for power9. This is a re-implementation of the patch that Will Cohen posted earlier, which uses the (newly defined) PM_LD_MISS_ALT entry instead of the PM_LD_MISS_FIN . Thanks, -Will 2017-12-05 Heike Jagode (jagode@icl.utk.edu) * release_procedure.txt: Updated notes for release procedure. 2017-12-05 Vince Weaver * src/extras.c: extras.c: add string.h include to make the ffsll warning go away 2017-12-04 Heike Jagode (jagode@icl.utk.edu) * src/configure, src/configure.in: Fixed configure bug: Once ffsll support is detected, set HAVE_FFSLL to 1 in config.h. Tested without configure flag --with-ffsll, with --with-ffsll=yes, --with- ffsll=no. 2017-12-04 Vince Weaver * src/ctests/Makefile.recipies, src/ctests/locks_pthreads.c: ctests: locks_pthreads: adjust run count again linear slowdown makes things run really quickly. This patch scales it down by the square root of the number of cores which is maybe a better compromise. * src/ctests/locks_pthreads.c: ctests: locks_pthreads, minor cleanups 2017-11-20 William Cohen * src/ctests/locks_pthreads.c: Keep locks_pthreads test's amount of work reasonable on many core machines The runtime of locks_pthreads test scaled by the number of processors on the machine because of the serialized increment operation in the test. As more machines are available with 100+ processors the runtime of locks_pthreads is becoming execessive. Revised the test to specify the approximate total number of iterations and split the work the threads. Fri Dec 4 11:31:46 2015 -0500 sangamesh * src/extras.c, src/papi.h: Revert change that added ffsll to papi.h This reverts commit 2f1ec33a9e585df1b6343a0ea735f79974c080df. commit 2f1ec33a9e585df1b6343a0ea735f79974c080df changed #if (!defined(HAVE_FFSLL) || defined(__bgp__)) int ffsll( long long lli ); #endif --- to --- extern int ffsll( long long lli in extras.c to avoid warning when --with-ffsll is used as config option Thu Apr 20 11:31:38 2017 -0400 Stephen Wood * src/extras.c, src/papi.h: revert part of patch that added extra attributes to ffsll This manually reverts part of: commit 9e199a8aee48f5a2c62d891f0b2c1701b496a9ca cast pointers appropriately to avoid warnings and errors Sun Dec 3 09:42:44 2017 -0800 Will Schmidt * src/libpfm4/lib/events/power9_events.h, src/libpfm4/tests/validate_power.c: Updated libpfm4 Current with: ---------------- commit ed3f51c4690685675cf2766edb90acbc0c1cdb67 (HEAD -> master, origin/master, origin/HEAD) Add alternate event numbers for power9. I had previously missed adding the _ALT entries, which allow some events to be specified on different counters. This patch fills those in. This patch also adds a few validation tests for the ALT events. ---------------- 2017-11-28 Heike Jagode (jagode@icl.utk.edu) * src/utils/papi_avail.c, src/utils/papi_native_avail.c: Fixed utility option inconsistencies between papi_avail and papi_native_avail. There are more inconsistencies with other PAPI utilities, which will be addressed eventually. 2017-11-28 Heike Jagode * README.md: README.md edited online with Bitbucket * README.md: README.md edited online with Bitbucket * README.md: README.md edited online with Bitbucket * README.md: README.md edited online with Bitbucket 2017-11-27 Heike Jagode * src/components/powercap/linux-powercap.c: More clean-ups and checking of return values. Mon Nov 13 23:15:53 2017 -0800 Thomas Richter * src/libpfm4/lib/pfmlib_common.c: Update libpfm4†> /tmp/commit- libpfm4-header.txt echo “Current with commit f5331b7cbc96d9f9441df6a54a6f3b6e0fab3fb9 better fix for pfmlib_getl() The following commit: commit 9c69edf67f6899d9c6870e9cb54dcd0990974f81 better param check in pfmlib_getl() Fixed paramter checking of pfmlib_getl() but missed one condition on the buffer argument. It is char **buffer. Therefore we need to check if *buffer is not NULL before we can check *len. 2017-11-19 Asim YarKhan * src/components/cuda/linux-cuda.c: CUDA component: Bug fix for releasing and resetting event list When an event addition failed because the event (or metric) requires multiple-runs the eventlist and event-context structure was not being cleaned up properly. This fixes the event cleanup process. 2017-11-17 Asim YarKhan * src/components/powercap/tests/powercap_basic.c, src/components/powercap/tests/powercap_limit.c: Powercap component: Updated tests to handle no-event-counters (num_cntrs==0) and skip some compiler warnings (argv, argc unused) 2017-11-16 William Cohen * src/components/lmsensors/linux-lmsensors.c: Make more of lmsensors component internal state hidden There are a number of functions pointers stored in variable that are only used within the lmsensors component. Making those static ensures they are not visible outside the lmsensors component. * src/components/lmsensors/linux-lmsensors.c: Make internal cached_counts variable static Want to make as little information about the internals of the PAPI lmsensors component visible to the outside. Thus, making cached_counts variable static. 2017-11-15 William Cohen * src/components/lmsensors/linux-lmsensors.c: Avoid statically limiting the number of lmsensor events allowed Some high-end server machines provide more events than the 512 entries limit imposed by the LM_SENSORS_MAX_COUNTERS define in the lmsensor component (observed 577 entries on one machine). When this limit was exceeded the lmsensor component would write beyond the array bounds causing ctests/all_native_events to crash. Modified the lmsensor code to dynamically allocate the required space for all the available lmsensor entries on the machine. This allows ctests/all_native_events to run to completion. * src/components/appio/appio.c, src/components/coretemp/linux- coretemp.c, src/components/example/example.c, src/components/infiniband/linux-infiniband.c, src/components/lustre /linux-lustre.c, src/components/rapl/linux-rapl.c: Use correct argument order for calloc function calls Some calls to calloc in PAPI have the order of the arguments reversed. According to the calloc man page the number of elements is the first argument and the size of each element is the second argument. Due to alignment constraints the second argument might be rounded up. Thus, it is best not to not to swap the arguments to calloc. 2017-11-15 Philip Vaccaro * src/components/powercap/linux-powercap.c, src/components/powercap/tests/powercap_basic.c: Updates and changes to the powercap component to address a few areas.. Various things were changed but mainly things were simplified and made more streamlined. Main focus was on simpifying managing the sytem files. Mon Nov 13 23:15:53 2017 -0800 Thomas Richter * src/libpfm4/docs/man3/pfm_get_event_encoding.3, src/libpfm4/docs/man3/pfm_get_os_event_encoding.3, src/libpfm4/lib/events/amd64_events_fam11h.h, src/libpfm4/lib/events/amd64_events_fam12h.h, src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/tests/validate_x86.c: Update libpfm4 Current with commit 9c69edf67f6899d9c6870e9cb54dcd0990974f81 better param check in pfmlib_getl() This patch ensures tha len >= 2 because we do: m = l - 2; Reviewed-by: Hendrik Brueckner 2017-11-13 Vince Weaver * src/components/perf_event/pe_libpfm4_events.c: pe_libpfm4_events: properly notice if trying to add invalid umask this passes the broken-event test case and all of the unit tests, but it would be good to test this on codes that do a lot of native event tests. the pe_libpfm4_events code *really* needs a once-over, it is currently a confusing mess. * src/components/perf_event/tests/Makefile, src/components/perf_event/tests/broken_events.c, src/components/perf_event/tests/event_name_lib.c, src/components/perf_event/tests/event_name_lib.h: perf_event/tsts: add broken event name test we were wrongly accepting event names with invalid umasks 2017-11-13 Philip Mucci * src/utils/print_header.c: Removed extraneous colon in VM vendor output 2017-11-10 Vince Weaver * src/validation_tests/papi_l1_dcm.c, src/validation_tests/papi_l2_dcm.c, src/validation_tests/papi_l2_dcr.c, src/validation_tests/papi_l2_dcw.c: validation_tests: fix compiler warnings on arm32 On Raspberry Pi we were getting warnings where we were printing sizeof() valus with %ld. Convert to %zu instead. 2017-11-09 Vince Weaver * src/validation_tests/papi_l2_dca.c: validation_tests: papi_l2_dca fix crash on ARM32 On raspberry pi it's not possible to detect L2 cache size so the test was dividing by zero. * src/linux-common.c: linux-common: remove warning on not finding mhz in cpuinfo This was added recently and is not needed. Most ARM32 devices don't have MHz in the cpuinfo file and it's not really a bug. * src/components/perf_event/perf_event.c: perf_event: disable the old pre-Linux-2.6.34 workarounds by default There were a number of bugs in perf_event that PAPI had to work around, but most of these were fixed by 2.6.34 In order to hit these bugs you would need to be running a kernel from before 2010 which wouldn't support any recent hardware. Unfortunately these bugs are hard to test for. We were enabling things based on kernel versions, but this caught vendors (such as Redhat) shipping 2.6.32 kernels that had backported fixes. This fix just #ifdefs things out, if no one complains then we can fully remove the code. * src/components/perf_event/perf_event.c: perf_event: decrement the available counter count if NMI_WATCHDOG is stealing one * src/components/perf_event/perf_event.c: perf_event: move the paranoid handling code to its own function * src/components/perf_event/perf_event.c: perf_event: centralize fast_counter_read flag just use the component version of the flag, rather than having a shadow global version. 2017-11-09 William Cohen * src/linux-memory.c: Make the fallback generic_get_memory_info function more robust On the aarch64 processor linux 4.11.0 kernels /sys/devices/system/cpu/cpu0/cache is available, but the index[0-9] subdirectories are not fully populated with information about cache and line size, associativity, or number of sets. These missing files would cause the generic_get_memory_info function to attempt to read data using a NULL file descriptor causing the program to crash. Added checks to see if every fopen was and fscan was successful and just say there is no cache if there is any failure. 2017-11-09 Asim YarKhan * src/components/cuda/linux-cuda.c, src/components/cuda/tests/Makefile, src/components/nvml/tests/Makefile, src/configure, src/configure.in: Enable icc and nvcc to work together in cuda and nvml components. For nvcc to work with Intel icc to compile cuda and nvml components and tests , it needs to use nvcc -ccbin=<$CC- compilerbin> . The compiler name in CC also needs to be clean, so CC= and any other flags are pushed to CFLAGS (changed in src/configure.in script). * src/ctests/mpifirst.c: Minor correction to mpifirst.c test 2017-11-09 Vince Weaver * src/utils/print_header.c: utils: print fast_counter_read (rdpmc) status in the utils header 2017-11-08 William Cohen * src/validation_tests/cache_helper.c: Ensure access to array within bounds Coverity reported the following issues. Need the test to be "type>=MAX_CACHE" rather than "type>MAX_CACHE". Error: OVERRUN (CWE-119): papi-5.5.2/src/validation_tests/cache_helper.c:85: cond_at_most: Checking "type > 4" implies that "type" may be up to 4 on the false branch. papi-5.5.2/src/validation_tests/cache_helper.c:90: overrun-local: Overrunning array "cache_info" of 4 24-byte elements at element index 4 (byte offset 96) using index "type" (which evaluates to 4). Error: OVERRUN (CWE-119): papi-5.5.2/src/validation_tests/cache_helper.c:101: cond_at_most: Checking "type > 4" implies that "type" may be up to 4 on the false branch. papi-5.5.2/src/validation_tests/cache_helper.c:106: overrun-local: Overrunning array "cache_info" of 4 24-byte elements at element index 4 (byte offset 96) using index "type" (which evaluates to 4). Error: OVERRUN (CWE-119): papi-5.5.2/src/validation_tests/cache_helper.c:117: cond_at_most: Checking "type > 4" implies that "type" may be up to 4 on the false branch. papi-5.5.2/src/validation_tests/cache_helper.c:122: overrun-local: Overrunning array "cache_info" of 4 24-byte elements at element index 4 (byte offset 96) using index "type" (which evaluates to 4). * src/ctests/overflow_pthreads.c: Eliminate coverity overflow warning about expression * src/components/perf_event_uncore/tests/perf_event_uncore_lib.c: Remove dead code from perf_event_uncore_lib.c 2017-11-09 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: don't initialize globals statically from the mucci-5.5.2 tree 2017-11-08 phil@minimalmetrics.com * src/linux-common.c: linux-common: clean up the /proc/cpuinfo parsing code From the mucci-cleanup branch * src/components/perf_event/perf_event.c, .../perf_event_uncore/perf_event_uncore.c, src/papi_libpfm4_events.c, src/papi_libpfm4_events.h: perf_event: clean up _papi_libpfm4_shutdown() From the mucci-cleanup branch * src/utils/print_header.c: utils: clean up the cpuinfo header From the mucci-cleanup branch * src/papi_internal.c, src/papi_internal.h: papi_internal: add PAPI_WARN() function From the mucci-cleanup branch * src/components/perf_event/pe_libpfm4_events.c: perf_event: clean up pe_libpfm4_events From the mucci-cleanup branch -- 2017-11-08 Vince Weaver * src/utils/papi_avail.c: utils/papi_avail: update the manpage info based on changes by Phil Mucci * .../perf_event/tests/perf_event_system_wide.c: perf_event tests: perf_event_system_wide: don't fail if permissions restrict system- wide events right now we just skip if we get EPERM, we should also maybe check the perf_event_paranoid setting and print a more meaningful report * src/ctests/locks_pthreads.c: ctests/locks_pthreads: avoid printing values when in quiet mode 2017-08-31 phil@minimalmetrics.com * src/Makefile.inc: Better symlink creation for shared library in make phase 2017-08-28 phil@minimalmetrics.com * doc/Makefile, src/.gitignore, src/Makefile.inc, src/components/.gitignore, src/components/Makefile_comp_tests, src/ctests/.gitignore, src/ctests/Makefile.recipies, src/ftests/.gitignore, src/ftests/Makefile.recipies, src/testlib/.gitignore, src/utils/.gitignore, src/utils/Makefile, src/validation_tests/.gitignore, src/validation_tests/Makefile.recipies: Full cleanup, including removal of .gitignore files that prevented us from realizing we were really cleaning/clobbering properly * src/validation_tests/.gitignore: .gitignore Makefile.target * src/papi.c: Remove PAPI_VERB_ECONT setting by default from initialization path. This prints all kinds of needless errors on virtual platforms. * src/x86_cpuid_info.c: Remove leftover printf 2017-08-21 phil@minimalmetrics.com * src/ctests/locks_pthreads.c: Test now performs a fixed number of iterations, and reports lock/unlock timings per thread. * src/components/perf_event/perf_event.c: Added more descriptive error message to exclude_guest check * src/papi_internal.c: Removed leading newline and trailing . from error messages * src/papi_preset.c: Updated message for derived event failures 2017-11-07 Vince Weaver * src/Makefile.inc, src/ctests/Makefile, src/ctests/Makefile.target.in, src/ftests/Makefile, src/ftests/Makefile.target.in, src/testlib/Makefile.target.in, src/utils/Makefile.target.in, src/validation_tests/Makefile, src/validation_tests/Makefile.target.in: tests: make sure DESTDIR and DATADIR are passed in when doing an install * src/ctests/Makefile, src/ctests/Makefile.target.in, src/ftests/Makefile, src/ftests/Makefile.target.in, src/utils/Makefile, src/utils/Makefile.target.in, src/validation_tests/Makefile, src/validation_tests/Makefile.target.in: ctests/ftests/utils/validation_tests: get shared library linking working again This should let the various tests and utils be linked as shared libraries again. * src/validation_tests/Makefile: validation_tests: add an installation target this makes the validation tests have an install target, like the ctests and ftests * src/ctests/Makefile, src/ftests/Makefile: ctests/ftests: fix "install" target at some point DATADIR was renamed datadir and the install targets were not updated. 2017-11-07 Asim YarKhan * bitbucket-pipelines.yml: Bitbucket pipeline testing: Inspired by Phil Mucci's branch; copied the functionalty tests run in that branch. * src/components/lmsensors/linux-lmsensors.c: lmsensors component: Changed event names to use lm_sensors (only once) instead of LM_SENSORS (twice) to be consistent with other events 2017-11-02 William Cohen * src/components/appio/tests/iozone/gnu3d.dem: gnu3d.dem should not be executed by the test framework This file is a gnuplot file and should not be executed as part of the tests. Removing the executable perms will signal to the testing framework that it shouldn't be executed. * src/components/appio/tests/iozone/Gnuplot.txt: Gnuplot.txt should not be executed by the test framework This file is a readme file and should not be executed as part of the tests. Removing the executable perms will signal to the testing framework that it shouldn't be executed. * .../appio/tests/iozone/iozone_visualizer.pl, src/components/appio/tests/iozone/report.pl: Fix perl scripts so they run on Linux machines The DOS style newlines were preventing Linux from selecting the appropriate interpreter for these scripts and causing these tests to fail. 2017-11-07 Asim YarKhan * src/components/lmsensors/configure: lmsensors component: Regenerate the configure file for the component 2017-11-02 William Cohen * src/components/lmsensors/Makefile.lmsensors.in, src/components/lmsensors/configure.in, src/components/lmsensors /linux-lmsensors.c: Make the lmsensors dynamically load the needed shared library When attempting to build the current git repo of papi the build of the files in the utils subdirectory failed because the lmsensors libraries were not being linked in. Rather than forcing the papi to link in the lmsensor library during the build the lmsensors component has been modified to dynamically load the needed libraries and enable the lmsensors events when available. This allows machines missing the lmsensor libraries installed to still use papi. 2017-11-06 Asim YarKhan * src/components/cuda/linux-cuda.c: CUDA component: On architectures without CUDA Metrics (e.g. Tesla C2050), skip metric registration rather than returning errors 2017-11-06 Vince Weaver * src/validation_tests/papi_l2_dca.c, src/validation_tests/papi_l2_dcm.c, src/validation_tests/papi_l2_dcr.c, src/validation_tests/papi_l2_dcw.c: validation_tests: make the papi_l2 tests fail with warnings On Haswell/Broadwell and newer these tests fail for unknown reasons. This isn't new behavior, it's just that the tests are new. It's unlikely we will have time to completely sort this out before the upcoming release, so change the FAIL to WARN so testers won't be unnecessarily alarmed. 2017-11-05 Vince Weaver * src/components/perf_event/perf_event.c, src/configure, src/configure.in: perf_event: enable rdpmc support by default It can still be disabled at configure time with --enable-perfevent- rdpmc=no This speeds up PAPI_read() by at least a factor of 5x (see the ESPT'17 workshop presentation) It is only enabled on Linux 4.13 and newer due to bugs in previous versions. 2017-11-03 Vince Weaver * src/ctests/sdsc-mpx.c: ctests: sdsc: fix issue where the error message is not printed correctly 2017-11-01 Heike Jagode * src/components/powercap/linux-powercap.c: Intermediate check-in: Fixed a whole bunch of careless file handling (missing closing of open files, missing setting of open/close flag, etc). Still more rigorous checks needed. Mon Oct 30 17:16:32 2017 -0700 Stephane Eranian * src/libpfm4/lib/events/intel_skl_events.h: Update libpfm4\n\nCurrent with\n commit 21405fb3c247a0d16861483daf0696cf4fa0cc43 update SW_PREFETCH event for Intel Skylake Event was renamed SW_PREFETCH_ACCESS, but we keep SW_PREFETCH as an alias. Added PREFETCHW umask. Enabled suport for both Skylake client and server as per official event table from 10/27/2017. See download.01.org/perfmon/ 2017-10-30 Vince Weaver * src/validation_tests/Makefile.recipies, src/validation_tests/cycles.c, src/validation_tests/cycles_validation.c: validation_tests: add cycles_validation test this is the old zero test, which does a number of cycles tests It should be extended to add more. 2017-10-30 Vince Weaver * src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/calibrate.c, src/ctests/child_overflow.c, src/ctests/code2name.c, src/ctests/earprofile.c, src/ctests/exec_overflow.c, src/ctests/fork_overflow.c, src/ctests/hwinfo.c, src/ctests/mendes- alt.c, src/ctests/prof_utils.c, src/ctests/prof_utils.h, src/ctests/profile.c, src/ctests/remove_events.c, src/ctests/shlib.c, src/ctests/system_child_overflow.c, src/ctests/system_overflow.c, src/ctests/zero_named.c, src/testlib/papi_test.h, src/testlib/test_utils.c: papi: c++11 fixes: fix various ctests that c++ complains on mostly just const warnings, some K+R function declarations, and possibly an actual char/char* bug. * src/papi.c, src/papi.h: papi: c++11 conversion: PAPI_get_component_index() * src/papi.c, src/papi.h: papi: c++11 conversion: convert PAPI_perror() * src/aix.c, src/components/appio/appio.c, src/components/bgpm/CNKunit/linux-CNKunit.c, src/components/bgpm/IOunit/linux-IOunit.c, src/components/bgpm/L2unit/linux-L2unit.c, src/components/bgpm/NWunit/linux-NWunit.c, src/components/emon /linux-emon.c, src/components/net/linux-net.c, src/components/perf_event/pe_libpfm4_events.c, src/components/perf_event/pe_libpfm4_events.h, src/components/perf_event/perf_event.c, .../perf_event_uncore/perf_event_uncore.c, src/components/perfmon_ia64/perfmon-ia64.c, src/freebsd.c, src /linux-bgq.c, src/papi.c, src/papi.h, src/papi_internal.c, src/papi_internal.h, src/papi_libpfm3_events.c, src/papi_libpfm_events.h, src/papi_vector.c, src/papi_vector.h: papi: start converting papi.h to be C++11 clean Most of the issues have to do with string to char * conversion. This first patch converts PAPI_event_name_to_code() The issue was first reported by Brian Van Straalen * src/validation_tests/papi_l2_dca.c: validation_tests/papi_l2_dca: update some comments * src/ctests/zero.c, src/validation_tests/cycles.c: ctests/zero: make test pass on recent intel machines The test was failing due to the PAPI_get_real_cycles() validation on recent Intel chips. This is probably something that should be tested in a separate test and not in zero which is supposed to be a bare-bones are-things-working test. 2017-10-27 Philip Vaccaro * src/components/powercap/README: updated powercap README to be more concise. includes more details on interacting with energy counters and power limits. 2017-10-27 Asim YarKhan * src/components/cuda/linux-cuda.c, src/components/nvml/linux-nvml.c: CUDA/NVML components: Handled segfault which can occur when dlclosing libcudart from both components by adding an additional flag to dlopen 2017-10-24 Asim YarKhan * src/components/cuda/linux-cuda.c, src/components/cuda/tests/simpleMultiGPU.cu: CUDA component: Clean up fulltest by moving some output from stdout to SUBDBG, removed some commented out lines * src/components/nvml/linux-nvml.c: nvml component: To support V100 (Volta) updated to get nvmlDevice handle ordered by index rather than pci busid. 2017-10-23 Asim YarKhan * src/components/cuda/linux-cuda.c: CUDA component: Minor fix to remove some unneeded stdout which shows up during fulltest 2017-10-20 Asim YarKhan * src/components/cuda/linux-cuda.c, src/components/cuda/tests/Makefile, src/components/cuda/tests/simpleMultiGPU.cu: CUDA component test update: Remove some debug output. Do not build cupti_only test binary. Thu Oct 19 11:23:44 2017 -0700 Stephane Eranian * src/libpfm4/examples/showevtinfo.c, src/libpfm4/lib/events/intel_skl_events.h: Update libpfm4\n\nCurrent with\n commit 2e98642dd331b15382256caa380834d01b63bef8 Fix Intel Skylake EXE_ACTIVITY.1_PORTS_UTIL event Was missing a umask name. 2017-10-17 Vince Weaver * src/ctests/version.c: ctests: version, add INCREMENT field at the request of Steve Kaufmann * src/ctests/Makefile.recipies, src/ctests/version.c: ctests: re- enable version test not sure why it was disabled * src/ctests/Makefile.recipies: ctests: alphabetize SERIAL tests in Makefile.recipes 2017-10-13 Philip Vaccaro * src/components/powercap/tests/Makefile, src/components/powercap/tests/powercap_limit.c: added simple limit test for the powercap component. 2017-10-09 Asim YarKhan * src/components/nvml/linux-nvml.c: Big Fix NVML component: Fix problem with names when there are multiple identical GPUs If multiple identical GPUs were available, the names were not mapped correctly. Fixed event names to be "nvml:::Tesla_K40c:device_0:myevent" rather than "nvml:::Tesla_K40c_0:myevent". Fri Sep 29 00:25:09 2017 -0700 Stephane Eranian * src/libpfm4/include/perfmon/perf_event.h, src/libpfm4/lib/events/intel_skl_events.h, src/libpfm4/lib/events/s390x_cpumf_events.h, src/libpfm4/lib/pfmlib_s390x_cpumf.c, src/libpfm4/perf_examples/Makefile, src/libpfm4/perf_examples/branch_smpl.c, src/libpfm4/perf_examples/perf_util.c: Update libpfm4\n\nCurrent with\n commit d1e7c96df60a00a371fdaa3b635ad4a38cee4c2f add new branch_smpl.c perf_events example This patch adds a new example to demo how to sample and parse the PERF_SAMPLE_BRANCH_STACK record format of perf_events. It will dump branches taken from the sampled command. 2017-10-05 Asim YarKhan * src/components/nvml/README, src/components/nvml/linux-nvml.c, src/components/nvml/linux-nvml.h, src/components/nvml/tests/HelloWorld.cu, src/components/nvml/tests/Makefile, .../nvml/tests/nvml_power_limiting_test.cu: Update NVML component: Support for power limiting using NVML PAPI has added support for power limiting using NVML (on supported devices from the Kepler family or later). The executable needs to have root permissions to change the power limits on the device. We have added new events to the NVML component to support power management limits. The nvml:::DEVICE:power_management_limit can be written (as well as read), but requires higher permissions (root level). The limit is constrainted between a min and a max value, which can be read. When the component is unloaded, the power_management_limit should be reset to the initial value. nvml:::DEVICE:power_management_limit nvml:::DEVICE:power_management_limit_constraint_min nvml:::DEVICE:power_management_limit_constraint_max A new test (nvml/tests/nvml_power_limiting_test.cu)/ was written to check if the writing functionality works (with the proper hardware and permissions). 2017-10-04 Asim YarKhan * src/components/nvml/linux-nvml.c, src/components/nvml/linux-nvml.h, src/components/nvml/tests/HelloWorld.cu: Style consistency and refactoring via astyle command. No changes to the actual code were made here. 2017-10-04 Vince Weaver * src/components/rapl/linux-rapl.c: rapl: add support for some Intel Atom models Goldmont / Gemini_Lake / Denverton * src/components/rapl/linux-rapl.c: rapl: fix skylake SoC measurement support * src/components/rapl/linux-rapl.c: rapl: add support for skylake SoC energy measurements * src/components/rapl/linux-rapl.c: rapl: add Skylake-X / Kabylake support * src/components/rapl/linux-rapl.c: rapl: centralize the "different DRAM units" code * src/components/rapl/linux-rapl.c: rapl: merge like processors * src/components/rapl/linux-rapl.c: rapl: convert chip detection to a switch statement * src/components/rapl/linux-rapl.c: rapl: update the whitespace a bit 2017-09-12 Heike Jagode (jagode@icl.utk.edu) * .../infiniband_umad/linux-infiniband_umad.c, .../infiniband_umad /linux-infiniband_umad.h: Fixed papi_vector for infiniband_umad component. The array of function pointers that the component defines must use the naming convention papi_vector_t _x_vector where x is the name of the component directory. In this case, the name of the component directory is infiniband_umad and not infiniband. This change has not been tested yet due to OFED lib issues on our local machines. There may be more changes required in order to get the infiniband_umad component to work properly. 2017-09-11 Hanumanth * man/man1/papi_avail.1, man/man1/papi_native_avail.1, src/utils/papi_avail.c, src/utils/papi_native_avail.c: Updating man and help pages for papi_avail and papi_native_avail 2017-09-07 Asim YarKhan * src/components/cuda/tests/nvlink_bandwidth.cu, .../cuda/tests/nvlink_bandwidth_cupti_only.cu: Update to CUDA component to support NVLink. The CUDA component has been cleaned up and updated to support NVLink. NVLink metrics can not be measured properly in KERNEL event collection mode, so the CUPTI EventCollectionMode is transparently set to CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS when a NVLink metric is being measured in an eventset. For all other events and metrics, the CUDA component uses the KERNEL event collection mode. A bug in the earlier version was that repeated calls to add CUDA events were failing because some structures were not cleaned up. This should now be fixed. A new nvlink test was added to the CUDA component tests. 2017-08-31 Phil Mucci * man/man1/papi_avail.1, man/man1/papi_clockres.1, man/man1/papi_command_line.1, man/man1/papi_component_avail.1, man/man1/papi_cost.1, man/man1/papi_decode.1, man/man1/papi_error_codes.1, man/man1/papi_event_chooser.1, man/man1/papi_hybrid_native_avail.1, man/man1/papi_mem_info.1, man/man1/papi_multiplex_cost.1, man/man1/papi_native_avail.1, man/man1/papi_version.1, man/man1/papi_xml_event_info.1, man/man3/PAPI_cleanup_eventset.3, man/man3/PAPI_destroy_eventset.3: Updating options for papi_avail/native_avail as well as all references to old mailing list 2017-08-31 Asim YarKhan * src/components/nvml/linux-nvml.c, src/components/nvml/tests/HelloWorld.cu, src/components/nvml/tests/Makefile: Minor updates to NVML component to enable it to compile and run without complaints 2017-08-30 Vince Weaver * src/validation_tests/papi_br_prc.c, src/validation_tests/papi_br_tkn.c: validation: update papi_br_prc and papi_br_tkn for amd fam15h amd fam15h doesn't have a conditional branch event so the measures have to be against total. for now print warning, maybe we should let it go w/o a warning. * src/papi_events.csv: papi_events: add PAPI_BR_PRC event to amd fam15h * src/papi_events.csv: papi_events: update PAPI_BR_PRC and PAPI_BR_TKN on sandybridge/ivybridge They were using TOTAL branches for the derived branch events rather than CONDITIONAL like the other modern x86 processors were using. * src/validation_tests/papi_br_tkn.c: validation_tests: papi_br_tkn: update to only count conditional branches * src/validation_tests/papi_br_prc.c: validation_tests: papi_br_prc: make sure it is comparing conditional branches was doing total branches, which made the test fail on skylake Mon Aug 21 23:55:46 2017 -0700 Stephane Eranian * src/libpfm4/lib/pfmlib_intel_x86.c: Update libpfm4\n\nCurrent with\n commit a290dead7c1f351f8269a265c0d4a5f38a60ba29 fix usage of is_model_event() for Intel X86 This patch fixes a couple of problems introduced by commit: 77a5ac9d43b1 add model field to intel_x86_entry_t The code in pfm_intel_x86_get_event_first() was incorrect. It was calling is_model_event() before checking if the index was within bounds. It should have been the opposite. Same issue in pfm_intel_x86_get_next_event(). This could cause SEGFAULT as report by Phil Mucci. The patch also fixes the return value of pfm_intel_x86_get_event_first(). It was not calculated correctly. Reported-by: Phil Mucci 2017-08-20 Vince Weaver * src/ctests/Makefile.recipies, src/ctests/failed_events.c: ctests: add failed_events test it tries to create invalid events to make sure the event parser properly handles invalid events. 2017-08-19 Vince Weaver * src/components/perf_event_uncore/tests/Makefile, .../perf_event_uncore/tests/perf_event_uncore.c, .../tests/perf_event_uncore_attach.c: perf_event_uncore: tests: update perf_event_uncore to use :cpu=0 This is the more common way of specifying uncore events. Rename the old test that uses PAPI_set_opt() to perf_event_uncore_attach * .../tests/perf_event_uncore_cbox.c, .../tests/perf_event_uncore_lib.c, .../tests/perf_event_uncore_lib.h: perf_event_uncore: tests: update uncore events for recent processors * src/ctests/zero_pthreads.c: ctests: zero_pthreads: remove extraneous printf when in quiet mode * .../tests/perf_event_uncore_lib.c: perf_event_uncore: event list, add recent processors libpfm4 still doesn't support regular Haswell, Broadwell, or Skylake machines * .../perf_event_uncore/tests/perf_event_uncore.c, .../tests/perf_event_uncore_cbox.c, .../tests/perf_event_uncore_multiple.c: perf_event_uncore: tests: print a message indicating the problem on skip also some whitespace cleanups * src/components/perf_event/tests/event_name_lib.c: perf_event: tests: update event_name_lib for recent Intel processors * src/components/perf_event/tests/event_name_lib.c: perf_event: tests: event_name_lib, clean up whitespace * .../perf_event/tests/perf_event_offcore_response.c: perf_event: tests: update perf_event_offcore_response test print an indicator of why we are skipping the test also some gratuitous whitespace cleanups * src/ctests/zero_shmem.c: ctests: zero_shmem: document the code a little better * src/ctests/zero_smp.c: ctests: zero_smp: make it actually do something on Linux Linux can use the pthread code just like AIX although we don't validate the results, so this test could be another candidate for not being necessary anymore. * src/ctests/zero_shmem.c: ctests: zero_shmem: minor cleanups we pretty much always skip this test. Is it needed anymore? What was it testing in the first place? The code it calls (start_pes() ) doesn't seem to exist anymore * src/ctests/zero_omp.c, src/ctests/zero_pthreads.c: ctests: zero_omp and zero_pthread were skipping due to a typo when updating the code I had left a stray ! before PAPI_query_event() 2017-08-19 Vince Weaver * src/papi_events.csv: papi_events: the skylake fixes broke hsw/bdw this skylake-x change is way more trouble than it was worth. 2017-08-19 Vince Weaver * src/papi_events.csv: papi_events: on skylake the SNP_FWD umask was renamed to SNP_HIT_WITH_FWD This broke presets on skylake, skylake-x * src/components/perf_event/pe_libpfm4_events.c: perf_event: fix uninitialized descr issue reported by valgrind I don't think this is the skylake-x bug though 2017-08-18 Vince Weaver * src/components/perf_event/pe_libpfm4_events.c: perf_event: clean up some whitespace in pe_libpfm4_events.c * src/linux-memory.c: linux-memory: various errors when compiling with debug enabled the new proc memory code had some mistakes in the debug messages that only appeared when compiled with --with- debug Reported-by: Steve Kaufmann 2017-08-17 Vince Weaver * src/papi_events.csv: papi_events: missed one of the skx event locations 2017-08-16 Vince Weaver * src/papi_events.csv: papi_events: enable Skylake X support Sun Aug 6 00:22:52 2017 -0700 Stephane Eranian * src/libpfm4/include/perfmon/pfmlib.h, src/libpfm4/lib/events/intel_skl_events.h, src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_intel_skl.c, src/libpfm4/lib/pfmlib_intel_snbep_unc.c, src/libpfm4/lib/pfmlib_intel_x86.c, src/libpfm4/lib/pfmlib_intel_x86_priv.h, src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/tests/validate_x86.c: Update libpfm4\n\nCurrent with\n commit efd16920194999fdf1146e9dab3f7435608a9479 add support for Intel Skylake X This patch adds support for Intel Skylake X core PMU events. Based on download.01.org/perfmon/SKX/skylakex_core_v25.json. New PMU is called skx. 2017-08-07 Vince Weaver * src/papi_events.csv: papi_events: add initial AMD fam17h support not tested on actual hardware yet * src/papi_events.csv: papi_events: fix the amd_fam16h PMU name The way libpfm4 reports fam16h was modified a bit from my initial patches. fam16h seems to be working now. Thu Jul 27 23:30:20 2017 -0700 Stephane Eranian * src/libpfm4/README, src/libpfm4/docs/Makefile, src/libpfm4/docs/man3/libpfm_amd64_fam16h.3, src/libpfm4/docs/man3/libpfm_amd64_fam17h.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_cbo.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ha.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_imc.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_irp.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_pcu.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_qpi.3, .../docs/man3/libpfm_intel_bdx_unc_r2pcie.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_r3qpi.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_sbo.3, src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ubo.3, src/libpfm4/examples/showevtinfo.c, src/libpfm4/include/perfmon/pfmlib.h, src/libpfm4/lib/Makefile, src/libpfm4/lib/events/amd64_events_fam16h.h, src/libpfm4/lib/events/amd64_events_fam17h.h, src/libpfm4/lib/events/intel_bdx_unc_cbo_events.h, src/libpfm4/lib/events/intel_bdx_unc_ha_events.h, src/libpfm4/lib/events/intel_bdx_unc_imc_events.h, src/libpfm4/lib/events/intel_bdx_unc_irp_events.h, src/libpfm4/lib/events/intel_bdx_unc_pcu_events.h, src/libpfm4/lib/events/intel_bdx_unc_qpi_events.h, .../lib/events/intel_bdx_unc_r2pcie_events.h, .../lib/events/intel_bdx_unc_r3qpi_events.h, src/libpfm4/lib/events/intel_bdx_unc_sbo_events.h, src/libpfm4/lib/events/intel_bdx_unc_ubo_events.h, src/libpfm4/lib/pfmlib_amd64.c, src/libpfm4/lib/pfmlib_amd64_fam16h.c, src/libpfm4/lib/pfmlib_amd64_fam17h.c, src/libpfm4/lib/pfmlib_amd64_priv.h, src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_cbo.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_ha.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_imc.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_irp.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_pcu.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_qpi.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_r2pcie.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_r3qpi.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_sbo.c, src/libpfm4/lib/pfmlib_intel_bdx_unc_ubo.c, src/libpfm4/lib/pfmlib_intel_snbep_unc.c, src/libpfm4/lib/pfmlib_intel_snbep_unc_priv.h, src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/perf_examples/self_count.c, src/libpfm4/tests/validate_x86.c: Update libpfm4 Current with commit 72474c59d88512e49d9be7c4baa4355e8d8ad10a fix typo in AMd Fam17h man page PMU name was mistyped. 2017-08-04 Vince Weaver * src/validation_tests/papi_l1_dcm.c, src/validation_tests/papi_l2_dcm.c: validation_tests: for the DCM tests up the allowed error to 5% We don't want to fail too easily, and 5% seems reasonable. This lets the test pass on ARM64 Dragonboard 401c * src/linux-memory.c: linux-memory: add fallback generic Linux /sys cache size detection This will allow getting cache sizes on architectures we don't have custom code for. Currently this mostly means ARM64. * src/validation_tests/papi_l1_dcm.c, src/validation_tests/papi_l2_dcm.c: validation_tests: don't crash if cachesize reported as zero * src/validation_tests/branches_testcode.c: branches_testcode: add arm64 support 2017-07-27 Vince Weaver * src/papi_events.csv, src/validation_tests/papi_l2_dca.c: validation_tests: trying to find out why PAPI_L2_DCA fails on Haswell it's a mystery still. One alternative is to switch the event to be the same as PAPI_L1_DCM but that seems like it would be cheating. * src/validation_tests/papi_l2_dcw.c: validation_tests: papi_l2_dcw: shorten a warning message * src/papi_events.csv: papi_events: note that libpfm4 Kaby Lake support is treated as part of Skylake * src/validation_tests/Makefile.recipies, src/validation_tests/papi_l2_dcw.c: validation_tests: add PAPI_L2_DCW test * src/validation_tests/Makefile.recipies, src/validation_tests/papi_l2_dcr.c: validation_tests: add PAPI_L2_DCR test * src/validation_tests/papi_l2_dcm.c: validation_tests: PAPI_L2_DCM figured out a test that made sense * src/validation_tests/Makefile.recipies, src/validation_tests/papi_l1_dcm.c: validation_tests: add PAPI_L1_DCM test * src/validation_tests/Makefile.recipies, src/validation_tests/cache_testcode.c, src/validation_tests/papi_l2_dcm.c, src/validation_tests/testcode.h: validation_tests: first attempt at papi_l2_dcm test disabled for now, as it's really hard to make a workable cache miss test on modern hardware. 2017-07-26 Vince Weaver * src/ctests/Makefile, src/ctests/Makefile.recipies, src/ctests/child_overflow.c, src/ctests/exec_overflow.c, src/validation_tests/Makefile.recipies, src/validation_tests/busy_work.c, src/validation_tests/testcode.h: ctests: clean up the exec/child overflow tests The exec_overflow test segfaults when using rdpmc This is a bug in Linux. I'm working on getting it fixed. 2017-07-21 Vince Weaver * src/validation_tests/Makefile.recipies, src/validation_tests/cache_helper.c, src/validation_tests/cache_helper.h, src/validation_tests/cache_testcode.c, src/validation_tests/papi_l1_dca.c, src/validation_tests/papi_l2_dca.c, src/validation_tests/testcode.h: validation_tests: add PAPI_L2_DCA test also adds some generic cache testing infrastructure * src/validation_tests/papi_l1_dca.c: validation_tests: PAPI_L1_DCA fixes had to find a machine that actually supported the event. On AMD Fam15h the write count is 3x expected? Need to investigate further. * src/validation_tests/papi_br_prc.c: validation_tests: papi_br_prc, properly skip if event not found * src/validation_tests/Makefile.recipies, src/validation_tests/papi_l1_dca.c: validation_tests: add PAPI_L1_DCA test 2017-07-20 Vince Weaver * src/validation_tests/Makefile.recipies, src/validation_tests/papi_br_msp.c, src/validation_tests/papi_br_prc.c: validation_tests: add PAPI_BR_PRC test * src/validation_tests/Makefile.recipies, src/validation_tests/papi_br_tkn.c: validation_tests: add PAPI_BR_TKN test * src/validation_tests/Makefile.recipies, src/validation_tests/papi_br_ntk.c: validation_tests: add PAPI_BR_NTK test 2017-07-07 Vince Weaver * src/papi_events.csv: papi_events: move haswell, skylake, and broadwell to traditional PAPI_REF_CYC there's a slight chance this might break things for people, if so we can revert it. * src/linux-timer.c: linux-timer: fix build warning on non-power build * src/ctests/flops.c, src/validation_tests/flops_testcode.c, src/validation_tests/papi_dp_ops.c, src/validation_tests/papi_fp_ops.c, src/validation_tests/papi_sp_ops.c: validation: make the flops tests handle that POWER has fused multiply-add PAPI_DP_OPS and PAPI_SP_OPS still fail, need to audit what the event is doing * src/papi_events.csv: POWER8: add a few branch preset events they pass the validation tests, not sure why they weren't enabled originally * src/validation_tests/branches_testcode.c: validation: add POWER branches testcode not sure I got the clobbers right * src/components/perf_event/perf_helpers.h, src/validation_tests/papi_tot_ins.c: POWER: fix some compiler warnings 2016-10-18 Phil Mucci * src/linux-timer.c: Ensure stdint gets included for all Linuxen. * src/linux-timer.c: Some Linuxen need stdint to get the uint64_t type. 2016-10-14 Phil Mucci * src/linux-lock.h: Restructured unlock code to avoid warnings. Tested against 80 threads on Power8 2016-10-12 Phil Mucci * src/linux-timer.c: PPC64/PPC fast timer fixup. 2017-07-07 Vince Weaver * src/linux-timer.c: linux-timer: allow using fast timer for get_real_cycles() on POWER 2016-07-12 Phil Mucci * src/linux-timer.c, src/linux-timer.h: First pass at good rdtsc for Power7/8 2017-07-03 Vince Weaver * src/ctests/flops.c, src/ctests/hl_rates.c, src/validation_tests/Makefile.recipies, src/validation_tests/flops.c, src/validation_tests/flops_testcode.c, src/validation_tests/flops_validation.c, src/validation_tests/papi_dp_ops.c, src/validation_tests/papi_fp_ops.c, src/validation_tests/papi_sp_ops.c, src/validation_tests/testcode.h: validation_tests: add tests for PAPI_SP_OPS and PAPI_DP_OPS extend the flops_testcode as well, to have both float and double versions. * src/validation_tests/papi_ref_cyc.c: validation_tests: papi_ref_cyc: update test to work on older systems it's actually the newer (haswell/broadwell/skylake) that are using a different event than the older systems. Make the test check for the old behavior. 2017-07-02 Vince Weaver * src/ctests/Makefile.recipies, src/ctests/cycle_ratio.c, src/validation_tests/Makefile.recipies, src/validation_tests/flops_testcode.c, src/validation_tests/papi_ref_cyc.c, src/validation_tests/testcode.h: validation_tests: move cycle_ratio test to be papi_ref_cyc test * src/ctests/cycle_ratio.c: ctests: rewrite cycle_ratio test on Intel platforms PAPI_REF_CYC is a fixed 100MHz cycle count the test was making the assumption that PAPI_REF_CYC was equal to the max design freq (not turboboost) and thus as far as I can tell it never would return the right answer. This test should probably be moved to validation_tests. 2017-07-01 Vince Weaver * src/ctests/Makefile.recipies, src/ctests/branches.c, src/ctests /sdsc-mpx.c, src/ctests/sdsc2.c: ctests: migrate all other users of dummy3() workload * src/ctests/Makefile.recipies, src/ctests/sdsc4-mpx.c, src/validation_tests/flops_testcode.c, src/validation_tests/testcode.h: ctests: move the "dummy3" workload to the common workload library * src/ctests/sdsc4-mpx.c: ctests: sdsc4-mpx: fix failing on recent Intel machines the multiplexing of an event with small results (PAPI_SR_INS in this case) has high variance, so don't use it for validation. There was code trying to do this but it wasn't working. 2017-06-30 Vince Weaver * src/ctests/first.c, src/ctests/matrix-hl.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c: ctests: catch lack of CPU component earlier gets rid of extreaneous SKIPPED in the output of run_tests.sh * src/components/cuda/tests/HelloWorld.cu, src/components/cuda/tests/Makefile: tests:cuda: make the HelloWorld test more like a standard PAPI test * src/validation_tests/Makefile.recipies: validation_tests: fix linking against a CUDA enabled PAPI Fix suggested by Steve Kaufmann * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: make it so it can compile with c++ this lets us link against it from the CUDA tests * src/components/cuda/sampling/gpu_activity.c: tests: cuda: fix sampling/gpu_activity to compile without warnings * src/Makefile.inc: tests: make the component tests build command be the same as ctests/ftests * src/ctests/calibrate.c: ctests: calibrate: turn off printf if TEST_QUIET missed this one when testing because test machine skipped it due to lack of floating point events 2017-06-29 Vince Weaver * .../tests/perf_event_amd_northbridge.c, src/ctests/Makefile.recipies, src/ctests/cycle_ratio.c, src/ctests/derived.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_index.c, src/ctests/overflow_pthreads.c, src/ctests/overflow_twoevents.c, src/ctests/prof_utils.c, src/ctests/prof_utils.h, src/ctests/profile.c, src/ctests/profile_twoevents.c, src/ctests/realtime.c, src/ctests/reset.c, src/ctests/reset_multiplex.c, src/ctests/sdsc-mpx.c, src/ctests/sdsc.c, src/ctests/sdsc4-mpx.c, src/ctests/sdsc4.c, src/ctests/shlib.c, src/ctests/tenth.c, src/ctests/thrspecific.c, src/testlib/papi_test.h: testlib: remove the hack where all printf's are #defined to something else Explicitly check everywhere for TESTS_QUIET or equivelent, rather than using c-pre- processor macros to redefine printf * src/papi.c, src/testlib/test_utils.c: tests: set the ctest debug mode to VERBOSE by default for tests the TESTS_QUIET mode was turning *off* verbose debugging, which meant that PAPIERROR() calls wouldn't show up during a ./run_tests.sh * src/components/perf_event/perf_event.c: perf_event: properly initialize the mmap_addr structure It wasn't always being set to NULL, and so on some tests the code would try to munmap() it even though it wasn't mapped. * src/testlib/test_utils.c: tests: enable color in test status messages this has been an optional feature for a long time, if you enabled the environment variable TESTS_COLOR=y this change makes it default to being on (you can disable with export TESTS_COLOR=n also it should automatically detect if you are piping to a file and disable colors in the case too * src/validation_tests/Makefile, src/validation_tests/Makefile.recipies: validation_tests: always include -lrt on the tests Should be harmless, and I don't always test on an old enough machine to trigger the problem. * src/ctests/forkexec.c, src/ctests/forkexec2.c, src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/multiplex3_pthreads.c, src/ctests/system_child_overflow.c: ctests: make the fork/exec tests only print "PASSED" once this makes the run_test.sh input look a lot nicer * src/run_tests.sh, src/testlib/test_utils.c: tests: make the output from run_tests.sh more compact 2017-06-28 Vince Weaver * .../perf_event/tests/perf_event_system_wide.c: perf_event: tests, make perf_event_system_wide use INS rather than CYC cycles varied too much, making the validation fail * src/validation_tests/Makefile.recipies, src/validation_tests/papi_br_cn.c, src/validation_tests/papi_br_ucn.c: validation_tests: add tests for PAPI_BR_CN and PAPI_BR_UCN * src/validation_tests/flops.c: validation_tests: flops: wasn't falling back properly if no FLOPS event * src/utils/Makefile, src/validation_tests/Makefile.recipies: tests: clean up the Makefiles * src/utils/print_header.c: utils: print_header: print the operating system version in the header * .../tests/perf_event_amd_northbridge.c: perf_event_uncore: the perf_event_amd_northbridge test wasn't working it maybe never worked at all? It was hardcoded to thinking it was running on a 3.9 kernel always. * src/ctests/Makefile, src/ctests/Makefile.recipies, src/ctests/zero.c: ctests: zero: complete transition from FLOPS to INS as metric this will make it more likely to be runnable on modern machines. * src/ctests/vector.c, src/validation_tests/vector_testcode.c: validation_tests: move the unused vector.c code maybe we should remove it. It was never built as far as I can tell. * src/validation_tests/Makefile.recipies, src/validation_tests/flops.c: validation_tests: add a generic flops test based on hl_rates we do a lot of testing of the high-level interface but not as much of the regular PAPI interface. * src/ctests/Makefile.recipies, src/ctests/hl_rates.c, src/validation_tests/flops_testcode.c, src/validation_tests/testcode.h: ctests: hl_rates: clean up and fix extraneous error message the error message was due to the way TESTS_QUIET is passed as a command line argument. also made it use the same matrix-multiply code that the flops test uses. also added some validation to the results. * src/ctests/all_events.c: ctests: all_events: issue warning if preset cannot be created specifically this came up on an AMD fam15h system where the PAPI_L1_ICH event cannot be created due to Linux stealing a counter for the NMI watchdog * src/validation_tests/papi_hw_int.c: validation_tests: papi_hw_int explicitly mark large constant as ULL compiler was warning on 32-bit machine * src/validation_tests/papi_ld_ins.c, src/validation_tests/papi_sr_ins.c, src/validation_tests/papi_tot_cyc.c: validation_tests: a few tests had the !quiet check inverted * src/validation_tests/papi_hw_int.c: validation_tests: fix papi_hw_int looping forever somehow the loop exit line got lost * src/validation_tests/Makefile.recipies, src/validation_tests/matrix_multiply.c, src/validation_tests/matrix_multiply.h, src/validation_tests/papi_ld_ins.c, src/validation_tests/papi_sr_ins.c: validation_tests: add PAPI_SR_INS test * src/validation_tests/Makefile.recipies, src/validation_tests/matrix_multiply.c, src/validation_tests/matrix_multiply.h, src/validation_tests/papi_hw_int.c, src/validation_tests/papi_ld_ins.c: validation_tests: add PAPI_LD_INS test * src/run_tests.sh, src/validation_tests/Makefile.recipies, src/validation_tests/papi_hw_int.c: validation_tests: add PAPI_HW_INT test 2017-06-27 Vince Weaver * src/run_tests_exclude.txt: run_tests_exclude: add attach_target not really a test so we shouldn't run it * src/ctests/byte_profile.c, src/ctests/earprofile.c, src/ctests/prof_utils.c, src/ctests/prof_utils.h: ctests/prof_utils: remove prof_init() helper It didn't do much more than a papi_init, probably better to have each file do that in the open. * src/ctests/inherit.c, src/ctests/ipc.c, src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/low- level.c, src/ctests/mendes-alt.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_index.c, src/ctests/overflow_one_and_read.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/prof_utils.c, src/ctests/profile.c, src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, src/ctests/remove_events.c, src/ctests/sprofile.c, src/ctests/zero.c, src/ctests/zero_flip.c, src/ctests/zero_named.c, src/testlib/test_utils.c: ctests: skip rather than fail if no events available 2017-06-26 Vince Weaver * src/ctests/first.c, src/ctests/mpifirst.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/testlib/test_utils.c: testlib: fix add_two_events() was not setting some values, causing many tests to fail * src/ctests/attach2.c, src/ctests/system_overflow.c: ctests: compiler warning caught two lack-of-braces mistakes * src/ctests/byte_profile.c, src/ctests/code2name.c, src/ctests/describe.c, src/testlib/test_utils.c: tests: more changes to skip instead of fail if no events available * src/ctests/Makefile.recipies, src/ctests/child_overflow.c, src/ctests/exec_overflow.c, src/ctests/fork_exec_overflow.c, src/ctests/fork_overflow.c, src/ctests/system_child_overflow.c, src/ctests/system_overflow.c: ctests: break up the for_exec_overflow test it was really four benchmarks with some ifdefs the proper way to do that would be to have a common C file and link against it for the shared routines, rather than using the pre-processor * src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c: ctests: have attach tests cleanly skip if no events available * src/testlib/test_utils.c: testlib: update add_two_events to skip() if not events found * src/ctests/mendes-alt.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/testlib/papi_test.h, src/testlib/test_utils.c: testutils: remove init_multiplex() test helper the only benefit it had over calling PAPI_multiplex_init() was a domain workaround for perfctr+power6 systems. Ideally not many of those systems are around anymore, an in any case a proper fix would have the perfctr component handle that, not the testing library. * .../perf_event/tests/perf_event_system_wide.c, .../perf_event/tests/perf_event_user_kernel.c, src/ctests/api.c, src/ctests/byte_profile.c, src/ctests/high-level.c, src/ctests/hl_rates.c, src/validation_tests/papi_br_ins.c, src/validation_tests/papi_br_msp.c, src/validation_tests/papi_tot_cyc.c, src/validation_tests/papi_tot_ins.c: tests: try to "skip" rather than "fail" if no events available * src/ctests/derived.c: ctests: derived: fix warning found on older gcc * src/ctests/high-level2.c: ctests: clean up high-level2 test skip on machine without flops/flips event * src/components/Makefile_comp_tests.target.in: components test: fix another build issue be sure to use local copy of papi.h * src/components/Makefile_comp_tests.target.in: component tests: fix build issue was trying to use the system version of libpapi.a instead of local version * src/components/appio/tests/Makefile, src/components/appio/tests/appio_list_events.c, src/components/appio/tests/appio_values_by_code.c, src/components/coretemp/tests/Makefile, src/components/example/tests/Makefile, src/components/host_micpower/tests/Makefile, src/components/infiniband/tests/Makefile, .../infiniband/tests/infiniband_values_by_code.c, src/components/infiniband_umad/tests/Makefile, .../tests/infiniband_umad_values_by_code.c, src/components/lustre/tests/Makefile, src/components/micpower/tests/Makefile, src/components/mx/tests/Makefile, src/components/net/tests/Makefile, src/components/perf_event/tests/Makefile, src/components/perf_event_uncore/tests/Makefile, src/components/powercap/tests/Makefile, src/components/rapl/tests/Makefile, src/components/stealtime/tests/Makefile: components: update component test Makefiles to include Makefile_comp_test.target * src/components/Makefile_comp_tests.target.in: components: update Makefile_comp_test.target.in should now be usable by the components without many Makefile changes * src/components/perf_event/tests/Makefile, src/components/perf_event/tests/nmi_watchdog.c, src/ctests/Makefile.recipies, src/ctests/nmi_watchdog.c: ctests: nmi_watchdog is a perf_event specific test, move it there * src/components/Makefile_comp_tests.target.in, src/components/README, src/components/perf_event/tests/Makefile: components: update the autoconfigure to generate more useful Makefile.target.in although I don't think most components are using it at all 2017-06-26 Asim YarKhan * src/components/cuda/Makefile.cuda.in, src/components/cuda/README, src/components/cuda/Rules.cuda, src/components/cuda/configure, src/components/cuda/configure.in, src/components/cuda/linux-cuda.c, src/components/cuda/sampling/Makefile, src/components/cuda/tests/HelloWorld.cu, src/components/cuda/tests/Makefile, src/components/cuda/tests/simpleMultiGPU.cu: CUDA component update: Support for CUPTI metrics (early release) This commit adds support for CUPTI metrics, which are higher level measures that may be decompsed into multiple lower level CUPTI events. Known problems and limitations in early release of metric support * Only sets of metrics and events that can be gathered in a single pass are supported. Transparent multi-pass support is expected * All metrics are returned as long long integers, which means that CUPTI double precision values will be truncated, possibly severely. * The NVLink metrics have been disabled for this alpha release. 2017-06-23 Vince Weaver * src/validation_tests/papi_fp_ops.c: validation: papi_fp_ops, skip (not fail) if PAPI_FP_OPS unavailable * src/ctests/Makefile, src/ctests/Makefile.recipies, src/ctests/Makefile.target.in, src/ctests/flops.c: ctests: flops, update to use some of the validate_tests infrastructure * src/validation_tests/Makefile.recipies, src/validation_tests/flops_testcode.c, src/validation_tests/papi_fp_ops.c, src/validation_tests/testcode.h: validation_tests: add papi_fp_ops test tested on an AMD fam15h machine * src/components/powercap/tests/powercap_basic.c: powercap: fix compiler warnings in the powercap_basic test * src/ctests/flops.c: ctests: update flops test * src/ctests/api.c: ctests: update api test only seems to test the high-level API * src/ctests/all_native_events.c: ctests: update all_native_events removed some ancient warnings about uncore/offcore events. Should not be a problem on libpfm4/perf_event * src/ctests/all_events.c: ctests: clean up all_events test * src/components/appio/tests/appio_list_events.c, src/components/appio/tests/appio_test_blocking.c, .../appio/tests/appio_test_fread_fwrite.c, src/components/appio/tests/appio_test_pthreads.c, src/components/appio/tests/appio_test_read_write.c, src/components/appio/tests/appio_test_recv.c, src/components/appio/tests/appio_test_seek.c, src/components/appio/tests/appio_test_select.c, src/components/appio/tests/appio_test_socket.c, src/components/appio/tests/appio_values_by_code.c, src/components/appio/tests/appio_values_by_name.c, src/components/coretemp/tests/coretemp_basic.c, src/components/coretemp/tests/coretemp_pretty.c, src/components/example/tests/example_basic.c, .../example/tests/example_multiple_components.c, .../host_micpower/tests/host_micpower_basic.c, .../infiniband/tests/infiniband_list_events.c, .../infiniband/tests/infiniband_values_by_code.c, .../tests/infiniband_umad_list_events.c, src/components/libmsr/tests/libmsr_basic.c, src/components/lustre/tests/lustre_basic.c, src/components/micpower/tests/micpower_basic.c, src/components/mx/tests/mx_basic.c, src/components/mx/tests/mx_elapsed.c, src/components/net/tests/net_list_events.c, src/components/net/tests/net_values_by_code.c, src/components/net/tests/net_values_by_name.c, .../perf_event/tests/perf_event_offcore_response.c, .../perf_event/tests/perf_event_system_wide.c, .../perf_event/tests/perf_event_user_kernel.c, .../tests/perf_event_amd_northbridge.c, .../perf_event_uncore/tests/perf_event_uncore.c, .../tests/perf_event_uncore_cbox.c, .../tests/perf_event_uncore_multiple.c, src/components/powercap/tests/powercap_basic.c, src/components/rapl/tests/rapl_basic.c, src/components/rapl/tests/rapl_overflow.c, src/components/stealtime/tests/stealtime_basic.c, src/components/vmware/tests/vmware_basic.c, src/ctests/all_events.c, src/ctests/all_native_events.c, src/ctests/api.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/branches.c, src/ctests/byte_profile.c, src/ctests/calibrate.c, src/ctests/case1.c, src/ctests/case2.c, src/ctests/clockres_pthreads.c, src/ctests/cmpinfo.c, src/ctests/code2name.c, src/ctests/cycle_ratio.c, src/ctests/data_range.c, src/ctests/derived.c, src/ctests/describe.c, src/ctests/disable_component.c, src/ctests/dmem_info.c, src/ctests/earprofile.c, src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, src/ctests/exeinfo.c, src/ctests/first.c, src/ctests/flops.c, src/ctests/fork.c, src/ctests/fork2.c, src/ctests/fork_exec_overflow.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/get_event_component.c, src/ctests/high-level.c, src/ctests/high-level2.c, src/ctests/hl_rates.c, src/ctests/hwinfo.c, src/ctests/inherit.c, src/ctests/ipc.c, src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/low-level.c, src/ctests /matrix-hl.c, src/ctests/max_multiplex.c, src/ctests/memory.c, src/ctests/mendes-alt.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/nmi_watchdog.c, src/ctests/omptough.c, src/ctests/overflow.c, src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, src/ctests/profile.c, src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, src/ctests/pthrtough.c, src/ctests/pthrtough2.c, src/ctests/realtime.c, src/ctests/remove_events.c, src/ctests/reset.c, src/ctests/reset_multiplex.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/shlib.c, src/ctests/sprofile.c, src/ctests/tenth.c, src/ctests/thrspecific.c, src/ctests/timer_overflow.c, src/ctests/virttime.c, src/ctests/zero.c, src/ctests/zero_attach.c, src/ctests/zero_flip.c, src/ctests/zero_fork.c, src/ctests/zero_named.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c, src/ctests/zero_smp.c, src/testlib/papi_test.h, src/testlib/test_utils.c, src/validation_tests/papi_br_ins.c, src/validation_tests/papi_br_msp.c, src/validation_tests/papi_tot_cyc.c, src/validation_tests/papi_tot_ins.c: testlib: remove the "free variables" option from test_pass() It was only used by a small handfull of tests, and wasn't really strictly necessary anyway. test_pass() should pass the test and that's all. * src/ctests/zero.c: ctests: zero: start cleaning up this test * src/validation_tests/Makefile.recipies: validation_tests: clock_gettime() requires -lrt on older versions of glibc 2017-06-22 Will Schmidt * src/linux-memory.c, src/papi_events.csv: PAPI power9 event list presets Here is an initial set of events and changes to help support Power9. This is based on similar changes that were made for power8 when initial support was added there. I've updated the event names to match what we expect to have in power9, and have done compile/build/ sniff tests. 2017-06-22 Vince Weaver * src/ftests/Makefile.target.in: ftests: fortran tests weren't getting the TOPTFLAGS var set * src/testlib/test_utils.c: testlib: fix colors not turning off in pass/fail indicator * src/ctests/api.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/inherit.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/zero_attach.c, src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: update the way pass/fail is printed It's been bugging me for years that they don't line up * src/run_tests.sh: run_tests.sh: run the validation tests too * src/Makefile.inc: Makefile.inc: make it compile the validation_tests * src/validation_tests/Makefile.recipies, src/validation_tests/papi_br_msp.c: validation-tests: add papi_br_msp test * src/validation_tests/Makefile.recipies, src/validation_tests/branches_testcode.c, src/validation_tests/matrix_multiply.c, src/validation_tests/matrix_multiply.h, src/validation_tests/papi_br_ins.c, src/validation_tests/testcode.h: validation_tests: add papi_br_ins test * src/validation_tests/Makefile.recipies, src/validation_tests/papi_tot_cyc.c: validation_tests: add papi_tot_cyc test * src/Makefile.inc: fix "make install-all" had some extraneous ".." after some previous changes * src/configure, src/configure.in, src/validation_tests/Makefile.target.in, src/validation_tests/papi_tot_ins.c: validation_tests: update configure so it sets up the Makefile * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: papi_print_header() lives with the utils code now * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: make tests_quiet() return an integer This way we don't have to depend on the global var TESTS_QUIET if we don't want to. * src/validation_tests/Makefile, src/validation_tests/Makefile.recipies, src/validation_tests/Makefile.target.in, src/validation_tests/display_error.c, src/validation_tests/display_error.h, src/validation_tests/instructions_testcode.c, src/validation_tests/papi_tot_ins.c, src/validation_tests/testcode.h: validation_tests: add initial papi_tot_ins test it is not hooked up to the build system yet * src/ctests/multiplex1.c, src/ctests/multiplex2.c, src/ctests/second.c, src/ctests/sprofile.c, src/ctests/virttime.c, src/ctests/zero_attach.c, src/ctests/zero_flip.c, src/ctests/zero_fork.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c: ctests: more printf/TESTS_QUIET conversions * src/testlib/fpapi_test.h: ftests: missing define was making second.F fail * src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/memory.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/multiplex1.c: ctests: more printf/TESTS_QUIET fixes 2017-06-21 Vince Weaver * src/ctests/all_events.c, src/ctests/all_native_events.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/byte_profile.c, src/ctests/calibrate.c, src/ctests/cmpinfo.c, src/ctests/code2name.c, src/ctests/cycle_ratio.c, src/ctests/exeinfo.c, src/ctests/fork_exec_overflow.c, src/ctests/hl_rates.c, src/ctests/hwinfo.c: ctests: explicitly block printfs with TESTS_QUIET There was some hackery with the preprocessor to avoid this but that wasn't a good solution. * src/testlib/do_loops.h, src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: minor papi_test.h cleanups * .../perf_event/tests/perf_event_offcore_response.c, .../perf_event/tests/perf_event_system_wide.c, .../perf_event/tests/perf_event_user_kernel.c, .../tests/perf_event_amd_northbridge.c, .../perf_event_uncore/tests/perf_event_uncore.c, .../perf_event_uncore/tests/perf_event_uncore_cbox.c, .../tests/perf_event_uncore_multiple.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/attach_target.c, src/ctests/branches.c, src/ctests/burn.c, src/ctests/byte_profile.c, src/ctests/cycle_ratio.c, src/ctests/derived.c, src/ctests/dmem_info.c, src/ctests/earprofile.c, src/ctests/first.c, src/ctests/high-level.c, src/ctests/inherit.c, src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/low- level.c, src/ctests/matrix-hl.c, src/ctests/memory.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, src/ctests/overflow_one_and_read.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, src/ctests/prof_utils.c, src/ctests/profile.c, src/ctests/profile_twoevents.c, src/ctests/remove_events.c, src/ctests/reset.c, src/ctests/reset_multiplex.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/sprofile.c, src/ctests/tenth.c, src/ctests/zero.c, src/ctests/zero_attach.c, src/ctests/zero_flip.c, src/ctests/zero_fork.c, src/ctests/zero_named.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c, src/ctests/zero_shmem.c, src/ctests/zero_smp.c, src/testlib/Makefile, src/testlib/fpapi_test.h, src/testlib/papi_test.h, src/testlib/test_utils.h: testlib: more papi_test.h reduction * src/testlib/Makefile: testlib: turn off optimization on the validation loops it's making tests fail, need to go back and be sure we are properly tricking the compiler. * src/Makefile.inc, src/components/Makefile_comp_tests, src/components/perf_event/tests/Makefile, src/components/perf_event_uncore/tests/Makefile, src/components/rapl/tests/Makefile, src/components/rapl/tests/rapl_overflow.c, src/ctests/Makefile, src/ctests/Makefile.recipies, src/ctests/overflow_pthreads.c, src/ctests/profile_pthreads.c, src/ftests/Makefile, src/ftests/Makefile.recipies, src/ftests/Makefile.target.in, src/testlib/Makefile, src/testlib/do_loops.c, src/testlib/do_loops.h, src/testlib/papi_test.h: testlib: start splitting the validation code off from the pass/fail code * src/components/perf_event/tests/perf_event_offcore_response.c, src/components/perf_event/tests/perf_event_system_wide.c, src/components/perf_event/tests/perf_event_user_kernel.c, src/compo nents/perf_event_uncore/tests/perf_event_amd_northbridge.c, src/components/perf_event_uncore/tests/perf_event_uncore.c, src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c, sr c/components/perf_event_uncore/tests/perf_event_uncore_multiple.c, src/components/rapl/tests/rapl_basic.c, src/components/rapl/tests/rapl_overflow.c, src/ctests/all_native_events.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/attach_target.c, src/ctests/branches.c, src/ctests/burn.c, src/ctests/byte_profile.c, src/ctests/calibrate.c, src/ctests/case1.c, src/ctests/case2.c, src/ctests/clockres_pthreads.c, src/ctests/cmpinfo.c, src/ctests/code2name.c, src/ctests/cycle_ratio.c, src/ctests/data_range.c, src/ctests/derived.c, src/ctests/describe.c, src/ctests/disable_component.c, src/ctests/dmem_info.c, src/ctests/earprofile.c, src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, src/ctests/exeinfo.c, src/ctests/first.c, src/ctests/flops.c, src/ctests/fork.c, src/ctests/fork2.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/get_event_component.c, src/ctests/high-level.c, src/ctests/high-level2.c, src/ctests/hl_rates.c, src/ctests/hwinfo.c, src/ctests/inherit.c, src/ctests/ipc.c, src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/low-level.c, src/ctests /matrix-hl.c, src/ctests/memory.c, src/ctests/mendes-alt.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/nmi_watchdog.c, src/ctests/omptough.c, src/ctests/overflow.c, src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, src/ctests/prof_utils.c, src/ctests/profile.c, src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, src/ctests/pthrtough.c, src/ctests/pthrtough2.c, src/ctests/realtime.c, src/ctests/remove_events.c, src/ctests/reset.c, src/ctests/reset_multiplex.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/shlib.c, src/ctests/sprofile.c, src/ctests/tenth.c, src/ctests/thrspecific.c, src/ctests/timer_overflow.c, src/ctests/virttime.c, src/ctests/zero.c, src/ctests/zero_attach.c, src/ctests/zero_flip.c, src/ctests/zero_fork.c, src/ctests/zero_named.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c, src/ctests/zero_shmem.c, src/ctests/zero_smp.c, src/testlib/do_loops.c, src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: remove include of papi.h Need to explicitly include it in your test if you need it. * src/testlib/Makefile, src/testlib/do_loops.c, src/testlib/do_loops.h, src/testlib/dummy.c, src/utils/Makefile, src/utils/papi_command_line.c, src/utils/papi_cost.c: utils: remove last uses of testlib * src/utils/Makefile, src/utils/papi_hybrid_native_avail.c: utils: update papi_hybrid_native_avail to not depend on testlib * src/utils/papi_multiplex_cost.c: utils: clean up papi_multiplex_cost remove dependeicnes on papi_test.h print message warning that it can take a long time to run * .../perf_event/tests/perf_event_offcore_response.c, .../perf_event/tests/perf_event_system_wide.c, .../perf_event/tests/perf_event_user_kernel.c, .../perf_event_uncore/perf_event_uncore.c, .../tests/perf_event_amd_northbridge.c, .../perf_event_uncore/tests/perf_event_uncore.c, .../tests/perf_event_uncore_cbox.c, .../tests/perf_event_uncore_multiple.c, src/components/rapl/tests/rapl_basic.c, src/components/rapl/tests/rapl_overflow.c, src/ctests/all_native_events.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/branches.c, src/ctests/byte_profile.c, src/ctests/calibrate.c, src/ctests/data_range.c, src/ctests/describe.c, src/ctests/disable_component.c, src/ctests/earprofile.c, src/ctests/exec.c, src/ctests/exec2.c, src/ctests/exeinfo.c, src/ctests/first.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/get_event_component.c, src/ctests/inherit.c, src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests /matrix-hl.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/nmi_watchdog.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_force_software.c, src/ctests/overflow_pthreads.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/prof_utils.c, src/ctests/profile_pthreads.c, src/ctests/remove_events.c, src/ctests/reset.c, src/ctests/reset_multiplex.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/shlib.c, src/ctests/timer_overflow.c, src/ctests/zero_named.c, src/testlib/do_loops.c, src/testlib/papi_test.h, src/testlib/test_utils.c, src/utils/Makefile, src/utils/cost_utils.c, src/utils/papi_command_line.c, src/utils/papi_cost.c, src/utils/papi_event_chooser.c: testlib: more header removal from papi_test.h * src/components/perf_event/tests/perf_event_system_wide.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/zero_attach.c, src/testlib/papi_test.h, src/utils/cost_utils.c: testlib: remove a few more includes from papi_test.h * src/components/rapl/tests/rapl_basic.c, src/ctests/all_events.c, src/ctests/all_native_events.c, src/ctests/api.c, src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/attach_cpu.c, src/ctests/attach_target.c, src/ctests/branches.c, src/ctests/burn.c, src/ctests/calibrate.c, src/ctests/case1.c, src/ctests/case2.c, src/ctests/clockres_pthreads.c, src/ctests/code2name.c, src/ctests/cycle_ratio.c, src/ctests/data_range.c, src/ctests/derived.c, src/ctests/describe.c, src/ctests/dmem_info.c, src/ctests/earprofile.c, src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, src/ctests/exeinfo.c, src/ctests/flops.c, src/ctests/fork.c, src/ctests/fork2.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/high- level.c, src/ctests/high-level2.c, src/ctests/hl_rates.c, src/ctests/hwinfo.c, src/ctests/inherit.c, src/ctests/ipc.c, src/ctests/johnmay2.c, src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/low-level.c, src/ctests/max_multiplex.c, src/ctests/memory.c, src/ctests/multiattach.c, src/ctests/multiattach2.c, src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, src/ctests/overflow_single_event.c, src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, src/ctests/prof_utils.c, src/ctests/profile.c, src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, src/ctests/pthrtough.c, src/ctests/pthrtough2.c, src/ctests/realtime.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/shlib.c, src/ctests/sprofile.c, src/ctests/tenth.c, src/ctests/thrspecific.c, src/ctests/timer_overflow.c, src/ctests/virttime.c, src/ctests/zero.c, src/ctests/zero_attach.c, src/ctests/zero_flip.c, src/ctests/zero_fork.c, src/ctests/zero_omp.c, src/ctests/zero_pthreads.c, src/ctests/zero_shmem.c, src/ctests/zero_smp.c, src/testlib/do_loops.c, src/testlib/dummy.c, src/testlib/papi_test.h, src/testlib/test_utils.c, src/utils/papi_command_line.c, src/utils/papi_cost.c: testlib: split some headers out of papi_test.h Too much is going on in that header, no need to have every include in the world in it. Trying to make the testcode more standalone so it is easier to follow. * src/testlib/Makefile, src/testlib/Makefile.target.in: testlib: let testlib build properly from within the testlib directory * src/testlib/clockcore.c: testlib: clockcore wasn't protecting all the output with !quiet * src/ctests/Makefile: ctests: make sure tests link against the right papi.h file * src/Makefile.inc, src/ctests/Makefile, src/ctests/Makefile.target.in: ctests: allow running "make" in the ctests directory to work 2017-06-20 Vince Weaver * src/Matlab/PAPI_Matlab.readme, src/papi.c, src/utils/papi_avail.c, src/utils/papi_clockres.c, src/utils/papi_command_line.c, src/utils/papi_component_avail.c, src/utils/papi_cost.c, src/utils/papi_decode.c, src/utils/papi_error_codes.c, src/utils/papi_event_chooser.c, src/utils/papi_hybrid_native_avail.c, src/utils/papi_mem_info.c, src/utils/papi_multiplex_cost.c, src/utils/papi_native_avail.c, src/utils/papi_version.c, src/utils/papi_xml_event_info.c: update the ptools-perfapi e-mail address in the auto-generated manpages it was still using the old ptools.org address. * doc/Makefile: docs: fix the manpage build after renaming the utils Thanks to Steve Kaufmann for catching this. * src/utils/Makefile, src/utils/papi_native_avail.c: utils: papi_native_avail: remove extraneous testing code * src/utils/Makefile, src/utils/papi_mem_info.c: utils: papi_mem_info: remove extraneous test code * src/utils/Makefile, src/utils/papi_xml_event_info.c: utils: papi_xml_event_info: remove extraneous test code * src/utils/Makefile, src/utils/papi_decode.c: utils: papi_decode: remove extraneous test code * src/utils/Makefile, src/utils/papi_error_codes.c: utils: papi_error_codes: remove extraneous test code * src/utils/Makefile, src/utils/papi_component_avail.c: utils: papi_component_avail: remove extraneous test code * src/ctests/clockres_pthreads.c, src/testlib/clockcore.c, src/testlib/clockcore.h, src/testlib/papi_test.h, src/utils/Makefile, src/utils/papi_clockres.c: utils: papi_clockres, remove extraneous test code * src/utils/Makefile, src/utils/papi_avail.c, src/utils/print_header.c, src/utils/print_header.h: utils: update papi_avail to not depend on testlibs It's not a test. * src/utils/Makefile: utils: add target for papi_hybrid_native_avail do not build it by default though? Should only be built if compiling for MIC? * src/utils/Makefile, src/utils/avail.c, src/utils/clockres.c, src/utils/command_line.c, src/utils/component.c, src/utils/cost.c, src/utils/decode.c, src/utils/error_codes.c, src/utils/event_chooser.c, src/utils/event_info.c, src/utils/hybrid_native_avail.c, src/utils/mem_info.c, src/utils/multiplex_cost.c, src/utils/native_avail.c, src/utils/papi_avail.c, src/utils/papi_clockres.c, src/utils/papi_command_line.c, src/utils/papi_component_avail.c, src/utils/papi_cost.c, src/utils/papi_decode.c, src/utils/papi_error_codes.c, src/utils/papi_event_chooser.c, src/utils/papi_hybrid_native_avail.c, src/utils/papi_mem_info.c, src/utils/papi_multiplex_cost.c, src/utils/papi_native_avail.c, src/utils/papi_xml_event_info.c: utils: rename the utils so the executable matches the filename This has bothered me for years, you want to fix "papi_native_avail" but there is no file in the tree called "papi_native_avail.c" * src/utils/Makefile, src/utils/papi_version.c, src/utils/version.c: utils: rename version.c to papi_version.c Also minor cleanups to the utility. * src/Makefile.inc, src/configure, src/configure.in, src/utils/Makefile, src/utils/Makefile.target.in: utils: clean up Makefile and build process of utils Now should be able to run "make" in the utils subdir and have it build. Also move the list of util files to build out of configure as I don't think there's any reason for having them there. * src/components/perf_event/pe_libpfm4_events.c: perf: fall back to operating system default events if libpfm4 lacks support This will allow use of PAPI on machines that Linux has support for, but libpfm4 has not added events yet. Still some limitations, for example the PAPI preset events won't work. * src/components/perf_event/pe_libpfm4_events.c, src/components/perf_event/perf_event.c: perf: report better errors if libpfm4 initialization fails * src/components/perf_event/pe_libpfm4_events.c: perf: pe_libpfm4_events: minor whitespace fixup * src/components/perf_event/pe_libpfm4_events.c: perf: pe_libpfm4_events: whitespace changes to make code easier to follow 2017-06-19 Vince Weaver * src/ctests/code2name.c: ctests/code2name: fix uninitialized variable warning * src/ctests/calibrate.c: ctests/calibrate: fix uninitialized variable warning * src/ctests/thrspecific.c: ctests: thrspecific fix so it finishes It's actually really unclear what this code is trying to test, but with optimization enabled it hung forever. Marking the variable being spun on as volatile fixes things but I think there is more wrong with the test than just that. * src/ctests/branches.c, src/ctests/sdsc.c, src/ctests/sdsc4.c: ctests: fix tests using "dummy3()" as a workload Now that we enable optimization on the ctests this breaks some of the benchmarks. dummy3() was being optimized away which caused segfaults and other problems. The tests don't crash now, but they still fail. Still investigating. 2016-10-12 Phil Mucci * src/configure: Regenerated configure with recent autoconf * src/configure.in: By default, we want -O1 on tests (TOPTFLAGS). -O0 is too literal and causes a number of tests who depend on peephole optimization to run. * src/utils/Makefile: Utils are installed therefore they should be built with production flags not test/debug flags * src/Makefile.inc: Make clean should not clean up libpfm. Thats for make distclean. We're not developing libpfm! 2016-07-04 Phil Mucci * src/ctests/mendes-alt.c, src/ctests/zero.c: Moved functions definitions to top of file to eliminate non-ANSI-C prototypes inside main. Modified message in zero to not turbo boost will also cause errors (cycles > real-time-cycle * src/Makefile.in, src/Makefile.inc, src/configure, src/configure.in: Remove EXTRA_CFLAGS, now CFLAGS. Added FTOPTS so compiling Fortran tests have same flags as ctests. Fix proper testing at configure time of libpfm for proper combinations of libpfm options * src/ftests/Makefile: Homogenize include flags * src/ctests/Makefile: Homogenize include flags * src/testlib/Makefile: Removed unnecessary defs and options * src/utils/Makefile: Removed unnecessary definitions and compiler options 2016-07-01 Phil Mucci * src/Makefile.in, src/Makefile.inc, src/Rules.perfctr-pfm, src/Rules.perfmon2, src/Rules.pfm4_pe, src/components/Makefile_comp_tests.target.in, src/components/perf_event/pe_libpfm4_events.c, src/configure, src/configure.in, src/ctests/Makefile, src/ctests/Makefile.target.in, src/ftests/Makefile, src/ftests/Makefile.target.in: Makefile.in: - Removed DEBUGFLAGS, NOTLS, PAPI_EVENTS_TABLE from being generated. These were not properly used. - Added LIBCFLAGS generated from configure for CFLAGS that ONLY apply to the library and the library code. NOT tests nor utilities. Previously we were propagating all kinds of bogus flags to the tests and utils. - CFLAGS is now properly set for compiler flags not defines etc. Makefile.inc: - Put papi_events_table.h in the right place. This is always the same name. Previous attempts at parameterizing this were broken and/or unnecessary. - Added dependency for the above in the right place and ALWAYS generate it, regardless of whether we actually include it in the library (vs load the CSV at runtime). Rules.perfctr-pfm - Removed conditional removal of events table during clean. Rules.perfmon2 - Removed conditional removal of events table during clean. Rules.pfm4_pe - Stopped mussing with CFLAGS which would pollute child builds but refer to LIBCFLAGS. CFLAGS is for everything! - Removed conditional removal of events table during clean. - Removed duplicate reference to papi_events_table.h components/perf_event/pe_libpfm4_events.c: - Removed HARDCODED include of a libpfm4 private header file. Wrong path and unnecessary include. This would break if you linked against another libpfm using any of the config options. components/perf_event/peu_libpfm4_events.c: - Removed HARDCODED include of a libpfm4 private header file. Wrong path and unnecessary include. This would break if you linked against another libpfm using any of the config options. components/Makefile_comp_tests.target.in: - Refer to datarootdir to make autoconf happy configure/configure.in: Regenerated using autoconf 2.69 and many modifications to serious brokennesss. Lots of fixes: - Sanitize options for static inclusion of user and papi presets - Fix options that do not print out a result - Fix debug=yes to not include PAPI_MEMORY_MANAGEMENT. That's only enabled with debug=memory. This will reduce false positives when we debug. We don't want our own malloc/free changing behavior when we are trying to debug! - Fix CFLAGS/LIBCFLAGS/DEBUGFLAGS. configure now exports a variable called PAPICFLAGS which gets stuffed into LIBCFLAGS in Makefile.in. This variable IS ONLY for compiler flags relevant to the library. Previously we were exporting all sorts of stuff that would make our passes behave differently that user code. _GNU_SOURCE and -D_REENTRANT. That stuff is for the library and components. Not user code. - Update compile tests to use AC_LANG_SOURCE as required. - Fix clock timer checking output to now say what timer we picked instead of just skipping an answer - Same for virtual clock timer - Remove broken --with-papi-events option. - Fixed --with-static-tools option - Fixed/added --with- static-papi-events option (default) and --with-static-user-events option. - Fixed modalities of configuring whether to build a static/shared or both. - Fixed link of tests with shared libraries when above options don't support it. Modality again. Remove SETPATH/LIBPATH define, which won't work for ANY combination of --with-pfm-prefix/root/libdir except our included library. Woefully broken and would result in many false positive failures. If you are going to run the tests on the shared library it is now the users responsibility to set LD_LIBRARY_PATH/LIBPATH correctly. I suspect this may irritate some, but broken 90% of the time is no excuse for correct 10% of the time especially when it could generate bug reports falsely. - Fixed with-static-tools, with-shlib-tools options to correct modalities. - Fixed all modalities with --with- pfm-prefix/root/libdir/incdir. Previously the build, configure and source files were still referring to pieces of code INSIDE our libpfm4 resulting in version skew and breakage. The way to test this stuff is to use --root or --prefix after removing the internal libpfm4 library. - Removed unnecessary and confusing force_pfm_incdir - Fixed with-pe-incdir option which, like before was most of the time referring to the libpfm4 included header file. Not good if one has a custom kernel! PECFLAGS now only appended to PAPICFLAGS(LIBCFLAGS). - Removal of DEBUGFLAGS. aix.c needs testing. Anyone have one? - Fixed CFLAGS for BSD - Add message for papi_events.csv ctests/Makefile ftests/Makefile - Don't redefine CC/CC_R/CFLAGS/FFLAGS. - Make these files consistent ctests/Makefile.target.in ftests/Makefile.target.in - refer to datarootdir as required 2016-06-27 Phil Mucci * src/testlib/Makefile, src/testlib/Makefile.target.in: Added explicit target for libtestlib.a. The all target should have been markted as .PHONY as to avoid constant rebuilding. Also, we really should merge these two files into a master and an include. Maintaining two makefiles stinks! 2017-06-16 Vince Weaver * src/papi_fwrappers.c: fwrappers: papif_unregister_thread was misspelled as papif_unregster_thread This was noticed by Vedran Novakovic For an extremely long time (10+ years?) the fortran wrapper was misspelled as papif_unregster_thread() It's probably too late to fix this without potentially breaking things, so just add a duplicate function with the proper spelling and leave the old one too. * src/papi_preset.c: papi_preset: fix compiler warning This really confusing warning has been around for a while. gcc-6.3 reports it in a really odd way: papi_preset.c: In function ‘check_derived_events’: papi_preset.c:513:19: warning: ‘__s’ may be used uninitialized in this function$ int val = atoi(&subtoken[1]); ^~~~~~~~~~~~ papi_preset.c:464:1: note: ‘__s’ was declared here ops_string_merge(char **original, char *insertion, int replaces, int start_ind$ ^~~~~~~~~~~~~~~~ But there is no __s variable, or anything to do with where the arrows are pointing. gcc-5 gives a better warning: papi_preset.c: In function ‘check_derived_events’: papi_preset.c:513:14: warning: ‘tok_save_ptr’ may be used uninitialized in this$ int val = atoi(&subtoken[1]); ^ papi_preset.c:472:8: note: ‘tok_save_ptr’ was declared here char *tok_save_ptr; So the thing it seems to be complaining about is that the *saveptr paramater to strtok_r() is not set to NULL. According to the manpage I don't think this should be needed? But I think it should be safe to initialize it anyway. Tue Jun 6 11:09:17 2017 -0500 Will Schmidt * src/libpfm4/lib/events/power9_events.h, src/libpfm4/perf_examples/self_count.c, src/libpfm4/tests/validate_power.c: Update libpfm4 Current with commit ce5b320031f75f9a9881333c13902d5541f91cc8 add power9 entries to validate_power.c Hi, Update the validate_power test to include power9 entries. sniff-test run output: $ ./validate Libpfm structure tests: libpfm ABI version : 0 pfm_pmu_info_t : Passed pfm_event_info_t : Passed pfm_event_attr_info_t : Passed pfm_pmu_encode_arg_t : Passed pfm_perf_encode_arg_t : Passed Libpfm internal table tests: checking power9 (946 events): Passed Architecture specific tests: 20 PowerPC events: 0 errors All tests passed 2017-06-15 Vince Weaver * src/components/perf_event/pe_libpfm4_events.c, src/components/perf_event/pe_libpfm4_events.h, .../perf_event_uncore/Rules.perf_event_uncore, .../perf_event_uncore/perf_event_uncore.c, .../perf_event_uncore/peu_libpfm4_events.c, .../perf_event_uncore/peu_libpfm4_events.h: perf_event: merge the libpfm4 helper libraries perf_event and perf_event_uncore had their own almost exactly the same libpfm4 helper libraries. Maintaining both was a chore, and it looks like it is possible to just share one copy. This does mean that it is now not possible to configure the perf_event_uncore component without perf_event being enabled, but I am not sure if that was even possible to begin with. * src/components/perf_event/pe_libpfm4_events.c, .../perf_event_uncore/perf_event_uncore.c, .../perf_event_uncore/peu_libpfm4_events.c, .../perf_event_uncore/peu_libpfm4_events.h: perf_event_uncore: make the libpfm4 routines match even more * src/components/perf_event/pe_libpfm4_events.c, .../perf_event_uncore/peu_libpfm4_events.c: perf_event: make perf_event and perf_event uncore libpfm4 more similar it's a bad idea to have more or less two copies of the same code * src/components/perf_event/pe_libpfm4_events.c, .../perf_event_uncore/peu_libpfm4_events.c: perf_event: Avoid unintended libpfm build dependency due to PFM_PMU_MAX enum This patch is based on one sent by William Cohen The libpfm pfmlib.h file enumerates the each of performance monitoring units (PMUs) it can program in pfm_pmu_t type. The last enum in this type is PFM_PMU_MAX. Depending on which specific version of libpfm being used this specific value could vary. The problem is that PFM_PMU_MAX is statically defined in the pfmlib.h file and this was being used as a loop bounds when iterating to determine which PMUs are potentially available. If PAPI was built with an older version of libpfm and then run with a newer libpfm shared library on a machine with a larger PFM_PMU_MAX value, none of the PMUs past the smaller PFM_PMU_MAX used for the the build would be examined or enabled. 2017-06-15 Heike Jagode (jagode@icl.utk.edu) * src/components/infiniband/linux-infiniband.c: Updated infiniband component so that it works for mofed driver version 4.0, where directory counters_ext in sysfs fs has changed to hw_counters. This update to the component makes it work for both directory names: - counters_ext for mofed driver version <4.0, and - hw_counters for mofed driver version =>4.0 This change has not been fully tested yet due to missing access to machine with updated version of mofed driver. (CORAL machines will have an updated version of this driver.) 2017-05-04 Vince Weaver * src/components/rapl/linux-rapl.c: rapl: broadwell-ep DRAM units are special (like Haswell-EP) The Linux kernel perf interface had this wrong too. I noticed this in my cluster computing classs, the Broadwell-EP DRAM results were unrealistically high values. Fri Apr 21 17:33:15 2017 -0700 William Cohen * src/libpfm4/README, src/libpfm4/include/perfmon/pfmlib.h, src/libpfm4/lib/Makefile, src/libpfm4/lib/events/power9_events.h, src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_power9.c, src/libpfm4/lib/pfmlib_power_priv.h, src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/lib/pfmlib_s390x_cpumf.c: Update libpfm4\n\nCurrent with\n commit 8385268c98553cb5dec9ca86bbad3e5c44a2ab16 fix internal pfm_event_attr_info_t use for S390X Commit 321133e converted most of the architectures to use the internal perflib_event_attr_info_t type. However, the s390 was missed in that previous commit. This patch corrects the issue so libpfm compiles on s390. 2017-04-20 Stephen Wood * src/extras.c, src/papi.h, src/papi_fwrappers.c, src/papi_hl.c, src/papi_internal.c: cast pointers appropriately to avoid warnings and errors 2017-04-19 Sangamesh Ragate * src/papi_events.csv: Mapped PAPI_L2_ICM preset event to PM_INST_FROM_L2MISS native event for Power8 2017-04-06 Asim YarKhan * src/ftests/fmatrixlowpapi.F: Fixed: This fortran test exceeded 72 columns and made the default Intel ifort compilation unhappy Wed Apr 5 23:35:44 2017 -0700 Andreas Beckmann * src/libpfm4/docs/man3/libpfm_arm_ac53.3, src/libpfm4/docs/man3/libpfm_arm_ac57.3, src/libpfm4/docs/man3/libpfm_arm_xgene.3, src/libpfm4/lib/Makefile, src/libpfm4/lib/events/arm_cortex_a53_events.h, src/libpfm4/lib/events/intel_glm_events.h, src/libpfm4/lib/events/intel_hswep_unc_imc_events.h, src/libpfm4/lib/events/intel_ivbep_unc_imc_events.h, src/libpfm4/lib/events/intel_knl_events.h, src/libpfm4/lib/events/intel_knl_unc_cha_events.h, src/libpfm4/lib/events/power4_events.h, src/libpfm4/lib/events/ppc970_events.h, src/libpfm4/lib/events/ppc970mp_events.h, src/libpfm4/perf_examples/self_smpl_multi.c: Update libpfm4\n\nCurrent with\n commit 71a960d9c17b663137a2023ce63edd2f3ca115f5 fix various event description typos This patch fixes the typos in several event description for Intel, Arm, and Power event tables. 2017-03-30 William Cohen * src/ftests/cost.F, src/ftests/first.F, src/ftests/fmatrixlowpapi.F, src/ftests/second.F: Eliminate warnings about implicit type conversions in Fortran tests The gfortran compiler on Fedora 25 was giving warnings indicating that a few of the tests were doing implicit type convertion between reals and ints. Those implicit conversions have been made explicit to elminate the fortran compiler warning messages. Tue Apr 4 09:42:25 2017 -0700 Stephane Eranian * src/libpfm4/include/perfmon/pfmlib.h, src/libpfm4/lib/pfmlib_amd64.c, src/libpfm4/lib/pfmlib_amd64_priv.h, src/libpfm4/lib/pfmlib_arm.c, src/libpfm4/lib/pfmlib_arm_priv.h, src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_intel_netburst.c, src/libpfm4/lib/pfmlib_intel_nhm_unc.c, src/libpfm4/lib/pfmlib_intel_snbep_unc.c, src/libpfm4/lib/pfmlib_intel_snbep_unc_priv.h, src/libpfm4/lib/pfmlib_intel_x86.c, src/libpfm4/lib/pfmlib_intel_x86_perf_event.c, src/libpfm4/lib/pfmlib_intel_x86_priv.h, src/libpfm4/lib/pfmlib_mips.c, src/libpfm4/lib/pfmlib_mips_priv.h, src/libpfm4/lib/pfmlib_perf_event.c, src/libpfm4/lib/pfmlib_perf_event_pmu.c, src/libpfm4/lib/pfmlib_perf_event_raw.c, src/libpfm4/lib/pfmlib_power_priv.h, src/libpfm4/lib/pfmlib_powerpc.c, src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/lib/pfmlib_sparc.c, src/libpfm4/lib/pfmlib_sparc_priv.h, src/libpfm4/lib/pfmlib_torrent.c, src/libpfm4/tests/validate.c, src/libpfm4/tests/validate_x86.c: Update libpfm4\n\nCurrent with\n commit 5e311841e5d70efb93d11826109cb5acab6e051c enable 38-bit raw umasks for Intel offcore_response events This patch enables support for passing and encoding of 38-bit offcore_response matrix umask. Without the patch, the raw umask was limited to 32-bit which is not enough to cover all the possible bits of the offcore_response event available since Intel SandyBridge. $ examples/check_events offcore_response_0:0xffffff Requested Event: offcore_response_0:0xffffff Actual Event: ivb::OFFCORE_RESPONSE_0:0xffffff:k=1:u=1:e=0:i=0:c=0:t=0 PMU : Intel Ivy Bridge IDX : 155189325 Codes : 0x5301b7 0xffffff The patch also adds tests to the validation code. 2017-03-29 Vince Weaver * src/components/perfctr/perfctr-x86.c: perfctr: fix perfctr component to actually work Simple one-line typo means perfctr was not working, probably for years. I've tested on a 2.6.32-perfctr kernel and it works again. 2017-03-28 Vince Weaver * src/papi_events.csv: papi_events: add AMD fam16h jaguar events These will become useful if/when the contributed libpfm4 jaguar patches get applied. 2017-03-27 Vince Weaver * src/papi_events.csv: events: p4: change the PAPI_TOT_CYC event PAPI_TOT_CYC wasn't working on Pentium4 because the GLOBAL_POWER_EVENT:RUNNING event was being grabbed by the hardware watchdog. perf cycles:u was still working, that's because the kernel transparently remaps the cycles event to an alias when global_power_event's slot is taken. The aliased event is the unwieldly: execution_event:nbogus0:nbogus1:nbogus2:nbogus3:bogus0:b ogus1:bogus2:bogus3:cmpl:thr=15 which does seem to give the right results. Use this event instead by default on Pentium 4 * src/components/perf_event/perf_event.c: perf_event: fix warning when compiling with debug enabled the flags field is an unsigned long, not an int 2017-03-22 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: don't allocate a mmap page if not rdpmc or sampling * src/components/perf_event/perf_event.c: perf_event: only allocate 1 mmap page (rather than 3) if not sampling Next step is to allocate 0 mmap pages unless rdpmc is enabled * src/components/perf_event/perf_event.c, src/components/perf_event/perf_event_lib.h: perf_event: update the _pe_set_overflow() call Working on making it more obvious which events are sampling (and thus need mmap buffers) or not. Also there were some bugs in the handling of having multiple overflow sources per eventset, though I'm not sure if PAPI actually handles that. * src/components/perf_event/perf_event.c: perf_event: turn off fast_counter_read if mmaps fail By default on Linux perf_event can't use more than 516kB of mmap space. So perf_event-rdpmc would fail after you added a large number (>32) of events. This shows up on the kufrin benchmark on some machines. This fix makes PAPI fall back to non-rdpmc if an mmap error happens. I'm also going to try to tune the mmap usage a bit to make the limits a bit higher. 2017-03-21 Asim YarKhan * src/configure: configure script updated using autoconf-2.59 2017-03-20 Vince Weaver * src/components/perf_event/perf_event.c, src/configure.in: configure: enable rdpmc with --enable-perfevent-rdpmc=yes Make this an option to configure. Defaults to no. Need to find a machine with autoconf 2.59 on and I'll regenerate configure as well. 2017-03-16 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: try to work around exclude_guest issue run a test at startup to see if events with exclude_guest fail. libpfm4 sets this by default, but older kernels will fail because this was previously a reserved (must be zero) field. 2017-03-14 Vince Weaver * src/ctests/multiattach.c: tests: multiattach: whitespace/comments/clarifications digging through the code trying to figure out why it fails with rdpmc enabled. it turns out it is seeing wrong running/enabled multiplexing results even though we aren't multiplexing tracking this down is a pain because we can't strace/ltrace due to the code using ptrace to start/stop processes. 2017-03-09 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: can't mmap() an inherited event this is why the inherit test was failing * src/components/perf_event/perf_event.c, src/components/perf_event/perf_helpers.h: perf_event: add rdpmc support (but disabled) finally add the rdpmc code, but it still fails on a few tests so it is disabled by default. * src/components/perf_event/perf_event.c, src/components/perf_event/perf_event_lib.h: perf_event: make all events come with a mmap buffer This wastes some address space, but having separate codepaths for rdpmc/regular/sampling/profiling would be hard to maintain. Had to remove some assumptions from the profiling/sampling code that mmap_buf means sampling is happening. * src/components/perf_event/perf_event.c: perf_event: add check for paranoid==3 Recent distributions are *completely* disablng perf_event by default with their vendor kernels (this is not upstream yet). Have PAPI detect and disable the perf_event component if this is detected. * src/components/perf_event/perf_event.c: perf_event: split close_pe_events() into two functions * src/components/perf_event/perf_event.c, src/components/perf_event/perf_helpers.h: perf_event: more whitespace / rearrangement should not be any changes to actual code, is just whitespace/comment/function movement I know changes like this make the git history harder to follow, but it really helps when trying to follow the code when working on major changes. 2017-03-08 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: more whitespace/comment cleanups digging through the code, still prepping for rdpmc 2017-03-07 Vince Weaver * src/components/perf_event/perf_helpers.h: perf_event: rdpmc: need to sign extend offset too Otherwise things stop working after a PAPI_reset() * src/components/perf_event/perf_event.c: perf_event: split up _pe_read() makes the code a bit easier to follow. also prep for rdpmc() * src/components/perf_event/perf_event.c: perf_event: clean up whitespace in _pe_read 2017-03-08 Vince Weaver * src/ctests/first.c: ctests: first: white space cleanups minor things noticed when trying to figure out why it was failing with rdpmc (the answer was rdpmc code not handling PAPI_reset()) 2017-03-07 Vince Weaver * src/components/perf_event/perf_helpers.h: perf_event: recent changes broke build on non-x86 an ifdef was in the wrong location. * src/components/perf_event/perf_event.c, src/components/perf_event/perf_helpers.h: perf_event: update rdpmc detection * src/utils/component.c: utils: component_avail: clean up -d (detailed) results print rdpmc status, as well as line things up. Also don't print redundant info, now that a lot more fields are printed by default. * src/utils/component.c: utils: component_avail: whitespace/grammar fixes * src/components/perf_event/Rules.perf_event, src/components/perf_event/perf_helpers.h: perf_event: add mmap/rdpmc routine we don't use it yet 2017-03-06 Vince Weaver * src/components/perf_event/perf_helpers.h: perf_event: add rdtsc() and rdpmc() inline-assembly * src/components/perf_event/perf_event.c, src/components/perf_event/perf_helpers.h: perf_event: move perf_event_open() code to a helper file We'll be adding some other helpers to this file too. 2017-03-03 Vince Weaver * src/components/perf_event/perf_event.c: perf_event: move bug_sync_read() check out of line we should eventually just phase out a lot of these checks for older kernels, but it gets tricky as long as RHEL is shipping 2.6.32. With this change on my IVB machine PAPI_read() cost went from mean cycles : 932.158549 std deviation: 358.752461 to mean cycles : 896.642644 std deviation: 305.568268 * src/components/perf_event/pe_libpfm4_events.c, src/components/perf_event/pe_libpfm4_events.h, src/components/perf_event/perf_event.c: perf_event: remove _pe_libpfm4_get_cidx() helper function easier to explicitly pass it to the libpfm4 event code * src/components/perf_event/perf_event_lib.h: perf_event: wakeup_mode field is no longer used * src/components/perf_event/perf_event.c: perf_event: remove WAKEUP_MODE_ defines These date back to initial perf_event support, but were never used. Probably were meant in case advanced sampling/profiling was ever implemented, but it wasn't. * src/components/perf_event/perf_event.c: perf_event.c: split setup_mmap() to its own function non-sampling events will need to have mmap buffers when we move to rdpmc() * src/components/perf_event/perf_event.c: perf_event: rename tune_up_fd to configure_fd_for_sampling makes it a bit more clear what is going on * src/components/perf_event/perf_event.c: perf_event: remove extraneous whitespace 2017-02-24 Vince Weaver * src/utils/cost.c: papi_cost: wasn't properly resetting the event search after POSTFIX This means some architectures could have skipped the ADD/SUB test even though such events were available. Wed Feb 22 01:16:42 2017 -0800 Stephane Eranian * src/libpfm4/lib/events/intel_bdw_events.h, src/libpfm4/lib/events/intel_skl_events.h, src/libpfm4/lib/pfmlib_intel_rapl.c, src/libpfm4/tests/validate_x86.c: Update libpfm4\n\nCurrent with\n commit 1bd352eef242f53e130c3b025bbf7881a5fb5d1e update Intel RAPL processor support Added Kabylake, Skylake X Added PSYS RAPL event for Skylake client. 2017-02-17 Vince Weaver * src/utils/cost.c: papi_cost: clear eventset before derived add test we weren't clearing the eventset after the derived postfix test to the add test was actually measuring two derived events. This was noticed on broadwell-ep where papi_cost would fail due to the lack of enough counters to have both the postfix and add events at the same time. 2017-01-23 Asim YarKhan * RELEASENOTES.txt: Fixing the date in the RELEASENOTES file. papi-5.6.0/src/libpfm4/lib/pfmlib_arm_armv8.c000664 001750 001750 00000011661 13216244365 023027 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_arm_armv8.c : support for ARMv8 processors * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_arm_priv.h" #include "events/arm_cortex_a57_events.h" /* A57 event tables */ #include "events/arm_cortex_a53_events.h" /* A53 event tables */ #include "events/arm_xgene_events.h" /* Applied Micro X-Gene tables */ static int pfm_arm_detect_cortex_a57(void *this) { int ret; ret = pfm_arm_detect(this); if (ret != PFM_SUCCESS) return PFM_ERR_NOTSUPP; if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */ (pfm_arm_cfg.part == 0xd07)) { /* Cortex A57 */ return PFM_SUCCESS; } return PFM_ERR_NOTSUPP; } static int pfm_arm_detect_cortex_a53(void *this) { int ret; ret = pfm_arm_detect(this); if (ret != PFM_SUCCESS) return PFM_ERR_NOTSUPP; if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */ (pfm_arm_cfg.part == 0xd03)) { /* Cortex A53 */ return PFM_SUCCESS; } return PFM_ERR_NOTSUPP; } static int pfm_arm_detect_xgene(void *this) { int ret; ret = pfm_arm_detect(this); if (ret != PFM_SUCCESS) return PFM_ERR_NOTSUPP; if ((pfm_arm_cfg.implementer == 0x50) && /* Applied Micro */ (pfm_arm_cfg.part == 0x000)) { /* Applied Micro X-Gene */ return PFM_SUCCESS; } return PFM_ERR_NOTSUPP; } /* ARM Cortex A57 support */ pfmlib_pmu_t arm_cortex_a57_support={ .desc = "ARM Cortex A57", .name = "arm_ac57", .pmu = PFM_PMU_ARM_CORTEX_A57, .pme_count = LIBPFM_ARRAY_SIZE(arm_cortex_a57_pe), .type = PFM_PMU_TYPE_CORE, .pe = arm_cortex_a57_pe, .pmu_detect = pfm_arm_detect_cortex_a57, .max_encoding = 1, .num_cntrs = 6, .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), .get_event_first = pfm_arm_get_event_first, .get_event_next = pfm_arm_get_event_next, .event_is_valid = pfm_arm_event_is_valid, .validate_table = pfm_arm_validate_table, .get_event_info = pfm_arm_get_event_info, .get_event_attr_info = pfm_arm_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), .get_event_nattrs = pfm_arm_get_event_nattrs, }; /* ARM Cortex A53 support */ pfmlib_pmu_t arm_cortex_a53_support={ .desc = "ARM Cortex A53", .name = "arm_ac53", .pmu = PFM_PMU_ARM_CORTEX_A53, .pme_count = LIBPFM_ARRAY_SIZE(arm_cortex_a53_pe), .type = PFM_PMU_TYPE_CORE, .pe = arm_cortex_a53_pe, .pmu_detect = pfm_arm_detect_cortex_a53, .max_encoding = 1, .num_cntrs = 6, .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), .get_event_first = pfm_arm_get_event_first, .get_event_next = pfm_arm_get_event_next, .event_is_valid = pfm_arm_event_is_valid, .validate_table = pfm_arm_validate_table, .get_event_info = pfm_arm_get_event_info, .get_event_attr_info = pfm_arm_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), .get_event_nattrs = pfm_arm_get_event_nattrs, }; /* Applied Micro X-Gene support */ pfmlib_pmu_t arm_xgene_support={ .desc = "Applied Micro X-Gene", .name = "arm_xgene", .pmu = PFM_PMU_ARM_XGENE, .pme_count = LIBPFM_ARRAY_SIZE(arm_xgene_pe), .type = PFM_PMU_TYPE_CORE, .pe = arm_xgene_pe, .pmu_detect = pfm_arm_detect_xgene, .max_encoding = 1, .num_cntrs = 4, .get_event_encoding[PFM_OS_NONE] = pfm_arm_get_encoding, PFMLIB_ENCODE_PERF(pfm_arm_get_perf_encoding), .get_event_first = pfm_arm_get_event_first, .get_event_next = pfm_arm_get_event_next, .event_is_valid = pfm_arm_event_is_valid, .validate_table = pfm_arm_validate_table, .get_event_info = pfm_arm_get_event_info, .get_event_attr_info = pfm_arm_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_arm_perf_validate_pattrs), .get_event_nattrs = pfm_arm_get_event_nattrs, }; papi-5.6.0/src/ctests/ipc.c000664 001750 001750 00000003756 13216244360 017560 0ustar00jshenry1963jshenry1963000000 000000 /* * A simple example for the use of PAPI, using PAPI_ipc * -Kevin London */ #include #include #include "papi.h" #include "papi_test.h" #define INDEX 500 int main( int argc, char **argv ) { extern void dummy( void * ); float matrixa[INDEX][INDEX], matrixb[INDEX][INDEX], mresult[INDEX][INDEX]; float real_time, proc_time, ipc; long long ins; int retval; int i, j, k; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Initialize the Matrix arrays */ for( i = 0; i < INDEX; i++ ) { for( j= 0; j < INDEX; j++ ) { mresult[i][j] = 0.0; matrixa[i][j] = matrixb[i][j] = ( float ) rand( ) * ( float ) 1.1; } } /* Setup PAPI library and begin collecting data from the counters */ retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ); if (retval < PAPI_OK ) { if (!quiet) printf("Trouble starting IPC\n"); test_skip( __FILE__, __LINE__, "PAPI_ipc", retval ); } /* Matrix-Matrix multiply */ for ( i = 0; i < INDEX; i++ ) for ( j = 0; j < INDEX; j++ ) for ( k = 0; k < INDEX; k++ ) mresult[i][j] = mresult[i][j] + matrixa[i][k] * matrixb[k][j]; /* Collect the data into the variables passed in */ if ( ( retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ) ) < PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); dummy( ( void * ) mresult ); if ( !quiet ) { printf( "Real_time: %f Proc_time: %f Total ins: ", real_time, proc_time ); printf( LLDFMT, ins ); printf( " IPC: %f\n", ipc ); } /* This should not happen unless the optimizer */ /* gets too good */ if (ins < INDEX*INDEX) { test_fail( __FILE__, __LINE__, "Instruction count too low.", 5 ); } /* Something is broken, or else you have a really */ /* slow processor */ if (ipc<0.01 ) { test_fail( __FILE__, __LINE__, "IPC equals zero.", 5 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/docs/man3/libpfm_amd64.3000664 001750 001750 00000001215 13216244363 022776 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2010" "" "Linux Programmer's Manual" .SH NAME libpfm_amd64 - support for AMD64 processors .SH SYNOPSIS .nf .B #include .sp .SH DESCRIPTION The library supports all AMD64 processors in both 32 and 64-bit modes. The support is broken down in three groups: .TP .B AMD K7 processors (family 6) .TP .B AMD K8 processors (family 15) .TP .B AMD Family 10h processors (family 16) .sp .TP Each group has a distinct man page. See links below. .SH SEE ALSO libpfm_amd64_k7(3), libpfm_amd64_k8(3), libpfm_amd64_fam10h(3) .SH AUTHORS .nf Stephane Eranian Robert Richter .if .PP papi-5.6.0/src/papi_bipartite.h000664 001750 001750 00000014310 13216244366 020473 0ustar00jshenry1963jshenry1963000000 000000 /* * File: papi_bipartite.h * Author: Dan Terpstra * terpstra@eecs.utk.edu * Mods: * */ /* This file contains one function: _papi_bipartite_alloc() Its role is to act as an execution harness for implementing a recursive Modified Bipartite Graph allocation of counter resources for those platforms that don't have built-in smart counter allocation. It is intended to be #included in the cpu component source to minimize other disruption to the build process. This routine presumes the existence of a half dozen "bpt_" helper routines. Prototypes for these routines are given below. success return 1 fail return 0 */ /* This function examines the event to determine if it can be mapped to counter ctr. Returns true if it can, false if it can't. */ static int _bpt_map_avail( hwd_reg_alloc_t * dst, int ctr ); /* This function forces the event to be mapped to only counter ctr. Returns nothing. */ static void _bpt_map_set( hwd_reg_alloc_t * dst, int ctr ); /* This function examines the event to determine if it has a single exclusive mapping. Returns true if exlusive, false if non-exclusive. */ static int _bpt_map_exclusive( hwd_reg_alloc_t * dst ); /* This function compares the dst and src events to determine if any resources are shared. Typically the src event is exclusive, so this detects a conflict if true. Returns true if conflict, false if no conflict. */ static int _bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ); /* This function removes shared resources available to the src event from the resources available to the dst event, and reduces the rank of the dst event accordingly. Typically, the src event will be exclusive, but the code shouldn't assume it. Returns nothing. */ static void _bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ); static void _bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ); static int _papi_bipartite_alloc( hwd_reg_alloc_t * event_list, int count, int cidx ) { int i, j; char *ptr = ( char * ) event_list; int idx_q[count]; /* queue of indexes of lowest rank events */ int map_q[count]; /* queue of mapped events (TRUE if mapped) */ int head, tail; int size = _papi_hwd[cidx]->size.reg_alloc; /* build a queue of indexes to all events that live on one counter only (rank == 1) */ head = 0; /* points to top of queue */ tail = 0; /* points to bottom of queue */ for ( i = 0; i < count; i++ ) { map_q[i] = 0; if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) & ptr[size * i] ) ) idx_q[tail++] = i; } /* scan the single counter queue looking for events that share counters. If two events can live only on one counter, return failure. If the second event lives on more than one counter, remove shared counter from its selector and reduce its rank. Mark first event as mapped to its counter. */ while ( head < tail ) { for ( i = 0; i < count; i++ ) { if ( i != idx_q[head] ) { if ( _bpt_map_shared( ( hwd_reg_alloc_t * ) & ptr[size * i], ( hwd_reg_alloc_t * ) & ptr[size * idx_q [head]] ) ) { /* both share a counter; if second is exclusive, mapping fails */ if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) & ptr[size * i] ) ) return 0; else { _bpt_map_preempt( ( hwd_reg_alloc_t * ) & ptr[size * i], ( hwd_reg_alloc_t * ) & ptr[size * idx_q [head]] ); if ( _bpt_map_exclusive( ( hwd_reg_alloc_t * ) & ptr[size * i] ) ) idx_q[tail++] = i; } } } } map_q[idx_q[head]] = 1; /* mark this event as mapped */ head++; } if ( tail == count ) { return 1; /* idx_q includes all events; everything is successfully mapped */ } else { char *rest_event_list; char *copy_rest_event_list; int remainder; rest_event_list = papi_calloc( _papi_hwd[cidx]->cmp_info.num_cntrs, size ); copy_rest_event_list = papi_calloc( _papi_hwd[cidx]->cmp_info.num_cntrs, size ); if ( !rest_event_list || !copy_rest_event_list ) { if ( rest_event_list ) papi_free( rest_event_list ); if ( copy_rest_event_list ) papi_free( copy_rest_event_list ); return ( 0 ); } /* copy all unmapped events to a second list and make a backup */ for ( i = 0, j = 0; i < count; i++ ) { if ( map_q[i] == 0 ) { memcpy( ©_rest_event_list[size * j++], &ptr[size * i], ( size_t ) size ); } } remainder = j; memcpy( rest_event_list, copy_rest_event_list, ( size_t ) size * ( size_t ) remainder ); /* try each possible mapping until you fail or find one that works */ for ( i = 0; i < _papi_hwd[cidx]->cmp_info.num_cntrs; i++ ) { /* for the first unmapped event, try every possible counter */ if ( _bpt_map_avail( ( hwd_reg_alloc_t * ) rest_event_list, i ) ) { _bpt_map_set( ( hwd_reg_alloc_t * ) rest_event_list, i ); /* remove selected counter from all other unmapped events */ for ( j = 1; j < remainder; j++ ) { if ( _bpt_map_shared( ( hwd_reg_alloc_t * ) & rest_event_list[size * j], ( hwd_reg_alloc_t * ) rest_event_list ) ) _bpt_map_preempt( ( hwd_reg_alloc_t * ) & rest_event_list[size * j], ( hwd_reg_alloc_t * ) rest_event_list ); } /* if recursive call to allocation works, break out of the loop */ if ( _papi_bipartite_alloc ( ( hwd_reg_alloc_t * ) rest_event_list, remainder, cidx ) ) break; /* recursive mapping failed; copy the backup list and try the next combination */ memcpy( rest_event_list, copy_rest_event_list, ( size_t ) size * ( size_t ) remainder ); } } if ( i == _papi_hwd[cidx]->cmp_info.num_cntrs ) { papi_free( rest_event_list ); papi_free( copy_rest_event_list ); return 0; /* fail to find mapping */ } for ( i = 0, j = 0; i < count; i++ ) { if ( map_q[i] == 0 ) _bpt_map_update( ( hwd_reg_alloc_t * ) & ptr[size * i], ( hwd_reg_alloc_t * ) & rest_event_list[size * j++] ); } papi_free( rest_event_list ); papi_free( copy_rest_event_list ); return 1; } } papi-5.6.0/src/validation_tests/flops_validation.c000664 001750 001750 00000017166 13216244370 024412 0ustar00jshenry1963jshenry1963000000 000000 /* flops.c, based on the hl_rates.c ctest * * This test runs a "classic" matrix multiply * and then runs it again with the inner loop swapped. * the swapped version should have better MFLIPS/MFLOPS/IPC and we test that. */ #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" int main( int argc, char **argv ) { int retval; double rtime, ptime, mflips, mflops, ipc; long long flips=0, flops=0, ins[2]; double rtime_start,rtime_end; double ptime_start,ptime_end; double rtime_classic,rtime_swapped; double mflips_classic,mflips_swapped; double mflops_classic,mflops_swapped; double ipc_classic,ipc_swapped; int quiet,event_added_flips,event_added_flops,event_added_ipc; int eventset=PAPI_NULL; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create the eventset */ retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Initialize the test matrix */ flops_float_init_matrix(); /************************/ /* FLIPS */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_flips\n"); } /* Add FP_INS event */ retval=PAPI_add_named_event(eventset,"PAPI_FP_INS"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_FP_INS not available!\n"); event_added_flips=0; } else { event_added_flips=1; } if (event_added_flips) { PAPI_start(eventset); } rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); // Flips classic flops_float_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_flips) { PAPI_stop(eventset,&flips); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; mflips=flips/rtime; if (!quiet) { printf( "\nClassic\n"); printf( "real time: %lf\n", rtime); printf( "process time: %lf\n", ptime); printf( "FP Instructions: %lld\n", flips); printf( "MFLIPS %lf\n", mflips); } mflips_classic=mflips; // Flips swapped rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); if (event_added_flips) { PAPI_reset(eventset); PAPI_start(eventset); } flops_float_swapped_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_flips) { PAPI_stop(eventset,&flips); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; mflips=flips/rtime; if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Instructions: %lld\n", flips); printf( "MFLIPS %f\n", mflips); } mflips_swapped=mflips; // turn off flips if (event_added_flips) { retval=PAPI_remove_named_event(eventset,"PAPI_FP_INS"); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_remove_named_event", retval ); } } /************************/ /* FLOPS */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_flops\n"); } /* Add FP_OPS event */ retval=PAPI_add_named_event(eventset,"PAPI_FP_OPS"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_FP_OPS not available!\n"); event_added_flops=0; } else { event_added_flops=1; } if (event_added_flops) { PAPI_start(eventset); } rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); // Classic flops flops_float_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_flops) { PAPI_stop(eventset,&flops); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; mflops=flops/rtime; if (!quiet) { printf( "\nClassic\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Operations: %lld\n", flops); printf( "MFLOPS %f\n", mflops); } mflops_classic=mflops; // Swapped flops rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); if (event_added_flops) { PAPI_reset(eventset); PAPI_start(eventset); } flops_float_swapped_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_flops) { PAPI_stop(eventset,&flops); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; mflops=flops/rtime; if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %f\n", rtime); printf( "process time: %f\n", ptime); printf( "FP Operations: %lld\n", flops); printf( "MFLOPS %f\n", mflops); } mflops_swapped=mflops; // turn off flops if (event_added_flops) { retval=PAPI_remove_named_event(eventset,"PAPI_FP_OPS"); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_remove_named_event", retval ); } } /************************/ /* IPC */ /************************/ if (!quiet) { printf( "\n----------------------------------\n" ); printf( "PAPI_ipc\n"); } /* Add PAPI_TOT_INS event */ retval=PAPI_add_named_event(eventset,"PAPI_TOT_INS"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_TOT_INS not available!\n"); event_added_ipc=0; } else { event_added_ipc=1; } if (event_added_ipc) { /* Add PAPI_TOT_CYC event */ retval=PAPI_add_named_event(eventset,"PAPI_TOT_CYC"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_TOT_CYC not available!\n"); event_added_ipc=0; } else { event_added_ipc=1; } } if (event_added_ipc) { PAPI_start(eventset); } rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); // Classic ipc flops_float_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_ipc) { PAPI_stop(eventset,ins); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; ipc=(double)ins[0]/(double)ins[1]; if (!quiet) { printf( "\nClassic\n"); printf( "real time: %lf\n", rtime); printf( "process time: %lf\n", ptime); printf( "Instructions: %lld\n", ins[0]); printf( "Cycles: %lld\n", ins[1]); printf( "IPC %lf\n", ipc); } ipc_classic=ipc; rtime_classic=rtime; // Swapped ipc if (event_added_ipc) { PAPI_reset(eventset); PAPI_start(eventset); } rtime_start=PAPI_get_real_usec(); ptime_start=PAPI_get_virt_usec(); flops_float_swapped_matrix_matrix_multiply(); rtime_end=PAPI_get_real_usec(); ptime_end=PAPI_get_virt_usec(); if (event_added_ipc) { PAPI_stop(eventset,ins); } rtime=rtime_end-rtime_start; ptime=ptime_end-ptime_start; ipc=(double)ins[0]/(double)ins[1]; if (!quiet) { printf( "\nSwapped\n"); printf( "real time: %lf\n", rtime); printf( "process time: %lf\n", ptime); printf( "Instructions: %lld\n", ins[0]); printf( "Cycles: %lld\n", ins[1]); printf( "IPC %lf\n", ipc); } ipc_swapped=ipc; rtime_swapped=rtime; /* Validate */ if (event_added_flips) { if (mflips_swappedrtime_classic) { test_fail(__FILE__,__LINE__, "time should be better when swapped",0); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/lib/ultra3plus_events.h000664 001750 001750 00000032063 13216244363 023631 0ustar00jshenry1963jshenry1963000000 000000 static pme_sparc_entry_t ultra3plus_pe[] = { /* These two must always be first. */ { .pme_name = "Cycle_cnt", .pme_desc = "Accumulated cycles", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x0, }, { .pme_name = "Instr_cnt", .pme_desc = "Number of instructions completed", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x1, }, /* PIC0 events common to all UltraSPARC processors */ { .pme_name = "Dispatch0_IC_miss", .pme_desc = "I-buffer is empty from I-Cache miss", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x2, }, { .pme_name = "IC_ref", .pme_desc = "I-cache refrences", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x8, }, { .pme_name = "DC_rd", .pme_desc = "D-cache read references (including accesses that subsequently trap)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x9, }, { .pme_name = "DC_wr", .pme_desc = "D-cache store accesses (including cacheable stores that subsequently trap)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xa, }, { .pme_name = "EC_ref", .pme_desc = "E-cache references", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xc, }, { .pme_name = "EC_snoop_inv", .pme_desc = "L2-cache invalidates generated from a snoop by a remote processor", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xe, }, /* PIC1 events common to all UltraSPARC processors */ { .pme_name = "Dispatch0_mispred", .pme_desc = "I-buffer is empty from Branch misprediction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x2, }, { .pme_name = "EC_wb", .pme_desc = "Dirty sub-blocks that produce writebacks due to L2-cache miss events", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xd, }, { .pme_name = "EC_snoop_cb", .pme_desc = "L2-cache copybacks generated from a snoop by a remote processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xe, }, /* PIC0 events common to all UltraSPARC-III/III+/IIIi processors */ { .pme_name = "Dispatch0_br_target", .pme_desc = "I-buffer is empty due to a branch target address calculation", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x3, }, { .pme_name = "Dispatch0_2nd_br", .pme_desc = "Stall cycles due to having two branch instructions line-up in one 4-instruction group causing the second branch in the group to be re-fetched, delaying it's entrance into the I-buffer", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x4, }, { .pme_name = "Rstall_storeQ", .pme_desc = "R-stage stall for a store instruction which is the next instruction to be executed, but it stailled due to the store queue being full", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x5, }, { .pme_name = "Rstall_IU_use", .pme_desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding integer instruction in the pipeline that is not yet available", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x6, }, { .pme_name = "EC_write_hit_RTO", .pme_desc = "W-cache exclusive requests that hit L2-cache in S, O, or Os state and thus, do a read-to-own bus transaction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xd, }, { .pme_name = "EC_rd_miss", .pme_desc = "L2-cache miss events (including atomics) from D-cache events", .pme_ctrl = PME_CTRL_S0, .pme_val = 0xf, }, { .pme_name = "PC_port0_rd", .pme_desc = "P-cache cacheable FP loads to the first port (general purpose load path to D-cache and P-cache via MS pipeline)", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x10, }, { .pme_name = "SI_snoop", .pme_desc = "Counts snoops from remote processor(s) including RTS, RTSR, RTO, RTOR, RS, RSR, RTSM, and WS", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x11, }, { .pme_name = "SI_ciq_flow", .pme_desc = "Counts system clock cycles when the flow control (PauseOut) signal is asserted", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x12, }, { .pme_name = "SI_owned", .pme_desc = "Counts events where owned_in is asserted on bus requests from the local processor", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x13, }, { .pme_name = "SW_count0", .pme_desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x14, }, { .pme_name = "IU_Stat_Br_miss_taken", .pme_desc = "Retired branches that were predicted to be taken, but in fact were not taken", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x15, }, { .pme_name = "IU_Stat_Br_Count_taken", .pme_desc = "Retired taken branches", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x16, }, { .pme_name = "Dispatch0_rs_mispred", .pme_desc = "I-buffer is empty due to a Return Address Stack misprediction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x4, }, { .pme_name = "FA_pipe_completion", .pme_desc = "Instructions that complete execution on the FPG ALU pipelines", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x18, }, /* PIC1 events common to all UltraSPARC-III/III+/IIIi processors */ { .pme_name = "IC_miss_cancelled", .pme_desc = "I-cache misses cancelled due to mis-speculation, recycle, or other events", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x3, }, { .pme_name = "Re_FPU_bypass", .pme_desc = "Stall due to recirculation when an FPU bypass condition that does not have a direct bypass path occurs", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x5, }, { .pme_name = "Re_DC_miss", .pme_desc = "Stall due to loads that miss D-cache and get recirculated", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x6, }, { .pme_name = "Re_EC_miss", .pme_desc = "Stall due to loads that miss L2-cache and get recirculated", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x7, }, { .pme_name = "IC_miss", .pme_desc = "I-cache misses, including fetches from mis-speculated execution paths which are later cancelled", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x8, }, { .pme_name = "DC_rd_miss", .pme_desc = "Recirculated loads that miss the D-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x9, }, { .pme_name = "DC_wr_miss", .pme_desc = "D-cache store accesses that miss D-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xa, }, { .pme_name = "Rstall_FP_use", .pme_desc = "R-stage stall for an event that the next instruction to be executed depends on the result of a preceeding floating-point instruction in the pipeline that is not yet available", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xb, }, { .pme_name = "EC_misses", .pme_desc = "E-cache misses", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xc, }, { .pme_name = "EC_ic_miss", .pme_desc = "L2-cache read misses from I-cache requests", .pme_ctrl = PME_CTRL_S1, .pme_val = 0xf, }, { .pme_name = "Re_PC_miss", .pme_desc = "Stall due to recirculation when a prefetch cache miss occurs on a prefetch predicted second load", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x10, }, { .pme_name = "ITLB_miss", .pme_desc = "I-TLB miss traps taken", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x11, }, { .pme_name = "DTLB_miss", .pme_desc = "Memory reference instructions which trap due to D-TLB miss", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x12, }, { .pme_name = "WC_miss", .pme_desc = "W-cache misses", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x13, }, { .pme_name = "WC_snoop_cb", .pme_desc = "W-cache copybacks generated by a snoop from a remote processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x14, }, { .pme_name = "WC_scrubbed", .pme_desc = "W-cache hits to clean lines", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x15, }, { .pme_name = "WC_wb_wo_read", .pme_desc = "W-cache writebacks not requiring a read", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x16, }, { .pme_name = "PC_soft_hit", .pme_desc = "FP loads that hit a P-cache line that was prefetched by a software-prefetch instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x18, }, { .pme_name = "PC_snoop_inv", .pme_desc = "P-cache invalidates that were generated by a snoop from a remote processor and stores by a local processor", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x19, }, { .pme_name = "PC_hard_hit", .pme_desc = "FP loads that hit a P-cache line that was prefetched by a hardware prefetch", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1a, }, { .pme_name = "PC_port1_rd", .pme_desc = "P-cache cacheable FP loads to the second port (memory and out-of-pipeline instruction execution loads via the A0 and A1 pipelines)", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1b, }, { .pme_name = "SW_count1", .pme_desc = "Counts software-generated occurrences of 'sethi %hi(0xfc000), %g0' instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1c, }, { .pme_name = "IU_Stat_Br_miss_untaken", .pme_desc = "Retired branches that were predicted to be untaken, but in fact were taken", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1d, }, { .pme_name = "IU_Stat_Br_Count_untaken", .pme_desc = "Retired untaken branches", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1e, }, { .pme_name = "PC_MS_miss", .pme_desc = "FP loads through the MS pipeline that miss P-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x1f, }, { .pme_name = "Re_RAW_miss", .pme_desc = "Stall due to recirculation when there is a load in the E-stage which has a non-bypassable read-after-write hazard with an earlier store instruction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x26, }, { .pme_name = "FM_pipe_completion", .pme_desc = "Instructions that complete execution on the FPG Multiply pipelines", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x27, }, /* PIC0 memory controller events common to UltraSPARC-III/III+ processors */ { .pme_name = "MC_reads_0", .pme_desc = "Read requests completed to memory bank 0", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x20, }, { .pme_name = "MC_reads_1", .pme_desc = "Read requests completed to memory bank 1", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x21, }, { .pme_name = "MC_reads_2", .pme_desc = "Read requests completed to memory bank 2", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x22, }, { .pme_name = "MC_reads_3", .pme_desc = "Read requests completed to memory bank 3", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x23, }, { .pme_name = "MC_stalls_0", .pme_desc = "Clock cycles that requests were stalled in the MCU queues because bank 0 was busy with a previous request", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x24, }, { .pme_name = "MC_stalls_2", .pme_desc = "Clock cycles that requests were stalled in the MCU queues because bank 2 was busy with a previous request", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x25, }, /* PIC1 memory controller events common to all UltraSPARC-III/III+ processors */ { .pme_name = "MC_writes_0", .pme_desc = "Write requests completed to memory bank 0", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x20, }, { .pme_name = "MC_writes_1", .pme_desc = "Write requests completed to memory bank 1", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x21, }, { .pme_name = "MC_writes_2", .pme_desc = "Write requests completed to memory bank 2", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x22, }, { .pme_name = "MC_writes_3", .pme_desc = "Write requests completed to memory bank 3", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x23, }, { .pme_name = "MC_stalls_1", .pme_desc = "Clock cycles that requests were stalled in the MCU queues because bank 1 was busy with a previous request", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x24, }, { .pme_name = "MC_stalls_3", .pme_desc = "Clock cycles that requests were stalled in the MCU queues because bank 3 was busy with a previous request", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x25, }, /* PIC0 events specific to UltraSPARC-III+ processors */ { .pme_name = "EC_wb_remote", .pme_desc = "Counts the retry event when any victimization for which the processor generates an R_WB transaction to non_LPA address region", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x19, }, { .pme_name = "EC_miss_local", .pme_desc = "Counts any transaction to an LPA for which the processor issues an RTS/RTO/RS transaction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x1a, }, { .pme_name = "EC_miss_mtag_remote", .pme_desc = "Counts any transaction to an LPA in which the processor is required to generate a retry transaction", .pme_ctrl = PME_CTRL_S0, .pme_val = 0x1b, }, /* PIC1 events specific to UltraSPARC-III+/IIIi processors */ { .pme_name = "Re_DC_missovhd", .pme_desc = "Used to measure D-cache stall counts seperatedly for L2-cache hits and misses. This counter is used with the recirculation and cache access events to seperately calculate the D-cache loads that hit and miss the L2-cache", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x4, }, /* PIC1 events specific to UltraSPARC-III+ processors */ { .pme_name = "EC_miss_mtag_remote", .pme_desc = "Counts any transaction to an LPA in which the processor is required to generate a retry transaction", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x28, }, { .pme_name = "EC_miss_remote", .pme_desc = "Counts the events triggered whenever the processor generates a remote (R_*) transaction and the address is to a non-LPA portion (remote) of the physical address space, or an R_WS transaction due to block-store/block-store-commit to any address space (LPA or non-LPA), or an R-RTO due to store/swap request on Os state to LPA space", .pme_ctrl = PME_CTRL_S1, .pme_val = 0x29, }, }; #define PME_ULTRA3PLUS_EVENT_COUNT (sizeof(ultra3plus_pe)/sizeof(pme_sparc_entry_t)) papi-5.6.0/src/libpfm-3.y/docs/Makefile000664 001750 001750 00000006100 13216244361 021567 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/.. include $(TOPDIR)/config.mk include $(TOPDIR)/rules.mk ifeq ($(CONFIG_PFMLIB_ARCH_IA64),y) ARCH_MAN=libpfm_itanium.3 libpfm_itanium2.3 libpfm_montecito.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_I386),y) ARCH_MAN=libpfm_p6.3 libpfm_core.3 libpfm_amd64.3 libpfm_atom.3 libpfm_nehalem.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_X86_64),y) ARCH_MAN=libpfm_amd64.3 libpfm_core.3 libpfm_atom.3 libpfm_nehalem.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_MIPS64),y) endif ifeq ($(CONFIG_PFMLIB_ARCH_POWERPC),y) ARCH_MAN=libpfm_powerpc.3 endif ifeq ($(CONFIG_PFMLIB_ARCH_CRAYXT),y) endif ifeq ($(CONFIG_PFMLIB_CELL),y) endif GEN_MAN= libpfm.3 pfm_dispatch_events.3 pfm_find_event.3 pfm_find_event_bycode.3 \ pfm_find_event_bycode_next.3 pfm_find_event_mask.3 pfm_find_full_event.3 \ pfm_force_pmu.3 pfm_get_cycle_event.3 pfm_get_event_code.3 pfm_get_event_code_counter.3 \ pfm_get_event_counters.3 pfm_get_event_description.3 pfm_get_event_mask_code.3 \ pfm_get_event_mask_description.3 pfm_get_event_mask_name.3 pfm_get_event_name.3 \ pfm_get_full_event_name.3 pfm_get_hw_counter_width.3 pfm_get_impl_counters.3 \ pfm_get_impl_pmcs.3 pfm_get_impl_pmds.3 pfm_get_inst_retired.3 pfm_get_max_event_name_len.3 \ pfm_get_num_counters.3 pfm_get_num_events.3 pfm_get_num_pmcs.3 \ pfm_get_num_pmds.3 pfm_get_pmu_name.3 pfm_get_pmu_name_bytype.3 \ pfm_get_pmu_type.3 pfm_get_version.3 pfm_initialize.3 \ pfm_list_supported_pmus.3 pfm_pmu_is_supported.3 pfm_regmask_and.3 \ pfm_regmask_clr.3 pfm_regmask_copy.3 pfm_regmask_eq.3 pfm_regmask_isset.3 \ pfm_regmask_or.3 pfm_regmask_set.3 pfm_regmask_weight.3 pfm_set_options.3 \ pfm_strerror.3 MAN=$(GEN_MAN) $(ARCH_MAN) install: -mkdir -p $(DESTDIR)$(MANDIR)/man3 ( cd man3; $(INSTALL) -m 644 $(MAN) $(DESTDIR)$(MANDIR)/man3 ) papi-5.6.0/src/examples/Makefile.IRIX64000664 001750 001750 00000000742 13216244361 021527 0ustar00jshenry1963jshenry1963000000 000000 PAPIINC = .. PAPILIB = ../libpapi.a CC = gcc CFLAGS = -I$(PAPIINC) LDFLAGS = $(PAPILIB) TARGETS = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events all: $(TARGETS) $(TARGETS): $$@.c $(CC) $? -o $@ $(CFLAGS) $(LDFLAGS) clean: rm -f *.o $(TARGETS) papi-5.6.0/src/ftests/second.F000664 001750 001750 00000021053 13216244361 020215 0ustar00jshenry1963jshenry1963000000 000000 #include "fpapi_test.h" program second implicit integer (p) integer domain, granularity character*(PAPI_MAX_STR_LEN) domainstr, grnstr integer*8 values(10), max, min integer es1, es2, es3 integer retval Integer last_char External last_char integer tests_quiet, get_quiet external get_quiet #if (defined(sgi) && defined(host_mips)) integer id integer*4 getuid #endif #if (defined(sgi) && defined(host_mips)) id = getuid() #endif tests_quiet = get_quiet() es1 = PAPI_NULL es2 = PAPI_NULL es3 = PAPI_NULL retval = PAPI_VER_CURRENT call PAPIf_library_init(retval) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_library_init', retval) end if call PAPIf_query_event(PAPI_TOT_INS, retval) if (retval.NE.PAPI_OK) then call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_INS', PAPI_ENOEVNT) end if call PAPIf_create_eventset(es1, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( es1, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_add_event( es1, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_create_eventset(es2, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( es2, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_add_event( es2, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_create_eventset(es3, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_create_eventset', *retval) end if call PAPIf_add_event( es3, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_add_event( es3, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_add_event', retval) end if call PAPIf_set_event_domain(es1, PAPI_DOM_ALL, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_set_domain', retval) end if call PAPIf_set_event_domain(es2, PAPI_DOM_KERNEL, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_set_domain', retval) end if call PAPIf_set_event_domain(es3, PAPI_DOM_USER, retval) if ( retval.NE.PAPI_OK) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_set_domain', retval) end if call PAPIf_start(es1, retval) call fdo_flops(NUM_FLOPS) if (retval.eq.PAPI_OK) then call PAPIf_stop(es1, values(1), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if end if call PAPIf_start(es2, retval) call fdo_flops(NUM_FLOPS) if (retval.eq.PAPI_OK) then call PAPIf_stop(es2, values(3), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if end if call PAPIf_start(es3, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if call fdo_flops(NUM_FLOPS) call PAPIf_stop(es3, values(5), retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_remove_event( es1, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es1, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es2, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es2, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es3, PAPI_TOT_INS, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if call PAPIf_remove_event( es3, PAPI_TOT_CYC, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_remove_event', retval) end if if (tests_quiet .EQ. 0) then print *, 'Test case 2: Non-overlapping start, stop, read', *' for all 3 domains.' print *, '-------------------------------------------------'// * '------------------------------' end if call PAPIf_get_domain(es1, domain, PAPI_DEFDOM, retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_domain', retval) end if call stringify_domain(domain, domainstr) if (tests_quiet .EQ. 0) then write (*,900) 'Default domain is:', domain, domainstr end if 900 format(a20, i3, ' ', a70) call PAPIf_get_granularity(es1, granularity, PAPI_DEFGRN, *retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_get_granularity', *retval) end if call stringify_granularity(granularity, grnstr) if (tests_quiet .EQ. 0) then write (*,800) 'Default granularity is:', granularity, grnstr end if 800 format(a25, i3, ' ', a20) if (tests_quiet .EQ. 0) then print *, 'Using', NUM_FLOPS, ' iterations of c += b*c' print *, '-------------------------------------------------'// * '------------------------------' print *, 'Test type : PAPI_DOM_ALL PAPI_DOM_KERNEL', *' PAPI_DOM_USER' write (*,200) 'PAPI_TOT_INS', values(1), values(3), values(5) write (*,200) 'PAPI_TOT_CYC', values(2), values(4), values(6) 200 format(A15, ': ', I15, I15, I15) print *, '-------------------------------------------------'// * '------------------------------' print *, 'Verification:' print *, 'Row 1 approximately equals N 0 N' print *, 'Column 1 approximately equals column 2 plus column 3' #if defined(sgi) && defined(host_mips) print * print *, '* IRIX requires root for PAPI_DOM_KERNEL', *' and PAPI_DOM_ALL.' print *, '* The first two columns will be invalid if not', *' run as root for IRIX.' #endif end if #if (defined(sgi) && defined(host_mips)) if (id.NE.0) then min = NUM_FLOPS*0.9 max = NUM_FLOPS*1.1 if ((values(5) .lt. min) .OR. (values(5) .gt. max)) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_FP_INS', 1) end if else min = values(5)*0.9 max = values(5)*1.1 if ((values(1) .lt. min) .OR. (values(1) .gt. max)) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_FP_INS', 1) end if min = values(2)*0.9 max = values(2)*1.1 if (((values(4)+values(6)) .lt. min) .OR. * ((values(4)+values(6)) .gt. max)) then call ftest_fail(__FILE__, __LINE__, 'PAPI_TOT_CYC', 1) end if endif #else min = INT(REAL(values(5))*0.9) max = INT(REAL(values(5))*1.1) if ((values(1) .lt. min) .OR. (values(1) .gt. max)) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_FP_INS', 1) end if min = INT(REAL(values(2))*0.8) max = INT(REAL(values(2))*1.2) if (((values(4)+values(6)) .lt. min) .OR. * ((values(4)+values(6)) .gt. max)) then call ftest_fail(__FILE__, __LINE__, . 'PAPI_TOT_CYC', 1) end if #endif call ftests_pass(__FILE__) end papi-5.6.0/src/perfctr-2.7.x/usr.lib/event_set_p5.c000664 001750 001750 00000032502 13216244370 023665 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: event_set_p5.c,v 1.1 2003/02/16 21:08:54 mikpe Exp $ * Performance counter event descriptions for Intel P5 and P5 MMX * processors, and Cyrix 6x86/MII/III processors. * * Copyright (C) 2003 Mikael Pettersson * * References * ---------- * [IA32, Volume 3] "Intel Architecture Software Developer's Manual, * Volume 3: System Programming Guide". Intel document number 245472-009. * (at http://developer.intel.com/) * * [Cyrix 6x86MX] "Cyrix 6x86MX Processor". * [Cyrix MII] "Cyrix M II Data Book". * [Cyrix III] "Cyrix III Processor DataBook" Ver. 1.0, 1/25/00. * Note: This "Cyrix III" was code-named "Joshua", and it was apparently * cancelled by VIA due to disappointing performance. * (MII and III docs at http://www.viatech.com/) */ #include /* for NULL */ #include "libperfctr.h" #include "event_set.h" /* * Intel Pentium (P5) events. */ static const struct perfctr_event p5_events[] = { { 0x00, 0x3, NULL, "DATA_READ", "Number of memory data reads (internal data cache hit and " "miss combined)." }, { 0x01, 0x3, NULL, "DATA_WRITE", "Number of memory data writes (internal data cache hit and " "miss combined), I/O is not included." }, { 0x02, 0x3, NULL, "DATA_TLB_MISS", "Number of misses to the data cache translation look-aside " "buffer." }, { 0x03, 0x3, NULL, "DATA_READ_MISS", "Number of memory read accesses that miss the internal data " "cache whether or not the access is cacheable or noncacheable." }, { 0x04, 0x3, NULL, "DATA_WRITE_MISS", "Number of memory write accesses that miss the internal data " "cache whether or not the access is cacheable or noncacheable." }, { 0x05, 0x3, NULL, "WRITE_HIT_TO_M_OR_E_STATE_LINES", "Number of write hits to exclusive or modified lines in the " "data cache." }, { 0x06, 0x3, NULL, "DATA_CACHE_LINES_WRITTEN_BACK", "Number of dirty lines (all) that are written back, regardless " "of the cause." }, { 0x07, 0x3, NULL, "EXTERNAL_SNOOPS", "Number of accepted external snoops whether they hit in the code " "cache or data cache or neither." }, { 0x08, 0x3, NULL, "EXTERNAL_DATA_CACHE_SNOOP_HITS", "Number of external snoops to the data cache." }, { 0x09, 0x3, NULL, "MEMORY_ACCESSES_IN_BOTH_PIPES", "Number of data memory reads or writes that are paired in both " "pipes of the pipeline." }, { 0x0A, 0x3, NULL, "BANK_CONFLICTS", "Number of actual bank conflicts." }, { 0x0B, 0x3, NULL, "MISALIGNED_DATA_MEMORY_OR_IO_REFERENCES", "Number of memory or I/O reads or writes that are misaligned." }, { 0x0C, 0x3, NULL, "CODE_READ", "Number of instruction reads whether the read is cacheable or " "noncacheable." }, { 0x0D, 0x3, NULL, "CODE_TLB_MISS", "Number of instruction reads that miss the code TLB whether " "the read is cacheable or noncacheable." }, { 0x0E, 0x3, NULL, "CODE_CACHE_MISS", "Number of instruction reads that miss the internal code cache " "whether the read is cacheable or noncacheable." }, { 0x0F, 0x3, NULL, "ANY_SEGMENT_REGISTER_LOADED", "Number of writes into any segment register in real or protected " "mode including the LDTR, GDTR, IDTR, and TR." }, /* 0x10: reserved */ /* 0x11: reserved */ { 0x12, 0x3, NULL, "BRANCHES", "Number of taken and not taken branches, including conditional " "branches, jumps, calls, returns, software interrupts, and " "interrupt returns." }, { 0x13, 0x3, NULL, "BTB_HITS", "Number of BTB hits that occur." }, { 0x14, 0x3, NULL, "TAKEN_BRANCH_OR_BTB_HIT", "Number of taken branches or BTB hits that occur." }, { 0x15, 0x3, NULL, "PIPELINE_FLUSHES", "Number of pipeline flushes that occur." }, { 0x16, 0x3, NULL, "INSTRUCTIONS_EXECUTED", "Number of instructions executed (up to two per clock)." }, { 0x17, 0x3, NULL, "INSTRUCTIONS_EXECUTED_V_PIPE", /* XXX: was INSTRUCTIONS_EXECUTED_IN_V_PIPE */ "Number of instructions executed in the V_pipe. It indicates " "the number of instructions that were paired." }, { 0x18, 0x3, NULL, "BUS_CYCLE_DURATION", "Number of clocks while a bus cycle is in progress." }, { 0x19, 0x3, NULL, "WRITE_BUFFER_FULL_STALL_DURATION", "Number of clocks while the pipeline is stalled due to full " "write buffers." }, { 0x1A, 0x3, NULL, "WAITING_FOR_DATA_MEMORY_READ_STALL_DURATION", "Number of clocks while the pipeline is stalled while waiting " "for data memory reads." }, { 0x1B, 0x3, NULL, "STALL_ON_WRITE_TO_AN_E_OR_M_STATE_LINE", "Number of stalls on writes to E- or M-state lines." }, { 0x1C, 0x3, NULL, "LOCKED_BUS_CYCLE", "Number of locked bus cycles that occur as the result of " "LOCK prefix or LOCK instruction, page-table updates, and " "descriptor table updates." }, { 0x1D, 0x3, NULL, "IO_READ_OR_WRITE_CYCLE", "Number of bus cycles directed to I/O space." }, { 0x1E, 0x3, NULL, "NONCACHEABLE_MEMORY_READS", "Number of noncacheable instruction or data memory read bus cycles." }, { 0x1F, 0x3, NULL, "PIPELINE_AGI_STALLS", "Number of adress generation interlock (AGI) stalls." }, /* 0x20: reserved */ /* 0x21: reserved */ { 0x22, 0x3, NULL, "FLOPS", "Number of floating-point operations that occur." }, { 0x23, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR0_REGISTER", "Number of matches on DR0 breakpoint." }, { 0x24, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR1_REGISTER", "Number of matches on DR1 breakpoint." }, { 0x25, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR2_REGISTER", "Number of matches on DR2 breakpoint." }, { 0x26, 0x3, NULL, "BREAKPOINT_MATCH_ON_DR3_REGISTER", "Number of matches on DR3 breakpoint." }, { 0x27, 0x3, NULL, "HARDWARE_INTERRUPTS", "Number of taken INTR and NMI interrupts." }, { 0x28, 0x3, NULL, "DATA_READ_OR_WRITE", "Number of memory data reads and/or writes (internal data cache " "hit and miss combined)." }, { 0x29, 0x3, NULL, "DATA_READ_MISS_OR_WRITE_MISS", "Number of memory read and/or write accesses that miss the " "internal data cache whether or not the acceess is cacheable " "or noncacheable." }, }; const struct perfctr_event_set perfctr_p5_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5, .event_prefix = "P5_", .include = NULL, .nevents = ARRAY_SIZE(p5_events), .events = p5_events, }; /* * Intel Pentium MMX (P5MMX) events. */ static const struct perfctr_event p5mmx_and_mii_events[] = { { 0x2B, 0x1, NULL, "MMX_INSTRUCTIONS_EXECUTED_U_PIPE", "Number of MMX instructions executed in the U-pipe." }, { 0x2B, 0x2, NULL, "MMX_INSTRUCTIONS_EXECUTED_V_PIPE", "Number of MMX instructions executed in the V-pipe." }, { 0x2D, 0x1, NULL, "EMMS_INSTRUCTIONS_EXECUTED", "Number of EMMS instructions executed." }, { 0x2D, 0x2, NULL, "TRANSITIONS_BETWEEN_MMX_AND_FP_INSTRUCTIONS", "Number of transitions between MMX and floating-point instructions " "or vice versa." }, { 0x2F, 0x1, NULL, "SATURATING_MMX_INSTRUCTIONS_EXECUTED", "Number of saturating MMX instructions executed, independently of " "whether they actually saturated." }, { 0x2F, 0x2, NULL, "SATURATIONS_PERFORMED", "Number of MMX instructions that used saturating arithmetic and " "that at least one of its results actually saturated." }, { 0x31, 0x1, NULL, "MMX_INSTRUCTION_DATA_READS", "Number of MMX instruction data reads." }, { 0x32, 0x2, NULL, "TAKEN_BRANCHES", "Number of taken branches." }, { 0x37, 0x1, NULL, "MISPREDICTED_OR_UNPREDICTED_RETURNS", "Number of returns predicted incorrectly or not predicted at all." }, { 0x37, 0x2, NULL, "PREDICTED_RETURNS", "Number of predicted returns (whether they are predicted correctly " "and incorrectly)." }, { 0x38, 0x1, NULL, "MMX_MULTIPLY_UNIT_INTERLOCK", "Number of clocks the pipe is stalled since the destination of " "previous MMX instruction is not ready yet." }, { 0x38, 0x2, NULL, "MOVD_MOVQ_STORE_STALL_DUE_TO_PREVIOUS_MMX_OPERATION", "Number of clocks a MOVD/MOVQ instruction store is stalled in D2 " "stage due to a previous MMX operation with a destination to be " "used in the store instruction." }, { 0x39, 0x1, NULL, "RETURNS", "Number of returns executed." }, { 0x3A, 0x1, NULL, "BTB_FALSE_ENTRIES", "Number of false entries in the Branch Target Buffer." }, { 0x3A, 0x2, NULL, "BTB_MISS_PREDICTION_ON_NOT_TAKEN_BRANCH", "Number of times the BTB predicted a not-taken branch as taken." }, { 0x3B, 0x1, NULL, "FULL_WRITE_BUFFER_STALL_DURATION_WHILE_EXECUTING_MMX_INSTRUCTIONS", "Number of clocks while the pipeline is stalled due to full write " "buffers while executing MMX instructions." }, { 0x3B, 0x2, NULL, "STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE", "Number of clocks during stalls on MMX instructions writing " "to E- or M-state lines." }, }; static const struct perfctr_event_set p5mmx_and_mii_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5MMX, .event_prefix = "P5MMX_", .include = &perfctr_p5_event_set, .nevents = ARRAY_SIZE(p5mmx_and_mii_events), .events = p5mmx_and_mii_events, }; static const struct perfctr_event p5mmx_events[] = { { 0x2A, 0x1, NULL, "BUS_OWNERSHIP_LATENCY", "The time from LRM bus ownership request to bus ownership granted." }, { 0x2A, 0x2, NULL, "BUS_OWNERSHIP_TRANSFERS", "The number of bus ownership transfers." }, { 0x2C, 0x1, NULL, "CACHE_M_STATE_LINE_SHARING", "Number of times a processor identified a hit to a modified line " "due to a memory access in the other processor." }, { 0x2C, 0x2, NULL, "CACHE_LINE_SHARING", "Number of shared data lines in the L1 cache." }, { 0x2E, 0x1, NULL, "BUS_UTILIZATION_DUE_TO_PROCESSOR_ACTIVITY", "Number of clocks the bus is busy due to the processor's own activity." }, { 0x2E, 0x2, NULL, "WRITES_TO_NONCACHEABLE_MEMORY", "Number of write accesses to noncacheable memory." }, { 0x30, 0x1, NULL, "NUMBER_OF_CYCLES_NOT_IN_HALT_STATE", "Number of cycles the processor is not idle due to HLT instruction." }, { 0x30, 0x2, NULL, "DATA_CACHE_TLB_MISS_STALL_DURATION", "Number of clocks the pipeline is stalled due to a data cache " "translation look-aside buffer (TLB) miss." }, { 0x31, 0x2, NULL, "MMX_INSTRUCTION_DATA_READ_MISSES", "Number of MMX instruction data read misses." }, { 0x32, 0x1, NULL, "FLOATING_POINT_STALLS_DURATION", "Number of clocks while pipe is stalled due to a floating-point freeze." }, { 0x33, 0x1, NULL, "D1_STARVATION_AND_FIFO_IS_EMPTY", "Number of times D1 stage cannot issue ANY instructions since the " "FIFO buffer is empty." }, { 0x33, 0x2, NULL, "D1_STARVATION_AND_ONLY_ONE_INSTRUCTION_IN_FIFO", "Number of times the D1 stage issues just a single instruction since " "the FIFO buffer had just one instruction ready." }, { 0x34, 0x1, NULL, "MMX_INSTRUCTION_DATA_WRITES", "Number of data writes caused by MMX instructions." }, { 0x34, 0x2, NULL, "MMX_INSTRUCTION_DATA_WRITE_MISSES", "Number of data write misses caused by MMX instructions." }, { 0x35, 0x1, NULL, "PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS", "Number of pipeline flushes due to wrong branch prediction resolved " "in either the E-stage or the WB-stage." }, { 0x35, 0x2, NULL, "PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS_RESOLVED_IN_WB_STAGE", "Number of pipeline flushes due to wrong branch prediction resolved " "in the WB-stage." }, { 0x36, 0x1, NULL, "MISALIGNED_DATA_MEMORY_REFERENCE_ON_MMX_INSTRUCTIONS", "Number of misaligned data memory references when executing MMX " "instructions." }, { 0x36, 0x2, NULL, "PIPELINE_ISTALL_FOR_MMX_INSTRUCTION_DATA_MEMORY_READS", "Number of clocks during pipeline stalls caused by waits from MMX " "instructions data memory reads." }, /* 0x39, counter 1: reserved */ }; const struct perfctr_event_set perfctr_p5mmx_event_set = { .cpu_type = PERFCTR_X86_INTEL_P5MMX, .event_prefix = "P5MMX_", .include = &p5mmx_and_mii_event_set, .nevents = ARRAY_SIZE(p5mmx_events), .events = p5mmx_events, }; /* * Cyrix 6x86MX, MII, and III events. */ static const struct perfctr_event mii_events[] = { { 0x039, 0x2, NULL, "RSB_OVERFLOWS" }, /* NOTE: The manuals list the following events as having codes 40-48. However, the 7-bit event code is actually split in the CESR, using bits 0-5 and 10, and similarly for the high half of the CESR. Since the driver also parses the other fields (bits 6-9) in a user's evntsel, the events are listed here with their actual in-CESR values. */ { 0x400, 0x3, NULL, "L2_TLB_MISSES" }, { 0x401, 0x3, NULL, "L1_TLB_DATA_MISS" }, { 0x402, 0x3, NULL, "L1_TLB_CODE_MISS" }, { 0x403, 0x3, NULL, "L1_TLB_MISS" }, { 0x404, 0x3, NULL, "TLB_FLUSHES" }, { 0x405, 0x3, NULL, "TLB_PAGE_INVALIDATES" }, { 0x406, 0x3, NULL, "TLB_PAGE_INVALIDATES_THAT_HIT" }, { 0x408, 0x3, NULL, "INSTRUCTIONS_DECODED" }, }; const struct perfctr_event_set perfctr_mii_event_set = { .cpu_type = PERFCTR_X86_CYRIX_MII, .event_prefix = "MII_", .include = &p5mmx_and_mii_event_set, .nevents = ARRAY_SIZE(mii_events), .events = mii_events, }; papi-5.6.0/src/components/libmsr/tests/000775 001750 001750 00000000000 13216244357 022146 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/realtime.c000664 001750 001750 00000005637 13216244360 020607 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int retval; long long elapsed_us, elapsed_cyc; const PAPI_hw_info_t *hw_info; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) { test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); } elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); if (!quiet) { printf( "Testing real time clock. (CPU Max %d MHz, CPU Min %d MHz)\n", hw_info->cpu_max_mhz, hw_info->cpu_min_mhz ); printf( "Sleeping for 10 seconds.\n" ); } sleep( 10 ); elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; if (!quiet) { printf( "%lld us. %lld cyc.\n", elapsed_us, elapsed_cyc ); printf( "%f Computed MHz.\n", ( float ) elapsed_cyc / ( float ) elapsed_us ); } /* Elapsed microseconds and elapsed cycles are not as unambiguous as they appear. On Pentium III and 4, for example, cycles is a measured value, while useconds is computed from cycles and mhz. MHz is read from /proc/cpuinfo (on linux). Thus, any error in MHz is propagated to useconds. Conversely, on ultrasparc useconds are extracted from a system call (gethrtime()) and cycles are computed from useconds. Also, MHz comes from a scan of system info, Thus any error in gethrtime() propagates to both cycles and useconds, and cycles can be further impacted by errors in reported MHz. Without knowing the error bars on these system values, we can't really specify error ranges for our reported values, but we *DO* know that errors for at least one instance of Pentium 4 (torc17@utk) are on the order of one part per thousand. Newer multicore Intel processors seem to have broken the relationship between the clock rate reported in /proc/cpuinfo and the actual computed clock. To accomodate this artifact, the test no longer fails, but merely reports results out of range. */ if ( elapsed_us < 9000000 ) { if (!quiet) printf( "NOTE: Elapsed real time less than 9 seconds (%lld us)!\n",elapsed_us ); test_fail(__FILE__,__LINE__,"Real time too short",1); } if ( elapsed_us > 11000000 ) { if (!quiet) printf( "NOTE: Elapsed real time greater than 11 seconds! (%lld us)\n", elapsed_us ); test_fail(__FILE__,__LINE__,"Real time too long",1); } if ( ( float ) elapsed_cyc < 9.0 * hw_info->cpu_max_mhz * 1000000.0 ) if (!quiet) printf( "NOTE: Elapsed real cycles less than 9*MHz*1000000.0!\n" ); if ( ( float ) elapsed_cyc > 11.0 * hw_info->cpu_max_mhz * 1000000.0 ) if (!quiet) printf( "NOTE: Elapsed real cycles greater than 11*MHz*1000000.0!\n" ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/examples_v3.x/notify_self.c000664 001750 001750 00000020614 13216244362 024367 0ustar00jshenry1963jshenry1963000000 000000 /* * notify_self.c - example of how you can use overflow notifications * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define SMPL_PERIOD 1000000000ULL static volatile unsigned long notification_received; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static pfarg_pmr_t pdx[1]; static int ctx_fd; static char *event1_name; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void warning(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } static void sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) { pfarg_msg_t msg; int fd = ctx_fd; int r; if (fd != ctx_fd) fatal_error("handler does not get valid file descriptor\n"); if (event1_name && pfm_read(fd, 0, PFM_RW_PMD, pdx, sizeof(*pdx)) == -1) fatal_error("pfm_read(PMD): %s", strerror(errno)); retry: r = read(fd, &msg, sizeof(msg)); if (r != sizeof(msg)) { if(r == -1 && errno == EINTR) { warning("read interrupted, retrying\n"); goto retry; } fatal_error("cannot read overflow message: %s\n", strerror(errno)); } if (msg.type != PFM_MSG_OVFL) fatal_error("unexpected msg type: %d\n",msg.type); /* * increment our notification counter */ notification_received++; /* * XXX: risky to do printf() in signal handler! */ if (event1_name) printf("Notification %lu: %"PRIu64" %s ip=0x%llx\n", notification_received, pdx[0].reg_value, event1_name, (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); else printf("Notification %lu ip=0x%llx\n", notification_received, (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); /* * And resume monitoring */ if (pfm_set_state(fd, 0, PFM_ST_RESTART) == -1) fatal_error("pfm_set_state(restart): %d\n", errno); } /* * infinite loop waiting for notification to get out */ void busyloop(void) { /* * busy loop to burn CPU cycles */ for(;notification_received < 3;) ; } #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } int main(int argc, char **argv) { pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmd_attr_t pd[NUM_PMDS]; pfarg_sinfo_t sif; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i, num_counters; size_t len; int ret; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Install the signal handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)sigio_handler; sigaction (SIGIO, &act, 0); memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&sif,0, sizeof(sif)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } inp.pfp_event_count = i; /* * how many counters we use */ if (i > 1) { pfm_get_max_event_name_len(&len); event1_name = malloc(len+1); if (event1_name == NULL) fatal_error("cannot allocate event name\n"); pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); } /* * now create the session for self monitoring/per-task */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * We want to get notified when the counter used for our first * event overflows */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; /* * nothing to sample when only one counter */ if (inp.pfp_event_count > 1) { pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); pdx[0].reg_num = pd[1].reg_num; } /* * we arm the first counter, such that it will overflow * after SMPL_PERIOD events have been observed */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc))) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(ctx_fd, 0, PFM_RW_PMD_ATTR, pd, outp.pfp_pmd_count * sizeof(*pd))) fatal_error("pfm_write error errno %d\n",errno); /* * we want to monitor ourself */ if (pfm_attach(ctx_fd, 0, getpid())) fatal_error("pfm_attach error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(ctx_fd, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START)) fatal_error("pfm_set_state(start) error errno %d\n", errno); busyloop(); if (pfm_set_state(ctx_fd, 0, PFM_ST_STOP)) fatal_error("pfm_set_state(stop) error errno %d\n", errno); /* * destroy our session */ close(ctx_fd); if (event1_name) free(event1_name); return 0; } papi-5.6.0/src/perfctr-2.6.x/examples/signal/arch.h000775 001750 001750 00000000466 13216244366 023737 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.1.2.1 2004/11/28 22:40:31 mikpe Exp $ * Architecture-specific support code. * * Copyright (C) 2004 Mikael Pettersson */ extern unsigned long ucontext_pc(const struct ucontext *uc); extern void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control); papi-5.6.0/src/perfctr-2.6.x/examples/signal/ppc.c000775 001750 001750 00000002225 13216244366 023572 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.1.2.2 2004/12/19 13:53:11 mikpe Exp $ * PPC32-specific code. * * Copyright (C) 2004 Mikael Pettersson */ #include #include #include #include #include "libperfctr.h" #include "arch.h" unsigned long ucontext_pc(const struct ucontext *uc) { /* glibc-2.3.3 (YDL4) changed the type of uc->uc_mcontext, * breaking code which worked in glibc-2.3.1 (YDL3.0.1). * This formulation works with both, and is cleaner than * selecting glibc-2.3.3 specific code with "#ifdef NGREG". */ return uc->uc_mcontext.regs->nip; } void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control) { memset(cpu_control, 0, sizeof *cpu_control); cpu_control->tsc_on = 1; cpu_control->nractrs = 0; cpu_control->nrictrs = 1; cpu_control->pmc_map[0] = 0; /* INSTRUCTIONS_COMPLETED */ cpu_control->evntsel[0] = 0x02; /* overflow after 100 events */ cpu_control->ireset[0] = 0x80000000-100; /* not kernel mode, enable interrupts, enable PMC1 interrupts */ cpu_control->ppc.mmcr0 = (1<<(31-1)) | (1<<(31-5)) | (1<<(31-16)); } papi-5.6.0/src/libpfm-3.y/lib/pfmlib_montecito.c000664 001750 001750 00000211732 13216244363 023456 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_montecito.c : support for the Dual-Core Itanium2 processor * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_priv_ia64.h" /* architecture private */ #include "pfmlib_montecito_priv.h" /* PMU private */ #include "montecito_events.h" /* PMU private */ #define is_ear(i) event_is_ear(montecito_pe+(i)) #define is_ear_tlb(i) event_is_ear_tlb(montecito_pe+(i)) #define is_ear_alat(i) event_is_ear_alat(montecito_pe+(i)) #define is_ear_cache(i) event_is_ear_cache(montecito_pe+(i)) #define is_iear(i) event_is_iear(montecito_pe+(i)) #define is_dear(i) event_is_dear(montecito_pe+(i)) #define is_etb(i) event_is_etb(montecito_pe+(i)) #define has_opcm(i) event_opcm_ok(montecito_pe+(i)) #define has_iarr(i) event_iarr_ok(montecito_pe+(i)) #define has_darr(i) event_darr_ok(montecito_pe+(i)) #define has_all(i) event_all_ok(montecito_pe+(i)) #define has_mesi(i) event_mesi_ok(montecito_pe+(i)) #define evt_use_opcm(e) ((e)->pfp_mont_opcm1.opcm_used != 0 || (e)->pfp_mont_opcm2.opcm_used !=0) #define evt_use_irange(e) ((e)->pfp_mont_irange.rr_used) #define evt_use_drange(e) ((e)->pfp_mont_drange.rr_used) #define evt_grp(e) (int)montecito_pe[e].pme_qualifiers.pme_qual.pme_group #define evt_set(e) (int)montecito_pe[e].pme_qualifiers.pme_qual.pme_set #define evt_umask(e) montecito_pe[e].pme_umask #define evt_type(e) (int)montecito_pe[e].pme_type #define evt_caf(e) (int)montecito_pe[e].pme_caf #define FINE_MODE_BOUNDARY_BITS 16 #define FINE_MODE_MASK ~((1U< PMC0 * 1 -> PMC1 * n -> PMCn * * The following are in the model specific rr_br[]: * IBR0 -> 0 * IBR1 -> 1 * ... * IBR7 -> 7 * DBR0 -> 0 * DBR1 -> 1 * ... * DBR7 -> 7 * * We do not use a mapping table, instead we make up the * values on the fly given the base. */ static int pfm_mont_detect(void) { int tmp; int ret = PFMLIB_ERR_NOTSUPP; tmp = pfm_ia64_get_cpu_family(); if (tmp == 0x20) { ret = PFMLIB_SUCCESS; } return ret; } /* * Check the event for incompatibilities. This is useful * for L1D and L2D related events. Due to wire limitations, * some caches events are separated into sets. There * are 6 sets for the L1D cache group and 8 sets for L2D group. * It is NOT possible to simultaneously measure events from * differents sets for L1D. For instance, you cannot * measure events from set0 and set1 in L1D cache group. The L2D * group allows up to two different sets to be active at the same * time. The first set is selected by the event in PMC4 and the second * set by the event in PMC6. Once the set is selected for PMC4, * the same set is locked for PMC5 and PMC8. Similarly, once the * set is selected for PMC6, the same set is locked for PMC7 and * PMC9. * * This function verifies that only one set of L1D is selected * and that no more than 2 sets are selected for L2D */ static int check_cross_groups(pfmlib_input_param_t *inp, unsigned int *l1d_event, unsigned long *l2d_set1_mask, unsigned long *l2d_set2_mask) { int g, s, s1, s2; unsigned int cnt = inp->pfp_event_count; pfmlib_event_t *e = inp->pfp_events; unsigned int i, j; unsigned long l2d_mask1 = 0, l2d_mask2 = 0; unsigned int l1d_event_idx = UNEXISTING_SET; /* * Let check the L1D constraint first * * There is no umask restriction for this group */ for (i=0; i < cnt; i++) { g = evt_grp(e[i].event); s = evt_set(e[i].event); if (g != PFMLIB_MONT_EVT_L1D_CACHE_GRP) continue; DPRINT("i=%u g=%d s=%d\n", i, g, s); l1d_event_idx = i; for (j=i+1; j < cnt; j++) { if (evt_grp(e[j].event) != g) continue; /* * if there is another event from the same group * but with a different set, then we return an error */ if (evt_set(e[j].event) != s) return PFMLIB_ERR_EVTSET; } } /* * Check that we have only up to two distinct * sets for L2D */ s1 = s2 = -1; for (i=0; i < cnt; i++) { g = evt_grp(e[i].event); if (g != PFMLIB_MONT_EVT_L2D_CACHE_GRP) continue; s = evt_set(e[i].event); /* * we have seen this set before, continue */ if (s1 == s) { l2d_mask1 |= 1UL << i; continue; } if (s2 == s) { l2d_mask2 |= 1UL << i; continue; } /* * record first of second set seen */ if (s1 == -1) { s1 = s; l2d_mask1 |= 1UL << i; } else if (s2 == -1) { s2 = s; l2d_mask2 |= 1UL << i; } else { /* * found a third set, that's not possible */ return PFMLIB_ERR_EVTSET; } } *l1d_event = l1d_event_idx; *l2d_set1_mask = l2d_mask1; *l2d_set2_mask = l2d_mask2; return PFMLIB_SUCCESS; } /* * Certain prefetch events must be treated specially when instruction range restriction * is used because they can only be constrained by IBRP1 in fine-mode. Other events * will use IBRP0 if tagged as a demand fetch OR IBPR1 if tagged as a prefetch match. * * Events which can be qualified by the two pairs depending on their tag: * - ISB_BUNPAIRS_IN * - L1I_FETCH_RAB_HIT * - L1I_FETCH_ISB_HIT * - L1I_FILLS * * This function returns the number of qualifying prefetch events found */ static int prefetch_events[]={ PME_MONT_L1I_PREFETCHES, PME_MONT_L1I_STRM_PREFETCHES, PME_MONT_L2I_PREFETCHES }; #define NPREFETCH_EVENTS sizeof(prefetch_events)/sizeof(int) static int prefetch_dual_events[]= { PME_MONT_ISB_BUNPAIRS_IN, PME_MONT_L1I_FETCH_RAB_HIT, PME_MONT_L1I_FETCH_ISB_HIT, PME_MONT_L1I_FILLS }; #define NPREFETCH_DUAL_EVENTS sizeof(prefetch_dual_events)/sizeof(int) /* * prefetch events must use IBRP1, unless they are dual and the user specified * PFMLIB_MONT_IRR_DEMAND_FETCH in rr_flags */ static int check_prefetch_events(pfmlib_input_param_t *inp, pfmlib_mont_input_rr_t *irr, unsigned int *count, int *base_idx, int *dup) { int code; int prefetch_codes[NPREFETCH_EVENTS]; int prefetch_dual_codes[NPREFETCH_DUAL_EVENTS]; unsigned int i, j; int c, flags; int found = 0, found_ibrp0 = 0, found_ibrp1 = 0; flags = irr->rr_flags & (PFMLIB_MONT_IRR_DEMAND_FETCH|PFMLIB_MONT_IRR_PREFETCH_MATCH); for(i=0; i < NPREFETCH_EVENTS; i++) { pfm_get_event_code(prefetch_events[i], &code); prefetch_codes[i] = code; } for(i=0; i < NPREFETCH_DUAL_EVENTS; i++) { pfm_get_event_code(prefetch_dual_events[i], &code); prefetch_dual_codes[i] = code; } for(i=0; i < inp->pfp_event_count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); for(j=0; j < NPREFETCH_EVENTS; j++) { if (c == prefetch_codes[j]) { found++; found_ibrp1++; } } /* * for the dual events, users must specify one or both of the * PFMLIB_MONT_IRR_DEMAND_FETCH or PFMLIB_MONT_IRR_PREFETCH_MATCH */ for(j=0; j < NPREFETCH_DUAL_EVENTS; j++) { if (c == prefetch_dual_codes[j]) { found++; if (flags == 0) return PFMLIB_ERR_IRRFLAGS; if (flags & PFMLIB_MONT_IRR_DEMAND_FETCH) found_ibrp0++; if (flags & PFMLIB_MONT_IRR_PREFETCH_MATCH) found_ibrp1++; } } } *count = found; *dup = 0; /* * if both found_ibrp0 and found_ibrp1 > 0, then we need to duplicate * the range in ibrp0 to ibrp1. */ if (found) { *base_idx = found_ibrp0 ? 0 : 2; if (found_ibrp1 && found_ibrp0) *dup = 1; } return 0; } /* * look for CPU_OP_CYCLES_QUAL * Return: * 1 if found * 0 otherwise */ static int has_cpu_cycles_qual(pfmlib_input_param_t *inp) { unsigned int i; int code, c; pfm_get_event_code(PME_MONT_CPU_OP_CYCLES_QUAL, &code); for(i=0; i < inp->pfp_event_count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); if (c == code) return 1; } return 0; } /* * IA64_INST_RETIRED (and subevents) is the only event which can be measured on all * 4 IBR when non-fine mode is not possible. * * This function returns: * - the number of events match the IA64_INST_RETIRED code * - in retired_mask to bottom 4 bits indicates which of the 4 INST_RETIRED event * is present */ static unsigned int check_inst_retired_events(pfmlib_input_param_t *inp, unsigned long *retired_mask) { int code; int c; unsigned int i, count, found = 0; unsigned long umask, mask; pfm_get_event_code(PME_MONT_IA64_INST_RETIRED, &code); count = inp->pfp_event_count; mask = 0; for(i=0; i < count; i++) { pfm_get_event_code(inp->pfp_events[i].event, &c); if (c == code) { pfm_mont_get_event_umask(inp->pfp_events[i].event, &umask); switch(umask) { case 0: mask |= 1; break; case 1: mask |= 2; break; case 2: mask |= 4; break; case 3: mask |= 8; break; } found++; } } if (retired_mask) *retired_mask = mask; return found; } static int check_fine_mode_possible(pfmlib_mont_input_rr_t *rr, int n) { pfmlib_mont_input_rr_desc_t *lim = rr->rr_limits; int i; for(i=0; i < n; i++) { if ((lim[i].rr_start & FINE_MODE_MASK) != (lim[i].rr_end & FINE_MODE_MASK)) return 0; } return 1; } /* * mode = 0 -> check code (enforce bundle alignment) * mode = 1 -> check data */ static int check_intervals(pfmlib_mont_input_rr_t *irr, int mode, unsigned int *n_intervals) { unsigned int i; pfmlib_mont_input_rr_desc_t *lim = irr->rr_limits; for(i=0; i < 4; i++) { /* end marker */ if (lim[i].rr_start == 0 && lim[i].rr_end == 0) break; /* invalid entry */ if (lim[i].rr_start >= lim[i].rr_end) return PFMLIB_ERR_IRRINVAL; if (mode == 0 && (lim[i].rr_start & 0xf || lim[i].rr_end & 0xf)) return PFMLIB_ERR_IRRALIGN; } *n_intervals = i; return PFMLIB_SUCCESS; } /* * It is not possible to measure more than one of the * L2D_OZQ_CANCELS0, L2D_OZQ_CANCELS1 at the same time. */ static int cancel_events[]= { PME_MONT_L2D_OZQ_CANCELS0_ACQ, PME_MONT_L2D_OZQ_CANCELS1_ANY }; #define NCANCEL_EVENTS sizeof(cancel_events)/sizeof(int) static int check_cancel_events(pfmlib_input_param_t *inp) { unsigned int i, j, count; int code; int cancel_codes[NCANCEL_EVENTS]; int idx = -1; for(i=0; i < NCANCEL_EVENTS; i++) { pfm_get_event_code(cancel_events[i], &code); cancel_codes[i] = code; } count = inp->pfp_event_count; for(i=0; i < count; i++) { for (j=0; j < NCANCEL_EVENTS; j++) { pfm_get_event_code(inp->pfp_events[i].event, &code); if (code == cancel_codes[j]) { if (idx != -1) { return PFMLIB_ERR_INVAL; } idx = inp->pfp_events[i].event; } } } return PFMLIB_SUCCESS; } /* * Automatically dispatch events to corresponding counters following constraints. */ static unsigned int l2d_set1_cnts[]={ 4, 5, 8 }; static unsigned int l2d_set2_cnts[]={ 6, 7, 9 }; static int pfm_mont_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_mont_input_param_t *param = mod_in; pfm_mont_pmc_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t avail_cntrs, impl_cntrs; unsigned int i,j, k, max_cnt; unsigned int assign[PMU_MONT_NUM_COUNTERS]; unsigned int m, cnt; unsigned int l1d_set; unsigned long l2d_set1_mask, l2d_set2_mask, evt_mask, mesi; unsigned long not_assigned_events, cnt_mask; int l2d_set1_p, l2d_set2_p; int ret; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; cnt = inp->pfp_event_count; if (PFMLIB_DEBUG()) for (m=0; m < cnt; m++) { DPRINT("ev[%d]=%s counters=0x%lx\n", m, montecito_pe[e[m].event].pme_name, montecito_pe[e[m].event].pme_counters); } if (cnt > PMU_MONT_NUM_COUNTERS) return PFMLIB_ERR_TOOMANY; l1d_set = UNEXISTING_SET; ret = check_cross_groups(inp, &l1d_set, &l2d_set1_mask, &l2d_set2_mask); if (ret != PFMLIB_SUCCESS) return ret; ret = check_cancel_events(inp); if (ret != PFMLIB_SUCCESS) return ret; /* * at this point, we know that: * - we have at most 1 L1D set * - we have at most 2 L2D sets * - cancel events are compatible */ DPRINT("l1d_set=%u l2d_set1_mask=0x%lx l2d_set2_mask=0x%lx\n", l1d_set, l2d_set1_mask, l2d_set2_mask); /* * first, place L1D cache event in PMC5 * * this is the strongest constraint */ pfm_get_impl_counters(&impl_cntrs); pfm_regmask_andnot(&avail_cntrs, &impl_cntrs, &inp->pfp_unavail_pmcs); not_assigned_events = 0; DPRINT("avail_cntrs=0x%lx\n", avail_cntrs.bits[0]); /* * we do not check ALL_THRD here because at least * one event has to be in PMC5 for this group */ if (l1d_set != UNEXISTING_SET) { if (!pfm_regmask_isset(&avail_cntrs, 5)) return PFMLIB_ERR_NOASSIGN; assign[l1d_set] = 5; pfm_regmask_clr(&avail_cntrs, 5); } l2d_set1_p = l2d_set2_p = 0; /* * assign L2D set1 and set2 counters */ for (i=0; i < cnt ; i++) { evt_mask = 1UL << i; /* * place l2d set1 events. First 3 go to designated * counters, the rest is placed elsewhere in the final * pass */ if (l2d_set1_p < 3 && (l2d_set1_mask & evt_mask)) { assign[i] = l2d_set1_cnts[l2d_set1_p]; if (!pfm_regmask_isset(&avail_cntrs, assign[i])) return PFMLIB_ERR_NOASSIGN; pfm_regmask_clr(&avail_cntrs, assign[i]); l2d_set1_p++; continue; } /* * same as above but for l2d set2 */ if (l2d_set2_p < 3 && (l2d_set2_mask & evt_mask)) { assign[i] = l2d_set2_cnts[l2d_set2_p]; if (!pfm_regmask_isset(&avail_cntrs, assign[i])) return PFMLIB_ERR_NOASSIGN; pfm_regmask_clr(&avail_cntrs, assign[i]); l2d_set2_p++; continue; } /* * if not l2d nor l1d, then defer placement until final pass */ if (i != l1d_set) not_assigned_events |= evt_mask; DPRINT("phase 1: i=%u avail_cntrs=0x%lx l2d_set1_p=%d l2d_set2_p=%d not_assigned=0x%lx\n", i, avail_cntrs.bits[0], l2d_set1_p, l2d_set2_p, not_assigned_events); } /* * assign BUS_* ER_* events (work only in PMC4-PMC9) */ evt_mask = not_assigned_events; for (i=0; evt_mask ; i++, evt_mask >>=1) { if ((evt_mask & 0x1) == 0) continue; cnt_mask = montecito_pe[e[i].event].pme_counters; /* * only interested in events with restricted set of counters */ if (cnt_mask == 0xfff0) continue; for(j=0; cnt_mask; j++, cnt_mask >>=1) { if ((cnt_mask & 0x1) == 0) continue; DPRINT("phase 2: i=%d j=%d cnt_mask=0x%lx avail_cntrs=0x%lx not_assigned_evnts=0x%lx\n", i, j, cnt_mask, avail_cntrs.bits[0], not_assigned_events); if (!pfm_regmask_isset(&avail_cntrs, j)) continue; assign[i] = j; not_assigned_events &= ~(1UL << i); pfm_regmask_clr(&avail_cntrs, j); break; } if (cnt_mask == 0) return PFMLIB_ERR_NOASSIGN; } /* * assign the rest of the events (no constraints) */ evt_mask = not_assigned_events; max_cnt = PMU_MONT_FIRST_COUNTER + PMU_MONT_NUM_COUNTERS; for (i=0, j=0; evt_mask ; i++, evt_mask >>=1) { DPRINT("phase 3a: i=%d j=%d evt_mask=0x%lx avail_cntrs=0x%lx not_assigned_evnts=0x%lx\n", i, j, evt_mask, avail_cntrs.bits[0], not_assigned_events); if ((evt_mask & 0x1) == 0) continue; while(j < max_cnt && !pfm_regmask_isset(&avail_cntrs, j)) { DPRINT("phase 3: i=%d j=%d evt_mask=0x%lx avail_cntrs=0x%lx not_assigned_evnts=0x%lx\n", i, j, evt_mask, avail_cntrs.bits[0], not_assigned_events); j++; } if (j == max_cnt) return PFMLIB_ERR_NOASSIGN; assign[i] = j; j++; } for (j=0; j < cnt ; j++ ) { mesi = 0; /* * XXX: we do not support .all placement just yet */ if (param && param->pfp_mont_counters[j].flags & PFMLIB_MONT_FL_EVT_ALL_THRD) { DPRINT(".all mode is not yet supported by libpfm\n"); return PFMLIB_ERR_NOTSUPP; } if (has_mesi(e[j].event)) { for(k=0;k< e[j].num_masks; k++) { mesi |= 1UL << e[j].unit_masks[k]; } /* by default we capture everything */ if (mesi == 0) mesi = 0xf; } reg.pmc_val = 0; /* clear all, bits 26-27 must be zero for proper operations */ /* if plm is 0, then assume not specified per-event and use default */ reg.pmc_plm = inp->pfp_events[j].plm ? inp->pfp_events[j].plm : inp->pfp_dfl_plm; reg.pmc_oi = 0; /* let the user/OS deal with this field */ reg.pmc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc_thres = param ? param->pfp_mont_counters[j].thres: 0; reg.pmc_ism = 0x2; /* force IA-64 mode */ reg.pmc_umask = is_ear(e[j].event) ? 0x0 : montecito_pe[e[j].event].pme_umask; reg.pmc_es = montecito_pe[e[j].event].pme_code; reg.pmc_all = 0; /* XXX force self for now */ reg.pmc_m = (mesi>>3) & 0x1; reg.pmc_e = (mesi>>2) & 0x1; reg.pmc_s = (mesi>>1) & 0x1; reg.pmc_i = mesi & 0x1; /* * Note that we don't force PMC4.pmc_ena = 1 because the kernel takes care of this for us. * This way we don't have to program something in PMC4 even when we don't use it */ pc[j].reg_num = assign[j]; pc[j].reg_value = reg.pmc_val; pc[j].reg_addr = pc[j].reg_alt_addr = assign[j]; pd[j].reg_num = assign[j]; pd[j].reg_addr = pd[j].reg_alt_addr = assign[j]; __pfm_vbprintf("[PMC%u(pmc%u)=0x%06lx m=%d e=%d s=%d i=%d thres=%d all=%d es=0x%02x plm=%d umask=0x%x pm=%d ism=0x%x oi=%d] %s\n", assign[j], assign[j], reg.pmc_val, reg.pmc_m, reg.pmc_e, reg.pmc_s, reg.pmc_i, reg.pmc_thres, reg.pmc_all, reg.pmc_es,reg.pmc_plm, reg.pmc_umask, reg.pmc_pm, reg.pmc_ism, reg.pmc_oi, montecito_pe[e[j].event].pme_name); __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[j].reg_num, pd[j].reg_num); } /* number of PMC registers programmed */ outp->pfp_pmc_count = cnt; outp->pfp_pmd_count = cnt; return PFMLIB_SUCCESS; } static int pfm_dispatch_iear(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_mont_pmc_reg_t reg; pfmlib_mont_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_mont_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_iear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_mont_iear.ear_used == 0) { /* * case 3: no I-EAR event, no (or nothing) in param->pfp_mont_iear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_mont_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_mont_iear.ear_mode); param->pfp_mont_iear.ear_umask = evt_umask(inp->pfp_events[i].event); DPRINT("I-EAR event with no info\n"); } /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running I-EAR), use param info */ reg.pmc_val = 0; if (param->pfp_mont_iear.ear_mode == PFMLIB_MONT_EAR_TLB_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc37_mont_tlb_reg.iear_plm = param->pfp_mont_iear.ear_plm ? param->pfp_mont_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc37_mont_tlb_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc37_mont_tlb_reg.iear_ct = 0x0; reg.pmc37_mont_tlb_reg.iear_umask = param->pfp_mont_iear.ear_umask; } else if (param->pfp_mont_iear.ear_mode == PFMLIB_MONT_EAR_CACHE_MODE) { /* if plm is 0, then assume not specified per-event and use default */ reg.pmc37_mont_cache_reg.iear_plm = param->pfp_mont_iear.ear_plm ? param->pfp_mont_iear.ear_plm : inp->pfp_dfl_plm; reg.pmc37_mont_cache_reg.iear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc37_mont_cache_reg.iear_ct = 0x1; reg.pmc37_mont_cache_reg.iear_umask = param->pfp_mont_iear.ear_umask; } else { DPRINT("ALAT mode not supported in I-EAR mode\n"); return PFMLIB_ERR_INVAL; } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 37)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 37; /* PMC37 is I-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 37; pos1++; pd[pos2].reg_num = 34; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 34; pos2++; pd[pos2].reg_num = 35; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 35; pos2++; if (param->pfp_mont_iear.ear_mode == PFMLIB_MONT_EAR_TLB_MODE) { __pfm_vbprintf("[PMC37(pmc37)=0x%lx ctb=tlb plm=%d pm=%d umask=0x%x]\n", reg.pmc_val, reg.pmc37_mont_tlb_reg.iear_plm, reg.pmc37_mont_tlb_reg.iear_pm, reg.pmc37_mont_tlb_reg.iear_umask); } else { __pfm_vbprintf("[PMC37(pmc37)=0x%lx ctb=cache plm=%d pm=%d umask=0x%x]\n", reg.pmc_val, reg.pmc37_mont_cache_reg.iear_plm, reg.pmc37_mont_cache_reg.iear_pm, reg.pmc37_mont_cache_reg.iear_umask); } __pfm_vbprintf("[PMD34(pmd34)]\n[PMD35(pmd35)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_dear(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_mont_pmc_reg_t reg; pfmlib_mont_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_mont_input_param_t fake_param; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_dear(inp->pfp_events[i].event)) break; } if (param == NULL || param->pfp_mont_dear.ear_used == 0) { /* * case 3: no D-EAR event, no (or nothing) in param->pfp_mont_dear.ear_used */ if (i == count) return PFMLIB_SUCCESS; memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; /* * case 1: extract all information for event (name) */ pfm_mont_get_ear_mode(inp->pfp_events[i].event, ¶m->pfp_mont_dear.ear_mode); param->pfp_mont_dear.ear_umask = evt_umask(inp->pfp_events[i].event); DPRINT("D-EAR event with no info\n"); } /* sanity check on the mode */ if ( param->pfp_mont_dear.ear_mode != PFMLIB_MONT_EAR_CACHE_MODE && param->pfp_mont_dear.ear_mode != PFMLIB_MONT_EAR_TLB_MODE && param->pfp_mont_dear.ear_mode != PFMLIB_MONT_EAR_ALAT_MODE) return PFMLIB_ERR_INVAL; /* * case 2: ear_used=1, event is defined, we use the param info as it is more precise * case 4: ear_used=1, no event (free running D-EAR), use param info */ reg.pmc_val = 0; /* if plm is 0, then assume not specified per-event and use default */ reg.pmc40_mont_reg.dear_plm = param->pfp_mont_dear.ear_plm ? param->pfp_mont_dear.ear_plm : inp->pfp_dfl_plm; reg.pmc40_mont_reg.dear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc40_mont_reg.dear_mode = param->pfp_mont_dear.ear_mode; reg.pmc40_mont_reg.dear_umask = param->pfp_mont_dear.ear_umask; reg.pmc40_mont_reg.dear_ism = 0x2; /* force IA-64 mode */ if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 40)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 40; /* PMC11 is D-EAR config register */ pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 40; pos1++; pd[pos2].reg_num = 32; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 32; pos2++; pd[pos2].reg_num = 33; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 33; pos2++; pd[pos2].reg_num = 36; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 36; pos2++; __pfm_vbprintf("[PMC40(pmc40)=0x%lx mode=%s plm=%d pm=%d ism=0x%x umask=0x%x]\n", reg.pmc_val, reg.pmc40_mont_reg.dear_mode == 0 ? "L1D" : (reg.pmc40_mont_reg.dear_mode == 1 ? "L1DTLB" : "ALAT"), reg.pmc40_mont_reg.dear_plm, reg.pmc40_mont_reg.dear_pm, reg.pmc40_mont_reg.dear_ism, reg.pmc40_mont_reg.dear_umask); __pfm_vbprintf("[PMD32(pmd32)]\n[PMD33(pmd33)\nPMD36(pmd36)\n"); /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_dispatch_opcm(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_mont_output_param_t *mod_out) { pfmlib_mont_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfm_mont_pmc_reg_t reg1, reg2, pmc36; unsigned int i, has_1st_pair, has_2nd_pair, count; unsigned int pos = outp->pfp_pmc_count; int used_pmc32, used_pmc34; if (param == NULL) return PFMLIB_SUCCESS; #define PMC36_DFL_VAL 0xfffffff0 /* * mandatory default value for PMC36 as described in the documentation * all monitoring is opcode constrained. Better make sure the match/mask * is set to match everything! It looks weird for the default value! */ pmc36.pmc_val = PMC36_DFL_VAL; reg1.pmc_val = 0x030f01ffffffffff; reg2.pmc_val = 0; used_pmc32 = param->pfp_mont_opcm1.opcm_used; used_pmc34 = param->pfp_mont_opcm2.opcm_used; /* * check in any feature is used. * PMC36 must be setup when opcode matching is used OR when code range restriction is used */ if (used_pmc32 == 0 && used_pmc34 == 0 && param->pfp_mont_irange.rr_used == 0) return 0; /* * check for rr_nbr_used to make sure that the range request produced something on output */ if (used_pmc32 || (param->pfp_mont_irange.rr_used && mod_out->pfp_mont_irange.rr_nbr_used) ) { /* * if not used, ignore all bits */ if (used_pmc32) { reg1.pmc32_34_mont_reg.opcm_mask = param->pfp_mont_opcm1.opcm_mask; reg1.pmc32_34_mont_reg.opcm_b = param->pfp_mont_opcm1.opcm_b; reg1.pmc32_34_mont_reg.opcm_f = param->pfp_mont_opcm1.opcm_f; reg1.pmc32_34_mont_reg.opcm_i = param->pfp_mont_opcm1.opcm_i; reg1.pmc32_34_mont_reg.opcm_m = param->pfp_mont_opcm1.opcm_m; reg2.pmc33_35_mont_reg.opcm_match = param->pfp_mont_opcm1.opcm_match; } if (param->pfp_mont_irange.rr_used) { reg1.pmc32_34_mont_reg.opcm_ig_ad = 0; reg1.pmc32_34_mont_reg.opcm_inv = param->pfp_mont_irange.rr_flags & PFMLIB_MONT_RR_INV ? 1 : 0; } else { /* clear range restriction fields when none is used */ reg1.pmc32_34_mont_reg.opcm_ig_ad = 1; reg1.pmc32_34_mont_reg.opcm_inv = 0; } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 32)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 32; pc[pos].reg_value = reg1.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 32; pos++; /* * will be constrained by PMC32 */ if (used_pmc32) { if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 33)) return PFMLIB_ERR_NOASSIGN; /* * used pmc33 only when we have active opcode matching */ pc[pos].reg_num = 33; pc[pos].reg_value = reg2.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 33; pos++; has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_MONT_IA64_TAGGED_INST_RETIRED_IBRP0_PMC32_33) has_1st_pair=1; if (inp->pfp_events[i].event == PME_MONT_IA64_TAGGED_INST_RETIRED_IBRP2_PMC32_33) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc36.pmc36_mont_reg.opcm_ch0_ig_opcm = 0; if (has_2nd_pair || has_1st_pair == 0) pmc36.pmc36_mont_reg.opcm_ch2_ig_opcm = 0; } __pfm_vbprintf("[PMC32(pmc32)=0x%lx m=%d i=%d f=%d b=%d mask=0x%lx inv=%d ig_ad=%d]\n", reg1.pmc_val, reg1.pmc32_34_mont_reg.opcm_m, reg1.pmc32_34_mont_reg.opcm_i, reg1.pmc32_34_mont_reg.opcm_f, reg1.pmc32_34_mont_reg.opcm_b, reg1.pmc32_34_mont_reg.opcm_mask, reg1.pmc32_34_mont_reg.opcm_inv, reg1.pmc32_34_mont_reg.opcm_ig_ad); if (used_pmc32) __pfm_vbprintf("[PMC33(pmc33)=0x%lx match=0x%lx]\n", reg2.pmc_val, reg2.pmc33_35_mont_reg.opcm_match); } /* * will be constrained by PMC34 */ if (used_pmc34) { reg1.pmc_val = 0x01ffffffffff; /* pmc34 default value */ reg2.pmc_val = 0; reg1.pmc32_34_mont_reg.opcm_mask = param->pfp_mont_opcm2.opcm_mask; reg1.pmc32_34_mont_reg.opcm_b = param->pfp_mont_opcm2.opcm_b; reg1.pmc32_34_mont_reg.opcm_f = param->pfp_mont_opcm2.opcm_f; reg1.pmc32_34_mont_reg.opcm_i = param->pfp_mont_opcm2.opcm_i; reg1.pmc32_34_mont_reg.opcm_m = param->pfp_mont_opcm2.opcm_m; reg2.pmc33_35_mont_reg.opcm_match = param->pfp_mont_opcm2.opcm_match; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 34)) return PFMLIB_ERR_NOASSIGN; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 35)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 34; pc[pos].reg_value = reg1.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 34; pos++; pc[pos].reg_num = 35; pc[pos].reg_value = reg2.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 35; pos++; has_1st_pair = has_2nd_pair = 0; count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].event == PME_MONT_IA64_TAGGED_INST_RETIRED_IBRP1_PMC34_35) has_1st_pair=1; if (inp->pfp_events[i].event == PME_MONT_IA64_TAGGED_INST_RETIRED_IBRP3_PMC34_35) has_2nd_pair=1; } if (has_1st_pair || has_2nd_pair == 0) pmc36.pmc36_mont_reg.opcm_ch1_ig_opcm = 0; if (has_2nd_pair || has_1st_pair == 0) pmc36.pmc36_mont_reg.opcm_ch3_ig_opcm = 0; __pfm_vbprintf("[PMC34(pmc34)=0x%lx m=%d i=%d f=%d b=%d mask=0x%lx]\n", reg1.pmc_val, reg1.pmc32_34_mont_reg.opcm_m, reg1.pmc32_34_mont_reg.opcm_i, reg1.pmc32_34_mont_reg.opcm_f, reg1.pmc32_34_mont_reg.opcm_b, reg1.pmc32_34_mont_reg.opcm_mask); __pfm_vbprintf("[PMC35(pmc35)=0x%lx match=0x%lx]\n", reg2.pmc_val, reg2.pmc33_35_mont_reg.opcm_match); } if (pmc36.pmc_val != PMC36_DFL_VAL) { if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 36)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 36; pc[pos].reg_value = pmc36.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 36; pos++; __pfm_vbprintf("[PMC36(pmc36)=0x%lx ch0_ig_op=%d ch1_ig_op=%d ch2_ig_op=%d ch3_ig_op=%d]\n", pmc36.pmc_val, pmc36.pmc36_mont_reg.opcm_ch0_ig_opcm, pmc36.pmc36_mont_reg.opcm_ch1_ig_opcm, pmc36.pmc36_mont_reg.opcm_ch2_ig_opcm, pmc36.pmc36_mont_reg.opcm_ch3_ig_opcm); } outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int pfm_dispatch_etb(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfmlib_event_t *e= inp->pfp_events; pfm_mont_pmc_reg_t reg; pfmlib_mont_input_param_t *param = mod_in; pfmlib_reg_t *pc, *pd; pfmlib_mont_input_param_t fake_param; int found_etb = 0, found_bad_dear = 0; int has_etb_param; unsigned int i, pos1, pos2; unsigned int count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; /* * explicit ETB settings */ has_etb_param = param && param->pfp_mont_etb.etb_used; reg.pmc_val = 0UL; /* * we need to scan all events looking for DEAR ALAT/TLB due to incompatibility. * In this case PMC39 must be forced to zero */ count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_etb(e[i].event)) found_etb = 1; /* * keep track of the first ETB event */ /* look only for DEAR TLB */ if (is_dear(e[i].event) && (is_ear_tlb(e[i].event) || is_ear_alat(e[i].event))) { found_bad_dear = 1; } } DPRINT("found_etb=%d found_bar_dear=%d\n", found_etb, found_bad_dear); /* * did not find D-EAR TLB/ALAT event, need to check param structure */ if (found_bad_dear == 0 && param && param->pfp_mont_dear.ear_used == 1) { if ( param->pfp_mont_dear.ear_mode == PFMLIB_MONT_EAR_TLB_MODE || param->pfp_mont_dear.ear_mode == PFMLIB_MONT_EAR_ALAT_MODE) found_bad_dear = 1; } /* * no explicit ETB event and no special case to deal with (cover part of case 3) */ if (found_etb == 0 && has_etb_param == 0 && found_bad_dear == 0) return PFMLIB_SUCCESS; if (has_etb_param == 0) { /* * case 3: no ETB event, etb_used=0 but found_bad_dear=1, need to cleanup PMC12 */ if (found_etb == 0) goto assign_zero; /* * case 1: we have a ETB event but no param, default setting is to capture * all branches. */ memset(&fake_param, 0, sizeof(fake_param)); param = &fake_param; param->pfp_mont_etb.etb_tm = 0x3; /* all branches */ param->pfp_mont_etb.etb_ptm = 0x3; /* all branches */ param->pfp_mont_etb.etb_ppm = 0x3; /* all branches */ param->pfp_mont_etb.etb_brt = 0x0; /* all branches */ DPRINT("ETB event with no info\n"); } /* * case 2: ETB event in the list, param provided * case 4: no ETB event, param provided (free running mode) */ reg.pmc39_mont_reg.etbc_plm = param->pfp_mont_etb.etb_plm ? param->pfp_mont_etb.etb_plm : inp->pfp_dfl_plm; reg.pmc39_mont_reg.etbc_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc39_mont_reg.etbc_ds = 0; /* 1 is reserved */ reg.pmc39_mont_reg.etbc_tm = param->pfp_mont_etb.etb_tm & 0x3; reg.pmc39_mont_reg.etbc_ptm = param->pfp_mont_etb.etb_ptm & 0x3; reg.pmc39_mont_reg.etbc_ppm = param->pfp_mont_etb.etb_ppm & 0x3; reg.pmc39_mont_reg.etbc_brt = param->pfp_mont_etb.etb_brt & 0x3; /* * if DEAR-ALAT or DEAR-TLB is set then PMC12 must be set to zero (see documentation p. 87) * * D-EAR ALAT/TLB and ETB cannot be used at the same time. * From documentation: PMC12 must be zero in this mode; else the wrong IP for misses * coming right after a mispredicted branch. * * D-EAR cache is fine. */ assign_zero: if (found_bad_dear && reg.pmc_val != 0UL) return PFMLIB_ERR_EVTINCOMP; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 39)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 39; pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 39; pos1++; __pfm_vbprintf("[PMC39(pmc39)=0x%lx plm=%d pm=%d ds=%d tm=%d ptm=%d ppm=%d brt=%d]\n", reg.pmc_val, reg.pmc39_mont_reg.etbc_plm, reg.pmc39_mont_reg.etbc_pm, reg.pmc39_mont_reg.etbc_ds, reg.pmc39_mont_reg.etbc_tm, reg.pmc39_mont_reg.etbc_ptm, reg.pmc39_mont_reg.etbc_ppm, reg.pmc39_mont_reg.etbc_brt); /* * only add ETB PMDs when actually using BTB. * Not needed when dealing with D-EAR TLB and DEAR-ALAT * PMC39 restriction */ if (found_etb || has_etb_param) { pd[pos2].reg_num = 38; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 38; pos2++; pd[pos2].reg_num = 39; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 39; pos2++; __pfm_vbprintf("[PMD38(pmd38)]\n[PMD39(pmd39)\n"); for(i=48; i < 64; i++, pos2++) { pd[pos2].reg_num = i; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = i; __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[pos2].reg_num, pd[pos2].reg_num); } } /* update final number of entries used */ outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static void do_normal_rr(unsigned long start, unsigned long end, pfmlib_reg_t *br, int nbr, int dir, int *idx, int *reg_idx, int plm) { unsigned long size, l_addr, c; unsigned long l_offs = 0, r_offs = 0; unsigned long l_size, r_size; dbreg_t db; int p2; if (nbr < 1 || end <= start) return; size = end - start; DPRINT("start=0x%016lx end=0x%016lx size=0x%lx bytes (%lu bundles) nbr=%d dir=%d\n", start, end, size, size >> 4, nbr, dir); p2 = pfm_ia64_fls(size); c = ALIGN_DOWN(end, p2); DPRINT("largest power of two possible: 2^%d=0x%lx, crossing=0x%016lx\n", p2, 1UL << p2, c); if ((c - (1UL<= start) { l_addr = c - (1UL << p2); } else { p2--; if ((c + (1UL<>l_offs: 0x%lx\n", l_offs); } } else if (dir == 1 && r_size != 0 && nbr == 1) { p2++; l_addr = start; if (PFMLIB_DEBUG()) { r_offs = l_addr+(1UL<>r_offs: 0x%lx\n", r_offs); } } l_size = l_addr - start; r_size = end - l_addr-(1UL<>largest chunk: 2^%d @0x%016lx-0x%016lx\n", p2, l_addr, l_addr+(1UL<>before: 0x%016lx-0x%016lx\n", start, l_addr); if (r_size && !r_offs) printf(">>after : 0x%016lx-0x%016lx\n", l_addr+(1UL<>1; if (nbr & 0x1) { /* * our simple heuristic is: * we assign the largest number of registers to the largest * of the two chunks */ if (l_size > r_size) { l_nbr++; } else { r_nbr++; } } do_normal_rr(start, l_addr, br, l_nbr, 0, idx, reg_idx, plm); do_normal_rr(l_addr+(1UL<rr_start, in_rr->rr_end, n_pairs, fine_mode ? ", fine_mode" : "", rr_flags & PFMLIB_MONT_RR_INV ? ", inversed" : ""); __pfm_vbprintf("start offset: -0x%lx end_offset: +0x%lx\n", out_rr->rr_soff, out_rr->rr_eoff); for (j=0; j < n_pairs; j++, base_idx+=2) { d.val = dbr[base_idx+1].reg_value; r_end = dbr[base_idx].reg_value+((~(d.db.db_mask)) & ~(0xffUL << 56)); if (fine_mode) __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask); else __pfm_vbprintf("brp%u: db%u: 0x%016lx db%u: plm=0x%x mask=0x%016lx end=0x%016lx\n", dbr[base_idx].reg_num>>1, dbr[base_idx].reg_num, dbr[base_idx].reg_value, dbr[base_idx+1].reg_num, d.db.db_plm, d.db.db_mask, r_end); } } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_fine_rr(pfmlib_mont_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_mont_output_rr_t *orr) { int i; pfmlib_reg_t *br; pfmlib_mont_input_rr_desc_t *in_rr; pfmlib_mont_output_rr_desc_t *out_rr; unsigned long addr; int reg_idx; dbreg_t db; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; db.val = 0; db.db.db_mask = FINE_MODE_MASK; if (n > 2) return PFMLIB_ERR_IRRTOOMANY; for (i=0; i < n; i++, reg_idx += 2, in_rr++, br+= 4) { /* * setup lower limit pair * * because of the PMU can only see addresses on a 2-bundle boundary, we must align * down to the closest bundle-pair aligned address. 5 => 32-byte aligned address */ addr = ALIGN_DOWN(in_rr->rr_start, 5); out_rr->rr_soff = in_rr->rr_start - addr; /* * adjust plm for each range */ db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[0].reg_num = reg_idx; br[0].reg_value = addr; br[0].reg_addr = br[0].reg_alt_addr = 1+reg_idx; br[1].reg_num = reg_idx+1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = 1+reg_idx+1; /* * setup upper limit pair * * * In fine mode, the bundle address stored in the upper limit debug * registers is included in the count, so we substract 0x10 to exclude it. * * because of the PMU bug, we align the (corrected) end to the nearest * 32-byte aligned address + 0x10. With this correction and depending * on the correction, we may count one * * */ addr = in_rr->rr_end - 0x10; if ((addr & 0x1f) == 0) addr += 0x10; out_rr->rr_eoff = addr - in_rr->rr_end + 0x10; br[2].reg_num = reg_idx+4; br[2].reg_value = addr; br[2].reg_addr = br[2].reg_alt_addr = 1+reg_idx+4; br[3].reg_num = reg_idx+5; br[3].reg_value = db.val; br[3].reg_addr = br[3].reg_alt_addr = 1+reg_idx+5; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 2, 1, irr->rr_flags); } orr->rr_nbr_used += i<<2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } /* * base_idx = base register index to use (for IBRP1, base_idx = 2) */ static int compute_single_rr(pfmlib_mont_input_rr_t *irr, int dfl_plm, int *base_idx, pfmlib_mont_output_rr_t *orr) { unsigned long size, end, start; unsigned long p_start, p_end; pfmlib_mont_input_rr_desc_t *in_rr; pfmlib_mont_output_rr_desc_t *out_rr; pfmlib_reg_t *br; dbreg_t db; int reg_idx; int l, m; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; start = in_rr->rr_start; end = in_rr->rr_end; size = end - start; reg_idx = *base_idx; l = pfm_ia64_fls(size); m = l; if (size & ((1UL << l)-1)) { if (l>62) { printf("range: [0x%lx-0x%lx] too big\n", start, end); return PFMLIB_ERR_IRRTOOBIG; } m++; } DPRINT("size=%ld, l=%d m=%d, internal: 0x%lx full: 0x%lx\n", size, l, m, 1UL << l, 1UL << m); for (; m < 64; m++) { p_start = ALIGN_DOWN(start, m); p_end = p_start+(1UL<= end) goto found; } return PFMLIB_ERR_IRRINVAL; found: DPRINT("m=%d p_start=0x%lx p_end=0x%lx\n", m, p_start,p_end); /* when the event is not IA64_INST_RETIRED, then we MUST use ibrp0 */ br[0].reg_num = reg_idx; br[0].reg_value = p_start; br[0].reg_addr = br[0].reg_alt_addr = 1+reg_idx; db.val = 0; db.db.db_mask = ~((1UL << m)-1); db.db.db_plm = in_rr->rr_plm ? in_rr->rr_plm : (unsigned long)dfl_plm; br[1].reg_num = reg_idx + 1; br[1].reg_value = db.val; br[1].reg_addr = br[1].reg_alt_addr = 1+reg_idx+1; out_rr->rr_soff = start - p_start; out_rr->rr_eoff = p_end - end; if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, 0, 1, 0, irr->rr_flags); orr->rr_nbr_used += 2; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int compute_normal_rr(pfmlib_mont_input_rr_t *irr, int dfl_plm, int n, int *base_idx, pfmlib_mont_output_rr_t *orr) { pfmlib_mont_input_rr_desc_t *in_rr; pfmlib_mont_output_rr_desc_t *out_rr; unsigned long r_end; pfmlib_reg_t *br; dbreg_t d; int i, j; int br_index, reg_idx, prev_index; in_rr = irr->rr_limits; out_rr = orr->rr_infos; br = orr->rr_br+orr->rr_nbr_used; reg_idx = *base_idx; br_index = 0; for (i=0; i < n; i++, in_rr++, out_rr++) { /* * running out of registers */ if (br_index == 8) break; prev_index = br_index; do_normal_rr( in_rr->rr_start, in_rr->rr_end, br, 4 - (reg_idx>>1), /* how many pairs available */ 0, &br_index, ®_idx, in_rr->rr_plm ? in_rr->rr_plm : dfl_plm); DPRINT("br_index=%d reg_idx=%d\n", br_index, reg_idx); /* * compute offsets */ out_rr->rr_soff = out_rr->rr_eoff = 0; for(j=prev_index; j < br_index; j+=2) { d.val = br[j+1].reg_value; r_end = br[j].reg_value+((~(d.db.db_mask)+1) & ~(0xffUL << 56)); if (br[j].reg_value <= in_rr->rr_start) out_rr->rr_soff = in_rr->rr_start - br[j].reg_value; if (r_end >= in_rr->rr_end) out_rr->rr_eoff = r_end - in_rr->rr_end; } if (PFMLIB_VERBOSE()) print_one_range(in_rr, out_rr, br, prev_index, (br_index-prev_index)>>1, 0, irr->rr_flags); } /* do not have enough registers to cover all the ranges */ if (br_index == 8 && i < n) return PFMLIB_ERR_TOOMANY; orr->rr_nbr_used += br_index; /* update base_idx, for subsequent calls */ *base_idx = reg_idx; return PFMLIB_SUCCESS; } static int pfm_dispatch_irange(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_mont_output_param_t *mod_out) { pfm_mont_pmc_reg_t reg; pfmlib_mont_input_param_t *param = mod_in; pfmlib_mont_input_rr_t *irr; pfmlib_mont_output_rr_t *orr; pfmlib_reg_t *pc = outp->pfp_pmcs; unsigned long retired_mask; unsigned int i, pos = outp->pfp_pmc_count, count; unsigned int retired_only, retired_count, fine_mode, prefetch_count; unsigned int n_intervals; int base_idx = 0, dup = 0; int ret; if (param == NULL) return PFMLIB_SUCCESS; if (param->pfp_mont_irange.rr_used == 0) return PFMLIB_SUCCESS; if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_mont_irange; orr = &mod_out->pfp_mont_irange; ret = check_intervals(irr, 0, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_IRRINVAL; retired_count = check_inst_retired_events(inp, &retired_mask); retired_only = retired_count == inp->pfp_event_count; fine_mode = irr->rr_flags & PFMLIB_MONT_RR_NO_FINE_MODE ? 0 : check_fine_mode_possible(irr, n_intervals); DPRINT("n_intervals=%d retired_only=%d retired_count=%d fine_mode=%d\n", n_intervals, retired_only, retired_count, fine_mode); /* * On montecito, there are more constraints on what can be measured with irange. * * - The fine mode is the best because you directly set the lower and upper limits of * the range. This uses 2 ibr pairs for range (ibrp0/ibrp2 and ibp1/ibrp3). Therefore * at most 2 fine mode ranges can be defined. The boundaries of the range must be in the * same 64KB page. The fine mode works will all events. * * - if the fine mode fails, then for all events, except IA64_TAGGED_INST_RETIRED_*, only * the first pair of ibr is available: ibrp0. This imposes some severe restrictions on the * size and alignement of the range. It can be bigger than 64KB and must be properly aligned * on its size. The library relaxes these constraints by allowing the covered areas to be * larger than the expected range. It may start before and end after the requested range. * You can determine the amount of overrun in either direction for each range by looking at * the rr_soff (start offset) and rr_eoff (end offset). * * - if the events include certain prefetch events then only IBRP1 can be used. * See 3.3.5.2 Exception 1. * * - Finally, when the events are ONLY IA64_TAGGED_INST_RETIRED_* then all IBR pairs can be used * to cover the range giving us more flexibility to approximate the range when it is not * properly aligned on its size (see 10.3.5.2 Exception 2). But the corresponding * IA64_TAGGED_INST_RETIRED_* must be present. */ if (fine_mode == 0 && retired_only == 0 && n_intervals > 1) return PFMLIB_ERR_IRRTOOMANY; /* we do not default to non-fine mode to support more ranges */ if (n_intervals > 2 && fine_mode == 1) return PFMLIB_ERR_IRRTOOMANY; ret = check_prefetch_events(inp, irr, &prefetch_count, &base_idx, &dup); if (ret) return ret; DPRINT("prefetch_count=%u base_idx=%d dup=%d\n", prefetch_count, base_idx, dup); /* * CPU_OP_CYCLES.QUAL supports code range restrictions but it returns * meaningful values (fine/coarse mode) only when IBRP1 is not used. */ if ((base_idx > 0 || dup) && has_cpu_cycles_qual(inp)) return PFMLIB_ERR_FEATCOMB; if (fine_mode == 0) { if (retired_only) { /* can take multiple intervals */ ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } else { /* unless we have only prefetch and instruction retired events, * we cannot satisfy the request because the other events cannot * be measured on anything but IBRP0. */ if ((prefetch_count+retired_count) != inp->pfp_event_count) return PFMLIB_ERR_FEATCOMB; ret = compute_single_rr(irr, inp->pfp_dfl_plm, &base_idx, orr); if (ret == PFMLIB_SUCCESS && dup) ret = compute_single_rr(irr, inp->pfp_dfl_plm, &base_idx, orr); } } else { if (prefetch_count && n_intervals != 1) return PFMLIB_ERR_IRRTOOMANY; /* except is retired_only, can take only one interval */ ret = compute_fine_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret == PFMLIB_SUCCESS && dup) ret = compute_fine_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); } if (ret != PFMLIB_SUCCESS) return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_IRRTOOMANY : ret; reg.pmc_val = 0xdb6; /* default value */ count = orr->rr_nbr_used; for (i=0; i < count; i++) { switch(orr->rr_br[i].reg_num) { case 0: reg.pmc38_mont_reg.iarc_ig_ibrp0 = 0; break; case 2: reg.pmc38_mont_reg.iarc_ig_ibrp1 = 0; break; case 4: reg.pmc38_mont_reg.iarc_ig_ibrp2 = 0; break; case 6: reg.pmc38_mont_reg.iarc_ig_ibrp3 = 0; break; } } if (fine_mode) { reg.pmc38_mont_reg.iarc_fine = 1; } else if (retired_only) { /* * we need to check that the user provided all the events needed to cover * all the ibr pairs used to cover the range */ if ((retired_mask & 0x1) == 0 && reg.pmc38_mont_reg.iarc_ig_ibrp0 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x2) == 0 && reg.pmc38_mont_reg.iarc_ig_ibrp1 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x4) == 0 && reg.pmc38_mont_reg.iarc_ig_ibrp2 == 0) return PFMLIB_ERR_IRRINVAL; if ((retired_mask & 0x8) == 0 && reg.pmc38_mont_reg.iarc_ig_ibrp3 == 0) return PFMLIB_ERR_IRRINVAL; } if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 38)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 38; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 38; pos++; __pfm_vbprintf("[PMC38(pmc38)=0x%lx ig_ibrp0=%d ig_ibrp1=%d ig_ibrp2=%d ig_ibrp3=%d fine=%d]\n", reg.pmc_val, reg.pmc38_mont_reg.iarc_ig_ibrp0, reg.pmc38_mont_reg.iarc_ig_ibrp1, reg.pmc38_mont_reg.iarc_ig_ibrp2, reg.pmc38_mont_reg.iarc_ig_ibrp3, reg.pmc38_mont_reg.iarc_fine); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static const unsigned long iod_tab[8]={ /* --- */ 3, /* --D */ 2, /* -O- */ 3, /* should not be used */ /* -OD */ 0, /* =IOD safe because default IBR is harmless */ /* I-- */ 1, /* =IO safe because by defaut OPC is turned off */ /* I-D */ 0, /* =IOD safe because by default opc is turned off */ /* IO- */ 1, /* IOD */ 0 }; /* * IMPORTANT: MUST BE CALLED *AFTER* pfm_dispatch_irange() to make sure we see * the irange programming to adjust pmc41. */ static int pfm_dispatch_drange(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp, pfmlib_mont_output_param_t *mod_out) { pfmlib_mont_input_param_t *param = mod_in; pfmlib_reg_t *pc = outp->pfp_pmcs; pfmlib_mont_input_rr_t *irr; pfmlib_mont_output_rr_t *orr, *orr2; pfm_mont_pmc_reg_t pmc38; pfm_mont_pmc_reg_t reg; unsigned int i, pos = outp->pfp_pmc_count; int iod_codes[4], dfl_val_pmc32, dfl_val_pmc34; unsigned int n_intervals; int ret; int base_idx = 0; int fine_mode = 0; #define DR_USED 0x1 /* data range is used */ #define OP_USED 0x2 /* opcode matching is used */ #define IR_USED 0x4 /* code range is used */ if (param == NULL) return PFMLIB_SUCCESS; /* * if only pmc32/pmc33 opcode matching is used, we do not need to change * the default value of pmc41 regardless of the events being measured. */ if ( param->pfp_mont_drange.rr_used == 0 && param->pfp_mont_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * it seems like the ignored bits need to have special values * otherwise this does not work. */ reg.pmc_val = 0x2078fefefefe; /* * initialize iod codes */ iod_codes[0] = iod_codes[1] = iod_codes[2] = iod_codes[3] = 0; /* * setup default iod value, we need to separate because * if drange is used we do not know in advance which DBR will be used * therefore we need to apply dfl_val later */ dfl_val_pmc32 = param->pfp_mont_opcm1.opcm_used ? OP_USED : 0; dfl_val_pmc34 = param->pfp_mont_opcm2.opcm_used ? OP_USED : 0; if (param->pfp_mont_drange.rr_used == 1) { if (mod_out == NULL) return PFMLIB_ERR_INVAL; irr = ¶m->pfp_mont_drange; orr = &mod_out->pfp_mont_drange; ret = check_intervals(irr, 1, &n_intervals); if (ret != PFMLIB_SUCCESS) return ret; if (n_intervals < 1) return PFMLIB_ERR_DRRINVAL; ret = compute_normal_rr(irr, inp->pfp_dfl_plm, n_intervals, &base_idx, orr); if (ret != PFMLIB_SUCCESS) { return ret == PFMLIB_ERR_TOOMANY ? PFMLIB_ERR_DRRTOOMANY : ret; } /* * Update iod_codes to reflect the use of the DBR constraint. */ for (i=0; i < orr->rr_nbr_used; i++) { if (orr->rr_br[i].reg_num == 0) iod_codes[0] |= DR_USED | dfl_val_pmc32; if (orr->rr_br[i].reg_num == 2) iod_codes[1] |= DR_USED | dfl_val_pmc34; if (orr->rr_br[i].reg_num == 4) iod_codes[2] |= DR_USED | dfl_val_pmc32; if (orr->rr_br[i].reg_num == 6) iod_codes[3] |= DR_USED | dfl_val_pmc34; } } /* * XXX: assume dispatch_irange executed before calling this function */ if (param->pfp_mont_irange.rr_used == 1) { orr2 = &mod_out->pfp_mont_irange; if (mod_out == NULL) return PFMLIB_ERR_INVAL; /* * we need to find out whether or not the irange is using * fine mode. If this is the case, then we only need to * program pmc41 for the ibr pairs which designate the lower * bounds of a range. For instance, if IBRP0/IBRP2 are used, * then we only need to program pmc13.cfg_dbrp0 and pmc13.ena_dbrp0, * the PMU will automatically use IBRP2, even though pmc13.ena_dbrp2=0. */ for(i=0; i <= pos; i++) { if (pc[i].reg_num == 38) { pmc38.pmc_val = pc[i].reg_value; if (pmc38.pmc38_mont_reg.iarc_fine == 1) fine_mode = 1; break; } } /* * Update to reflect the use of the IBR constraint */ for (i=0; i < orr2->rr_nbr_used; i++) { if (orr2->rr_br[i].reg_num == 0) iod_codes[0] |= IR_USED | dfl_val_pmc32; if (orr2->rr_br[i].reg_num == 2) iod_codes[1] |= IR_USED | dfl_val_pmc34; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 4) iod_codes[2] |= IR_USED | dfl_val_pmc32; if (fine_mode == 0 && orr2->rr_br[i].reg_num == 6) iod_codes[3] |= IR_USED | dfl_val_pmc34; } } if (param->pfp_mont_irange.rr_used == 0 && param->pfp_mont_drange.rr_used ==0) { iod_codes[0] = iod_codes[2] = dfl_val_pmc32; iod_codes[1] = iod_codes[3] = dfl_val_pmc34; } /* * update the cfg dbrpX field. If we put a constraint on a cfg dbrp, then * we must enable it in the corresponding ena_dbrpX */ reg.pmc41_mont_reg.darc_ena_dbrp0 = iod_codes[0] ? 1 : 0; reg.pmc41_mont_reg.darc_cfg_dtag0 = iod_tab[iod_codes[0]]; reg.pmc41_mont_reg.darc_ena_dbrp1 = iod_codes[1] ? 1 : 0; reg.pmc41_mont_reg.darc_cfg_dtag1 = iod_tab[iod_codes[1]]; reg.pmc41_mont_reg.darc_ena_dbrp2 = iod_codes[2] ? 1 : 0; reg.pmc41_mont_reg.darc_cfg_dtag2 = iod_tab[iod_codes[2]]; reg.pmc41_mont_reg.darc_ena_dbrp3 = iod_codes[3] ? 1 : 0; reg.pmc41_mont_reg.darc_cfg_dtag3 = iod_tab[iod_codes[3]]; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 41)) return PFMLIB_ERR_NOASSIGN; pc[pos].reg_num = 41; pc[pos].reg_value = reg.pmc_val; pc[pos].reg_addr = pc[pos].reg_alt_addr = 41; pos++; __pfm_vbprintf("[PMC41(pmc41)=0x%lx cfg_dtag0=%d cfg_dtag1=%d cfg_dtag2=%d cfg_dtag3=%d ena_dbrp0=%d ena_dbrp1=%d ena_dbrp2=%d ena_dbrp3=%d]\n", reg.pmc_val, reg.pmc41_mont_reg.darc_cfg_dtag0, reg.pmc41_mont_reg.darc_cfg_dtag1, reg.pmc41_mont_reg.darc_cfg_dtag2, reg.pmc41_mont_reg.darc_cfg_dtag3, reg.pmc41_mont_reg.darc_ena_dbrp0, reg.pmc41_mont_reg.darc_ena_dbrp1, reg.pmc41_mont_reg.darc_ena_dbrp2, reg.pmc41_mont_reg.darc_ena_dbrp3); outp->pfp_pmc_count = pos; return PFMLIB_SUCCESS; } static int check_qualifier_constraints(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in) { pfmlib_mont_input_param_t *param = mod_in; pfmlib_event_t *e = inp->pfp_events; unsigned int i, count; count = inp->pfp_event_count; for(i=0; i < count; i++) { /* * skip check for counter which requested it. Use at your own risk. * No all counters have necessarily been validated for use with * qualifiers. Typically the event is counted as if no constraint * existed. */ if (param->pfp_mont_counters[i].flags & PFMLIB_MONT_FL_EVT_NO_QUALCHECK) continue; if (evt_use_irange(param) && has_iarr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_drange(param) && has_darr(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; if (evt_use_opcm(param) && has_opcm(e[i].event) == 0) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int check_range_plm(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in) { pfmlib_mont_input_param_t *param = mod_in; unsigned int i, count; if (param->pfp_mont_drange.rr_used == 0 && param->pfp_mont_irange.rr_used == 0) return PFMLIB_SUCCESS; /* * range restriction applies to all events, therefore we must have a consistent * set of plm and they must match the pfp_dfl_plm which is used to setup the debug * registers */ count = inp->pfp_event_count; for(i=0; i < count; i++) { if (inp->pfp_events[i].plm && inp->pfp_events[i].plm != inp->pfp_dfl_plm) return PFMLIB_ERR_FEATCOMB; } return PFMLIB_SUCCESS; } static int pfm_dispatch_ipear(pfmlib_input_param_t *inp, pfmlib_mont_input_param_t *mod_in, pfmlib_output_param_t *outp) { pfm_mont_pmc_reg_t reg; pfmlib_mont_input_param_t *param = mod_in; pfmlib_event_t *e = inp->pfp_events; pfmlib_reg_t *pc, *pd; unsigned int pos1, pos2; unsigned int i, count; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; pos1 = outp->pfp_pmc_count; pos2 = outp->pfp_pmd_count; /* * check if there is something to do */ if (param == NULL || param->pfp_mont_ipear.ipear_used == 0) return PFMLIB_SUCCESS; /* * we need to look for use of ETB, because IP-EAR and ETB cannot be used at the * same time */ if (param->pfp_mont_etb.etb_used) return PFMLIB_ERR_FEATCOMB; /* * look for implicit ETB used because of BRANCH_EVENT */ count = inp->pfp_event_count; for (i=0; i < count; i++) { if (is_etb(e[i].event)) return PFMLIB_ERR_FEATCOMB; } reg.pmc_val = 0; reg.pmc42_mont_reg.ipear_plm = param->pfp_mont_ipear.ipear_plm ? param->pfp_mont_ipear.ipear_plm : inp->pfp_dfl_plm; reg.pmc42_mont_reg.ipear_pm = inp->pfp_flags & PFMLIB_PFP_SYSTEMWIDE ? 1 : 0; reg.pmc42_mont_reg.ipear_mode = 4; reg.pmc42_mont_reg.ipear_delay = param->pfp_mont_ipear.ipear_delay; if (pfm_regmask_isset(&inp->pfp_unavail_pmcs, 42)) return PFMLIB_ERR_NOASSIGN; pc[pos1].reg_num = 42; pc[pos1].reg_value = reg.pmc_val; pc[pos1].reg_addr = pc[pos1].reg_alt_addr = 42; pos1++; __pfm_vbprintf("[PMC42(pmc42)=0x%lx plm=%d pm=%d mode=%d delay=%d]\n", reg.pmc_val, reg.pmc42_mont_reg.ipear_plm, reg.pmc42_mont_reg.ipear_pm, reg.pmc42_mont_reg.ipear_mode, reg.pmc42_mont_reg.ipear_delay); pd[pos2].reg_num = 38; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 38; pos2++; pd[pos2].reg_num = 39; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = 39; pos2++; __pfm_vbprintf("[PMD38(pmd38)]\n[PMD39(pmd39)\n"); for(i=48; i < 64; i++, pos2++) { pd[pos2].reg_num = i; pd[pos2].reg_addr = pd[pos2].reg_alt_addr = i; __pfm_vbprintf("[PMD%u(pmd%u)]\n", pd[pos2].reg_num, pd[pos2].reg_num); } outp->pfp_pmc_count = pos1; outp->pfp_pmd_count = pos2; return PFMLIB_SUCCESS; } static int pfm_mont_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { int ret; pfmlib_mont_input_param_t *mod_in = (pfmlib_mont_input_param_t *)model_in; pfmlib_mont_output_param_t *mod_out = (pfmlib_mont_output_param_t *)model_out; /* * nothing will come out of this combination */ if (mod_out && mod_in == NULL) return PFMLIB_ERR_INVAL; /* check opcode match, range restriction qualifiers */ if (mod_in && check_qualifier_constraints(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; /* check for problems with range restriction and per-event plm */ if (mod_in && check_range_plm(inp, mod_in) != PFMLIB_SUCCESS) return PFMLIB_ERR_FEATCOMB; ret = pfm_mont_dispatch_counters(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for I-EAR */ ret = pfm_dispatch_iear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for D-EAR */ ret = pfm_dispatch_dear(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* XXX: must be done before dispatch_opcm() and dispatch_drange() */ ret = pfm_dispatch_irange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; ret = pfm_dispatch_drange(inp, mod_in, outp, mod_out);; if (ret != PFMLIB_SUCCESS) return ret; /* now check for Opcode matchers */ ret = pfm_dispatch_opcm(inp, mod_in, outp, mod_out); if (ret != PFMLIB_SUCCESS) return ret; /* now check for ETB */ ret = pfm_dispatch_etb(inp, mod_in, outp); if (ret != PFMLIB_SUCCESS) return ret; /* now check for IP-EAR */ ret = pfm_dispatch_ipear(inp, mod_in, outp); return ret; } /* XXX: return value is also error code */ int pfm_mont_get_event_maxincr(unsigned int i, unsigned int *maxincr) { if (i >= PME_MONT_EVENT_COUNT || maxincr == NULL) return PFMLIB_ERR_INVAL; *maxincr = montecito_pe[i].pme_maxincr; return PFMLIB_SUCCESS; } int pfm_mont_is_ear(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_ear(i); } int pfm_mont_is_dear(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_dear(i); } int pfm_mont_is_dear_tlb(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_dear(i) && is_ear_tlb(i); } int pfm_mont_is_dear_cache(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_dear(i) && is_ear_cache(i); } int pfm_mont_is_dear_alat(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_ear_alat(i); } int pfm_mont_is_iear(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_iear(i); } int pfm_mont_is_iear_tlb(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_iear(i) && is_ear_tlb(i); } int pfm_mont_is_iear_cache(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_iear(i) && is_ear_cache(i); } int pfm_mont_is_etb(unsigned int i) { return i < PME_MONT_EVENT_COUNT && is_etb(i); } int pfm_mont_support_iarr(unsigned int i) { return i < PME_MONT_EVENT_COUNT && has_iarr(i); } int pfm_mont_support_darr(unsigned int i) { return i < PME_MONT_EVENT_COUNT && has_darr(i); } int pfm_mont_support_opcm(unsigned int i) { return i < PME_MONT_EVENT_COUNT && has_opcm(i); } int pfm_mont_support_all(unsigned int i) { return i < PME_MONT_EVENT_COUNT && has_all(i); } int pfm_mont_get_ear_mode(unsigned int i, pfmlib_mont_ear_mode_t *m) { pfmlib_mont_ear_mode_t r; if (!is_ear(i) || m == NULL) return PFMLIB_ERR_INVAL; r = PFMLIB_MONT_EAR_TLB_MODE; if (is_ear_tlb(i)) goto done; r = PFMLIB_MONT_EAR_CACHE_MODE; if (is_ear_cache(i)) goto done; r = PFMLIB_MONT_EAR_ALAT_MODE; if (is_ear_alat(i)) goto done; return PFMLIB_ERR_INVAL; done: *m = r; return PFMLIB_SUCCESS; } static int pfm_mont_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt < 4 || cnt > 15)) return PFMLIB_ERR_INVAL; *code = (int)montecito_pe[i].pme_code; return PFMLIB_SUCCESS; } /* * This function is accessible directly to the user */ int pfm_mont_get_event_umask(unsigned int i, unsigned long *umask) { if (i >= PME_MONT_EVENT_COUNT || umask == NULL) return PFMLIB_ERR_INVAL; *umask = evt_umask(i); return PFMLIB_SUCCESS; } int pfm_mont_get_event_group(unsigned int i, int *grp) { if (i >= PME_MONT_EVENT_COUNT || grp == NULL) return PFMLIB_ERR_INVAL; *grp = evt_grp(i); return PFMLIB_SUCCESS; } int pfm_mont_get_event_set(unsigned int i, int *set) { if (i >= PME_MONT_EVENT_COUNT || set == NULL) return PFMLIB_ERR_INVAL; *set = evt_set(i) == 0xf ? PFMLIB_MONT_EVT_NO_SET : evt_set(i); return PFMLIB_SUCCESS; } int pfm_mont_get_event_type(unsigned int i, int *type) { if (i >= PME_MONT_EVENT_COUNT || type == NULL) return PFMLIB_ERR_INVAL; *type = evt_caf(i); return PFMLIB_SUCCESS; } /* external interface */ int pfm_mont_irange_is_fine(pfmlib_output_param_t *outp, pfmlib_mont_output_param_t *mod_out) { pfmlib_mont_output_param_t *param = mod_out; pfm_mont_pmc_reg_t reg; unsigned int i, count; /* some sanity checks */ if (outp == NULL || param == NULL) return 0; if (outp->pfp_pmc_count >= PFMLIB_MAX_PMCS) return 0; if (param->pfp_mont_irange.rr_nbr_used == 0) return 0; /* * we look for pmc38 as it contains the bit indicating if fine mode is used */ count = outp->pfp_pmc_count; for(i=0; i < count; i++) { if (outp->pfp_pmcs[i].reg_num == 38) goto found; } return 0; found: reg.pmc_val = outp->pfp_pmcs[i].reg_value; return reg.pmc38_mont_reg.iarc_fine ? 1 : 0; } static char * pfm_mont_get_event_name(unsigned int i) { return montecito_pe[i].pme_name; } static void pfm_mont_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; unsigned long m; memset(counters, 0, sizeof(*counters)); m =montecito_pe[j].pme_counters; for(i=0; m ; i++, m>>=1) { if (m & 0x1) pfm_regmask_set(counters, i); } } static void pfm_mont_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { unsigned int i = 0; for(i=0; i < 16; i++) pfm_regmask_set(impl_pmcs, i); for(i=32; i < 43; i++) pfm_regmask_set(impl_pmcs, i); } static void pfm_mont_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { unsigned int i = 0; for(i=4; i < 16; i++) pfm_regmask_set(impl_pmds, i); for(i=32; i < 40; i++) pfm_regmask_set(impl_pmds, i); for(i=48; i < 64; i++) pfm_regmask_set(impl_pmds, i); } static void pfm_mont_get_impl_counters(pfmlib_regmask_t *impl_counters) { unsigned int i = 0; /* counter pmds are contiguous */ for(i=4; i < 16; i++) pfm_regmask_set(impl_counters, i); } static void pfm_mont_get_hw_counter_width(unsigned int *width) { *width = PMU_MONT_COUNTER_WIDTH; } static int pfm_mont_get_event_description(unsigned int ev, char **str) { char *s; s = montecito_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static int pfm_mont_get_cycle_event(pfmlib_event_t *e) { e->event = PME_MONT_CPU_OP_CYCLES_ALL; return PFMLIB_SUCCESS; } static int pfm_mont_get_inst_retired(pfmlib_event_t *e) { e->event = PME_MONT_IA64_INST_RETIRED; return PFMLIB_SUCCESS; } static unsigned int pfm_mont_get_num_event_masks(unsigned int event) { return has_mesi(event) ? 4 : 0; } static char * pfm_mont_get_event_mask_name(unsigned int event, unsigned int mask) { switch(mask) { case 0: return "I"; case 1: return "S"; case 2: return "E"; case 3: return "M"; } return NULL; } static int pfm_mont_get_event_mask_desc(unsigned int event, unsigned int mask, char **desc) { switch(mask) { case 0: *desc = strdup("invalid"); break; case 1: *desc = strdup("shared"); break; case 2: *desc = strdup("exclusive"); break; case 3: *desc = strdup("modified"); break; default: return PFMLIB_ERR_INVAL; } return PFMLIB_SUCCESS; } static int pfm_mont_get_event_mask_code(unsigned int event, unsigned int mask, unsigned int *code) { *code = mask; return PFMLIB_SUCCESS; } pfm_pmu_support_t montecito_support={ .pmu_name = "dual-core Itanium 2", .pmu_type = PFMLIB_MONTECITO_PMU, .pme_count = PME_MONT_EVENT_COUNT, .pmc_count = PMU_MONT_NUM_PMCS, .pmd_count = PMU_MONT_NUM_PMDS, .num_cnt = PMU_MONT_NUM_COUNTERS, .get_event_code = pfm_mont_get_event_code, .get_event_name = pfm_mont_get_event_name, .get_event_counters = pfm_mont_get_event_counters, .dispatch_events = pfm_mont_dispatch_events, .pmu_detect = pfm_mont_detect, .get_impl_pmcs = pfm_mont_get_impl_pmcs, .get_impl_pmds = pfm_mont_get_impl_pmds, .get_impl_counters = pfm_mont_get_impl_counters, .get_hw_counter_width = pfm_mont_get_hw_counter_width, .get_event_desc = pfm_mont_get_event_description, .get_cycle_event = pfm_mont_get_cycle_event, .get_inst_retired_event = pfm_mont_get_inst_retired, .get_num_event_masks = pfm_mont_get_num_event_masks, .get_event_mask_name = pfm_mont_get_event_mask_name, .get_event_mask_desc = pfm_mont_get_event_mask_desc, .get_event_mask_code = pfm_mont_get_event_mask_code }; papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/task_smpl.c000664 001750 001750 00000031320 13216244362 024551 0ustar00jshenry1963jshenry1963000000 000000 /* * task_smpl.c - example of a task sampling another one using a randomized sampling period * * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef pfm_default_smpl_arg_t smpl_fmt_arg_t; typedef pfm_default_smpl_hdr_t smpl_hdr_t; typedef pfm_default_smpl_entry_t smpl_entry_t; typedef pfm_default_smpl_ctx_arg_t ctx_arg_t; typedef int ctxid_t; #define FMT_UUID PFM_DEFAULT_SMPL_UUID #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define FIRST_COUNTER 4 static unsigned long collect_samples; static void *buf_addr; static pfm_uuid_t buf_fmt_id = FMT_UUID; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void warning(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int child(char **arg) { /* * force the task to stop before executing the first * user level instruction */ ptrace(PTRACE_TRACEME, 0, NULL, NULL); execvp(arg[0], arg); /* not reached */ exit(1); } static __inline__ int bit_weight(unsigned long x) { int sum = 0; for (; x ; x>>=1) { if (x & 0x1UL) sum++; } return sum; } static void process_smpl_buf(int id, unsigned long smpl_pmd_mask, int need_restart) { static unsigned long last_overflow = ~0UL; /* initialize to biggest value possible */ smpl_hdr_t *hdr = (smpl_hdr_t *)buf_addr; smpl_entry_t *ent; unsigned long count, entry, *reg, pos, msk; unsigned long entry_size; int j; printf("processing %s buffer at %p\n", need_restart==0 ? "leftover" : "", hdr); if (hdr->hdr_overflows <= last_overflow && last_overflow != ~0UL) { warning("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_overflow); return; } last_overflow = hdr->hdr_overflows; count = hdr->hdr_count; ent = (smpl_entry_t *)(hdr+1); pos = (unsigned long)ent; entry = collect_samples; /* * in this example program, we use fixed-size entries, therefore we * can compute the entry size in advance. Perfmon-2 supports variable * size entries. */ entry_size = sizeof(smpl_entry_t)+(bit_weight(smpl_pmd_mask)<<3); while(count--) { printf("entry %ld PID:%d CPU:%d IIP:0x%016lx\n", entry, ent->pid, ent->cpu, ent->ip); printf("\tOVFL: %d LAST_VAL: %lu\n", ent->ovfl_pmd, -ent->last_reset_val); /* * print body: additional PMDs recorded * PMD are recorded in increasing index order */ reg = (unsigned long *)(ent+1); for(j=0, msk = smpl_pmd_mask; msk; msk >>=1, j++) { if ((msk & 0x1) == 0) continue; printf("PMD%-2d = 0x%016lx\n", j, *reg); reg++; } /* * we could have removed this and used: * ent = (smpl_entry_t *)reg * instead. */ pos += entry_size; ent = (smpl_entry_t *)pos; entry++; } collect_samples = entry; /* * reactivate monitoring once we are done with the samples * * Note that this call can fail with EBUSY in non-blocking mode * as the task may have disappeared while we were processing * the samples. */ if (need_restart && perfmonctl(id, PFM_RESTART, 0, 0) == -1) { if (errno != EBUSY) fatal_error("perfmonctl error PFM_RESTART errno %d\n",errno); else warning("PFM_RESTART: task has probably terminated \n"); } } int mainloop(char **arg) { ctx_arg_t ctx; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_load_t load_args; pfm_msg_t msg; unsigned long ovfl_count = 0UL; unsigned long sample_period; unsigned long smpl_pmd_mask = 0UL; pid_t pid; int status, ret, fd; unsigned int i, num_counters; /* * intialize all locals */ memset(&ctx, 0, sizeof(ctx)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); /* * locate events */ pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * set the privilege mode: * PFM_PLM3 : user level * PFM_PLM0 : kernel level */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; /* build sampling mask */ smpl_pmd_mask |= 1UL << pc[i].reg_num; } printf("smpl_pmd_mask=0x%lx\n", smpl_pmd_mask); /* * now we indicate what to record when each counter overflows. * In our case, we only have one sampling period and it is set for the * first event. Here we indicate that when the sampling period expires * then we want to record the value of all the other counters. * * We exclude the first counter in this case. */ smpl_pmd_mask &= ~(1UL << pc[0].reg_num); pc[0].reg_smpl_pmds[0] = smpl_pmd_mask; /* * we our sampling counter overflow, we want to be notified. * The notification will come ONLY when the sampling buffer * becomes full. * * We also activate randomization of the sampling period. */ pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; /* * we also want to reset the other PMDs on * every overflow. If we do not set * this, the non-overflowed counters * will be untouched. */ pc[0].reg_reset_pmds[0] |= smpl_pmd_mask; sample_period = 1000000UL; pd[0].reg_value = (~0) - sample_period + 1; pd[0].reg_short_reset = (~0) - sample_period + 1; pd[0].reg_long_reset = (~0) - sample_period + 1; /* * setup randomization parameters, we allow a range of up to +256 here. */ pd[0].reg_random_seed = 5; pd[0].reg_random_mask = 0xff; printf("programming %u PMCS and %u PMDS\n", outp.pfp_pmc_count, inp.pfp_event_count); /* * prepare context structure. * * format specific parameters MUST be concatenated to the regular * pfarg_context_t structure. For convenience, the default sampling * format provides a data structure that already combines the pfarg_context_t * with what is needed fot this format. */ /* * We initialize the format specific information. * The format is identified by its UUID which must be copied * into the ctx_buf_fmt_id field. */ memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ ctx.buf_arg.buf_size = 8192; /* * now create our perfmon context. */ if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the file descriptor we will use to * identify this newly created context */ fd = ctx.ctx_arg.ctx_fd; /* * retrieve the virtual address at which the sampling * buffer has been mapped */ buf_addr = ctx.ctx_arg.ctx_smpl_vaddr; printf("context [%d] buffer mapped @%p\n", fd, buf_addr); /* * Now program the registers */ if (perfmonctl(fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } /* * initialize the PMDs */ if (perfmonctl(fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * Create the child task */ if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); /* * In order to get the PFM_END_MSG message, it is important * to ensure that the child task does not inherit the file * descriptor of the context. By default, file descriptor * are inherited during exec(). We explicitely close it * here. We could have set it up through fcntl(FD_CLOEXEC) * to achieve the same thing. */ if (pid == 0) { close(fd); child(arg); } /* * wait for the child to exec */ waitpid(pid, &status, WUNTRACED); /* * process is stopped at this point */ if (WIFEXITED(status)) { warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status)); goto terminate_session; } /* * attach context to stopped task */ load_args.load_pid = pid; if (perfmonctl(fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * activate monitoring for stopped task. * (nothing will be measured at this point */ if (perfmonctl(fd, PFM_START, NULL, 0) == -1) { fatal_error(" perfmonctl error PFM_START errno %d\n",errno); } /* * detach child. Side effect includes * activation of monitoring. */ ptrace(PTRACE_DETACH, pid, NULL, 0); /* * core loop */ for(;;) { /* * wait for overflow/end notification messages */ ret = read(fd, &msg, sizeof(msg)); if (ret == -1) { fatal_error("cannot read perfmon msg: %s\n", strerror(errno)); } switch(msg.type) { case PFM_MSG_OVFL: /* the sampling buffer is full */ process_smpl_buf(fd, smpl_pmd_mask, 1); ovfl_count++; break; case PFM_MSG_END: /* monitored task terminated */ printf("task terminated\n"); goto terminate_session; default: fatal_error("unknown message type %d\n", msg.type); } } terminate_session: /* * cleanup child */ waitpid(pid, &status, 0); /* * check for any leftover samples */ process_smpl_buf(fd, smpl_pmd_mask, 0); /* * destroy perfmon context */ close(fd); printf("%lu samples collected in %lu buffer overflows\n", collect_samples, ovfl_count); return 0; } int main(int argc, char **argv) { pfmlib_options_t pfmlib_options; if (argc < 2) fatal_error("You must specify a command to execute\n"); /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { fatal_error("Can't initialize library\n"); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); return mainloop(argv+1); } papi-5.6.0/src/libpfm-3.y/examples_v3.x/Makefile000664 001750 001750 00000005555 13216244362 023351 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/.. include $(TOPDIR)/config.mk include $(TOPDIR)/rules.mk DIRS= ifeq ($(ARCH),ia64) DIRS +=ia64 endif ifeq ($(ARCH),ia32) DIRS +=x86 endif ifeq ($(ARCH),x86_64) DIRS +=x86 endif ifeq ($(CONFIG_PFMLIB_ARCH_CRAYXT),y) CFLAGS += -DCONFIG_PFMLIB_ARCH_CRAYXT endif CFLAGS+= -I. -D_GNU_SOURCE LIBS += -lm ifeq ($(SYS),Linux) CFLAGS+= -pthread LIBS += -lrt endif TARGET_GEN=showevtinfo check_events ifeq ($(SYS),Linux) TARGET_LINUX +=self task task_attach task_attach_timeout syst \ notify_self notify_self2 notify_self3 \ multiplex multiplex2 set_notify whichpmu \ showreginfo task_smpl task_smpl_user \ pfmsetup self_smpl_multi self_pipe \ notify_self_fork XTRA += rtop endif all: $(TARGET_GEN) $(TARGET_LINUX) $(XTRA) @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done # Many systems don't have ncurses installed rtop: rtop.o detect_pmcs.o $(PFMLIB) -$(CC) $(CFLAGS) $(LDFLAGS) -D_GNU_SOURCE -o $@ $^ $(LIBS) -lpthread -lncurses $(TARGET_LINUX): %:%.o detect_pmcs.o $(PFMLIB) $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $^ $(LIBS) $(TARGET_GEN): %:%.o $(PFMLIB) $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $^ $(LIBS) clean: @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done $(RM) -f *.o $(TARGET_LINUX) $(TARGET_GEN) $(XTRA) *~ distclean: clean install_examples: $(TARGET_LINUX) $(TARGET_GEN) install_examples: @echo installing: $(TARGET_LINUX) $(TARGET_GEN) -mkdir -p $(DESTDIR)$(EXAMPLESDIR) $(INSTALL) -m 755 $(TARGET_LINUX) $(TARGET_GEN) $(DESTDIR)$(EXAMPLESDIR) @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done # # examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ # papi-5.6.0/man/man3/PAPIF_write.3000664 001750 001750 00000000754 13216244356 020313 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_write" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_write \- .PP Write counter values into counters\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_write\fP( C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_write\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm4/lib/events/intel_ivbep_unc_ubo_events.h000664 001750 001750 00000006121 13216244364 026503 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: ivbep_unc_ubo (Intel IvyBridge-EP U-Box uncore PMU) */ static const intel_x86_umask_t ivbep_unc_u_event_msg[]={ { .uname = "DOORBELL_RCVD", .udesc = "TBD", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "INT_PRIO", .udesc = "TBD", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IPI_RCVD", .udesc = "TBD", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MSI_RCVD", .udesc = "TBD", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "VLW_RCVD", .udesc = "TBD", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t ivbep_unc_u_phold_cycles[]={ { .uname = "ASSERT_TO_ACK", .udesc = "Number of cycles asserted to ACK", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ACK_TO_DEASSERT", .udesc = "Number of cycles ACK to deassert", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_entry_t intel_ivbep_unc_u_pe[]={ { .name = "UNC_U_EVENT_MSG", .desc = "VLW Received", .code = 0x42, .cntmsk = 0x3, .ngrp = 1, .modmsk = IVBEP_UNC_UBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_u_event_msg), .umasks = ivbep_unc_u_event_msg }, { .name = "UNC_U_LOCK_CYCLES", .desc = "IDI Lock/SplitLock Cycles", .code = 0x44, .cntmsk = 0x3, .modmsk = IVBEP_UNC_UBO_ATTRS, }, { .name = "UNC_U_PHOLD_CYCLES", .desc = "Cycles PHOLD asserts to Ack", .code = 0x45, .cntmsk = 0x3, .ngrp = 1, .modmsk = IVBEP_UNC_UBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(ivbep_unc_u_phold_cycles), .umasks = ivbep_unc_u_phold_cycles }, { .name = "UNC_U_RACU_REQUESTS", .desc = "RACU requests", .code = 0x46, .cntmsk = 0x3, .modmsk = IVBEP_UNC_UBO_ATTRS, }, }; papi-5.6.0/src/ctests/overflow_force_software.c000664 001750 001750 00000022445 13216244360 023734 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow_force_software.c * Author: Kevin London * london@cs.utk.edu * Mods: Maynard Johnson * maynardj@us.ibm.com * Philip Mucci * mucci@cs.utk.edu * Haihang You * you@cs.utk.edu * * */ /* This file performs the following test: overflow dispatch of an eventset with just a single event. Using both Hardware and software overflows The Eventset contains: + PAPI_FP_INS (overflow monitor) - Start eventset 1 - Do flops - Stop and measure eventset 1 - Set up overflow on eventset 1 - Start eventset 1 - Do flops - Stop eventset 1 - Set up forced software overflow on eventset 1 - Start eventset 1 - Do flops - Stop eventset 1 */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d) Overflow at %p overflow_vector=%#llx!\n" #define OUT_FMT "%-12s : %16lld%16d%16lld\n" #define SOFT_TOLERANCE 0.90 #define MY_NUM_TESTS 5 static int total[MY_NUM_TESTS] = { 0, }; /* total overflows */ static int use_total = 0; /* which total field to bump */ static long long values[MY_NUM_TESTS] = { 0, }; void handler( int EventSet, void *address, long long overflow_vector, void *context ) { ( void ) context; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); } total[use_total]++; } int main( int argc, char **argv ) { int EventSet = PAPI_NULL; long long hard_min, hard_max, soft_min, soft_max; int retval; int PAPI_event = 0, mythreshold; char event_name[PAPI_MAX_STR_LEN]; PAPI_option_t opt; PAPI_event_info_t info; PAPI_option_t itimer; const PAPI_hw_info_t *hw_info = NULL; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); /* query and set up the right instruction to monitor */ if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { PAPI_get_event_info( PAPI_FP_INS, &info ); if ( info.count == 1 || !strcmp( info.derived, "DERIVED_CMPD" ) ) PAPI_event = PAPI_FP_INS; } } if ( PAPI_event == 0 ) { if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { PAPI_get_event_info( PAPI_FP_OPS, &info ); if ( info.count == 1 || !strcmp( info.derived, "DERIVED_CMPD" ) ) PAPI_event = PAPI_FP_OPS; } } if ( PAPI_event == 0 ) { if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) { PAPI_get_event_info( PAPI_TOT_INS, &info ); if ( info.count == 1 || !strcmp( info.derived, "DERIVED_CMPD" ) ) PAPI_event = PAPI_TOT_INS; } } if ( PAPI_event == 0 ) test_skip( __FILE__, __LINE__, "No suitable event for this test found!", 0 ); hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); if ( PAPI_event == PAPI_FP_INS ) mythreshold = THRESHOLD; else #if defined(linux) mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; #else mythreshold = THRESHOLD * 2; #endif retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); retval = PAPI_add_event( EventSet, PAPI_event ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); retval = PAPI_get_opt( PAPI_COMPONENTINFO, &opt ); if ( retval != PAPI_OK ) test_skip( __FILE__, __LINE__, "Platform does not support Hardware overflow", 0 ); do_stuff( ); /* Do reference count */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); retval = PAPI_stop( EventSet, &values[use_total] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); use_total++; /* Now do hardware overflow reference count */ retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); retval = PAPI_stop( EventSet, &values[use_total] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); use_total++; retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); /* Now do software overflow reference count, uses SIGPROF */ retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); retval = PAPI_stop( EventSet, &values[use_total] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); use_total++; retval = PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); /* Now do software overflow with SIGVTALRM */ memset( &itimer, 0, sizeof ( itimer ) ); itimer.itimer.itimer_num = ITIMER_VIRTUAL; itimer.itimer.itimer_sig = SIGVTALRM; if ( PAPI_set_opt( PAPI_DEF_ITIMER, &itimer ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); retval = PAPI_stop( EventSet, &values[use_total] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); use_total++; retval = PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); /* Now do software overflow with SIGALRM */ memset( &itimer, 0, sizeof ( itimer ) ); itimer.itimer.itimer_num = ITIMER_REAL; itimer.itimer.itimer_sig = SIGALRM; if ( PAPI_set_opt( PAPI_DEF_ITIMER, &itimer ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_stuff( ); retval = PAPI_stop( EventSet, &values[use_total] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); use_total++; retval = PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); if ( !TESTS_QUIET ) { if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); printf ( "Test case: Software overflow of various types with 1 event in set.\n" ); printf ( "------------------------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", mythreshold ); printf ( "------------------------------------------------------------------------------\n" ); printf( "Test type : %11s%13s%13s%13s%13s\n", "Reference", "Hardware", "ITIMER_PROF", "ITIMER_VIRT", "ITIMER_REAL" ); printf( "%-12s: %11lld%13lld%13lld%13lld%13lld\n", info.symbol, values[0], values[1], values[2], values[3], values[4] ); printf( "Overflows : %11d%13d%13d%13d%13d\n", total[0], total[1], total[2], total[3], total[4] ); printf ( "------------------------------------------------------------------------------\n" ); printf( "Verification:\n" ); printf ( "Overflow in Column 2 greater than or equal to overflows in Columns 3, 4, 5\n" ); printf( "Overflow in Columns 3, 4, 5 greater than 0\n" ); } hard_min = ( long long ) ( ( ( double ) values[0] * ( 1.0 - OVR_TOLERANCE ) ) / ( double ) mythreshold ); hard_max = ( long long ) ( ( ( double ) values[0] * ( 1.0 + OVR_TOLERANCE ) ) / ( double ) mythreshold ); soft_min = ( long long ) ( ( ( double ) values[0] * ( 1.0 - SOFT_TOLERANCE ) ) / ( double ) mythreshold ); soft_max = ( long long ) ( ( ( double ) values[0] * ( 1.0 + SOFT_TOLERANCE ) ) / ( double ) mythreshold ); if ( total[1] > hard_max || total[1] < hard_min ) test_fail( __FILE__, __LINE__, "Hardware Overflows outside limits", 1 ); if ( total[2] > soft_max || total[3] > soft_max || total[4] > soft_max ) test_fail( __FILE__, __LINE__, "Software Overflows exceed theoretical maximum", 1 ); if ( total[2] < soft_min || total[3] < soft_min || total[4] < soft_min ) printf( "WARNING: Software Overflow occuring but suspiciously low\n" ); if ( ( total[2] == 0 ) || ( total[3] == 0 ) || ( total[4] == 0 ) ) test_fail( __FILE__, __LINE__, "Software Overflows", 1 ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/mont_irr.c000664 001750 001750 00000024721 13216244362 024445 0ustar00jshenry1963jshenry1963000000 000000 /* * mont_irr.c - example of how to use code range restriction with the Dual-Core Itanium 2 PMU * * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define VECTOR_SIZE 1000000UL typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "fp_ops_retired", VECTOR_SIZE<<1 }, { NULL, 0UL } }; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } void saxpy(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("saxpy done\n"); } void saxpy2(double *a, double *b, double *c, unsigned long size) { unsigned long i; for(i=0; i < size; i++) { c[i] = 2*a[i] + b[i]; } printf("saxpy2 done\n"); } static int do_test(void) { unsigned long size; double *a, *b, *c; size = VECTOR_SIZE; a = malloc(size*sizeof(double)); b = malloc(size*sizeof(double)); c = malloc(size*sizeof(double)); if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); memset(a, 0, size*sizeof(double)); memset(b, 0, size*sizeof(double)); memset(c, 0, size*sizeof(double)); saxpy(a,b,c, size); saxpy2(a,b,c, size); return 0; } int main(int argc, char **argv) { event_desc_t *p; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_mont_input_param_t mont_inp; pfmlib_mont_output_param_t mont_outp; pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t ibrs[8]; pfmlib_options_t pfmlib_options; struct fd { /* function descriptor */ unsigned long addr; unsigned long gp; } *fd; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("Can't initialize library\n"); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_MONTECITO_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Compute the range we are interested in * * On IA-64, the function pointer does not point directly * to the function but to a descriptor which contains two * unsigned long: the first one is the actual start address * of the function, the second is the gp (global pointer) * to load into r1 before jumping into the function. Unlesss * we're jumping into a shared library the gp is the same as * the current gp. * * In the artificial example, we also rely on the compiler/linker * NOT reordering code layout. We depend on saxpy2() being just * after saxpy(). * */ fd = (struct fd *)saxpy; range_start = fd->addr; fd = (struct fd *)saxpy2; range_end = fd->addr; /* * linker may reorder saxpy() and saxpy2() */ if (range_end < range_start) { unsigned long tmp; tmp = range_start; range_start = range_end; range_end = tmp; } memset(pc, 0, sizeof(pc)); memset(pd, 0, sizeof(pd)); memset(ibrs,0, sizeof(ibrs)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&mont_inp,0, sizeof(mont_inp)); memset(&mont_outp,0, sizeof(mont_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("cannot find %s event\n", p->event_name); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last element of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out by how much the * actual range is off compared to the requested range by checking the rr_soff and rr_eoff * fields on return from the library call. * * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) * used to cover the range is in rr_nbr_used. * * In the case of code range restriction on Itanium 2, the library will try to use the fine * mode first and then it will default to using multiple pairs to cover the range. */ mont_inp.pfp_mont_irange.rr_used = 1; /* indicate we use code range restriction */ mont_inp.pfp_mont_irange.rr_limits[0].rr_start = range_start; mont_inp.pfp_mont_irange.rr_limits[0].rr_end = range_end; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * print offsets */ printf("code range : [0x%016lx-0x%016lx)\n" "start_offset:-0x%lx end_offset:+0x%lx\n" "%d pairs of debug registers used\n", range_start, range_end, mont_outp.pfp_mont_irange.rr_infos[0].rr_soff, mont_outp.pfp_mont_irange.rr_infos[0].rr_eoff, mont_outp.pfp_mont_irange.rr_nbr_used >> 1); /* * now create the session */ id = pfm_create(0, NULL); if (id == -1) { if (errno == ENOSYS) fatal_error("Your kernel does not have performance monitoring support!\n"); fatal_error("cannot create session %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * propagate IBR settings. IBRS are mapped to PMC256-PMC263 */ for (i=0; i < mont_outp.pfp_mont_irange.rr_nbr_used; i++) { ibrs[i].reg_num = 256+mont_outp.pfp_mont_irange.rr_br[i].reg_num; ibrs[i].reg_value = mont_outp.pfp_mont_irange.rr_br[i].reg_value; } /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc))) fatal_error("pfm_write error errno %d\n",errno); /* * Program the code debug registers. */ if (pfm_write(id, 0, PFM_RW_PMC, ibrs, mont_outp.pfp_mont_irange.rr_nbr_used * sizeof(*ibrs))) fatal_error("pfm_write error for IBRS errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd)) == -1) fatal_error("pfm_write error errno %d\n",errno); /* * now we attach session */ if (pfm_attach(id, 0, getpid())) fatal_error("pfm_attach error errno %d\n",errno); /* * Let's roll now. * * We run two distinct copies of the same function but we restrict measurement * to the first one (saxpy). Therefore the expected count is half what you would * get if code range restriction was not used. The core loop in both case uses * two floating point operation per iteration. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); do_test(); if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * now read the results */ if (pfm_read(id, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error( "pfm_read error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%-3u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); } /* * let's stop this now */ close(id); return 0; } papi-5.6.0/src/libpfm4/docs/man3/libpfm_arm_ac7.3000664 001750 001750 00000001425 13216244363 023377 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "August, 2012" "" "Linux Programmer's Manual" .SH NAME libpfm_arm_ac7 - support for Arm Cortex A7 PMU .SH SYNOPSIS .nf .B #include .sp .B PMU name: arm_ac7 .B PMU desc: ARM Cortex A7 .sp .SH DESCRIPTION The library supports the ARM Cortex A7 core PMU. This PMU supports 4 counters and privilege levels filtering. .SH MODIFIERS The following modifiers are supported on ARM Cortex A7: .TP .B u Measure at the user level. This corresponds to \fBPFM_PLM3\fR. This is a boolean modifier. .TP .B k Measure at the kernel level. This corresponds to \fBPFM_PLM0\fR. This is a boolean modifier. .TP .B hv Measure at the hypervisor level. This corresponds to \fBPFM_PLMH\fR. This is a boolean modifier. .SH AUTHORS .nf Stephane Eranian .if .PP papi-5.6.0/src/libpfm-3.y/lib/pfmlib_intel_atom.c000664 001750 001750 00000052422 13216244363 023607 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_atom.c : Intel Atom PMU * * Copyright (c) 2008 Google, Inc * Contributed by Stephane Eranian * * Based on work: * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * * This file implements support for Intel Core PMU as specified in the following document: * "IA-32 Intel Architecture Software Developer's Manual - Volume 3B: System * Programming Guide" * * Intel Atom = architectural v3 + PEBS */ #include #include #include #include #include /* public headers */ #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_atom_priv.h" #include "intel_atom_events.h" /* let's define some handy shortcuts! */ #define sel_event_select perfevtsel.sel_event_select #define sel_unit_mask perfevtsel.sel_unit_mask #define sel_usr perfevtsel.sel_usr #define sel_os perfevtsel.sel_os #define sel_edge perfevtsel.sel_edge #define sel_pc perfevtsel.sel_pc #define sel_int perfevtsel.sel_int #define sel_en perfevtsel.sel_en #define sel_inv perfevtsel.sel_inv #define sel_cnt_mask perfevtsel.sel_cnt_mask #define sel_any perfevtsel.sel_any #define has_pebs(i) (intel_atom_pe[i].pme_flags & PFMLIB_INTEL_ATOM_PEBS) /* * Description of the PMC register mappings: * * 0 -> PMC0 -> PERFEVTSEL0 * 1 -> PMC1 -> PERFEVTSEL1 * 16 -> PMC16 -> FIXED_CTR_CTRL * 17 -> PMC17 -> PEBS_ENABLED * * Description of the PMD register mapping: * * 0 -> PMD0 -> PMC0 * 1 -> PMD1 -> PMC1 * 16 -> PMD2 -> FIXED_CTR0 * 17 -> PMD3 -> FIXED_CTR1 * 18 -> PMD4 -> FIXED_CTR2 */ #define INTEL_ATOM_SEL_BASE 0x186 #define INTEL_ATOM_CTR_BASE 0xc1 #define FIXED_CTR_BASE 0x309 #define PFMLIB_INTEL_ATOM_ALL_FLAGS \ (PFM_INTEL_ATOM_SEL_INV|PFM_INTEL_ATOM_SEL_EDGE|PFM_INTEL_ATOM_SEL_ANYTHR) static pfmlib_regmask_t intel_atom_impl_pmcs, intel_atom_impl_pmds; static int highest_counter; static int pfm_intel_atom_detect(void) { int ret, family, model; char buffer[128]; ret = __pfm_getcpuinfo_attr("vendor_id", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; if (strcmp(buffer, "GenuineIntel")) return PFMLIB_ERR_NOTSUPP; ret = __pfm_getcpuinfo_attr("cpu family", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; family = atoi(buffer); ret = __pfm_getcpuinfo_attr("model", buffer, sizeof(buffer)); if (ret == -1) return PFMLIB_ERR_NOTSUPP; model = atoi(buffer); /* * Atom : family 6 model 28 */ return family == 6 && model == 28 ? PFMLIB_SUCCESS : PFMLIB_ERR_NOTSUPP; } static int pfm_intel_atom_init(void) { int i; /* generic counters */ pfm_regmask_set(&intel_atom_impl_pmcs, 0); pfm_regmask_set(&intel_atom_impl_pmds, 0); pfm_regmask_set(&intel_atom_impl_pmcs, 1); pfm_regmask_set(&intel_atom_impl_pmds, 1); /* fixed counters */ pfm_regmask_set(&intel_atom_impl_pmcs, 16); pfm_regmask_set(&intel_atom_impl_pmds, 16); pfm_regmask_set(&intel_atom_impl_pmds, 17); pfm_regmask_set(&intel_atom_impl_pmds, 18); /* lbr */ pfm_regmask_set(&intel_atom_impl_pmds, 19); for(i=0; i < 16; i++) pfm_regmask_set(&intel_atom_impl_pmds, i); highest_counter = 18; /* PEBS */ pfm_regmask_set(&intel_atom_impl_pmcs, 17); return PFMLIB_SUCCESS; } static int pfm_intel_atom_is_fixed(pfmlib_event_t *e, unsigned int f) { unsigned int fl, flc, i; unsigned int mask = 0; fl = intel_atom_pe[e->event].pme_flags; /* * first pass: check if event as a whole supports fixed counters */ switch(f) { case 0: mask = PFMLIB_INTEL_ATOM_FIXED0; break; case 1: mask = PFMLIB_INTEL_ATOM_FIXED1; break; case 2: mask = PFMLIB_INTEL_ATOM_FIXED2_ONLY; break; default: return 0; } if (fl & mask) return 1; /* * second pass: check if unit mask supports fixed counter * * reject if mask not found OR if not all unit masks have * same fixed counter mask */ flc = 0; for(i=0; i < e->num_masks; i++) { fl = intel_atom_pe[e->event].pme_umasks[e->unit_masks[i]].pme_flags; if (fl & mask) flc++; } return flc > 0 && flc == e->num_masks ? 1 : 0; } /* * IMPORTANT: the interface guarantees that pfp_pmds[] elements are returned in the order the events * were submitted. */ static int pfm_intel_atom_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_intel_atom_input_param_t *param, pfmlib_output_param_t *outp) { #define HAS_OPTIONS(x) (cntrs && (cntrs[x].flags || cntrs[x].cnt_mask)) #define is_fixed_pmc(a) (a == 16 || a == 17 || a == 18) pfmlib_intel_atom_counter_t *cntrs; pfm_intel_atom_sel_reg_t reg; pfmlib_event_t *e; pfmlib_reg_t *pc, *pd; pfmlib_regmask_t *r_pmcs; uint64_t val; unsigned long plm; unsigned long long fixed_ctr; unsigned int npc, npmc0, npmc1, nf2; unsigned int i, j, n, k, ucode, use_pebs = 0, done_pebs; unsigned int assign_pc[PMU_INTEL_ATOM_NUM_COUNTERS]; unsigned int next_gen, last_gen; npc = npmc0 = npmc1 = nf2 = 0; e = inp->pfp_events; pc = outp->pfp_pmcs; pd = outp->pfp_pmds; n = inp->pfp_event_count; r_pmcs = &inp->pfp_unavail_pmcs; cntrs = param ? param->pfp_intel_atom_counters : NULL; use_pebs = param ? param->pfp_intel_atom_pebs_used : 0; if (n > PMU_INTEL_ATOM_NUM_COUNTERS) return PFMLIB_ERR_TOOMANY; /* * initilize to empty */ for(i=0; i < PMU_INTEL_ATOM_NUM_COUNTERS; i++) assign_pc[i] = -1; /* * error checking */ for(i=0; i < n; i++) { /* * only supports two priv levels for perf counters */ if (e[i].plm & (PFM_PLM1|PFM_PLM2)) return PFMLIB_ERR_INVAL; /* * check for valid flags */ if (cntrs && cntrs[i].flags & ~PFMLIB_INTEL_ATOM_ALL_FLAGS) return PFMLIB_ERR_INVAL; if (intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_UMASK_NCOMBO && e[i].num_masks > 1) { DPRINT("events does not support unit mask combination\n"); return PFMLIB_ERR_NOASSIGN; } /* * check event-level single register constraint (PMC0, PMC1, FIXED_CTR2) * fail if more than two events requested for the same counter */ if (intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_PMC0) { if (++npmc0 > 1) { DPRINT("two events compete for a PMC0\n"); return PFMLIB_ERR_NOASSIGN; } } /* * check if PMC1 is available and if only one event is dependent on it */ if (intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_PMC1) { if (++npmc1 > 1) { DPRINT("two events compete for a PMC1\n"); return PFMLIB_ERR_NOASSIGN; } } /* * UNHALTED_REFERENCE_CYCLES can only be measured on FIXED_CTR2 */ if (intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_FIXED2_ONLY) { if (++nf2 > 1) { DPRINT("two events compete for FIXED_CTR2\n"); return PFMLIB_ERR_NOASSIGN; } if (cntrs && ((cntrs[i].flags & (PFM_INTEL_ATOM_SEL_EDGE|PFM_INTEL_ATOM_SEL_INV)) || cntrs[i].cnt_mask)) { DPRINT("UNHALTED_REFERENCE_CYCLES only accepts anythr filter\n"); return PFMLIB_ERR_NOASSIGN; } } /* * unit-mask level constraint checking (PMC0, PMC1, FIXED_CTR2) */ for(j=0; j < e[i].num_masks; j++) { unsigned int flags; flags = intel_atom_pe[e[i].event].pme_umasks[e[i].unit_masks[j]].pme_flags; if (flags & PFMLIB_INTEL_ATOM_FIXED2_ONLY) { if (++nf2 > 1) { DPRINT("two events compete for FIXED_CTR2\n"); return PFMLIB_ERR_NOASSIGN; } if (HAS_OPTIONS(i)) { DPRINT("fixed counters do not support inversion/counter-mask\n"); return PFMLIB_ERR_NOASSIGN; } } } } next_gen = 0; /* first generic counter */ last_gen = 1; /* last generic counter */ /* * strongest constraint first: works only in IA32_PMC0, IA32_PMC1, FIXED_CTR2 * * When PEBS is used, we pick the first PEBS event and * place it into PMC0. Subsequent PEBS events, will go * in the other counters. */ done_pebs = 0; for(i=0; i < n; i++) { if ((intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_PMC0) || (use_pebs && pfm_intel_atom_has_pebs(e+i) && done_pebs == 0)) { if (pfm_regmask_isset(r_pmcs, 0)) return PFMLIB_ERR_NOASSIGN; assign_pc[i] = 0; next_gen = 1; done_pebs = 1; } if (intel_atom_pe[e[i].event].pme_flags & PFMLIB_INTEL_ATOM_PMC1) { if (pfm_regmask_isset(r_pmcs, 1)) return PFMLIB_ERR_NOASSIGN; assign_pc[i] = 1; if (next_gen == 1) next_gen = 2; else next_gen = 0; } } /* * next constraint: fixed counters * * We abuse the mapping here for assign_pc to make it easier * to provide the correct values for pd[]. * We use: * - 16 : fixed counter 0 (pmc16, pmd16) * - 17 : fixed counter 1 (pmc16, pmd17) * - 18 : fixed counter 1 (pmc16, pmd18) */ fixed_ctr = pfm_regmask_isset(r_pmcs, 16) ? 0 : 0x7; if (fixed_ctr) { for(i=0; i < n; i++) { /* fixed counters do not support event options (filters) */ if (HAS_OPTIONS(i)) { if (use_pebs && pfm_intel_atom_has_pebs(e+i)) continue; if (cntrs[i].flags != PFM_INTEL_ATOM_SEL_ANYTHR) continue; } if ((fixed_ctr & 0x1) && pfm_intel_atom_is_fixed(e+i, 0)) { assign_pc[i] = 16; fixed_ctr &= ~1; } if ((fixed_ctr & 0x2) && pfm_intel_atom_is_fixed(e+i, 1)) { assign_pc[i] = 17; fixed_ctr &= ~2; } if ((fixed_ctr & 0x4) && pfm_intel_atom_is_fixed(e+i, 2)) { assign_pc[i] = 18; fixed_ctr &= ~4; } } } /* * assign what is left */ for(i=0; i < n; i++) { if (assign_pc[i] == -1) { for(; next_gen <= last_gen; next_gen++) { if (!pfm_regmask_isset(r_pmcs, next_gen)) break; } if (next_gen <= last_gen) assign_pc[i] = next_gen++; else { DPRINT("cannot assign generic counters\n"); return PFMLIB_ERR_NOASSIGN; } } } j = 0; /* setup fixed counters */ reg.val = 0; k = 0; for (i=0; i < n ; i++ ) { if (!is_fixed_pmc(assign_pc[i])) continue; val = 0; /* if plm is 0, then assume not specified per-event and use default */ plm = e[i].plm ? e[i].plm : inp->pfp_dfl_plm; if (plm & PFM_PLM0) val |= 1ULL; if (plm & PFM_PLM3) val |= 2ULL; if (cntrs && cntrs[i].flags & PFM_INTEL_ATOM_SEL_ANYTHR) val |= 4ULL; val |= 1ULL << 3; /* force APIC int (kernel may force it anyway) */ reg.val |= val << ((assign_pc[i]-16)<<2); } if (reg.val) { pc[npc].reg_num = 16; pc[npc].reg_value = reg.val; pc[npc].reg_addr = 0x38D; pc[npc].reg_alt_addr = 0x38D; __pfm_vbprintf("[FIXED_CTRL(pmc%u)=0x%"PRIx64" pmi0=1 en0=0x%"PRIx64" any0=%d pmi1=1 en1=0x%"PRIx64" any1=%d pmi2=1 en2=0x%"PRIx64" any2=%d] ", pc[npc].reg_num, reg.val, reg.val & 0x3ULL, !!(reg.val & 0x4ULL), (reg.val>>4) & 0x3ULL, !!((reg.val>>4) & 0x4ULL), (reg.val>>8) & 0x3ULL, !!((reg.val>>8) & 0x4ULL)); if ((fixed_ctr & 0x1) == 0) __pfm_vbprintf("INSTRUCTIONS_RETIRED "); if ((fixed_ctr & 0x2) == 0) __pfm_vbprintf("UNHALTED_CORE_CYCLES "); if ((fixed_ctr & 0x4) == 0) __pfm_vbprintf("UNHALTED_REFERENCE_CYCLES "); __pfm_vbprintf("\n"); npc++; if ((fixed_ctr & 0x1) == 0) __pfm_vbprintf("[FIXED_CTR0(pmd16)]\n"); if ((fixed_ctr & 0x2) == 0) __pfm_vbprintf("[FIXED_CTR1(pmd17)]\n"); if ((fixed_ctr & 0x4) == 0) __pfm_vbprintf("[FIXED_CTR2(pmd18)]\n"); } for (i=0; i < n ; i++ ) { /* skip fixed counters */ if (is_fixed_pmc(assign_pc[i])) continue; reg.val = 0; /* assume reserved bits are zerooed */ /* if plm is 0, then assume not specified per-event and use default */ plm = e[i].plm ? e[i].plm : inp->pfp_dfl_plm; val = intel_atom_pe[e[i].event].pme_code; reg.sel_event_select = val & 0xff; ucode = (val >> 8) & 0xff; for(k=0; k < e[i].num_masks; k++) ucode |= intel_atom_pe[e[i].event].pme_umasks[e[i].unit_masks[k]].pme_ucode; val |= ucode << 8; reg.sel_unit_mask = ucode; reg.sel_usr = plm & PFM_PLM3 ? 1 : 0; reg.sel_os = plm & PFM_PLM0 ? 1 : 0; reg.sel_en = 1; /* force enable bit to 1 */ reg.sel_int = 1; /* force APIC int to 1 */ reg.sel_cnt_mask = val >>24; reg.sel_inv = val >> 23; reg.sel_edge = val >> 18; reg.sel_any = val >> 21;; if (cntrs) { if (!reg.sel_cnt_mask) { /* * counter mask is 8-bit wide, do not silently * wrap-around */ if (cntrs[i].cnt_mask > 255) return PFMLIB_ERR_INVAL; reg.sel_cnt_mask = cntrs[i].cnt_mask; } if (!reg.sel_edge) reg.sel_edge = cntrs[i].flags & PFM_INTEL_ATOM_SEL_EDGE ? 1 : 0; if (!reg.sel_inv) reg.sel_inv = cntrs[i].flags & PFM_INTEL_ATOM_SEL_INV ? 1 : 0; if (!reg.sel_any) reg.sel_any = cntrs[i].flags & PFM_INTEL_ATOM_SEL_ANYTHR? 1 : 0; } pc[npc].reg_num = assign_pc[i]; pc[npc].reg_value = reg.val; pc[npc].reg_addr = INTEL_ATOM_SEL_BASE+assign_pc[i]; pc[npc].reg_alt_addr= INTEL_ATOM_SEL_BASE+assign_pc[i]; __pfm_vbprintf("[PERFEVTSEL%u(pmc%u)=0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d anythr=%d] %s\n", pc[npc].reg_num, pc[npc].reg_num, reg.val, reg.sel_event_select, reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, reg.sel_any, intel_atom_pe[e[i].event].pme_name); __pfm_vbprintf("[PMC%u(pmd%u)]\n", pc[npc].reg_num, pc[npc].reg_num); npc++; } /* * setup pmds: must be in the same order as the events */ for (i=0; i < n ; i++) { if (is_fixed_pmc(assign_pc[i])) { /* setup pd array */ pd[i].reg_num = assign_pc[i]; pd[i].reg_addr = FIXED_CTR_BASE+assign_pc[i]-16; pd[i].reg_alt_addr = 0x40000000+assign_pc[i]-16; } else { pd[i].reg_num = assign_pc[i]; pd[i].reg_addr = INTEL_ATOM_CTR_BASE+assign_pc[i]; /* index to use with RDPMC */ pd[i].reg_alt_addr = assign_pc[i]; } } outp->pfp_pmd_count = i; /* * setup PEBS_ENABLE */ if (use_pebs && done_pebs) { /* * check that PEBS_ENABLE is available */ if (pfm_regmask_isset(r_pmcs, 17)) return PFMLIB_ERR_NOASSIGN; pc[npc].reg_num = 17; pc[npc].reg_value = 1ULL; pc[npc].reg_addr = 0x3f1; /* IA32_PEBS_ENABLE */ pc[npc].reg_alt_addr = 0x3f1; /* IA32_PEBS_ENABLE */ __pfm_vbprintf("[PEBS_ENABLE(pmc%u)=0x%"PRIx64" ena=%d]\n", pc[npc].reg_num, pc[npc].reg_value, pc[npc].reg_value & 0x1ull); npc++; } outp->pfp_pmc_count = npc; return PFMLIB_SUCCESS; } static int pfm_intel_atom_dispatch_events(pfmlib_input_param_t *inp, void *model_in, pfmlib_output_param_t *outp, void *model_out) { pfmlib_intel_atom_input_param_t *mod_in = (pfmlib_intel_atom_input_param_t *)model_in; if (inp->pfp_dfl_plm & (PFM_PLM1|PFM_PLM2)) { DPRINT("invalid plm=%x\n", inp->pfp_dfl_plm); return PFMLIB_ERR_INVAL; } return pfm_intel_atom_dispatch_counters(inp, mod_in, outp); } static int pfm_intel_atom_get_event_code(unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && (cnt > highest_counter || !pfm_regmask_isset(&intel_atom_impl_pmds, cnt))) return PFMLIB_ERR_INVAL; *code = intel_atom_pe[i].pme_code; return PFMLIB_SUCCESS; } static void pfm_intel_atom_get_event_counters(unsigned int j, pfmlib_regmask_t *counters) { unsigned int n, i; unsigned int has_f0, has_f1, has_f2; memset(counters, 0, sizeof(*counters)); n = intel_atom_pe[j].pme_numasks; has_f0 = has_f1 = has_f2 = 0; for (i=0; i < n; i++) { if (intel_atom_pe[j].pme_umasks[i].pme_flags & PFMLIB_INTEL_ATOM_FIXED0) has_f0 = 1; if (intel_atom_pe[j].pme_umasks[i].pme_flags & PFMLIB_INTEL_ATOM_FIXED1) has_f1 = 1; if (intel_atom_pe[j].pme_umasks[i].pme_flags & PFMLIB_INTEL_ATOM_FIXED2_ONLY) has_f2 = 1; } if (has_f0 == 0) has_f0 = intel_atom_pe[j].pme_flags & PFMLIB_INTEL_ATOM_FIXED0; if (has_f1 == 0) has_f1 = intel_atom_pe[j].pme_flags & PFMLIB_INTEL_ATOM_FIXED1; if (has_f2 == 0) has_f2 = intel_atom_pe[j].pme_flags & PFMLIB_INTEL_ATOM_FIXED2_ONLY; if (has_f0) pfm_regmask_set(counters, 16); if (has_f1) pfm_regmask_set(counters, 17); if (has_f2) pfm_regmask_set(counters, 18); /* the event on FIXED_CTR2 is exclusive CPU_CLK_UNHALTED:REF */ if (!has_f2) { pfm_regmask_set(counters, 0); pfm_regmask_set(counters, 1); if (intel_atom_pe[j].pme_flags & PFMLIB_INTEL_ATOM_PMC0) pfm_regmask_clr(counters, 1); if (intel_atom_pe[j].pme_flags & PFMLIB_INTEL_ATOM_PMC1) pfm_regmask_clr(counters, 0); } } static void pfm_intel_atom_get_impl_pmcs(pfmlib_regmask_t *impl_pmcs) { *impl_pmcs = intel_atom_impl_pmcs; } static void pfm_intel_atom_get_impl_pmds(pfmlib_regmask_t *impl_pmds) { *impl_pmds = intel_atom_impl_pmds; } static void pfm_intel_atom_get_impl_counters(pfmlib_regmask_t *impl_counters) { pfm_regmask_set(impl_counters, 0); pfm_regmask_set(impl_counters, 1); pfm_regmask_set(impl_counters, 16); pfm_regmask_set(impl_counters, 17); pfm_regmask_set(impl_counters, 18); } /* * Even though, CPUID 0xa returns in eax the actual counter * width, the architecture specifies that writes are limited * to lower 32-bits. As such, only the lower 32-bit have full * degree of freedom. That is the "useable" counter width. */ #define PMU_INTEL_ATOM_COUNTER_WIDTH 32 static void pfm_intel_atom_get_hw_counter_width(unsigned int *width) { /* * Even though, CPUID 0xa returns in eax the actual counter * width, the architecture specifies that writes are limited * to lower 32-bits. As such, only the lower 31 bits have full * degree of freedom. That is the "useable" counter width. */ *width = PMU_INTEL_ATOM_COUNTER_WIDTH; } static char * pfm_intel_atom_get_event_name(unsigned int i) { return intel_atom_pe[i].pme_name; } static int pfm_intel_atom_get_event_description(unsigned int ev, char **str) { char *s; s = intel_atom_pe[ev].pme_desc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static char * pfm_intel_atom_get_event_mask_name(unsigned int ev, unsigned int midx) { return intel_atom_pe[ev].pme_umasks[midx].pme_uname; } static int pfm_intel_atom_get_event_mask_desc(unsigned int ev, unsigned int midx, char **str) { char *s; s = intel_atom_pe[ev].pme_umasks[midx].pme_udesc; if (s) { *str = strdup(s); } else { *str = NULL; } return PFMLIB_SUCCESS; } static unsigned int pfm_intel_atom_get_num_event_masks(unsigned int ev) { return intel_atom_pe[ev].pme_numasks; } static int pfm_intel_atom_get_event_mask_code(unsigned int ev, unsigned int midx, unsigned int *code) { *code =intel_atom_pe[ev].pme_umasks[midx].pme_ucode; return PFMLIB_SUCCESS; } static int pfm_intel_atom_get_cycle_event(pfmlib_event_t *e) { e->event = PME_INTEL_ATOM_UNHALTED_CORE_CYCLES; return PFMLIB_SUCCESS; } static int pfm_intel_atom_get_inst_retired(pfmlib_event_t *e) { e->event = PME_INTEL_ATOM_INSTRUCTIONS_RETIRED; return PFMLIB_SUCCESS; } /* * this function is directly accessible by external caller * library initialization is not required, though recommended */ int pfm_intel_atom_has_pebs(pfmlib_event_t *e) { unsigned int i, n=0; if (e == NULL || e->event >= PME_INTEL_ATOM_EVENT_COUNT) return 0; if (intel_atom_pe[e->event].pme_flags & PFMLIB_INTEL_ATOM_PEBS) return 1; /* * ALL unit mask must support PEBS for this test to return true */ for(i=0; i < e->num_masks; i++) { /* check for valid unit mask */ if (e->unit_masks[i] >= intel_atom_pe[e->event].pme_numasks) return 0; if (intel_atom_pe[e->event].pme_umasks[e->unit_masks[i]].pme_flags & PFMLIB_INTEL_ATOM_PEBS) n++; } return n > 0 && n == e->num_masks; } pfm_pmu_support_t intel_atom_support={ .pmu_name = "Intel Atom", .pmu_type = PFMLIB_INTEL_ATOM_PMU, .pme_count = PME_INTEL_ATOM_EVENT_COUNT, .pmc_count = 4, .pmd_count = 22, .num_cnt = 5, .get_event_code = pfm_intel_atom_get_event_code, .get_event_name = pfm_intel_atom_get_event_name, .get_event_counters = pfm_intel_atom_get_event_counters, .dispatch_events = pfm_intel_atom_dispatch_events, .pmu_detect = pfm_intel_atom_detect, .pmu_init = pfm_intel_atom_init, .get_impl_pmcs = pfm_intel_atom_get_impl_pmcs, .get_impl_pmds = pfm_intel_atom_get_impl_pmds, .get_impl_counters = pfm_intel_atom_get_impl_counters, .get_hw_counter_width = pfm_intel_atom_get_hw_counter_width, .get_event_desc = pfm_intel_atom_get_event_description, .get_num_event_masks = pfm_intel_atom_get_num_event_masks, .get_event_mask_name = pfm_intel_atom_get_event_mask_name, .get_event_mask_code = pfm_intel_atom_get_event_mask_code, .get_event_mask_desc = pfm_intel_atom_get_event_mask_desc, .get_cycle_event = pfm_intel_atom_get_cycle_event, .get_inst_retired_event = pfm_intel_atom_get_inst_retired }; papi-5.6.0/src/components/perfctr_ppc/ppc970_events_map.c000664 001750 001750 00000014774 13216244360 025441 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: ppc970_events_map.c * Author: Maynard Johnson * maynardj@us.ibm.com * Mods: * * * This file MUST be kept synchronised with the events file. * */ #include "perfctr-ppc64.h" PPC64_native_map_t native_name_map[MAX_NATNAME_MAP_INDEX] = { {"PM_BRQ_FULL_CYC", -1} , {"PM_CR_MAP_FULL_CYC", -1} , {"PM_CYC", -1} , {"PM_DATA_FROM_L2", -1} , {"PM_DATA_TABLEWALK_CYC", -1} , {"PM_DSLB_MISS", -1} , {"PM_DTLB_MISS", -1} , {"PM_FPR_MAP_FULL_CYC", -1} , {"PM_FPU0_ALL", -1} , {"PM_FPU0_DENORM", -1} , {"PM_FPU0_FDIV", -1} , {"PM_FPU0_FMA", -1} , {"PM_FPU0_FSQRT", -1} , {"PM_FPU0_FULL_CYC", -1} , {"PM_FPU0_SINGLE", -1} , {"PM_FPU0_STALL3", -1} , {"PM_FPU0_STF", -1} , {"PM_FPU1_ALL", -1} , {"PM_FPU1_DENORM", -1} , {"PM_FPU1_FDIV", -1} , {"PM_FPU1_FMA", -1} , {"PM_FPU1_FSQRT", -1} , {"PM_FPU1_FULL_CYC", -1} , {"PM_FPU1_SINGLE", -1} , {"PM_FPU1_STALL3", -1} , {"PM_FPU1_STF", -1} , {"PM_FPU_DENORM", -1} , {"PM_FPU_FDIV", -1} , {"PM_GCT_EMPTY_CYC", -1} , {"PM_GCT_FULL_CYC", -1} , {"PM_GRP_BR_MPRED", -1} , {"PM_GRP_BR_REDIR", -1} , {"PM_GRP_DISP_REJECT", -1} , {"PM_GRP_DISP_VALID", -1} , {"PM_IC_PREF_INSTALL", -1} , {"PM_IC_PREF_REQ", -1} , {"PM_IERAT_XLATE_WR", -1} , {"PM_INST_CMPL", -1} , {"PM_INST_DISP", -1} , {"PM_INST_FROM_L1", -1} , {"PM_INST_FROM_L2", -1} , {"PM_ISLB_MISS", -1} , {"PM_ITLB_MISS", -1} , {"PM_LARX_LSU0", -1} , {"PM_LR_CTR_MAP_FULL_CYC", -1} , {"PM_LSU0_DERAT_MISS", -1} , {"PM_LSU0_FLUSH_LRQ", -1} , {"PM_LSU0_FLUSH_SRQ", -1} , {"PM_LSU0_FLUSH_ULD", -1} , {"PM_LSU0_FLUSH_UST", -1} , {"PM_LSU0_REJECT_ERAT_MISS", -1} , {"PM_LSU0_REJECT_LMQ_FULL", -1} , {"PM_LSU0_REJECT_RELOAD_CDF", -1} , {"PM_LSU0_REJECT_SRQ", -1} , {"PM_LSU0_SRQ_STFWD", -1} , {"PM_LSU1_DERAT_MISS", -1} , {"PM_LSU1_FLUSH_LRQ", -1} , {"PM_LSU1_FLUSH_SRQ", -1} , {"PM_LSU1_FLUSH_ULD", -1} , {"PM_LSU1_FLUSH_UST", -1} , {"PM_LSU1_REJECT_ERAT_MISS", -1} , {"PM_LSU1_REJECT_LMQ_FULL", -1} , {"PM_LSU1_REJECT_RELOAD_CDF", -1} , {"PM_LSU1_REJECT_SRQ", -1} , {"PM_LSU1_SRQ_STFWD", -1} , {"PM_LSU_FLUSH_ULD", -1} , {"PM_LSU_LRQ_S0_ALLOC", -1} , {"PM_LSU_LRQ_S0_VALID", -1} , {"PM_LSU_REJECT_SRQ", -1} , {"PM_LSU_SRQ_S0_ALLOC", -1} , {"PM_LSU_SRQ_S0_VALID", -1} , {"PM_LSU_SRQ_STFWD", -1} , {"PM_MRK_DATA_FROM_L2", -1} , {"PM_MRK_GRP_DISP", -1} , {"PM_MRK_IMR_RELOAD", -1} , {"PM_MRK_LD_MISS_L1", -1} , {"PM_MRK_LD_MISS_L1_LSU0", -1} , {"PM_MRK_LD_MISS_L1_LSU1", -1} , {"PM_MRK_STCX_FAIL", -1} , {"PM_MRK_ST_CMPL", -1} , {"PM_MRK_ST_MISS_L1", -1} , {"PM_PMC8_OVERFLOW", -1} , {"PM_RUN_CYC", -1} , {"PM_SNOOP_TLBIE", -1} , {"PM_STCX_FAIL", -1} , {"PM_STCX_PASS", -1} , {"PM_ST_MISS_L1", -1} , {"PM_SUSPENDED", -1} , {"PM_XER_MAP_FULL_CYC", -1} , {"PM_FPU_FMA", -1} , {"PM_FPU_STALL3", -1} , {"PM_GCT_EMPTY_SRQ_FULL", -1} , {"PM_GRP_DISP", -1} , {"PM_INST_FROM_MEM", -1} , {"PM_LSU_FLUSH_UST", -1} , {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} , {"PM_LSU_REJECT_LMQ_FULL", -1} , {"PM_MRK_BRU_FIN", -1} , {"PM_PMC1_OVERFLOW", -1} , {"PM_THRESH_TIMEO", -1} , {"PM_WORK_HELD", -1} , {"PM_BR_ISSUED", -1} , {"PM_BR_MPRED_CR", -1} , {"PM_BR_MPRED_TA", -1} , {"PM_CRQ_FULL_CYC", -1} , {"PM_DATA_FROM_MEM", -1} , {"PM_DC_INV_L2", -1} , {"PM_DC_PREF_OUT_OF_STREAMS", -1} , {"PM_DC_PREF_STREAM_ALLOC", -1} , {"PM_EE_OFF", -1} , {"PM_EE_OFF_EXT_INT", -1} , {"PM_FLUSH_BR_MPRED", -1} , {"PM_FLUSH_LSU_BR_MPRED", -1} , {"PM_FPU0_FEST", -1} , {"PM_FPU0_FIN", -1} , {"PM_FPU0_FMOV_FEST", -1} , {"PM_FPU0_FPSCR", -1} , {"PM_FPU0_FRSP_FCONV", -1} , {"PM_FPU1_FEST", -1} , {"PM_FPU1_FIN", -1} , {"PM_FPU1_FMOV_FEST", -1} , {"PM_FPU1_FRSP_FCONV", -1} , {"PM_FPU_FEST", -1} , {"PM_FXLS0_FULL_CYC", -1} , {"PM_FXLS1_FULL_CYC", -1} , {"PM_FXU0_FIN", -1} , {"PM_FXU1_FIN", -1} , {"PM_FXU_FIN", -1} , {"PM_GPR_MAP_FULL_CYC", -1} , {"PM_GRP_DISP_BLK_SB_CYC", -1} , {"PM_HV_CYC", -1} , {"PM_INST_FROM_PREF", -1} , {"PM_L1_DCACHE_RELOAD_VALID", -1} , {"PM_L1_PREF", -1} , {"PM_L1_WRITE_CYC", -1} , {"PM_L2_PREF", -1} , {"PM_LD_MISS_L1", -1} , {"PM_LD_MISS_L1_LSU0", -1} , {"PM_LD_MISS_L1_LSU1", -1} , {"PM_LD_REF_L1_LSU0", -1} , {"PM_LD_REF_L1_LSU1", -1} , {"PM_LSU0_LDF", -1} , {"PM_LSU1_LDF", -1} , {"PM_LSU_FLUSH", -1} , {"PM_LSU_LMQ_FULL_CYC", -1} , {"PM_LSU_LMQ_LHR_MERGE", -1} , {"PM_LSU_LMQ_S0_ALLOC", -1} , {"PM_LSU_LMQ_S0_VALID", -1} , {"PM_LSU_LRQ_FULL_CYC", -1} , {"PM_LSU_SRQ_FULL_CYC", -1} , {"PM_LSU_SRQ_SYNC_CYC", -1} , {"PM_MRK_DATA_FROM_MEM", -1} , {"PM_MRK_L1_RELOAD_VALID", -1} , {"PM_MRK_LSU0_FLUSH_LRQ", -1} , {"PM_MRK_LSU0_FLUSH_SRQ", -1} , {"PM_MRK_LSU0_FLUSH_ULD", -1} , {"PM_MRK_LSU0_FLUSH_UST", -1} , {"PM_MRK_LSU1_FLUSH_LRQ", -1} , {"PM_MRK_LSU1_FLUSH_SRQ", -1} , {"PM_MRK_LSU1_FLUSH_ULD", -1} , {"PM_MRK_LSU1_FLUSH_UST", -1} , {"PM_MRK_LSU_SRQ_INST_VALID", -1} , {"PM_MRK_ST_CMPL_INT", -1} , {"PM_MRK_VMX_FIN", -1} , {"PM_PMC2_OVERFLOW", -1} , {"PM_STOP_COMPLETION", -1} , {"PM_ST_REF_L1_LSU0", -1} , {"PM_ST_REF_L1_LSU1", -1} , {"PM_0INST_FETCH", -1} , {"PM_FPU_FIN", -1} , {"PM_FXU1_BUSY_FXU0_IDLE", -1} , {"PM_LSU_SRQ_EMPTY_CYC", -1} , {"PM_MRK_CRU_FIN", -1} , {"PM_MRK_GRP_CMPL", -1} , {"PM_PMC3_OVERFLOW", -1} , {"PM_1PLUS_PPC_CMPL", -1} , {"PM_DATA_FROM_L25_SHR", -1} , {"PM_FPU_ALL", -1} , {"PM_FPU_SINGLE", -1} , {"PM_FXU_IDLE", -1} , {"PM_GRP_DISP_SUCCESS", -1} , {"PM_GRP_MRK", -1} , {"PM_INST_FROM_L25_SHR", -1} , {"PM_LSU_FLUSH_SRQ", -1} , {"PM_LSU_REJECT_ERAT_MISS", -1} , {"PM_MRK_DATA_FROM_L25_SHR", -1} , {"PM_MRK_GRP_TIMEO", -1} , {"PM_PMC4_OVERFLOW", -1} , {"PM_DATA_FROM_L25_MOD", -1} , {"PM_FPU_FSQRT", -1} , {"PM_FPU_STF", -1} , {"PM_FXU_BUSY", -1} , {"PM_INST_FROM_L25_MOD", -1} , {"PM_LSU_DERAT_MISS", -1} , {"PM_LSU_FLUSH_LRQ", -1} , {"PM_LSU_REJECT_RELOAD_CDF", -1} , {"PM_MRK_DATA_FROM_L25_MOD", -1} , {"PM_MRK_FXU_FIN", -1} , {"PM_MRK_GRP_ISSUED", -1} , {"PM_MRK_ST_GPS", -1} , {"PM_PMC5_OVERFLOW", -1} , {"PM_FPU_FRSP_FCONV", -1} , {"PM_FXU0_BUSY_FXU1_IDLE", -1} , {"PM_GRP_CMPL", -1} , {"PM_MRK_FPU_FIN", -1} , {"PM_MRK_INST_FIN", -1} , {"PM_PMC6_OVERFLOW", -1} , {"PM_ST_REF_L1", -1} , {"PM_EXT_INT", -1} , {"PM_FPU_FMOV_FEST", -1} , {"PM_LD_REF_L1", -1} , {"PM_LSU_LDF", -1} , {"PM_MRK_LSU_FIN", -1} , {"PM_PMC7_OVERFLOW", -1} , {"PM_TB_BIT_TRANS", -1} }; papi-5.6.0/src/libpfm-3.y/examples_v2.x/ia64/ita_btb.c000664 001750 001750 00000031157 13216244362 024220 0ustar00jshenry1963jshenry1963000000 000000 /* * ita_btb.c - example of how use the BTB with the Itanium PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include typedef pfm_dfl_smpl_hdr_t btb_hdr_t; typedef pfm_dfl_smpl_entry_t btb_entry_t; typedef pfm_dfl_smpl_arg_t smpl_arg_t; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 /* * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch * events. */ #define SMPL_PERIOD (4UL*256) /* * We use a small buffer size to exercise the overflow handler */ #define SMPL_BUF_NENTRIES 64 static void *smpl_vaddr; static unsigned int entry_size; static int id; #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } /* * we don't use static to make sure the compiler does not inline the function */ long func1(void) { return 0;} long do_test(unsigned long loop) { long sum = 0; while(loop--) { if (loop & 0x1) sum += func1(); else sum += loop; } return sum; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } /* * print content of sampling buffer * * XXX: using stdio to print from a signal handler is not safe with multi-threaded * applications */ #define safe_printf printf static int show_btb_reg(int j, pfm_ita_pmd_reg_t reg) { int ret; int is_valid = reg.pmd8_15_ita_reg.btb_b == 0 && reg.pmd8_15_ita_reg.btb_mp == 0 ? 0 :1; ret = safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d valid=%c\n", j, reg.pmd_val, reg.pmd8_15_ita_reg.btb_b, reg.pmd8_15_ita_reg.btb_mp, is_valid ? 'Y' : 'N'); if (!is_valid) return ret; if (reg.pmd8_15_ita_reg.btb_b) { unsigned long addr; addr = reg.pmd8_15_ita_reg.btb_addr<<4; addr |= reg.pmd8_15_ita_reg.btb_slot < 3 ? reg.pmd8_15_ita_reg.btb_slot : 0; ret = safe_printf("\t Source Address: 0x%016lx\n" "\t Taken=%c Prediction: %s\n\n", addr, reg.pmd8_15_ita_reg.btb_slot < 3 ? 'Y' : 'N', reg.pmd8_15_ita_reg.btb_mp ? "Failure" : "Success"); } else { ret = safe_printf("\t Target Address: 0x%016lx\n\n", (unsigned long)(reg.pmd8_15_ita_reg.btb_addr<<4)); } return ret; } static void show_btb(pfm_ita_pmd_reg_t *btb, pfm_ita_pmd_reg_t *pmd16) { int i, last; i = (pmd16->pmd16_ita_reg.btbi_full) ? pmd16->pmd16_ita_reg.btbi_bbi : 0; last = pmd16->pmd16_ita_reg.btbi_bbi; safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita_reg.btbi_bbi, pmd16->pmd16_ita_reg.btbi_full); do { show_btb_reg(i+8, btb[i]); i = (i+1) % 8; } while (i != last); } static void process_smpl_buffer(void) { btb_hdr_t *hdr; btb_entry_t *ent; unsigned long pos; unsigned long smpl_entry = 0; pfm_ita_pmd_reg_t *reg, *pmd16; unsigned long i; int ret; static unsigned long last_ovfl = ~0UL; hdr = (btb_hdr_t *)smpl_vaddr; /* * check that we are not diplaying the previous set of samples again. * Required to take care of the last batch of samples. */ if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (btb_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita_pmd_reg_t*)(ent+1); /* * in this particular example, we have pmd8-pmd15 has the BTB. We have also * included pmd16 (BTB index) has part of the registers to record. This trick * allows us to get the index to decode the sequential order of the BTB. * * Recorded registers are always recorded in increasing order. So we know * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. */ pmd16 = reg+8; show_btb(reg, pmd16); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (pfm_restart(id) == -1) { perror("pfm_restart"); exit(1); } } int main(void) { int ret; int type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita_input_param_t ita_inp; pfarg_pmd_t pd[NUM_PMDS]; pfarg_pmc_t pc[NUM_PMCS]; pfarg_ctx_t ctx; smpl_arg_t buf_arg; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(&ctx, 0, sizeof(ctx)); memset(&buf_arg, 0, sizeof(buf_arg)); memset(&inp, 0, sizeof(inp)); memset(&outp, 0, sizeof(outp)); memset(&ita_inp,0, sizeof(ita_inp)); /* * Before calling pfm_find_dispatch(), we must specify what kind * of branches we want to capture. We are interesteed in all the mispredicted branches, * therefore we program we set the various fields of the BTB config to: */ ita_inp.pfp_ita_btb.btb_used = 1; ita_inp.pfp_ita_btb.btb_tar = 0x1; ita_inp.pfp_ita_btb.btb_tm = 0x2; ita_inp.pfp_ita_btb.btb_ptm = 0x3; ita_inp.pfp_ita_btb.btb_tac = 0x1; ita_inp.pfp_ita_btb.btb_bac = 0x1; ita_inp.pfp_ita_btb.btb_ppm = 0x3; ita_inp.pfp_ita_btb.btb_plm = PFM_PLM3; /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find event BRANCH_EVENT\n"); /* * set the (global) privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ buf_arg.buf_size = getpagesize(); /* * now create the context for self monitoring/per-task */ id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc * PMD16 is part of the set of used PMD returned by libpfm. * It will be reset automatically */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; /* * Now prepare the argument to initialize the PMD and the sampling period * We know we use only one PMD in this case, therefore pmd[0] corresponds * to our first event which is our sampling period. */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; pfm_bv_set(pd[0].reg_smpl_pmds, 16); entry_size = sizeof(btb_entry_t) + 1 * 8; for(i=8; i < 16; i++) { pfm_bv_set(pd[0].reg_smpl_pmds, i); entry_size += 8; } /* * When our counter overflows, we want to BTB index to be reset, so that we keep * in sync. This is required to make it possible to interpret pmd16 on overflow * to avoid repeating the same branch several times. */ pfm_bv_set(pd[0].reg_reset_pmds, 16); /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (pfm_load_context(id, &load_args) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ pfm_self_start(id); do_test(100000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer. Note that the buffer may not be full at this point. * */ process_smpl_buffer(); /* * let's stop this now */ munmap(smpl_vaddr, (size_t)buf_arg.buf_size); close(id); return 0; } papi-5.6.0/src/libpfm4/config.mk000664 001750 001750 00000013117 13216244363 020462 0ustar00jshenry1963jshenry1963000000 000000 # # Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. # Contributed by Stephane Eranian # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # This file is part of libpfm, a performance monitoring support library for # applications on Linux. # # # This file defines the global compilation settings. # It is included by every Makefile # # SYS := $(shell uname -s) ARCH := $(shell uname -m) ifeq (i686,$(findstring i686,$(ARCH))) override ARCH=i386 endif ifeq (i586,$(findstring i586,$(ARCH))) override ARCH=i386 endif ifeq (i486,$(findstring i486,$(ARCH))) override ARCH=i386 endif ifeq (i386,$(findstring i386,$(ARCH))) override ARCH=i386 endif ifeq (i86pc,$(findstring i86pc,$(ARCH))) override ARCH=i386 endif ifeq (x86,$(findstring x86,$(ARCH))) override ARCH=x86_64 endif ifeq ($(ARCH),x86_64) override ARCH=x86_64 endif ifeq ($(ARCH),amd64) override ARCH=x86_64 endif ifeq (ppc,$(findstring ppc,$(ARCH))) override ARCH=powerpc endif ifeq (sparc64,$(findstring sparc64,$(ARCH))) override ARCH=sparc endif ifeq (armv6,$(findstring armv6,$(ARCH))) override ARCH=arm endif ifeq (armv7,$(findstring armv7,$(ARCH))) override ARCH=arm endif ifeq (armv7,$(findstring armv7,$(ARCH))) override ARCH=arm endif ifeq (aarch32,$(findstring aarch32,$(ARCH))) override ARCH=arm endif ifeq (armv8l,$(findstring armv8l,$(ARCH))) override ARCH=arm endif ifeq (mips64,$(findstring mips64,$(ARCH))) override ARCH=mips endif ifeq (mips,$(findstring mips,$(ARCH))) override ARCH=mips endif ifeq (MINGW,$(findstring MINGW,$(SYS))) override SYS=WINDOWS endif # # CONFIG_PFMLIB_SHARED: y=compile static and shared versions, n=static only # CONFIG_PFMLIB_DEBUG: enable debugging output support # CONFIG_PFMLIB_NOPYTHON: do not generate the python support, incompatible # with PFMLIB_SHARED=n # CONFIG_PFMLIB_SHARED?=y CONFIG_PFMLIB_DEBUG?=y CONFIG_PFMLIB_NOPYTHON?=y # # Cell Broadband Engine is reported as PPC but needs special handling. # ifeq ($(SYS),Linux) MACHINE := $(shell grep -q 'Cell Broadband Engine' /proc/cpuinfo && echo cell) ifeq (cell,$(MACHINE)) override ARCH=cell endif endif # # Library version # VERSION=4 REVISION=8 AGE=0 # # Where should things (lib, headers, man) go in the end. # PREFIX=/usr/local LIBDIR=$(PREFIX)/lib INCDIR=$(PREFIX)/include MANDIR=$(PREFIX)/share/man DOCDIR=$(PREFIX)/share/doc/libpfm-$(VERSION).$(REVISION).$(AGE) # # System header files # # SYSINCDIR : where to find standard header files (default to .) SYSINCDIR=. # # Configuration Paramaters for libpfm library # ifeq ($(ARCH),ia64) CONFIG_PFMLIB_ARCH_IA64=y endif ifeq ($(ARCH),x86_64) CONFIG_PFMLIB_ARCH_X86_64=y CONFIG_PFMLIB_ARCH_X86=y endif ifeq ($(ARCH),i386) CONFIG_PFMLIB_ARCH_I386=y CONFIG_PFMLIB_ARCH_X86=y endif ifeq ($(ARCH),mips) CONFIG_PFMLIB_ARCH_MIPS=y endif ifeq ($(ARCH),powerpc) CONFIG_PFMLIB_ARCH_POWERPC=y endif ifeq ($(ARCH),sparc) CONFIG_PFMLIB_ARCH_SPARC=y endif ifeq ($(ARCH),arm) CONFIG_PFMLIB_ARCH_ARM=y endif ifeq ($(ARCH),aarch64) CONFIG_PFMLIB_ARCH_ARM64=y endif ifeq ($(ARCH),arm64) CONFIG_PFMLIB_ARCH_ARM64=y endif ifeq ($(ARCH),s390x) CONFIG_PFMLIB_ARCH_S390X=y endif ifeq ($(ARCH),cell) CONFIG_PFMLIB_CELL=y endif # # you shouldn't have to touch anything beyond this point # # # The entire package can be compiled using # icc the Intel Itanium Compiler (7.x,8.x, 9.x) # or GNU C #CC=icc CC?=gcc LIBS= INSTALL=install LDCONFIG=ldconfig LN?=ln -sf PFMINCDIR=$(TOPDIR)/include PFMLIBDIR=$(TOPDIR)/lib # # -Wextra: to enable extra compiler sanity checks (e.g., signed vs. unsigned) # -Wno-unused-parameter: to avoid warnings on unused foo(void *this) parameter # DBG?=-g -Wall -Werror -Wextra -Wno-unused-parameter ifeq ($(SYS),Darwin) # older gcc-4.2 does not like -Wextra and some of our initialization code # Xcode uses a gcc version which is too old for some static initializers CC=clang DBG?=-g -Wall -Werror LDCONFIG=true endif ifeq ($(SYS),FreeBSD) # gcc-4.2 does not like -Wextra and some of our initialization code DBG=-g -Wall -Werror endif CFLAGS+=$(OPTIM) $(DBG) -I$(SYSINCDIR) -I$(PFMINCDIR) MKDEP=makedepend PFMLIB=$(PFMLIBDIR)/libpfm.a ifeq ($(CONFIG_PFMLIB_DEBUG),y) CFLAGS += -DCONFIG_PFMLIB_DEBUG endif CTAGS?=ctags # # Python is for use with perf_events # so it only works on Linux # ifneq ($(SYS),Linux) CONFIG_PFMLIB_NOPYTHON=y endif # # mark that we are compiling on Linux # ifeq ($(SYS),Linux) CFLAGS+= -DCONFIG_PFMLIB_OS_LINUX endif # # compile examples statically if library is # compile static # not compatible with python support, so disable for now # ifeq ($(CONFIG_PFMLIB_SHARED),n) LDFLAGS+= -static CONFIG_PFMLIB_NOPYTHON=y endif ifeq ($(SYS),WINDOWS) CFLAGS +=-DPFMLIB_WINDOWS endif papi-5.6.0/man/man3/PAPI_num_hwctrs.3000664 001750 001750 00000000602 13216244356 021234 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_num_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_num_hwctrs \- .PP Return the number of hardware counters on the cpu\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBSee Also:\fP .RS 4 \fBPAPI_num_cmp_hwctrs\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_os_mips64.h000664 001750 001750 00000003735 13216244362 025631 0ustar00jshenry1963jshenry1963000000 000000 /* * Contributed by Philip Mucci based on code from * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_OS_MIPS64_H__ #define __PFMLIB_OS_MIPS64_H__ #ifndef __PFMLIB_OS_H__ #error "you should never include this file directly, use pfmlib_os.h" #endif #include #if !defined(__mips__) #error "you should not be including this file" #endif #ifndef __PFMLIB_OS_COMPILE #include /* * macros version of pfm_self_start/pfm_self_stop to be used in per-process self-monitoring sessions. * they are also defined as real functions. * * DO NOT USE on system-wide sessions. */ static inline int pfm_self_start(int fd) { return pfm_start(fd, NULL); } static inline int pfm_self_stop(int fd) { return pfm_stop(fd); } #endif /* __PFMLIB_OS_COMPILE */ #endif /* __PFMLIB_OS_MIPS64_H__ */ papi-5.6.0/src/components/appio/tests/appio_values_by_name.c000664 001750 001750 00000006722 13216244356 026321 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @author Tushar Mohan * * test case for the appio component * (adapted from test in linux-net component) * * @brief * Prints the values of several (but not all) appio events specified by names */ #include #include #include #include #include #include #include #include "papi.h" #include "papi_test.h" #define NUM_EVENTS 11 int main (int argc, char **argv) { int i, retval; int EventSet = PAPI_NULL; char *event_name[NUM_EVENTS] = { "READ_BYTES", "READ_CALLS", "READ_USEC", "READ_EOF", "READ_SHORT", "READ_ERR", "WRITE_BYTES", "WRITE_CALLS", "WRITE_USEC", "WRITE_ERR", "WRITE_SHORT" }; int event_code[NUM_EVENTS] = { 0, 0, 0, 0, 0, 0, 0, 0, 0}; long long event_value[NUM_EVENTS]; int total_events=0; /* Set TESTS_QUIET variable */ tests_quiet( argc, argv ); /* PAPI Initialization */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); } if (!TESTS_QUIET) { printf("Appio events by name\n"); } /* Map names to codes */ for ( i=0; i 0) { write(fdout, buf, bytes); } close(fdin); close(fdout); retval = PAPI_stop( EventSet, event_value ); if (retval != PAPI_OK) { test_fail(__FILE__, __LINE__, "PAPI_start()", retval); } if (!TESTS_QUIET) { for ( i=0; i */ /* Note! There are many many many things that can go wrong */ /* when trying to get a sane floating point measurement. */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; long long high=0,low=0,average=0,expected=1500000; double error,double_result; long long count,total=0; int quiet=0,retval,ins_result; int eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_FP_OPS event.\n\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create the eventset */ retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Add FP_OPS event */ retval=PAPI_add_named_event(eventset,"PAPI_FP_OPS"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_FP_OPS not available!\n"); test_skip( __FILE__, __LINE__, "adding PAPI_FP_OPS", retval ); } /**************************************/ /* Test a loop with no floating point */ /**************************************/ total=0; high=0; low=0; expected=0; if (!quiet) { printf("Testing a loop with %lld floating point (%d times):\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count10) { if (!quiet) printf("Unexpected FP event value\n"); test_fail( __FILE__, __LINE__, "Unexpected FP event", 1 ); } if (!quiet) printf("\n"); /*******************************************/ /* Test a single-precision matrix multiply */ /*******************************************/ total=0; high=0; low=0; expected=flops_float_init_matrix(); num_runs=3; if (!quiet) { printf("Testing a matrix multiply with %lld single-precision FP operations (%d times)\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); /*******************************************/ /* Test a double-precision matrix multiply */ /*******************************************/ total=0; high=0; low=0; expected=flops_double_init_matrix(); num_runs=3; if (!quiet) { printf("Testing a matrix multiply with %lld double-precision FP operations (%d times)\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/utils/papi_xml_event_info.c000664 001750 001750 00000027410 13216244370 022657 0ustar00jshenry1963jshenry1963000000 000000 /** file papi_xml_event_info.c * @page papi_xml_event_info * @brief papi_xml_event_info utility. * @section NAME * papi_xml_event_info - provides detailed information for PAPI events in XML format * * @section Synopsis * * @section Description * papi_native_avail is a PAPI utility program that reports information * about the events available on the current platform in an XML format. * * It will attempt to create an EventSet with each event in it, which * can be slow. * * @section Options *

    *
  • -h print help message *
  • -p print only preset events *
  • -n print only native events *
  • -c COMPONENT print only events from component number COMPONENT * event1, event2, ... Print only events that can be created in the same * event set with the events event1, event2, etc. *
* * @section Bugs * There are no known bugs in this utility. * If you find a bug, it should be reported to the * PAPI Mailing List at . */ #include #include #include "papi.h" static int EventSet; static int preset = 1; static int native = 1; static int cidx = -1; /**********************************************************************/ /* Take a string and print a version with properly escaped XML */ /**********************************************************************/ static int xmlize( const char *msg, FILE *f ) { const char *op; if ( !msg ) return PAPI_OK; for ( op = msg; *op != '\0'; op++ ) { switch ( *op ) { case '"': fprintf( f, """ ); break; case '&': fprintf( f, "&" ); break; case '\'': fprintf( f, "'" ); break; case '<': fprintf( f, "<" ); break; case '>': fprintf( f, ">" ); break; default: fprintf( f, "%c", *op); } } return PAPI_OK; } /*************************************/ /* print hardware info in XML format */ /*************************************/ static int papi_xml_hwinfo( FILE * f ) { const PAPI_hw_info_t *hwinfo; if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) return PAPI_ESYS; fprintf( f, "\n" ); fprintf( f, " vendor_string, f ); fprintf( f,"\"/>\n"); fprintf( f, " \n", hwinfo->vendor ); fprintf( f, " model_string, f ); fprintf( f, "\"/>\n"); fprintf( f, " \n", hwinfo->model ); fprintf( f, " \n", hwinfo->revision ); fprintf( f, " \n" ); fprintf( f, " \n", hwinfo->cpuid_family ); fprintf( f, " \n", hwinfo->cpuid_model ); fprintf( f, " \n", hwinfo->cpuid_stepping ); fprintf( f, " \n" ); fprintf( f, " \n", hwinfo->cpu_max_mhz ); fprintf( f, " \n", hwinfo->cpu_min_mhz ); fprintf( f, " \n", hwinfo->threads ); fprintf( f, " \n", hwinfo->cores ); fprintf( f, " \n", hwinfo->sockets ); fprintf( f, " \n", hwinfo->nnodes ); fprintf( f, " \n", hwinfo->ncpu ); fprintf( f, " \n", hwinfo->totalcpus ); fprintf( f, "\n" ); return PAPI_OK; } /****************************************************************/ /* Test if event can be added to an eventset */ /* (there might be existing events if specified on command line */ /****************************************************************/ static int test_event( int evt ) { int retval; retval = PAPI_add_event( EventSet, evt ); if ( retval != PAPI_OK ) { return retval; } if ( ( retval = PAPI_remove_event( EventSet, evt ) ) != PAPI_OK ) { fprintf( stderr, "Error removing event from eventset\n" ); exit( 1 ); } return PAPI_OK; } /***************************************/ /* Convert an event to XML */ /***************************************/ static void xmlize_event( FILE * f, PAPI_event_info_t * info, int num ) { if ( num >= 0 ) { fprintf( f, " symbol, f ); fprintf( f, "\" desc=\""); xmlize( info->long_descr, f ); fprintf( f, "\">\n"); } else { fprintf( f," symbol, f ); fprintf( f,"\" desc=\""); xmlize( info->long_descr, f ); fprintf( f,"\"> \n"); } } /****************************************/ /* Print all preset events */ /****************************************/ static void enum_preset_events( FILE * f, int cidx) { int i, num; int retval; PAPI_event_info_t info; i = PAPI_PRESET_MASK; fprintf( f, " \n" ); num = -1; retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cidx ); while ( retval == PAPI_OK ) { num++; retval = PAPI_get_event_info( i, &info ); if ( retval != PAPI_OK ) { retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ); continue; } if ( test_event( i ) == PAPI_OK ) { xmlize_event( f, &info, num ); fprintf( f, " \n" ); } retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ); } fprintf( f, " \n" ); } /****************************************/ /* Print all native events */ /****************************************/ static void enum_native_events( FILE * f, int cidx) { int i, k, num; int retval; PAPI_event_info_t info; i = PAPI_NATIVE_MASK; fprintf( f, " \n" ); num = -1; retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cidx ); while ( retval == PAPI_OK ) { num++; retval = PAPI_get_event_info( i, &info ); if ( retval != PAPI_OK ) { retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ); continue; } /* enumerate any umasks */ k = i; if ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK ) { /* Test if event can be added */ if ( test_event( k ) == PAPI_OK ) { /* add the event */ xmlize_event( f, &info, num ); /* add the event's unit masks */ do { retval = PAPI_get_event_info( k, &info ); if ( retval == PAPI_OK ) { if ( test_event( k )!=PAPI_OK ) { break; } xmlize_event( f, &info, -1 ); } } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cidx ) == PAPI_OK); fprintf( f, "
\n" ); } } else { /* this event has no unit masks; test & write the event */ if ( test_event( i ) == PAPI_OK ) { xmlize_event( f, &info, num ); fprintf( f, "
\n" ); } } retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cidx ); } fprintf( f, " \n" ); } /****************************************/ /* Print usage information */ /****************************************/ static void usage( char *argv[] ) { fprintf( stderr, "Usage: %s [options] [[event1] event2 ...]\n", argv[0] ); fprintf( stderr, " options: -h print help message\n" ); fprintf( stderr, " -p print only preset events\n" ); fprintf( stderr, " -n print only native events\n" ); fprintf( stderr," -c n print only events for component index n\n" ); fprintf( stderr, "If event1, event2, etc., are specified, then only events\n"); fprintf( stderr, "that can be run in addition to these events will be printed\n\n"); } static void parse_command_line (int argc, char **argv, int numc) { int i,retval; for( i = 1; i < argc; i++ ) { if ( argv[i][0] == '-' ) { switch ( argv[i][1] ) { case 'c': /* only events for specified component */ /* UGH, what is this, the IOCCC? */ cidx = (i+1) < argc ? atoi( argv[(i++)+1] ) : -1; if ( cidx < 0 || cidx >= numc ) { fprintf( stderr,"Error: component index %d out of bounds (0..%d)\n", cidx, numc - 1 ); usage( argv ); exit(1); } break; case 'p': /* only preset events */ preset = 1; native = 0; break; case 'n': /* only native events */ native = 1; preset = 0; break; case 'h': /* print help */ usage( argv ); exit(0); break; default: fprintf( stderr, "Error: unknown option: %s\n", argv[i] ); usage( argv ); exit(1); } } else { /* If event names are specified, add them to the */ /* EventSet and test if other events can be run with them */ int code = -1; retval = PAPI_event_name_to_code( argv[i], &code ); retval = PAPI_query_event( code ); if ( retval != PAPI_OK ) { fprintf( stderr, "Error: unknown event: %s\n", argv[i] ); usage( argv ); exit(1); } retval = PAPI_add_event( EventSet, code ); if ( retval != PAPI_OK ) { fprintf( stderr, "Error: event %s cannot be counted with others\n", argv[i] ); usage( argv ); exit(1); } } } } int main( int argc, char **argv) { int retval; const PAPI_component_info_t *comp; int numc = 0; retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { fprintf(stderr,"Error! PAPI_library_init\n"); return retval; } /* report any return codes less than 0? */ /* Why? */ #if 0 retval = PAPI_set_debug( PAPI_VERB_ECONT ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_set_debug", retval ); } #endif /* Create EventSet to use */ EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_create_eventset\n"); return retval; } /* Get number of components */ numc = PAPI_num_components( ); /* parse command line arguments */ parse_command_line(argc,argv,numc); /* print XML header */ fprintf( stdout, "\n" ); fprintf( stdout, "\n" ); /* print hardware info */ papi_xml_hwinfo( stdout ); /* If a specific component specified, only print events from there */ if ( cidx >= 0 ) { comp = PAPI_get_component_info( cidx ); fprintf( stdout, "\n", cidx, cidx ? "Unknown" : "CPU", comp->name ); if ( native ) enum_native_events( stdout, cidx); if ( preset ) enum_preset_events( stdout, cidx); fprintf( stdout, "\n" ); } else { /* Otherwise, print info for all components */ for ( cidx = 0; cidx < numc; cidx++ ) { comp = PAPI_get_component_info( cidx ); fprintf( stdout, "\n", cidx, cidx ? "Unknown" : "CPU", comp->name ); if ( native ) enum_native_events( stdout, cidx ); if ( preset ) enum_preset_events( stdout, cidx ); fprintf( stdout, "\n" ); /* clean out eventset */ retval = PAPI_cleanup_eventset( EventSet ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_cleanup_eventset\n"); return retval; } retval = PAPI_destroy_eventset( &EventSet ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_destroy_eventset\n"); return retval; } EventSet = PAPI_NULL; retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_create_eventset\n"); return retval; } /* re-parse command line to set up any events specified */ parse_command_line (argc, argv, numc); } } fprintf( stdout, "\n" ); return 0; } papi-5.6.0/man/man3/PAPI_stop.3000664 001750 001750 00000003525 13216244356 020037 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_stop" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_stop \- .PP Stop counting hardware events in an event set\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_stop( int EventSet, long long * values )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIEventSet\fP -- an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP .br \fIvalues\fP -- an array to hold the counter values of the counting events .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_OK\fP .br \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .br \fIPAPI_ENOTRUN\fP The EventSet is currently not running\&. .RE .PP \fBPAPI_stop\fP halts the counting of a previously defined event set and the counter values contained in that EventSet are copied into the values array Assumes an initialized PAPI library and a properly added event set\&. .PP \fBExample:\fP .RS 4 .PP .nf * int EventSet = PAPI_NULL; * long long values[2]; * int ret; * * ret = PAPI_create_eventset(&EventSet); * if (ret != PAPI_OK) handle_error(ret); * * // Add Total Instructions Executed to our EventSet * ret = PAPI_add_event(EventSet, PAPI_TOT_INS); * if (ret != PAPI_OK) handle_error(ret); * * // Start counting * ret = PAPI_start(EventSet); * if (ret != PAPI_OK) handle_error(ret); * poorly_tuned_function(); * ret = PAPI_stop(EventSet, values); * if (ret != PAPI_OK) handle_error(ret); * printf("%lld\\n",values[0]); * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_create_eventset\fP \fBPAPI_start\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPI_mh_info_t.3000664 001750 001750 00000000606 13216244356 021011 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_mh_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_mh_info_t \- .PP mh for mem hierarchy maybe? .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "int \fBlevels\fP" .br .ti -1c .RI "\fBPAPI_mh_level_t\fP \fBlevel\fP [4]" .br .in -1c .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/perfctr-2.6.x/examples/self/arch.h000775 001750 001750 00000000364 13216244366 023410 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: arch.h,v 1.1 2004/01/11 22:07:12 mikpe Exp $ * Architecture-specific support code. * * Copyright (C) 2004 Mikael Pettersson */ extern void do_setup(const struct perfctr_info *info, struct perfctr_cpu_control *cpu_control); papi-5.6.0/src/components/libmsr/configure000775 001750 001750 00000401530 13216244357 022716 0ustar00jshenry1963jshenry1963000000 000000 #! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.63. # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH if test "x$CONFIG_SHELL" = x; then if (eval ":") 2>/dev/null; then as_have_required=yes else as_have_required=no fi if test $as_have_required = yes && (eval ": (as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=\$LINENO as_lineno_2=\$LINENO test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } ") 2> /dev/null; then : else as_candidate_shells= as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. case $as_dir in /*) for as_base in sh bash ksh sh5; do as_candidate_shells="$as_candidate_shells $as_dir/$as_base" done;; esac done IFS=$as_save_IFS for as_shell in $as_candidate_shells $SHELL; do # Try only shells that exist, to save several forks. if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { ("$as_shell") 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : _ASEOF }; then CONFIG_SHELL=$as_shell as_have_required=yes if { "$as_shell" 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : (as_func_return () { (exit $1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = "$1" ); then : else exitcode=1 echo positional parameters were not saved. fi test $exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } _ASEOF }; then break fi fi done if test "x$CONFIG_SHELL" != x; then for as_var in BASH_ENV ENV do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done export CONFIG_SHELL exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} fi if test $as_have_required = no; then echo This script requires a shell more modern than all the echo shells that I found on your system. Please install a echo modern shell, or manually run the script under such a echo shell if you do have one. { (exit 1); exit 1; } fi fi fi (eval "as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0") || { echo No shell found that supports shell functions. echo Please tell bug-autoconf@gnu.org about your system, echo including any error possibly output before this message. echo This can help us improve future autoconf versions. echo Configuration will now proceed without shell functions. } as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME= PACKAGE_TARNAME= PACKAGE_VERSION= PACKAGE_STRING= PACKAGE_BUGREPORT= # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='LTLIBOBJS LIBOBJS CUDA_DIR LIBMSR_LIBDIR LIBMSR_INCDIR EGREP GREP CPP OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking with_libmsr_incdir with_libmsr_libdir ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CPP' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 { (exit 1); exit 1; }; } ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) { $as_echo "$as_me: error: unrecognized option: $ac_option Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` { $as_echo "$as_me: error: missing argument to $ac_option" >&2 { (exit 1); exit 1; }; } fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2 { (exit 1); exit 1; }; } ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; } done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used." >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || { $as_echo "$as_me: error: working directory cannot be determined" >&2 { (exit 1); exit 1; }; } test "X$ac_ls_di" = "X$ac_pwd_ls_di" || { $as_echo "$as_me: error: pwd does not report name of working directory" >&2 { (exit 1); exit 1; }; } # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2 { (exit 1); exit 1; }; } pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures this package to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then cat <<\_ACEOF Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-libmsr-incdir= Specify directory of libmsr header files (libmsr.h) in a specific location --with-libmsr-libdir= Specify directory of libmsr library (libmsr.a or libmsr.so) in a specific location Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF configure generated by GNU Autoconf 2.63 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was generated by GNU Autoconf 2.63. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; 2) ac_configure_args1="$ac_configure_args1 '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac done done $as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo cat <<\_ASBOX ## ---------------- ## ## Cache variables. ## ## ---------------- ## _ASBOX echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo cat <<\_ASBOX ## ----------------- ## ## Output variables. ## ## ----------------- ## _ASBOX echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX ## ------------------- ## ## File substitutions. ## ## ------------------- ## _ASBOX echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then cat <<\_ASBOX ## ----------- ## ## confdefs.h. ## ## ----------- ## _ASBOX echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then ac_site_file1=$CONFIG_SITE elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test -r "$ac_site_file"; then { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special # files actually), so we avoid doing that. if test -f "$cache_file"; then { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 $as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Check whether --with-libmsr_incdir was given. if test "${with_libmsr_incdir+set}" = set; then withval=$with_libmsr_incdir; libmsr_incdir=$withval fi # Check whether --with-libmsr_libdir was given. if test "${with_libmsr_libdir+set}" = set; then withval=$with_libmsr_libdir; libmsr_libdir=$withval fi if test "x$libmsr_libdir" != "x"; then LIBS="-L$libmsr_libdir" libmsr_dotest=1 fi if test "x$libmsr_incdir" != "x"; then CPPFLAGS="-I$libmsr_incdir" libmsr_dotest=1 fi CFLAGS="$CFLAGS -I$libmsr_incdir" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:$LINENO: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:$LINENO: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&5 $as_echo "$as_me: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } # Provide some information about the compiler. $as_echo "$as_me:$LINENO: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 { (ac_try="$ac_compiler --version >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler --version >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -v >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler -v >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -V >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compiler -V >&5") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { (ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi { $as_echo "$as_me:$LINENO: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } if test -z "$ac_file"; then $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: C compiler cannot create executables See \`config.log' for more details." >&5 $as_echo "$as_me: error: C compiler cannot create executables See \`config.log' for more details." >&2;} { (exit 77); exit 77; }; }; } fi ac_exeext=$ac_cv_exeext # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:$LINENO: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi fi fi { $as_echo "$as_me:$LINENO: result: yes" >&5 $as_echo "yes" >&6; } rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } { $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } { $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi rm -f conftest$ac_cv_exeext { $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT { $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if test "${ac_cv_objext+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&5 $as_echo "$as_me: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if test "${ac_cv_c_compiler_gnu+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_compiler_gnu=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if test "${ac_cv_prog_cc_g+set}" = set; then $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 CFLAGS="" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if test "${ac_cv_prog_cc_c89+set}" = set; then $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_c89=$ac_arg else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:$LINENO: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:$LINENO: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test "${ac_cv_prog_CPP+set}" = set; then $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:$LINENO: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&5 $as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if test "${ac_cv_path_GREP+set}" = set; then $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 $as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:$LINENO: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if test "${ac_cv_path_EGREP+set}" = set; then $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 $as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if test "${ac_cv_header_stdc+set}" = set; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_header_stdc=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -rf conftest.dSYM rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi { $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` { $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 $as_echo_n "checking for $ac_header... " >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then $as_echo_n "(cached) " >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval "$as_ac_Header=yes" else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_Header=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi ac_res=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } as_val=`eval 'as_val=${'$as_ac_Header'} $as_echo "$as_val"'` if test "x$as_val" = x""yes; then cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then { $as_echo "$as_me:$LINENO: checking for msr/msr_rapl.h" >&5 $as_echo_n "checking for msr/msr_rapl.h... " >&6; } if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then $as_echo_n "(cached) " >&6 fi { $as_echo "$as_me:$LINENO: result: $ac_cv_header_msr_msr_rapl_h" >&5 $as_echo "$ac_cv_header_msr_msr_rapl_h" >&6; } else # Is the header compilable? { $as_echo "$as_me:$LINENO: checking msr/msr_rapl.h usability" >&5 $as_echo_n "checking msr/msr_rapl.h usability... " >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_header_compiler=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:$LINENO: checking msr/msr_rapl.h presence" >&5 $as_echo_n "checking msr/msr_rapl.h presence... " >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then ac_header_preproc=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext { $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: present but cannot be compiled" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: see the Autoconf documentation" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: proceeding with the preprocessor's result" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: proceeding with the preprocessor's result" >&2;} { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: in the future, the compiler will take precedence" >&5 $as_echo "$as_me: WARNING: msr/msr_rapl.h: in the future, the compiler will take precedence" >&2;} ;; esac { $as_echo "$as_me:$LINENO: checking for msr/msr_rapl.h" >&5 $as_echo_n "checking for msr/msr_rapl.h... " >&6; } if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then $as_echo_n "(cached) " >&6 else ac_cv_header_msr_msr_rapl_h=$ac_header_preproc fi { $as_echo "$as_me:$LINENO: result: $ac_cv_header_msr_msr_rapl_h" >&5 $as_echo "$ac_cv_header_msr_msr_rapl_h" >&6; } fi if test "x$ac_cv_header_msr_msr_rapl_h" = x""yes; then : else { { $as_echo "$as_me:$LINENO: error: libmsr component: msr/msr_rapl.h not found: use configure flags to set the path " >&5 $as_echo "$as_me: error: libmsr component: msr/msr_rapl.h not found: use configure flags to set the path " >&2;} { (exit 1); exit 1; }; } fi LDFLAGS="$LDFLAGS -L$libmsr_libdir -Wl,-rpath,$libmsr_libdir" { $as_echo "$as_me:$LINENO: checking for init_msr in -lmsr" >&5 $as_echo_n "checking for init_msr in -lmsr... " >&6; } if test "${ac_cv_lib_msr_init_msr+set}" = set; then $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lmsr $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char init_msr (); int main () { return init_msr (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" $as_echo "$ac_try_echo") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || $as_test_x conftest$ac_exeext }; then ac_cv_lib_msr_init_msr=yes else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_msr_init_msr=no fi rm -rf conftest.dSYM rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:$LINENO: result: $ac_cv_lib_msr_init_msr" >&5 $as_echo "$ac_cv_lib_msr_init_msr" >&6; } if test "x$ac_cv_lib_msr_init_msr" = x""yes; then cat >>confdefs.h <<_ACEOF #define HAVE_LIBMSR 1 _ACEOF LIBS="-lmsr $LIBS" else { { $as_echo "$as_me:$LINENO: error: libmsr component: libmsr.so not found: use configure flags to set the path" >&5 $as_echo "$as_me: error: libmsr component: libmsr.so not found: use configure flags to set the path" >&2;} { (exit 1); exit 1; }; } fi LIBMSR_INCDIR=$libmsr_incdir LIBMSR_LIBDIR=$libmsr_libdir ac_config_files="$ac_config_files Makefile.libmsr" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then test "x$cache_file" != "x/dev/null" && { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} cat confcache >$cache_file else { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. ac_script=' :mline /\\$/{ N s,\\\n,, b mline } t clear :clear s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g t quote s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\[/\\&/g s/\]/\\&/g s/\$/$$/g H :any ${ g s/^\n// s/\n/ /g p } ' DEFS=`sed -n "$ac_script" confdefs.h` ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : ${CONFIG_STATUS=./config.status} ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 # Save the log message, to keep $[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by $as_me, which was generated by GNU Autoconf 2.63. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. Usage: $0 [OPTION]... [FILE]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ config.status configured by $0, generated by GNU Autoconf 2.63, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" Copyright (C) 2008 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac CONFIG_FILES="$CONFIG_FILES '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) { $as_echo "$as_me: error: unrecognized option: $1 Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *) ac_config_targets="$ac_config_targets $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "Makefile.libmsr") CONFIG_FILES="$CONFIG_FILES Makefile.libmsr" ;; *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 $as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= trap 'exit_status=$? { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status ' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || { $as_echo "$as_me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=' ' ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 $as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\).*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\).*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \ || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5 $as_echo "$as_me: error: could not setup config files machinery" >&2;} { (exit 1); exit 1; }; } _ACEOF # VPATH may cause trouble with some makes, so we remove $(srcdir), # ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=/{ s/:*\$(srcdir):*/:/ s/:*\${srcdir}:*/:/ s/:*@srcdir@:*/:/ s/^\([^=]*=[ ]*\):*/\1/ s/:*$// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" eval set X " :F $CONFIG_FILES " shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5 $as_echo "$as_me: error: invalid tag $ac_tag" >&2;} { (exit 1); exit 1; }; };; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 $as_echo "$as_me: error: cannot find input file: $ac_f" >&2;} { (exit 1); exit 1; }; };; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac ac_file_inputs="$ac_file_inputs '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:$LINENO: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$tmp/stdin" \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` { as_dir="$ac_dir" case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 $as_echo "$as_me: error: cannot create directory $as_dir" >&2;} { (exit 1); exit 1; }; }; } ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p ' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&2;} rm -f "$tmp/stdin" case $ac_file in -) cat "$tmp/out" && rm -f "$tmp/out";; *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";; esac \ || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 $as_echo "$as_me: error: could not create $ac_file" >&2;} { (exit 1); exit 1; }; } ;; esac done # for ac_tag { (exit 0); exit 0; } _ACEOF chmod +x $CONFIG_STATUS ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5 $as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || { (exit 1); exit 1; } fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi papi-5.6.0/src/perfctr-2.7.x/linux/include/asm-i386/000775 001750 001750 00000000000 13216244370 023572 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ftests/fmatrixlowpapi.F000664 001750 001750 00000011634 13216244361 022014 0ustar00jshenry1963jshenry1963000000 000000 C **************************************************************************** C C matrixpapi.f C An example of matrix-matrix multiplication and using PAPI low level to C look at the performance. written by Kevin London C March 2000 C **************************************************************************** #include "fpapi_test.h" program fmatrixlowpapi implicit integer (p) INTEGER ncols1,nrows1,ncols2,nrows2 PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) INTEGER i,j,k,retval,nchr,numevents,EventSet CHARACTER*(PAPI_MAX_STR_LEN) vstring,mstring C PAPI values of the counters INTEGER event INTEGER*8 values(2) INTEGER*8 starttime,stoptime REAL*8 finaltime INTEGER ncpu,nnodes,totalcpus,vendor,model REAL revision, mhz REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), & r(nrows1,ncols2) integer tests_quiet, get_quiet external get_quiet tests_quiet = get_quiet() EventSet = PAPI_NULL C Setup default values numevents=0 starttime=0 stoptime=0 retval = PAPI_VER_CURRENT call PAPIf_library_init( retval ) if ( retval.NE.PAPI_VER_CURRENT) then call ftest_fail(__FILE__, __LINE__, *'PAPI_library_init', retval) end if C Create the eventset call PAPIf_create_eventset(EventSet,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_create_eventset', retval) end if C Total cycles call PAPIf_add_event(EventSet,PAPI_TOT_CYC,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_add_event PAPI_TOT_CYC', retval) end if C Total [floating point] instructions call PAPIf_query_event(PAPI_FP_INS, retval) if (retval .NE. PAPI_OK) then event = PAPI_TOT_INS else event = PAPI_FP_INS end if call PAPIf_add_event(EventSet,event,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, *'PAPIf_add_event PAPI_TOT_INS', retval) end if C Grab the hardware info call PAPIf_get_hardware_info( ncpu, nnodes, totalcpus, vendor, . vstring, model, mstring, revision, mhz ) do i=len(mstring),1,-1 if(mstring(i:i).NE.' ') goto 10 end do 10 if(i.LT.1)then nchr=1 else nchr=i end if if (tests_quiet .EQ. 0) then print * print 100, totalcpus,mstring(1:nchr), mhz print * print 101,'ncpu',ncpu, 'nnodes',nnodes, 'totalcpus',totalcpus print 102,'mhz',mhz,'revision',revision print 103,'vendor',vendor,'vstring',vstring print 104,'model',model,'mstring',mstring print * end if 100 format(i5,' CPU(s) ',a,' at ',f7.2,' MHz') 101 format(a9,' =',i6,7x,a9,' =',i5,5x,a9,'=',i5) 102 format(a9,' =',f7.2,6x,a9,' =',f15.5) 103 format(a9,' =',i6,7x,a9,' =',a40) 104 format(a9,' =',i6,7x,a9,' =',a40) C Open matrix file number 1 for reading C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') C Open matrix file number 2 for reading C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') C matrix 1: read in the matrix values do i=1, nrows1 do j=1,ncols1 p(i,j) = i*j*1.0 end do end do C matrix 2: read in the matrix values do i=1, nrows2 do j=1,ncols2 q(i,j) = i*j*1.0 end do end do C Initialize the result matrix do i=1,nrows1 do j=1, ncols2 r(i,j) = i*j*1.0 end do end do C Grab the beginning time call PAPIf_get_real_usec( starttime ) C Start the event counters call PAPIf_start( EventSet, retval ) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_start', retval) end if C Compute the matrix-matrix multiplication do i=1,nrows1 do j=1,ncols2 do k=1,ncols1 r(i,j)=r(i,j) + p(i,k)*q(k,j) end do end do end do C Stop the counters and put the results in the array values call PAPIf_stop(EventSet,values,retval) if ( retval .NE. PAPI_OK ) then call ftest_fail(__FILE__, __LINE__, . 'PAPIf_stop', retval) end if call PAPIf_get_real_usec( stoptime ) finaltime=(REAL(stoptime)/1000000.0)-(REAL(starttime)/1000000.0) C Make sure the compiler does not optimize away the multiplication call dummy(r) if (tests_quiet .EQ. 0) then print *, 'Time: ', finaltime, 'seconds' print *, 'Cycles: ', values(1) if (event .EQ. PAPI_TOT_INS) then print *, 'Total Instructions: ', values(2) else print *, 'FP Instructions: ', values(2) write(*,'(a,f9.6)') ' Efficiency (fp/cycle):', & real(values(2))/real(values(1)) end if end if call ftests_pass(__FILE__) end papi-5.6.0/src/libpfm-3.y/examples_v3.x/ia64/ita_rr.c000664 001750 001750 00000027345 13216244362 024101 0ustar00jshenry1963jshenry1963000000 000000 /* * ita_rr.c - example of how to use data range restriction with the Itanium PMU * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #define N_LOOP 100000000U #if defined(__ECC) && defined(__INTEL_COMPILER) /* if you do not have this file, your compiler is too old */ #include #define clear_psr_ac() __rum(1UL<<3) #elif defined(__GNUC__) static inline void clear_psr_ac(void) { __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); } #else #error "You need to define clear_psr_ac() for your compiler" #endif #define TEST_DATA_COUNT 16 #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_PMU_NAME_LEN 32 #define MAX_EVT_NAME_LEN 128 typedef struct { char *event_name; unsigned long expected_value; } event_desc_t; static event_desc_t event_list[]={ { "misaligned_loads_retired", N_LOOP }, { "misaligned_stores_retired", N_LOOP }, { NULL, 0UL} }; typedef union { unsigned long l_tab[2]; unsigned int i_tab[4]; unsigned short s_tab[8]; unsigned char c_tab[16]; } test_data_t; static int do_test(test_data_t *data) { unsigned int *l, v; l = (unsigned int *)(data->c_tab+1); if (((unsigned long)l & 0x1) == 0) { printf("Data is not unaligned, can't run test\n"); return -1; } v = *l; v++; *l = v; return 0; } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(int argc, char **argv) { event_desc_t *p; test_data_t *test_data, *test_data_fake; unsigned long range_start, range_end; int ret, type = 0; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfmlib_ita_input_param_t ita_inp; pfmlib_ita_output_param_t ita_outp; pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_pmr_t dbrs[8]; pfmlib_options_t pfmlib_options; unsigned int i; int id; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU family */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); /* * now let's allocate the data structure we will be monitoring */ test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data == NULL) { fatal_error("cannot allocate test data structure"); } test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); if (test_data_fake == NULL) { fatal_error("cannot allocate test data structure"); } /* * Compute the range we are interested in */ range_start = (unsigned long)test_data; range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(dbrs,0, sizeof(dbrs)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); memset(&ita_inp,0, sizeof(ita_inp)); memset(&ita_outp,0, sizeof(ita_outp)); /* * find requested event */ p = event_list; for (i=0; p->event_name ; i++, p++) { if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", p->event_name); } } /* * set the privilege mode: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; /* * how many counters we use */ inp.pfp_event_count = i; /* * We use the library to figure out how to program the debug registers * to cover the data range we are interested in. The rr_end parameter * must point to the byte after the last of the range (C-style range). * * Because of the masking mechanism and therefore alignment constraints used to implement * this feature, it may not be possible to exactly cover a given range. It may be that * the coverage exceeds the desired range. So it is possible to capture noise if * the surrounding addresses are also heavily used. You can figure out, the actual * start and end offsets of the generated range by checking the rr_soff and rr_eoff fields * in the pfmlib_ita_output_param_t structure when coming back from the library call. * * Upon return, the pfmlib_ita_output_param_t.pfp_ita_drange.rr_dbr array is programmed and * the number of entries used to cover the range is in rr_nbr_used. */ /* * We indicate that we are using a Data Range Restriction feature. * In this particular case this will cause, pfm_dispatch_events() to * add pmc13 to the list of PMC registers to initialize and the */ ita_inp.pfp_ita_drange.rr_used = 1; ita_inp.pfp_ita_drange.rr_limits[0].rr_start = range_start; ita_inp.pfp_ita_drange.rr_limits[0].rr_end = range_end; /* * use the library to find the monitors to use * * upon return, cnt contains the number of entries * used in pc[]. */ if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" "start_offset:-0x%lx end_offset:+0x%lx\n", range_start, range_end, ita_outp.pfp_ita_drange.rr_nbr_used >> 1, ita_outp.pfp_ita_drange.rr_infos[0].rr_soff, ita_outp.pfp_ita_drange.rr_infos[0].rr_eoff); printf("fake data range: [0x%016lx-0x%016lx)\n", (unsigned long)test_data_fake, (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); /* * now create the session */ id =pfm_create(0, NULL); if (id == -1) { if (errno == ENOSYS) fatal_error("Your kernel does not have performance monitoring support!\n"); fatal_error("cannot create session %s\n", strerror(errno)); } /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * propagate the setup for the debug registers from the library to the arguments * to the syscall. The library does not know the type of the syscall * anymore. DBRS are ampped at PMC264+PMC271 */ for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { dbrs[i].reg_num = 264+ita_outp.pfp_ita_drange.rr_br[i].reg_num; dbrs[i].reg_value = ita_outp.pfp_ita_drange.rr_br[i].reg_value; } /* * Program the data debug registers. */ if (pfm_write(id, 0, PFM_RW_PMC, dbrs, ita_outp.pfp_ita_drange.rr_nbr_used * sizeof(*dbrs)) == -1) fatal_error("pfm_write_pmrs error errno %d\n",errno); /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more than coutning monitors. */ if (pfm_write(id, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc)) == -1) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(id, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * now we attach session */ if (pfm_attach(id, 0, getpid()) == -1) fatal_error("pfm_attach error errno %d\n",errno); /* * Let's make sure that the hardware does the unaligned accesses (do not use the * kernel software handler otherwise the PMU won't see the unaligned fault). */ clear_psr_ac(); /* * Let's roll now. * * The idea behind this test is to have two dynamically allocated data structures * which are access in a unaligned fashion. But we want to capture only the unaligned * accesses on one of the two. So the debug registers are programmed to cover the * first one ONLY. Then we activate monotoring and access the two data structures. * This is an artificial example just to demonstrate how to use data address range * restrictions. */ if (pfm_set_state(id, 0, PFM_ST_START)) fatal_error("pfm_set_state error errno %d\n",errno); for (i=0; i < N_LOOP; i++) { do_test(test_data); do_test(test_data_fake); } if (pfm_set_state(id, 0, PFM_ST_STOP)) fatal_error("pfm_set_state error errno %d\n",errno); /* * now read the results */ if (pfm_read(id, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd)) == -1) fatal_error( "pfm_read error errno %d\n",errno); /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * * For this example, we expect to see a value of 1 for both misaligned loads * and misaligned stores. But it can be two when the test_data and test_data_fake * are allocate very close from each other and the range created with the debug * registers is larger then test_data. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%u %20lu %s (expected %lu)\n", pd[i].reg_num, pd[i].reg_value, name, event_list[i].expected_value); if (pd[i].reg_value != event_list[i].expected_value) { printf("error: Result should be %lu for %s\n", event_list[i].expected_value, name); break; } } /* * let's stop this now */ close(id); free(test_data); free(test_data_fake); return 0; } papi-5.6.0/src/event_data/ppc970/events000664 001750 001750 00000312326 13216244361 021714 0ustar00jshenry1963jshenry1963000000 000000 { **************************** { THIS IS OPEN SOURCE CODE { **************************** { (C) COPYRIGHT International Business Machines Corp. 2005 { This file is licensed under the University of Tennessee license. { See LICENSE.txt. { { File: events/ppc970/events { Author: Maynard Johnson { maynardj@us.ibm.com { Mods: { { counter 1 } #0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full ##10095,60095 The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). #1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##10094,60094 The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #2,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #3,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 ##C3087 DL1 was reloaded from the local L2 due to a demand load #4,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##80097 This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #5,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##80095 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve #6,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##80094 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##10091,60091 The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction ##00093 This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##02098 This signal is active for one cycle when one of the operands is denormalized. #10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##00090 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##00091 This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##00092 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##10093,60093 The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped #14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##0209B This signal is active for one cycle when fp0 is executing single precision instruction. #15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##02099 This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##0209A This signal is active for one cycle when fp0 is executing a store instruction. #17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction ##00097 This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##0209C This signal is active for one cycle when one of the operands is denormalized. #19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##00094 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##00095 This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##00096 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##10097,60097 The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped #23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##0209F This signal is active for one cycle when fp1 is executing single precision instruction. #24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##0209D This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##0209E This signal is active for one cycle when fp1 is executing a store instruction. #26,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data ##02080 This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1 #27,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction ##00080 This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1 #28,v,g,n,n,PM_GCT_EMPTY_CYC,Cycles GCT empty ##00004 The Global Completion Table is completely empty #29,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##10090,60090 The ISU sends a signal indicating the gct is full. #30,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict ##1209F,6209F Group experienced a branch mispredict #31,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##1209E,6209E Group experienced branch redirect #32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##1209C,6209C A group that previously attempted dispatch was rejected. #33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##1209B,6209B Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. #34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch ##2209E New line coming into the prefetch buffer #35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##2209D Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). #36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##2209F This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). #37,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #38,v,g,n,n,PM_INST_DISP,Instructions dispatched ##12098,12099,1209A,62098,62099,6209A The ISU sends the number of instructions dispatched. #39,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 ##2208D An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions #40,v,g,n,n,PM_INST_FROM_L2,Instructions fetched from L2 ##22086 An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions #41,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##80091 A SLB miss for an instruction fetch as occurred #42,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##80090 A TLB miss for an Instruction Fetch has occurred #43,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##8209F A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) #44,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##10096,60096 The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #45,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##80092 A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #46,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C0092 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #47,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes ##C0093 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #48,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C0090 A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #49,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C0091 A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) #50,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C609B LSU0 reject due to ERAT miss #51,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C6099 LSU0 reject due to LMQ full or missed data coming #52,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C609A LSU0 reject due to reload CDF or tag update collision #53,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects ##C6098 LSU0 SRQ rejects #54,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C2098 Data from a store instruction was forwarded to a load on unit 0 #55,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##80096 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #56,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C0096 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #57,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes ##C0097 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #58,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C0094 A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #59,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C0095 A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #60,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C609F LSU1 reject due to ERAT miss #61,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C609D LSU1 reject due to LMQ full or missed data coming #62,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C609E LSU1 reject due to reload CDF or tag update collision #63,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects ##C609C LSU1 SRQ rejects #64,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C209C Data from a store instruction was forwarded to a load on unit 1 #65,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes ##C0080 A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C209E LRQ slot zero was allocated #67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C209A This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #68,v,g,n,n,PM_LSU_REJECT_SRQ,LSU SRQ rejects ##C6080 LSU SRQ rejects #69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C209D SRQ Slot zero was allocated #70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C2099 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #71,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded ##C2080 Data from a store instruction was forwarded to a load #72,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 ##C7087 DL1 was reloaded from the local L2 due to a marked demand load #73,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched ##00002 A group containing a sampled instruction was dispatched #74,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##8209A A DL1 reload occured due to marked load #75,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses ##82080 Marked L1 D cache load misses #76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##82098 A marked load, executing on unit 0, missed the dcache #77,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##8209C A marked load, executing on unit 1, missed the dcache #78,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##8209E A marked stcx (stwcx or stdcx) failed #79,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed ##00003 A sampled store has completed (data home) #80,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##8209B A marked store missed the dcache #81,v,g,n,n,PM_PMC8_OVERFLOW,PMC8 Overflow ##0000A PMC8 Overflow #82,v,g,n,n,PM_RUN_CYC,Run cycles ##00005 Processor Cycles gated by the run latch #83,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE ##80093 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #84,v,g,n,n,PM_STCX_FAIL,STCX failed ##82099 A stcx (stwcx or stdcx) failed #85,v,g,n,n,PM_STCX_PASS,Stcx passes ##8209D A stcx (stwcx or stdcx) instruction was successful #86,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C209B A store missed the dcache #87,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended #88,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##10092,60092 The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. $$$$$$$$ { counter 2 } #0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full ##10095,60095 The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). #1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##10094,60094 The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #2,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #3,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##80097 This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #4,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##80095 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve #5,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##80094 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #6,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##10091,60091 The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #7,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction ##00093 This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #8,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##02098 This signal is active for one cycle when one of the operands is denormalized. #9,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##00090 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #10,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##00091 This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #11,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##00092 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #12,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##10093,60093 The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped #13,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##0209B This signal is active for one cycle when fp0 is executing single precision instruction. #14,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##02099 This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #15,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##0209A This signal is active for one cycle when fp0 is executing a store instruction. #16,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction ##00097 This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #17,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##0209C This signal is active for one cycle when one of the operands is denormalized. #18,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##00094 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #19,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##00095 This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #20,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##00096 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #21,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##10097,60097 The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped #22,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##0209F This signal is active for one cycle when fp1 is executing single precision instruction. #23,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##0209D This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #24,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##0209E This signal is active for one cycle when fp1 is executing a store instruction. #25,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction ##00080 This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 #26,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 ##02080 FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1 #27,v,g,n,n,PM_GCT_EMPTY_SRQ_FULL,GCT empty caused by SRQ full ##0000B GCT empty caused by SRQ full #28,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##10090,60090 The ISU sends a signal indicating the gct is full. #29,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict ##1209F,6209F Group experienced a branch mispredict #30,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##1209E,6209E Group experienced branch redirect #31,v,g,n,n,PM_GRP_DISP,Group dispatches ##00004 A group was dispatched #32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##1209C,6209C A group that previously attempted dispatch was rejected. #33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##1209B,6209B Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. #34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch ##2209E New line coming into the prefetch buffer #35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##2209D Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). #36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##2209F This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). #37,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #38,v,g,n,n,PM_INST_DISP,Instructions dispatched ##12098,12099,1209A,62098,62099,6209A The ISU sends the number of instructions dispatched. #39,v,g,n,n,PM_INST_FROM_MEM,Instruction fetched from memory ##22086 Instruction fetched from memory #40,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##80091 A SLB miss for an instruction fetch as occurred #41,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##80090 A TLB miss for an Instruction Fetch has occurred #42,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##8209F A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) #43,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##10096,60096 The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #44,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##80092 A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #45,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C0092 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #46,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes ##C0093 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #47,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C0090 A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #48,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C0091 A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) #49,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C609B LSU0 reject due to ERAT miss #50,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C6099 LSU0 reject due to LMQ full or missed data coming #51,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C609A LSU0 reject due to reload CDF or tag update collision #52,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects ##C6098 LSU0 SRQ rejects #53,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C2098 Data from a store instruction was forwarded to a load on unit 0 #54,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##80096 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #55,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C0096 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #56,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes ##C0097 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #57,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C0094 A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #58,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C0095 A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #59,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C609F LSU1 reject due to ERAT miss #60,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C609D LSU1 reject due to LMQ full or missed data coming #61,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C609E LSU1 reject due to reload CDF or tag update collision #62,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects ##C609C LSU1 SRQ rejects #63,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C209C Data from a store instruction was forwarded to a load on unit 1 #64,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes ##C0080 A store was flushed because it was unaligned #65,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty ##00002 Cycles when both the LMQ and SRQ are empty (LSU is idle) #66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C209E LRQ slot zero was allocated #67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C209A This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #68,v,g,n,n,PM_LSU_REJECT_LMQ_FULL,LSU reject due to LMQ full or missed data coming ##C6080 LSU reject due to LMQ full or missed data coming #69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C209D SRQ Slot zero was allocated #70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C2099 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #71,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished ##00005 The branch unit finished a marked instruction. Instructions that finish may not necessary complete #72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##8209A A DL1 reload occured due to marked load #73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##82098 A marked load, executing on unit 0, missed the dcache #74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##8209C A marked load, executing on unit 1, missed the dcache #75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##8209E A marked stcx (stwcx or stdcx) failed #76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##8209B A marked store missed the dcache #77,v,g,n,n,PM_PMC1_OVERFLOW,PMC1 Overflow ##0000A PMC1 Overflow #78,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE ##80093 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #79,v,g,n,n,PM_STCX_FAIL,STCX failed ##82099 A stcx (stwcx or stdcx) failed #80,v,g,n,n,PM_STCX_PASS,Stcx passes ##8209D A stcx (stwcx or stdcx) instruction was successful #81,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C209B A store missed the dcache #82,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended #83,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout ##00003 The threshold timer expired #84,v,g,n,n,PM_WORK_HELD,Work held ##00001 RAS Unit has signaled completion to stop and there are groups waiting to complete #85,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##10092,60092 The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. $$$$$$$$ { counter 3 } #0,v,g,n,n,PM_BR_ISSUED,Branches issued ##23098 This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. #1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##23099 This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. #2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##2309A branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. #3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##11091,61091 The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). #4,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #5,v,g,n,n,PM_DATA_FROM_MEM,Data loaded from memory ##C3087 Data loaded from memory #6,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C1097 A dcache invalidated was received from the L2 because a line in L2 was castout. #7,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams ##8309A out of streams #8,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##8309F A new Prefetch Stream was allocated #9,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##1309B,6309B The number of Cycles MSR(EE) bit was off. #10,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##1309F,6309F Cycles MSR(EE) bit off and external interrupt pending #11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##11096,61096 Flush caused by branch mispredict #12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict ##11097,61097 Flush caused by LSU or branch mispredict #13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##01092 This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##01093 fp0 finished, produced a result This only indicates finish, not completion. #15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##01090 This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##03098 This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs #17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##01091 This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##01096 This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##01097 fp1 finished, produced a result. This only indicates finish, not completion. #20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions ##01094 This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##01095 This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction ##01080 This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. #23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##11090,61090 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##11094,61094 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##1309A,6309A The Fixed Point unit 0 finished an instruction and produced a result #26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##1309E,6309E The Fixed Point unit 1 finished an instruction and produced a result #27,v,g,n,n,PM_FXU_FIN,FXU produced a result ##63080 The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. #28,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##1309D,6309D The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##13099,63099 The ISU sends a signal indicating that dispatch is blocked by scoreboard. #30,v,g,n,n,PM_HV_CYC,Hypervisor Cycles ##00004 Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0) #31,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #32,v,g,n,n,PM_INST_FROM_PREF,Instructions fetched from prefetch ##2208D An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions #33,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C309C The data source information is valid #34,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##83099 A request to prefetch data into the L1 was made #35,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##2309B This signal is asserted each cycle a cache write is active. #36,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##8309B A request to prefetch data into L2 was made #37,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses ##C1080 Total DL1 Load references that miss the DL1 #38,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C1092 A load, executing on unit 0, missed the dcache #39,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C1096 A load, executing on unit 1, missed the dcache #40,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C1090 A load executed on unit 0 #41,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references ##C1094 A load executed on unit 1 #42,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##83098 A floating point load was executed from LSU unit 0 #43,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##8309C A floating point load was executed from LSU unit 1 #44,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##11095,61095 Flush initiated by LSU #45,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C309F The LMQ was full #46,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C709D A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #47,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C309E The first entry in the LMQ was allocated. #48,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C309D This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #49,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty ##00002 Cycles when both the LMQ and SRQ are empty (LSU is idle) #50,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##11092,61092 The ISU sends this signal when the LRQ is full. #51,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##11093,61093 The ISU sends this signal when the srq is full. #52,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##8309D This signal is asserted every cycle when a sync is in the SRQ. #53,v,g,n,n,PM_MRK_DATA_FROM_MEM,Marked data loaded from memory ##C7087 Marked data loaded from memory #54,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C709C The source information is valid and is for a marked load #55,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##81092 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #56,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes ##81093 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #57,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##81090 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #58,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##81091 A marked store was flushed from unit 0 because it was unaligned #59,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##81096 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #60,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes ##81097 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #61,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##81094 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #62,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##81095 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #63,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C709E This signal is asserted every cycle when a marked request is resident in the Store Request Queue #64,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention ##00003 A marked store previously sent to the memory subsystem completed (data home) after requiring intervention #65,v,g,n,n,PM_MRK_VMX_FIN,Marked instruction VMX processing finished ##00005 Marked instruction VMX processing finished #66,v,g,n,n,PM_PMC2_OVERFLOW,PMC2 Overflow ##0000A PMC2 Overflow #67,v,g,n,n,PM_STOP_COMPLETION,Completion stopped ##00001 RAS Unit has signaled completion to stop #68,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C1093 A store missed the dcache #69,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C1091 A store executed on unit 0 #70,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C1095 A store executed on unit 1 #71,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended $$$$$$$$ { counter 4 } #0,v,g,n,n,PM_0INST_FETCH,No instructions fetched ##2208D No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) #1,v,g,n,n,PM_BR_ISSUED,Branches issued ##23098 This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. #2,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##23099 This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. #3,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##2309A branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. #4,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##11091,61091 The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). #5,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #6,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C1097 A dcache invalidated was received from the L2 because a line in L2 was castout. #7,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams ##8309A out of streams #8,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##8309F A new Prefetch Stream was allocated #9,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##1309B,6309B The number of Cycles MSR(EE) bit was off. #10,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##1309F,6309F Cycles MSR(EE) bit off and external interrupt pending #11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##11096,61096 Flush caused by branch mispredict #12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict ##11097,61097 Flush caused by LSU or branch mispredict #13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##01092 This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##01093 fp0 finished, produced a result This only indicates finish, not completion. #15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##01090 This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##03098 This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs #17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##01091 This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##01096 This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##01097 fp1 finished, produced a result. This only indicates finish, not completion. #20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions ##01094 This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##01095 This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU_FIN,FPU produced a result ##01080 FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1 #23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##11090,61090 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##11094,61094 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##1309A,6309A The Fixed Point unit 0 finished an instruction and produced a result #26,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle ##00002 FXU0 was idle while FXU1 was busy #27,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##1309E,6309E The Fixed Point unit 1 finished an instruction and produced a result #28,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##1309D,6309D The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##13099,63099 The ISU sends a signal indicating that dispatch is blocked by scoreboard. #30,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C309C The data source information is valid #32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##83099 A request to prefetch data into the L1 was made #33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##2309B This signal is asserted each cycle a cache write is active. #34,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##8309B A request to prefetch data into L2 was made #35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C1092 A load, executing on unit 0, missed the dcache #36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C1096 A load, executing on unit 1, missed the dcache #37,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C1090 A load executed on unit 0 #38,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references ##C1094 A load executed on unit 1 #39,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##83098 A floating point load was executed from LSU unit 0 #40,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##8309C A floating point load was executed from LSU unit 1 #41,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##11095,61095 Flush initiated by LSU #42,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C309F The LMQ was full #43,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C709D A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #44,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C309E The first entry in the LMQ was allocated. #45,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C309D This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #46,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##11092,61092 The ISU sends this signal when the LRQ is full. #47,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty ##00003 The Store Request Queue is empty #48,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##11093,61093 The ISU sends this signal when the srq is full. #49,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##8309D This signal is asserted every cycle when a sync is in the SRQ. #50,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished ##00005 The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete #51,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed ##00004 A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. #52,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C709C The source information is valid and is for a marked load #53,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##81092 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #54,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes ##81093 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #55,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##81090 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #56,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##81091 A marked store was flushed from unit 0 because it was unaligned #57,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##81096 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #58,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes ##81097 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #59,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##81094 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #60,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##81095 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #61,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C709E This signal is asserted every cycle when a marked request is resident in the Store Request Queue #62,v,g,n,n,PM_PMC3_OVERFLOW,PMC3 Overflow ##0000A PMC3 Overflow #63,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C1093 A store missed the dcache #64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C1091 A store executed on unit 0 #65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C1095 A store executed on unit 1 #66,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended $$$$$$$$ { counter 5 } #0,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed ##00003 A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. #1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full ##10095,60095 The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). #2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##10094,60094 The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #3,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #4,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared ##C3087 DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load #5,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##80097 This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #6,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##80095 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve #7,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##80094 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #8,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##10091,60091 The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #9,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction ##00093 This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #10,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##02098 This signal is active for one cycle when one of the operands is denormalized. #11,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##00090 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #12,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##00091 This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #13,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##00092 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #14,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##10093,60093 The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped #15,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##0209B This signal is active for one cycle when fp0 is executing single precision instruction. #16,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##02099 This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #17,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##0209A This signal is active for one cycle when fp0 is executing a store instruction. #18,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction ##00097 This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #19,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##0209C This signal is active for one cycle when one of the operands is denormalized. #20,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##00094 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #21,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##00095 This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##00096 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #23,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##10097,60097 The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped #24,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##0209F This signal is active for one cycle when fp1 is executing single precision instruction. #25,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##0209D This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #26,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##0209E This signal is active for one cycle when fp1 is executing a store instruction. #27,v,g,n,n,PM_FPU_ALL,FPU executed add, mult, sub, cmp or sel instruction ##00080 This signal is active for one cycle when FPU is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo. Combined Unit 0 + Unit 1 #28,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction ##02080 FPU is executing single precision instruction. Combined Unit 0 + Unit 1 #29,u,g,n,n,PM_FXU_IDLE,FXU idle ##00002 FXU0 and FXU1 are both idle #30,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##10090,60090 The ISU sends a signal indicating the gct is full. #31,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict ##1209F,6209F Group experienced a branch mispredict #32,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##1209E,6209E Group experienced branch redirect #33,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##1209C,6209C A group that previously attempted dispatch was rejected. #34,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success ##00001 Number of groups sucessfully dispatched (not rejected) #35,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##1209B,6209B Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. #36,v,g,n,n,PM_GRP_MRK,Group marked in IDU ##00004 A group was sampled (marked) #37,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch ##2209E New line coming into the prefetch buffer #38,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##2209D Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). #39,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##2209F This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). #40,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #41,v,g,n,n,PM_INST_DISP,Instructions dispatched ##12098,12099,1209A,62098,62099,6209A The ISU sends the number of instructions dispatched. #42,v,g,n,n,PM_INST_FROM_L25_SHR,Instruction fetched from L2.5 shared ##22086 Instruction fetched from L2.5 shared #43,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##80091 A SLB miss for an instruction fetch as occurred #44,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##80090 A TLB miss for an Instruction Fetch has occurred #45,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##8209F A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) #46,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##10096,60096 The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #47,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##80092 A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #48,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C0092 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #49,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes ##C0093 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #50,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C0090 A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #51,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C0091 A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) #52,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C609B LSU0 reject due to ERAT miss #53,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C6099 LSU0 reject due to LMQ full or missed data coming #54,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C609A LSU0 reject due to reload CDF or tag update collision #55,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects ##C6098 LSU0 SRQ rejects #56,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C2098 Data from a store instruction was forwarded to a load on unit 0 #57,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##80096 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #58,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C0096 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #59,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes ##C0097 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #60,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C0094 A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #61,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C0095 A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #62,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C609F LSU1 reject due to ERAT miss #63,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C609D LSU1 reject due to LMQ full or missed data coming #64,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C609E LSU1 reject due to reload CDF or tag update collision #65,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects ##C609C LSU1 SRQ rejects #66,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C209C Data from a store instruction was forwarded to a load on unit 1 #67,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes ##C0080 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C209E LRQ slot zero was allocated #69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C209A This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #70,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss ##C6080 LSU reject due to ERAT miss #71,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C209D SRQ Slot zero was allocated #72,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C2099 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #73,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared ##C7087 DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load #74,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout ##00005 The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor #75,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##8209A A DL1 reload occured due to marked load #76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##82098 A marked load, executing on unit 0, missed the dcache #77,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##8209C A marked load, executing on unit 1, missed the dcache #78,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##8209E A marked stcx (stwcx or stdcx) failed #79,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##8209B A marked store missed the dcache #80,v,g,n,n,PM_PMC4_OVERFLOW,PMC4 Overflow ##0000A PMC4 Overflow #81,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE ##80093 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #82,v,g,n,n,PM_STCX_FAIL,STCX failed ##82099 A stcx (stwcx or stdcx) failed #83,v,g,n,n,PM_STCX_PASS,Stcx passes ##8209D A stcx (stwcx or stdcx) instruction was successful #84,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C209B A store missed the dcache #85,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended #86,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##10092,60092 The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. $$$$$$$$ { counter 6 } #0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full ##10095,60095 The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). #1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##10094,60094 The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #2,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #3,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified ##C3087 DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load #4,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##80097 This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #5,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##80095 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve #6,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##80094 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##10091,60091 The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction ##00093 This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##02098 This signal is active for one cycle when one of the operands is denormalized. #10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##00090 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##00091 This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##00092 This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##10093,60093 The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped #14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##0209B This signal is active for one cycle when fp0 is executing single precision instruction. #15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##02099 This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##0209A This signal is active for one cycle when fp0 is executing a store instruction. #17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction ##00097 This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo #18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##0209C This signal is active for one cycle when one of the operands is denormalized. #19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##00094 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##00095 This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##00096 This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##10097,60097 The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped #23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##0209F This signal is active for one cycle when fp1 is executing single precision instruction. #24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##0209D This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. #25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##0209E This signal is active for one cycle when fp1 is executing a store instruction. #26,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction ##00080 This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 #27,v,g,n,n,PM_FPU_STF,FPU executed store instruction ##02080 FPU is executing a store instruction. Combined Unit 0 + Unit 1 #28,u,g,n,n,PM_FXU_BUSY,FXU busy ##00002 FXU0 and FXU1 are both busy #29,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##10090,60090 The ISU sends a signal indicating the gct is full. #30,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict ##1209F,6209F Group experienced a branch mispredict #31,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##1209E,6209E Group experienced branch redirect #32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##1209C,6209C A group that previously attempted dispatch was rejected. #33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##1209B,6209B Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. #34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch ##2209E New line coming into the prefetch buffer #35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##2209D Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). #36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##2209F This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). #37,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #38,v,g,n,n,PM_INST_DISP,Instructions dispatched ##12098,12099,1209A,62098,62099,6209A The ISU sends the number of instructions dispatched. #39,v,g,n,n,PM_INST_FROM_L25_MOD,Instruction fetched from L2.5 modified ##22086 Instruction fetched from L2.5 modified #40,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##80091 A SLB miss for an instruction fetch as occurred #41,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##80090 A TLB miss for an Instruction Fetch has occurred #42,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##8209F A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) #43,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##10096,60096 The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #44,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##80092 A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #45,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C0092 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #46,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes ##C0093 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #47,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C0090 A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #48,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C0091 A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) #49,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C609B LSU0 reject due to ERAT miss #50,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C6099 LSU0 reject due to LMQ full or missed data coming #51,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C609A LSU0 reject due to reload CDF or tag update collision #52,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects ##C6098 LSU0 SRQ rejects #53,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C2098 Data from a store instruction was forwarded to a load on unit 0 #54,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##80096 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #55,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C0096 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #56,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes ##C0097 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #57,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C0094 A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #58,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C0095 A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #59,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C609F LSU1 reject due to ERAT miss #60,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C609D LSU1 reject due to LMQ full or missed data coming #61,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C609E LSU1 reject due to reload CDF or tag update collision #62,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects ##C609C LSU1 SRQ rejects #63,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C209C Data from a store instruction was forwarded to a load on unit 1 #64,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses ##80080 Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. #65,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes ##C0080 A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C209E LRQ slot zero was allocated #67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C209A This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #68,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision ##C6080 LSU reject due to reload CDF or tag update collision #69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C209D SRQ Slot zero was allocated #70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C2099 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. #71,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified ##C7087 DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load #72,v,g,n,n,PM_MRK_FXU_FIN,Marked instruction FXU processing finished ##00004 Marked instruction FXU processing finished #73,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued ##00005 A sampled instruction was issued #74,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##8209A A DL1 reload occured due to marked load #75,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##82098 A marked load, executing on unit 0, missed the dcache #76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##8209C A marked load, executing on unit 1, missed the dcache #77,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##8209E A marked stcx (stwcx or stdcx) failed #78,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS ##00003 A sampled store has been sent to the memory subsystem #79,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##8209B A marked store missed the dcache #80,v,g,n,n,PM_PMC5_OVERFLOW,PMC5 Overflow ##0000A PMC5 Overflow #81,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE ##80093 A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. #82,v,g,n,n,PM_STCX_FAIL,STCX failed ##82099 A stcx (stwcx or stdcx) failed #83,v,g,n,n,PM_STCX_PASS,Stcx passes ##8209D A stcx (stwcx or stdcx) instruction was successful #84,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C209B A store missed the dcache #85,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended #86,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##10092,60092 The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. $$$$$$$$ { counter 7 } #0,v,g,n,n,PM_BR_ISSUED,Branches issued ##23098 This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. #1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##23099 This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. #2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##2309A branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. #3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##11091,61091 The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). #4,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #5,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C1097 A dcache invalidated was received from the L2 because a line in L2 was castout. #6,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams ##8309A out of streams #7,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##8309F A new Prefetch Stream was allocated #8,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##1309B,6309B The number of Cycles MSR(EE) bit was off. #9,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##1309F,6309F Cycles MSR(EE) bit off and external interrupt pending #10,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##11096,61096 Flush caused by branch mispredict #11,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict ##11097,61097 Flush caused by LSU or branch mispredict #12,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##01092 This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #13,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##01093 fp0 finished, produced a result This only indicates finish, not completion. #14,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##01090 This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #15,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##03098 This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs #16,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##01091 This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #17,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##01096 This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #18,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##01097 fp1 finished, produced a result. This only indicates finish, not completion. #19,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions ##01094 This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #20,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##01095 This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #21,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions ##01080 This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 #22,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##11090,61090 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #23,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##11094,61094 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #24,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle ##00002 FXU0 is busy while FXU1 was idle #25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##1309A,6309A The Fixed Point unit 0 finished an instruction and produced a result #26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##1309E,6309E The Fixed Point unit 1 finished an instruction and produced a result #27,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##1309D,6309D The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #28,v,g,n,n,PM_GRP_CMPL,Group completed ##00003 A group completed. Microcoded instructions that span multiple groups will generate this event once per group. #29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##13099,63099 The ISU sends a signal indicating that dispatch is blocked by scoreboard. #30,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C309C The data source information is valid #32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##83099 A request to prefetch data into the L1 was made #33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##2309B This signal is asserted each cycle a cache write is active. #34,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##8309B A request to prefetch data into L2 was made #35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C1092 A load, executing on unit 0, missed the dcache #36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C1096 A load, executing on unit 1, missed the dcache #37,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C1090 A load executed on unit 0 #38,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references ##C1094 A load executed on unit 1 #39,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##83098 A floating point load was executed from LSU unit 0 #40,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##8309C A floating point load was executed from LSU unit 1 #41,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##11095,61095 Flush initiated by LSU #42,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C309F The LMQ was full #43,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C709D A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #44,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C309E The first entry in the LMQ was allocated. #45,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C309D This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #46,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##11092,61092 The ISU sends this signal when the LRQ is full. #47,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##11093,61093 The ISU sends this signal when the srq is full. #48,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##8309D This signal is asserted every cycle when a sync is in the SRQ. #49,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished ##00004 One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete #50,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished ##00005 One of the execution units finished a marked instruction. Instructions that finish may not necessary complete #51,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C709C The source information is valid and is for a marked load #52,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##81092 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #53,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes ##81093 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #54,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##81090 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #55,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##81091 A marked store was flushed from unit 0 because it was unaligned #56,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##81096 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #57,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes ##81097 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #58,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##81094 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #59,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##81095 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #60,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C709E This signal is asserted every cycle when a marked request is resident in the Store Request Queue #61,v,g,n,n,PM_PMC6_OVERFLOW,PMC6 Overflow ##0000A PMC6 Overflow #62,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C1093 A store missed the dcache #63,v,g,n,n,PM_ST_REF_L1,L1 D cache store references ##C1080 Total DL1 Store references #64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C1091 A store executed on unit 0 #65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C1095 A store executed on unit 1 #66,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended $$$$$$$$ { counter 8 } #0,v,g,n,n,PM_BR_ISSUED,Branches issued ##23098 This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. #1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##23099 This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. #2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##2309A branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. #3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##11091,61091 The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). #4,v,g,n,n,PM_CYC,Processor cycles ##0000F Processor cycles #5,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C1097 A dcache invalidated was received from the L2 because a line in L2 was castout. #6,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams ##8309A out of streams #7,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##8309F A new Prefetch Stream was allocated #8,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##1309B,6309B The number of Cycles MSR(EE) bit was off. #9,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##1309F,6309F Cycles MSR(EE) bit off and external interrupt pending #10,v,g,n,n,PM_EXT_INT,External interrupts ##00002 An external interrupt occurred #11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##11096,61096 Flush caused by branch mispredict #12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict ##11097,61097 Flush caused by LSU or branch mispredict #13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##01092 This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##01093 fp0 finished, produced a result This only indicates finish, not completion. #15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##01090 This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##03098 This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs #17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##01091 This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##01096 This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##01097 fp1 finished, produced a result. This only indicates finish, not completion. #20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions ##01094 This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ #21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##01095 This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #22,v,g,n,n,PM_FPU_FMOV_FEST,FPU executing FMOV or FEST instructions ##01080 This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1 #23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##11090,61090 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##11094,61094 The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped #25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##1309A,6309A The Fixed Point unit 0 finished an instruction and produced a result #26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##1309E,6309E The Fixed Point unit 1 finished an instruction and produced a result #27,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##1309D,6309D The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. #28,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##13099,63099 The ISU sends a signal indicating that dispatch is blocked by scoreboard. #29,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##00003 A group that previously attempted dispatch was rejected. #30,c,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of Eligible Instructions that completed. #31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C309C The data source information is valid #32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##83099 A request to prefetch data into the L1 was made #33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##2309B This signal is asserted each cycle a cache write is active. #34,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##8309B A request to prefetch data into L2 was made #35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C1092 A load, executing on unit 0, missed the dcache #36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C1096 A load, executing on unit 1, missed the dcache #37,v,g,n,n,PM_LD_REF_L1,L1 D cache load references ##C1080 Total DL1 Load references #38,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C1090 A load executed on unit 0 #39,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references ##C1094 A load executed on unit 1 #40,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##83098 A floating point load was executed from LSU unit 0 #41,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##8309C A floating point load was executed from LSU unit 1 #42,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##11095,61095 Flush initiated by LSU #43,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction ##83080 LSU executed Floating Point load instruction #44,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C309F The LMQ was full #45,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C709D A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #46,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C309E The first entry in the LMQ was allocated. #47,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C309D This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #48,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##11092,61092 The ISU sends this signal when the LRQ is full. #49,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##11093,61093 The ISU sends this signal when the srq is full. #50,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##8309D This signal is asserted every cycle when a sync is in the SRQ. #51,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C709C The source information is valid and is for a marked load #52,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##81092 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #53,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes ##81093 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #54,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##81090 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #55,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##81091 A marked store was flushed from unit 0 because it was unaligned #56,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##81096 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #57,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes ##81097 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #58,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##81094 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #59,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##81095 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #60,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished ##00004 One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete #61,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C709E This signal is asserted every cycle when a marked request is resident in the Store Request Queue #62,v,g,n,n,PM_PMC7_OVERFLOW,PMC7 Overflow ##0000A PMC7 Overflow #63,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C1093 A store missed the dcache #64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C1091 A store executed on unit 0 #65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C1095 A store executed on unit 1 #66,v,g,n,n,PM_SUSPENDED,Suspended ##00008 Suspended #67,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition ##00005 When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 papi-5.6.0/src/libpfm4/lib/pfmlib_intel_ivbep_unc_irp.c000664 001750 001750 00000005756 13216244365 025162 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_ivbep_irp.c : Intel IvyBridge-EP IRP uncore PMU * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_ivbep_unc_irp_events.h" static void display_irp(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; __pfm_vbprintf("[UNC_IRP=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "edge=%d thres=%d] %s\n", reg->val, reg->irp.unc_event, reg->irp.unc_umask, reg->irp.unc_en, reg->irp.unc_edge, reg->irp.unc_thres, pe[e->event].name); } pfmlib_pmu_t intel_ivbep_unc_irp_support = { .desc = "Intel Ivy Bridge-EP IRP uncore", .name = "ivbep_unc_irp", .perf_name = "uncore_irp", .pmu = PFM_PMU_INTEL_IVBEP_UNC_IRP, .pme_count = LIBPFM_ARRAY_SIZE(intel_ivbep_unc_i_pe), .type = PFM_PMU_TYPE_UNCORE, .num_cntrs = 4, .num_fixed_cntrs = 0, .max_encoding = 3, .pe = intel_ivbep_unc_i_pe, .atdesc = snbep_unc_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .pmu_detect = pfm_intel_ivbep_unc_detect, .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding), PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .display_reg = display_irp, }; papi-5.6.0/src/libpfm4/lib/pfmlib_intel_bdw.c000664 001750 001750 00000007712 13216244365 023104 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_bdw.c : Intel Broadwell core PMU * * Copyright (c) 2014 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "events/intel_bdw_events.h" static const int bdw_models[] = { 61, /* Broadwell Core-M */ 71, /* Broadwell + GT3e (Iris Pro graphics) */ 0 }; static const int bdwep_models[] = { 79, /* Broadwell-EP, Xeon */ 86, /* Broadwell-EP, Xeon D */ 0 }; static int pfm_bdw_init(void *this) { pfm_intel_x86_cfg.arch_version = 4; return PFM_SUCCESS; } pfmlib_pmu_t intel_bdw_support={ .desc = "Intel Broadwell", .name = "bdw", .pmu = PFM_PMU_INTEL_BDW, .pme_count = LIBPFM_ARRAY_SIZE(intel_bdw_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_bdw_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = bdw_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_bdw_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; pfmlib_pmu_t intel_bdw_ep_support={ .desc = "Intel Broadwell EP", .name = "bdw_ep", .pmu = PFM_PMU_INTEL_BDW_EP, .pme_count = LIBPFM_ARRAY_SIZE(intel_bdw_pe), .type = PFM_PMU_TYPE_CORE, .supported_plm = INTEL_X86_PLM, .num_cntrs = 8, /* consider with HT off by default */ .num_fixed_cntrs = 3, .max_encoding = 2, /* offcore_response */ .pe = intel_bdw_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK | INTEL_X86_PMU_FL_ECMASK, .cpu_family = 6, .cpu_models = bdwep_models, .pmu_detect = pfm_intel_x86_model_detect, .pmu_init = pfm_bdw_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, .can_auto_encode = pfm_intel_x86_can_auto_encode, }; papi-5.6.0/src/perfctr-2.7.x/examples/perfex/x86.h000664 001750 001750 00000000450 13216244370 023445 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.h,v 1.1 2004/01/11 22:07:12 mikpe Exp $ * x86-specific declarations. * * Copyright (C) 1999-2004 Mikael Pettersson */ #define ARCH_LONG_OPTIONS \ { "p4pe", 1, NULL, 1 }, { "p4_pebs_enable", 1, NULL, 1 }, \ { "p4pmv", 1, NULL, 2 }, { "p4_pebs_matrix_vert", 1, NULL, 2 }, papi-5.6.0/src/components/perfctr_ppc/000775 001750 001750 00000000000 13216244360 022015 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/man/man3/PAPIF_get_event_info.3000664 001750 001750 00000001152 13216244355 022144 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_get_event_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_get_event_info \- .PP Get the event's name and description info\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_get_event_info\fP(C_INT EventCode, C_STRING symbol, C_STRING long_descr, C_STRING short_descr, C_INT count, C_STRING event_note, C_INT flags, C_INT check ) .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_get_event_info\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/examples/add_event/000775 001750 001750 00000000000 13216244361 021071 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm4/lib/events/amd64_events_fam12h.h000664 001750 001750 00000141737 13216244364 024557 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2011 University of Tennessee * Contributed by Vince Weaver * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: amd64_fam12h (AMD64 Fam12h) */ static const amd64_umask_t amd64_fam12h_dispatched_fpu[]={ { .uname = "OPS_ADD", .udesc = "Add pipe ops excluding load ops and SSE move ops", .ucode = 0x1, }, { .uname = "OPS_MULTIPLY", .udesc = "Multiply pipe ops excluding load ops and SSE move ops", .ucode = 0x2, }, { .uname = "OPS_STORE", .udesc = "Store pipe ops excluding load ops and SSE move ops", .ucode = 0x4, }, { .uname = "OPS_ADD_PIPE_LOAD_OPS", .udesc = "Add pipe load ops and SSE move ops", .ucode = 0x8, }, { .uname = "OPS_MULTIPLY_PIPE_LOAD_OPS", .udesc = "Multiply pipe load ops and SSE move ops", .ucode = 0x10, }, { .uname = "OPS_STORE_PIPE_LOAD_OPS", .udesc = "Store pipe load ops and SSE move ops", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_retired_sse_operations[]={ { .uname = "SINGLE_ADD_SUB_OPS", .udesc = "Single precision add/subtract ops", .ucode = 0x1, }, { .uname = "SINGLE_MUL_OPS", .udesc = "Single precision multiply ops", .ucode = 0x2, }, { .uname = "SINGLE_DIV_OPS", .udesc = "Single precision divide/square root ops", .ucode = 0x4, }, { .uname = "DOUBLE_ADD_SUB_OPS", .udesc = "Double precision add/subtract ops", .ucode = 0x8, }, { .uname = "DOUBLE_MUL_OPS", .udesc = "Double precision multiply ops", .ucode = 0x10, }, { .uname = "DOUBLE_DIV_OPS", .udesc = "Double precision divide/square root ops", .ucode = 0x20, }, { .uname = "OP_TYPE", .udesc = "Op type: 0=uops. 1=FLOPS", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_retired_move_ops[]={ { .uname = "LOW_QW_MOVE_UOPS", .udesc = "Merging low quadword move uops", .ucode = 0x1, }, { .uname = "HIGH_QW_MOVE_UOPS", .udesc = "Merging high quadword move uops", .ucode = 0x2, }, { .uname = "ALL_OTHER_MERGING_MOVE_UOPS", .udesc = "All other merging move uops", .ucode = 0x4, }, { .uname = "ALL_OTHER_MOVE_UOPS", .udesc = "All other move uops", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_retired_serializing_ops[]={ { .uname = "SSE_BOTTOM_EXECUTING_UOPS", .udesc = "SSE bottom-executing uops retired", .ucode = 0x1, }, { .uname = "SSE_BOTTOM_SERIALIZING_UOPS", .udesc = "SSE bottom-serializing uops retired", .ucode = 0x2, }, { .uname = "X87_BOTTOM_EXECUTING_UOPS", .udesc = "X87 bottom-executing uops retired", .ucode = 0x4, }, { .uname = "X87_BOTTOM_SERIALIZING_UOPS", .udesc = "X87 bottom-serializing uops retired", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_fp_scheduler_cycles[]={ { .uname = "BOTTOM_EXECUTE_CYCLES", .udesc = "Number of cycles a bottom-execute uop is in the FP scheduler", .ucode = 0x1, }, { .uname = "BOTTOM_SERIALIZING_CYCLES", .udesc = "Number of cycles a bottom-serializing uop is in the FP scheduler", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_segment_register_loads[]={ { .uname = "ES", .udesc = "ES", .ucode = 0x1, }, { .uname = "CS", .udesc = "CS", .ucode = 0x2, }, { .uname = "SS", .udesc = "SS", .ucode = 0x4, }, { .uname = "DS", .udesc = "DS", .ucode = 0x8, }, { .uname = "FS", .udesc = "FS", .ucode = 0x10, }, { .uname = "GS", .udesc = "GS", .ucode = 0x20, }, { .uname = "HS", .udesc = "HS", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_locked_ops[]={ { .uname = "EXECUTED", .udesc = "The number of locked instructions executed", .ucode = 0x1, }, { .uname = "CYCLES_SPECULATIVE_PHASE", .udesc = "The number of cycles spent in speculative phase", .ucode = 0x2, }, { .uname = "CYCLES_NON_SPECULATIVE_PHASE", .udesc = "The number of cycles spent in non-speculative phase (including cache miss penalty)", .ucode = 0x4, }, { .uname = "CYCLES_WAITING", .udesc = "The number of cycles waiting for a cache hit (cache miss penalty).", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_cancelled_store_to_load_forward_operations[]={ { .uname = "ADDRESS_MISMATCHES", .udesc = "Address mismatches (starting byte not the same).", .ucode = 0x1, }, { .uname = "STORE_IS_SMALLER_THAN_LOAD", .udesc = "Store is smaller than load.", .ucode = 0x2, }, { .uname = "MISALIGNED", .udesc = "Misaligned.", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_data_cache_refills[]={ { .uname = "SYSTEM", .udesc = "Refill from the Northbridge", .ucode = 0x1, }, { .uname = "L2_SHARED", .udesc = "Shared-state line from L2", .ucode = 0x2, }, { .uname = "L2_EXCLUSIVE", .udesc = "Exclusive-state line from L2", .ucode = 0x4, }, { .uname = "L2_OWNED", .udesc = "Owned-state line from L2", .ucode = 0x8, }, { .uname = "L2_MODIFIED", .udesc = "Modified-state line from L2", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_data_cache_refills_from_northbridge[]={ { .uname = "INVALID", .udesc = "Invalid", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_data_cache_lines_evicted[]={ { .uname = "INVALID", .udesc = "Invalid", .ucode = 0x1, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x2, }, { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x10, }, { .uname = "BY_PREFETCHNTA", .udesc = "Cache line evicted was brought into the cache with by a PrefetchNTA instruction.", .ucode = 0x20, }, { .uname = "NOT_BY_PREFETCHNTA", .udesc = "Cache line evicted was not brought into the cache with by a PrefetchNTA instruction.", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l1_dtlb_miss_and_l2_dtlb_hit[]={ { .uname = "L2_4K_TLB_HIT", .udesc = "L2 4K TLB hit", .ucode = 0x1, }, { .uname = "L2_2M_TLB_HIT", .udesc = "L2 2M TLB hit", .ucode = 0x2, }, { .uname = "L2_1G_TLB_HIT", .udesc = "L2 1G TLB hit", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l1_dtlb_and_l2_dtlb_miss[]={ { .uname = "4K_TLB_RELOAD", .udesc = "4K TLB reload", .ucode = 0x1, }, { .uname = "2M_TLB_RELOAD", .udesc = "2M TLB reload", .ucode = 0x2, }, { .uname = "1G_TLB_RELOAD", .udesc = "1G TLB reload", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_prefetch_instructions_dispatched[]={ { .uname = "LOAD", .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", .ucode = 0x1, }, { .uname = "STORE", .udesc = "Store (PrefetchW)", .ucode = 0x2, }, { .uname = "NTA", .udesc = "NTA (PrefetchNTA)", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_dcache_misses_by_locked_instructions[]={ { .uname = "DATA_CACHE_MISSES_BY_LOCKED_INSTRUCTIONS", .udesc = "Data cache misses by locked instructions", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x2, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l1_dtlb_hit[]={ { .uname = "L1_4K_TLB_HIT", .udesc = "L1 4K TLB hit", .ucode = 0x1, }, { .uname = "L1_2M_TLB_HIT", .udesc = "L1 2M TLB hit", .ucode = 0x2, }, { .uname = "L1_1G_TLB_HIT", .udesc = "L1 1G TLB hit", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_ineffective_sw_prefetches[]={ { .uname = "SW_PREFETCH_HIT_IN_L1", .udesc = "Software prefetch hit in the L1.", .ucode = 0x1, }, { .uname = "SW_PREFETCH_HIT_IN_L2", .udesc = "Software prefetch hit in L2.", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x9, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_requests[]={ { .uname = "NON_CACHEABLE", .udesc = "Requests to non-cacheable (UC) memory", .ucode = 0x1, }, { .uname = "WRITE_COMBINING", .udesc = "Requests to write-combining (WC) memory or WC buffer flushes to WB memory", .ucode = 0x2, }, { .uname = "CACHE_DISABLED", .udesc = "Requests to cache-disabled (CD) memory", .ucode = 0x4, }, { .uname = "STREAMING_STORE", .udesc = "Streaming store (SS) requests", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x87, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_data_prefetches[]={ { .uname = "CANCELLED", .udesc = "Cancelled prefetches", .ucode = 0x1, }, { .uname = "ATTEMPTED", .udesc = "Prefetch attempts", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_northbridge_read_responses[]={ { .uname = "EXCLUSIVE", .udesc = "Exclusive", .ucode = 0x1, }, { .uname = "MODIFIED", .udesc = "Modified", .ucode = 0x2, }, { .uname = "SHARED", .udesc = "Shared", .ucode = 0x4, }, { .uname = "OWNED", .udesc = "Owned", .ucode = 0x8, }, { .uname = "DATA_ERROR", .udesc = "Data Error", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_octwords_written_to_system[]={ { .uname = "OCTWORD_WRITE_TRANSFER", .udesc = "Octword write transfer", .ucode = 0x1, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_requests_to_l2[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB fill (page table walks)", .ucode = 0x4, }, { .uname = "SNOOP", .udesc = "Tag snoop request", .ucode = 0x8, }, { .uname = "CANCELLED", .udesc = "Cancelled request", .ucode = 0x10, }, { .uname = "HW_PREFETCH_FROM_DC", .udesc = "Hardware prefetch from DC", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l2_cache_miss[]={ { .uname = "INSTRUCTIONS", .udesc = "IC fill", .ucode = 0x1, }, { .uname = "DATA", .udesc = "DC fill (includes possible replays, whereas EventSelect 041h does not)", .ucode = 0x2, }, { .uname = "TLB_WALK", .udesc = "TLB page table walk", .ucode = 0x4, }, { .uname = "HW_PREFETCH_FROM_DC", .udesc = "Hardware prefetch from DC", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l2_fill_writeback[]={ { .uname = "L2_FILLS", .udesc = "L2 fills (victims from L1 caches, TLB page table walks and data prefetches)", .ucode = 0x1, }, { .uname = "L2_WRITEBACKS", .udesc = "L2 Writebacks to system.", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_l1_itlb_miss_and_l2_itlb_miss[]={ { .uname = "4K_PAGE_FETCHES", .udesc = "Instruction fetches to a 4K page.", .ucode = 0x1, }, { .uname = "2M_PAGE_FETCHES", .udesc = "Instruction fetches to a 2M page.", .ucode = 0x2, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_instruction_cache_lines_invalidated[]={ { .uname = "INVALIDATING_PROBE_NO_IN_FLIGHT", .udesc = "Invalidating probe that did not hit any in-flight instructions.", .ucode = 0x1, }, { .uname = "INVALIDATING_PROBE_ONE_OR_MORE_IN_FLIGHT", .udesc = "Invalidating probe that hit one or more in-flight instructions.", .ucode = 0x2, }, { .uname = "SMC_NO_INFLIGHT", .udesc = "SMC that did not hit any in-flight instructions.", .ucode = 0x4, }, { .uname = "SMC_INFLIGHT", .udesc = "SMC that hit one or more in-flight instructions.", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_retired_mmx_and_fp_instructions[]={ { .uname = "X87", .udesc = "X87 instructions", .ucode = 0x1, }, { .uname = "MMX_AND_3DNOW", .udesc = "MMX and 3DNow! instructions", .ucode = 0x2, }, { .uname = "SSE_AND_SSE2", .udesc = "SSE and SSE2 instructions", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_interrupt_events[]={ { .uname = "FIXED_AND_LPA", .udesc = "Fixed and LPA", .ucode = 0x1, }, { .uname = "LPA", .udesc = "LPA", .ucode = 0x2, }, { .uname = "SMI", .udesc = "SMI", .ucode = 0x4, }, { .uname = "NMI", .udesc = "NMI", .ucode = 0x8, }, { .uname = "INIT", .udesc = "INIT", .ucode = 0x10, }, { .uname = "STARTUP", .udesc = "STARTUP", .ucode = 0x20, }, { .uname = "INT", .udesc = "INT", .ucode = 0x40, }, { .uname = "EOI", .udesc = "EOI", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_sideband_signals[]={ { .uname = "STOPGRANT", .udesc = "STOPGRANT", .ucode = 0x2, }, { .uname = "SHUTDOWN", .udesc = "SHUTDOWN", .ucode = 0x4, }, { .uname = "WBINVD", .udesc = "WBINVD", .ucode = 0x8, }, { .uname = "INVD", .udesc = "INVD", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1e, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_fpu_exceptions[]={ { .uname = "X87_RECLASS_MICROFAULTS", .udesc = "X87 reclass microfaults", .ucode = 0x1, }, { .uname = "SSE_RETYPE_MICROFAULTS", .udesc = "SSE retype microfaults", .ucode = 0x2, }, { .uname = "SSE_RECLASS_MICROFAULTS", .udesc = "SSE reclass microfaults", .ucode = 0x4, }, { .uname = "SSE_AND_X87_MICROTRAPS", .udesc = "SSE and x87 microtraps", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_dram_accesses_page[]={ { .uname = "DCT0_HIT", .udesc = "DCT0 Page hit", .ucode = 0x1, }, { .uname = "DCT0_MISS", .udesc = "DCT0 Page Miss", .ucode = 0x2, }, { .uname = "DCT0_CONFLICT", .udesc = "DCT0 Page Conflict", .ucode = 0x4, }, { .uname = "DCT1_PAGE_HIT", .udesc = "DCT1 Page hit", .ucode = 0x8, }, { .uname = "DCT1_PAGE_MISS", .udesc = "DCT1 Page Miss", .ucode = 0x10, }, { .uname = "DCT1_PAGE_CONFLICT", .udesc = "DCT1 Page Conflict", .ucode = 0x20, }, { .uname = "WRITE_REQUEST", .udesc = "Write request.", .ucode = 0x40, }, { .uname = "READ_REQUEST", .udesc = "Read request.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xff, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_controller_page_table_events[]={ { .uname = "PAGE_TABLE_OVERFLOW", .udesc = "Page Table Overflow", .ucode = 0x1, }, { .uname = "STALE_TABLE_ENTRY_HITS", .udesc = "Number of stale table entry hits. (hit on a page closed too soon).", .ucode = 0x2, }, { .uname = "PAGE_TABLE_IDLE_CYCLE_LIMIT_INCREMENTED", .udesc = "Page table idle cycle limit incremented.", .ucode = 0x4, }, { .uname = "PAGE_TABLE_IDLE_CYCLE_LIMIT_DECREMENTED", .udesc = "Page table idle cycle limit decremented.", .ucode = 0x8, }, { .uname = "PAGE_TABLE_CLOSED_INACTIVITY", .udesc = "Page table is closed due to row inactivity.", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_controller_slot_misses[]={ { .uname = "DCT0_RBD", .udesc = "DCT0 RBD.", .ucode = 0x10, }, { .uname = "DCT1_RBD", .udesc = "DCT1 RBD.", .ucode = 0x20, }, { .uname = "DCT0_PREFETCH", .udesc = "DCT0 Prefetch.", .ucode = 0x40, }, { .uname = "DCT1_PREFETCH", .udesc = "DCT1 Prefetch.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xf0, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_controller_turnarounds[]={ { .uname = "DCT0_READ_TO_WRITE", .udesc = "DCT0 read-to-write turnaround.", .ucode = 0x1, }, { .uname = "DCT0_WRITE_TO_READ", .udesc = "DCT0 write-to-read turnaround", .ucode = 0x2, }, { .uname = "DCT1_READ_TO_WRITE", .udesc = "DCT1 read-to-write turnaround.", .ucode = 0x8, }, { .uname = "DCT1_WRITE_TO_READ", .udesc = "DCT1 write-to-read turnaround", .ucode = 0x10, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x1b, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_rbd_queue[]={ { .uname = "COUNTER_REACHED", .udesc = "D18F2x[1,0]94[DcqBypassMax] counter reached.", .ucode = 0x4, }, { .uname = "BANK_CLOSED", .udesc = "Bank is closed due to bank conflict with an outstanding request in the RBD queue.", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xc, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_thermal_status[]={ { .uname = "MEMHOT_L_ASSERTIONS", .udesc = "MEMHOT_L assertions.", .ucode = 0x1, }, { .uname = "HTC_TRANSITIONS", .udesc = "Number of times the HTC transitions from inactive to active.", .ucode = 0x4, }, { .uname = "CLOCKS_HTC_P_STATE_INACTIVE", .udesc = "Number of clocks HTC P-state is inactive.", .ucode = 0x20, }, { .uname = "CLOCKS_HTC_P_STATE_ACTIVE", .udesc = "Number of clocks HTC P-state is active", .ucode = 0x40, }, { .uname = "PROCHOT_L_ASSERTIONS", .udesc = "PROCHOT_L asserted by an external source and the assertion causes a P-state change.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xe5, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_cpu_io_requests_to_memory_io[]={ { .uname = "I_O_TO_I_O", .udesc = "IO to IO", .ucode = 0x1, }, { .uname = "I_O_TO_MEM", .udesc = "IO to Mem", .ucode = 0x2, }, { .uname = "CPU_TO_I_O", .udesc = "CPU to IO", .ucode = 0x4, }, { .uname = "CPU_TO_MEM", .udesc = "CPU to Mem", .ucode = 0x8, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x0f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_cache_block[]={ { .uname = "VICTIM_WRITEBACK", .udesc = "Victim Block (Writeback)", .ucode = 0x1, }, { .uname = "DCACHE_LOAD_MISS", .udesc = "Read Block (Dcache load miss refill)", .ucode = 0x4, }, { .uname = "SHARED_ICACHE_REFILL", .udesc = "Read Block Shared (Icache refill)", .ucode = 0x8, }, { .uname = "READ_BLOCK_MODIFIED", .udesc = "Read Block Modified (Dcache store miss refill)", .ucode = 0x10, }, { .uname = "READ_TO_DIRTY", .udesc = "Change-to-Dirty (first store to clean block already in cache)", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3d, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_sized_commands[]={ { .uname = "NON_POSTED_WRITE_BYTE", .udesc = "Non-Posted SzWr Byte (1-32 bytes) Legacy or mapped IO, typically 1-4 bytes", .ucode = 0x1, }, { .uname = "NON_POSTED_WRITE_DWORD", .udesc = "Non-Posted SzWr DW (1-16 dwords) Legacy or mapped IO, typically 1 DWORD", .ucode = 0x2, }, { .uname = "POSTED_WRITE_BYTE", .udesc = "Posted SzWr Byte (1-32 bytes) Subcache-line DMA writes, size varies; also flushes of partially-filled Write Combining buffer", .ucode = 0x4, }, { .uname = "POSTED_WRITE_DWORD", .udesc = "Posted SzWr DW (1-16 dwords) Block-oriented DMA writes, often cache-line sized; also processor Write Combining buffer flushes", .ucode = 0x8, }, { .uname = "READ_BYTE_4_BYTES", .udesc = "SzRd Byte (4 bytes) Legacy or mapped IO", .ucode = 0x10, }, { .uname = "READ_DWORD_1_16_DWORDS", .udesc = "SzRd DW (1-16 dwords) Block-oriented DMA reads, typically cache-line size", .ucode = 0x20, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x3f, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_probe[]={ { .uname = "MISS", .udesc = "Probe miss", .ucode = 0x1, }, { .uname = "HIT_CLEAN", .udesc = "Probe hit clean", .ucode = 0x2, }, { .uname = "HIT_DIRTY_NO_MEMORY_CANCEL", .udesc = "Probe hit dirty without memory cancel (probed by Sized Write or Change2Dirty)", .ucode = 0x4, }, { .uname = "HIT_DIRTY_WITH_MEMORY_CANCEL", .udesc = "Probe hit dirty with memory cancel (probed by DMA read or cache refill request)", .ucode = 0x8, }, { .uname = "UPSTREAM_HIGH_PRIORITY_READS", .udesc = "Upstream high priority reads.", .ucode = 0x10, }, { .uname = "UPSTREAM_LOW_PRIORITY_READS", .udesc = "Upstream low priority reads.", .ucode = 0x20, }, { .uname = "UPSTREAM_LOW_PRIORITY_WRITES", .udesc = "Upstream low priority writes.", .ucode = 0x80, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0xbf, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_dev[]={ { .uname = "DEV_HIT", .udesc = "DEV hit", .ucode = 0x10, }, { .uname = "DEV_MISS", .udesc = "DEV miss", .ucode = 0x20, }, { .uname = "DEV_ERROR", .udesc = "DEV error", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x70, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_memory_controller_requests[]={ { .uname = "32_BYTES_WRITES", .udesc = "32 Bytes Sized Writes", .ucode = 0x8, }, { .uname = "64_BYTES_WRITES", .udesc = "64 Bytes Sized Writes", .ucode = 0x10, }, { .uname = "32_BYTES_READS", .udesc = "32 Bytes Sized Reads", .ucode = 0x20, }, { .uname = "64_BYTES_READS", .udesc = "64 Byte Sized Reads", .ucode = 0x40, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x78, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_page_size_mismatches[]={ { .uname = "GUEST_LARGER", .udesc = "Guest page size is larger than the host page size.", .ucode = 0x1, }, { .uname = "MTRR_MISMATCH", .udesc = "MTRR mismatch.", .ucode = 0x2, }, { .uname = "HOST_LARGER", .udesc = "Host page size is larger than the guest page size.", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_umask_t amd64_fam12h_retired_x87_ops[]={ { .uname = "ADD_SUB_OPS", .udesc = "Add/subtract ops", .ucode = 0x1, }, { .uname = "MUL_OPS", .udesc = "Multiply ops", .ucode = 0x2, }, { .uname = "DIV_OPS", .udesc = "Divide ops", .ucode = 0x4, }, { .uname = "ALL", .udesc = "All sub-events selected", .ucode = 0x7, .uflags= AMD64_FL_NCOMBO | AMD64_FL_DFL, }, }; static const amd64_entry_t amd64_fam12h_pe[]={ { .name = "DISPATCHED_FPU", .desc = "Dispatched FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_dispatched_fpu), .ngrp = 1, .umasks = amd64_fam12h_dispatched_fpu, }, { .name = "CYCLES_NO_FPU_OPS_RETIRED", .desc = "Cycles in which the FPU is Empty", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1, }, { .name = "DISPATCHED_FPU_OPS_FAST_FLAG", .desc = "Dispatched Fast Flag FPU Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2, }, { .name = "RETIRED_SSE_OPERATIONS", .desc = "Retired SSE Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x3, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_retired_sse_operations), .ngrp = 1, .umasks = amd64_fam12h_retired_sse_operations, }, { .name = "RETIRED_MOVE_OPS", .desc = "Retired Move Ops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_retired_move_ops), .ngrp = 1, .umasks = amd64_fam12h_retired_move_ops, }, { .name = "RETIRED_SERIALIZING_OPS", .desc = "Retired Serializing Ops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x5, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_retired_serializing_ops), .ngrp = 1, .umasks = amd64_fam12h_retired_serializing_ops, }, { .name = "FP_SCHEDULER_CYCLES", .desc = "Number of Cycles that a Serializing uop is in the FP Scheduler", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_fp_scheduler_cycles), .ngrp = 1, .umasks = amd64_fam12h_fp_scheduler_cycles, }, { .name = "SEGMENT_REGISTER_LOADS", .desc = "Segment Register Loads", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x20, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_segment_register_loads), .ngrp = 1, .umasks = amd64_fam12h_segment_register_loads, }, { .name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", .desc = "Pipeline Restart Due to Self-Modifying Code", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x21, }, { .name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", .desc = "Pipeline Restart Due to Probe Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x22, }, { .name = "LS_BUFFER_2_FULL_CYCLES", .desc = "LS Buffer 2 Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x23, }, { .name = "LOCKED_OPS", .desc = "Locked Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x24, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_locked_ops), .ngrp = 1, .umasks = amd64_fam12h_locked_ops, }, { .name = "RETIRED_CLFLUSH_INSTRUCTIONS", .desc = "Retired CLFLUSH Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x26, }, { .name = "RETIRED_CPUID_INSTRUCTIONS", .desc = "Retired CPUID Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x27, }, { .name = "CANCELLED_STORE_TO_LOAD_FORWARD_OPERATIONS", .desc = "Cancelled Store to Load Forward Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2a, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_cancelled_store_to_load_forward_operations), .ngrp = 1, .umasks = amd64_fam12h_cancelled_store_to_load_forward_operations, }, { .name = "SMIS_RECEIVED", .desc = "SMIs Received", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x2b, }, { .name = "DATA_CACHE_ACCESSES", .desc = "Data Cache Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x40, }, { .name = "DATA_CACHE_MISSES", .desc = "Data Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x41, }, { .name = "DATA_CACHE_REFILLS", .desc = "Data Cache Refills from L2 or Northbridge", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x42, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_data_cache_refills), .ngrp = 1, .umasks = amd64_fam12h_data_cache_refills, }, { .name = "DATA_CACHE_REFILLS_FROM_SYSTEM", .desc = "Data Cache Refills from the Northbridge", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x43, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_data_cache_refills_from_northbridge), .ngrp = 1, .umasks = amd64_fam12h_data_cache_refills_from_northbridge, }, { .name = "DATA_CACHE_LINES_EVICTED", .desc = "Data Cache Lines Evicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x44, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_data_cache_lines_evicted), .ngrp = 1, .umasks = amd64_fam12h_data_cache_lines_evicted, }, { .name = "L1_DTLB_MISS_AND_L2_DTLB_HIT", .desc = "L1 DTLB Miss and L2 DTLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x45, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l1_dtlb_miss_and_l2_dtlb_hit), .ngrp = 1, .umasks = amd64_fam12h_l1_dtlb_miss_and_l2_dtlb_hit, }, { .name = "L1_DTLB_AND_L2_DTLB_MISS", .desc = "L1 DTLB and L2 DTLB Miss", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x46, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l1_dtlb_and_l2_dtlb_miss), .ngrp = 1, .umasks = amd64_fam12h_l1_dtlb_and_l2_dtlb_miss, }, { .name = "MISALIGNED_ACCESSES", .desc = "Misaligned Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x47, }, { .name = "MICROARCHITECTURAL_LATE_CANCEL_OF_AN_ACCESS", .desc = "Microarchitectural Late Cancel of an Access", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x48, }, { .name = "MICROARCHITECTURAL_EARLY_CANCEL_OF_AN_ACCESS", .desc = "Microarchitectural Early Cancel of an Access", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x49, }, { .name = "PREFETCH_INSTRUCTIONS_DISPATCHED", .desc = "Prefetch Instructions Dispatched", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4b, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_prefetch_instructions_dispatched), .ngrp = 1, .umasks = amd64_fam12h_prefetch_instructions_dispatched, }, { .name = "DCACHE_MISSES_BY_LOCKED_INSTRUCTIONS", .desc = "DCACHE Misses by Locked Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_dcache_misses_by_locked_instructions), .ngrp = 1, .umasks = amd64_fam12h_dcache_misses_by_locked_instructions, }, { .name = "L1_DTLB_HIT", .desc = "L1 DTLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x4d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l1_dtlb_hit), .ngrp = 1, .umasks = amd64_fam12h_l1_dtlb_hit, }, { .name = "INEFFECTIVE_SW_PREFETCHES", .desc = "Ineffective Software Prefetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x52, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_ineffective_sw_prefetches), .ngrp = 1, .umasks = amd64_fam12h_ineffective_sw_prefetches, }, { .name = "GLOBAL_TLB_FLUSHES", .desc = "Global TLB Flushes", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x54, }, { .name = "MEMORY_REQUESTS", .desc = "Memory Requests by Type", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x65, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_requests), .ngrp = 1, .umasks = amd64_fam12h_memory_requests, }, { .name = "DATA_PREFETCHES", .desc = "Data Prefetcher", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x67, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_data_prefetches), .ngrp = 1, .umasks = amd64_fam12h_data_prefetches, }, { .name = "NORTHBRIDGE_READ_RESPONSES", .desc = "Northbridge Read Responses by Coherency State", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_northbridge_read_responses), .ngrp = 1, .umasks = amd64_fam12h_northbridge_read_responses, }, { .name = "OCTWORDS_WRITTEN_TO_SYSTEM", .desc = "Octwords Written to System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x6d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_octwords_written_to_system), .ngrp = 1, .umasks = amd64_fam12h_octwords_written_to_system, }, { .name = "CPU_CLK_UNHALTED", .desc = "CPU Clocks not Halted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x76, }, { .name = "REQUESTS_TO_L2", .desc = "Requests to L2 Cache", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7d, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_requests_to_l2), .ngrp = 1, .umasks = amd64_fam12h_requests_to_l2, }, { .name = "L2_CACHE_MISS", .desc = "L2 Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7e, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l2_cache_miss), .ngrp = 1, .umasks = amd64_fam12h_l2_cache_miss, }, { .name = "L2_FILL_WRITEBACK", .desc = "L2 Fill/Writeback", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x7f, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l2_fill_writeback), .ngrp = 1, .umasks = amd64_fam12h_l2_fill_writeback, }, { .name = "PAGE_SIZE_MISMATCHES", .desc = "Page Size Mismatches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x165, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_page_size_mismatches), .ngrp = 1, .umasks = amd64_fam12h_page_size_mismatches, }, { .name = "INSTRUCTION_CACHE_FETCHES", .desc = "Instruction Cache Fetches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x80, }, { .name = "INSTRUCTION_CACHE_MISSES", .desc = "Instruction Cache Misses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x81, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", .desc = "Instruction Cache Refills from L2", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x82, }, { .name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", .desc = "Instruction Cache Refills from System", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x83, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", .desc = "L1 ITLB Miss and L2 ITLB Hit", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x84, }, { .name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", .desc = "L1 ITLB Miss and L2 ITLB Miss", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x85, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_l1_itlb_miss_and_l2_itlb_miss), .ngrp = 1, .umasks = amd64_fam12h_l1_itlb_miss_and_l2_itlb_miss, }, { .name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", .desc = "Pipeline Restart Due to Instruction Stream Probe", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x86, }, { .name = "INSTRUCTION_FETCH_STALL", .desc = "Instruction Fetch Stall", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x87, }, { .name = "RETURN_STACK_HITS", .desc = "Return Stack Hits", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x88, }, { .name = "RETURN_STACK_OVERFLOWS", .desc = "Return Stack Overflows", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x89, }, { .name = "INSTRUCTION_CACHE_VICTIMS", .desc = "Instruction Cache Victims", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x8b, }, { .name = "INSTRUCTION_CACHE_LINES_INVALIDATED", .desc = "Instruction Cache Lines Invalidated", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x8c, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_instruction_cache_lines_invalidated), .ngrp = 1, .umasks = amd64_fam12h_instruction_cache_lines_invalidated, }, { .name = "ITLB_RELOADS", .desc = "ITLB Reloads", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x99, }, { .name = "ITLB_RELOADS_ABORTED", .desc = "ITLB Reloads Aborted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x9a, }, { .name = "RETIRED_INSTRUCTIONS", .desc = "Retired Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc0, }, { .name = "RETIRED_UOPS", .desc = "Retired uops", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc1, }, { .name = "RETIRED_BRANCH_INSTRUCTIONS", .desc = "Retired Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc2, }, { .name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", .desc = "Retired Mispredicted Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc3, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", .desc = "Retired Taken Branch Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc4, }, { .name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", .desc = "Retired Taken Branch Instructions Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc5, }, { .name = "RETIRED_FAR_CONTROL_TRANSFERS", .desc = "Retired Far Control Transfers", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc6, }, { .name = "RETIRED_BRANCH_RESYNCS", .desc = "Retired Branch Resyncs", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc7, }, { .name = "RETIRED_NEAR_RETURNS", .desc = "Retired Near Returns", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc8, }, { .name = "RETIRED_NEAR_RETURNS_MISPREDICTED", .desc = "Retired Near Returns Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xc9, }, { .name = "RETIRED_INDIRECT_BRANCHES_MISPREDICTED", .desc = "Retired Indirect Branches Mispredicted", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xca, }, { .name = "RETIRED_MMX_AND_FP_INSTRUCTIONS", .desc = "Retired MMX/FP Instructions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_retired_mmx_and_fp_instructions), .ngrp = 1, .umasks = amd64_fam12h_retired_mmx_and_fp_instructions, }, { .name = "INTERRUPTS_MASKED_CYCLES", .desc = "Interrupts-Masked Cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcd, }, { .name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", .desc = "Interrupts-Masked Cycles with Interrupt Pending", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xce, }, { .name = "INTERRUPTS_TAKEN", .desc = "Interrupts Taken", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xcf, }, { .name = "DECODER_EMPTY", .desc = "Decoder Empty", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd0, }, { .name = "DISPATCH_STALLS", .desc = "Dispatch Stalls", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd1, }, { .name = "DISPATCH_STALL_FOR_BRANCH_ABORT", .desc = "Dispatch Stall for Branch Abort to Retire", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd2, }, { .name = "DISPATCH_STALL_FOR_SERIALIZATION", .desc = "Dispatch Stall for Serialization", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd3, }, { .name = "DISPATCH_STALL_FOR_SEGMENT_LOAD", .desc = "Dispatch Stall for Segment Load", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd4, }, { .name = "DISPATCH_STALL_FOR_REORDER_BUFFER_FULL", .desc = "Dispatch Stall for Reorder Buffer Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd5, }, { .name = "DISPATCH_STALL_FOR_RESERVATION_STATION_FULL", .desc = "Dispatch Stall for Reservation Station Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd6, }, { .name = "DISPATCH_STALL_FOR_FPU_FULL", .desc = "Dispatch Stall for FPU Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd7, }, { .name = "DISPATCH_STALL_FOR_LS_FULL", .desc = "Dispatch Stall for LS Full", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd8, }, { .name = "DISPATCH_STALL_WAITING_FOR_ALL_QUIET", .desc = "Dispatch Stall Waiting for All Quiet", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xd9, }, { .name = "DISPATCH_STALL_FOR_FAR_TRANSFER_OR_RSYNC", .desc = "Dispatch Stall for Far Transfer or Resync to Retire", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xda, }, { .name = "FPU_EXCEPTIONS", .desc = "FPU Exceptions", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_fpu_exceptions), .ngrp = 1, .umasks = amd64_fam12h_fpu_exceptions, }, { .name = "DR0_BREAKPOINT_MATCHES", .desc = "DR0 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdc, }, { .name = "DR1_BREAKPOINT_MATCHES", .desc = "DR1 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdd, }, { .name = "DR2_BREAKPOINT_MATCHES", .desc = "DR2 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xde, }, { .name = "DR3_BREAKPOINT_MATCHES", .desc = "DR3 Breakpoint Matches", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xdf, }, { .name = "RETIRED_X87_OPS", .desc = "Retired x87 Floating Point Operations", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1c0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_retired_x87_ops), .ngrp = 1, .umasks = amd64_fam12h_retired_x87_ops, }, { .name = "LFENCE_INST_RETIRED", .desc = "LFENCE Instructions Retired", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1d3, }, { .name = "SFENCE_INST_RETIRED", .desc = "SFENCE Instructions Retired", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1d4, }, { .name = "MFENCE_INST_RETIRED", .desc = "MFENCE Instructions Retired", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1d5, }, { .name = "DRAM_ACCESSES_PAGE", .desc = "DRAM Accesses", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_dram_accesses_page), .ngrp = 1, .umasks = amd64_fam12h_dram_accesses_page, }, { .name = "MEMORY_CONTROLLER_0_PAGE", .desc = "DRAM Controller 0 Page Table Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe1, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_controller_page_table_events), .ngrp = 1, .umasks = amd64_fam12h_memory_controller_page_table_events, }, { .name = "MEMORY_CONTROLLER_SLOT_MISSES", .desc = "Memory Controller DRAM Command Slots Missed", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe2, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_controller_slot_misses), .ngrp = 1, .umasks = amd64_fam12h_memory_controller_slot_misses, }, { .name = "MEMORY_CONTROLLER_TURNAROUNDS", .desc = "Memory Controller Turnarounds", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe3, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_controller_turnarounds), .ngrp = 1, .umasks = amd64_fam12h_memory_controller_turnarounds, }, { .name = "MEMORY_CONTROLLER_RBD_QUEUE", .desc = "Memory Controller RBD Queue Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe4, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_rbd_queue), .ngrp = 1, .umasks = amd64_fam12h_memory_rbd_queue, }, { .name = "MEMORY_CONTROLLER_1_PAGE", .desc = "DRAM Controller 1 Page Table Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe5, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_controller_page_table_events), .ngrp = 1, .umasks = amd64_fam12h_memory_controller_page_table_events, }, { .name = "THERMAL_STATUS", .desc = "Thermal Status", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe8, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_thermal_status), .ngrp = 1, .umasks = amd64_fam12h_thermal_status, }, { .name = "CPU_IO_REQUESTS_TO_MEMORY_IO", .desc = "CPU/IO Requests to Memory/IO", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xe9, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_cpu_io_requests_to_memory_io), .ngrp = 1, .umasks = amd64_fam12h_cpu_io_requests_to_memory_io, }, { .name = "CACHE_BLOCK", .desc = "Cache Block Commands", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xea, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_cache_block), .ngrp = 1, .umasks = amd64_fam12h_cache_block, }, { .name = "SIZED_COMMANDS", .desc = "Sized Commands", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xeb, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_sized_commands), .ngrp = 1, .umasks = amd64_fam12h_sized_commands, }, { .name = "PROBE", .desc = "Probe Responses and Upstream Requests", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xec, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_probe), .ngrp = 1, .umasks = amd64_fam12h_probe, }, { .name = "DEV", .desc = "DEV Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0xee, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_dev), .ngrp = 1, .umasks = amd64_fam12h_dev, }, { .name = "MEMORY_CONTROLLER_REQUESTS", .desc = "Memory Controller Requests", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1f0, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_memory_controller_requests), .ngrp = 1, .umasks = amd64_fam12h_memory_controller_requests, }, { .name = "SIDEBAND_SIGNALS", .desc = "Sideband Signals and Special Cycles", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1e9, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_sideband_signals), .ngrp = 1, .umasks = amd64_fam12h_sideband_signals, }, { .name = "INTERRUPT_EVENTS", .desc = "Interrupt Events", .modmsk = AMD64_FAM10H_ATTRS, .code = 0x1ea, .numasks = LIBPFM_ARRAY_SIZE(amd64_fam12h_interrupt_events), .ngrp = 1, .umasks = amd64_fam12h_interrupt_events, }, }; papi-5.6.0/src/validation_tests/papi_ref_cyc.c000664 001750 001750 00000013622 13216244370 023471 0ustar00jshenry1963jshenry1963000000 000000 /* This test exercises the PAPI_TOT_CYC and PAPI_REF_CYC counters. PAPI_TOT_CYC should measure the number of cycles required to do a fixed amount of work. It should be roughly constant for constant work, regardless of the speed state a core is in. PAPI_REF_CYC should measure the number of cycles at a constant reference clock rate, independent of the actual clock rate of the core. */ /* PAPI_REF_CYC has various issues on Intel chips: On older machines PAPI uses UNHALTED_REFERENCE_CYCLES but this means different things on different architectures + On Core2/Atom this maps to the special Fixed Counter 2 CPU_CLK_UNHALTED.REF This counts at the same rate as the TSC (PAPI_get_real_cyc()) And also seems to match PAPI_TOT_CYC It is documented as having a fixed ratio to the CPU_CLK_UNHALTED.BUS (3c/1) event. + On Nehalem/Westemere this also maps to Fixed Counter 2. Again, counts same rate as the TSC and returns CPU_CLK_UNHALTED.REF_P (3c/1) times the "Maximum Non-Turbo Ratio" + Same for Sandybridge/Ivybridge On newer HSW,BDW,SKL machines PAPI uses a different type of event CPU_CLK_THREAD_UNHALTED:REF_XCLK + On Haswell machines this is just the reference clock (100MHz?) + On Sandybridge this is off by a factor of 8x? */ /* NOTE: PAPI_get_virt_cyc() returns a lie! It's just virt_time() * max_theoretical_MHz so no point in checking that */ #include #include #include #include "papi.h" #include "papi_test.h" #include "testcode.h" #define NUM_FLOPS 20000000 static void work (int EventSet, int sleep_test, int quiet) { int retval; long long values[2]; long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; double cycles_error; int numflops = NUM_FLOPS; /* Gather before stats */ elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); elapsed_virt_us = PAPI_get_virt_usec( ); elapsed_virt_cyc = PAPI_get_virt_cyc( ); /* Start PAPI */ retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } /* our test code */ if (sleep_test) { sleep(2); } else { do_flops( numflops, 1 ); } /* Stop PAPI */ retval = PAPI_stop( EventSet, values ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } /* Calculate total values */ elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; if (!quiet) { printf( "-------------------------------------------------------------------------\n" ); if (sleep_test) printf("Sleeping for 2s\n"); else printf( "Using %d iterations of c += a*b\n", numflops ); printf( "-------------------------------------------------------------------------\n" ); printf( "PAPI_TOT_CYC : \t%10lld\n", values[0] ); printf( "PAPI_REF_CYC : \t%10lld\n", values[1] ); printf( "Real usec : \t%10lld\n", elapsed_us ); printf( "Real cycles : \t%10lld\n", elapsed_cyc ); printf( "Virt usec : \t%10lld\n", elapsed_virt_us ); printf( "Virt cycles (estimate) : \t%10lld\n", elapsed_virt_cyc ); printf( "Estimated GHz : \t%10.3lf\n", (double) elapsed_cyc/(double)elapsed_us/1000.0); printf( "-------------------------------------------------------------------------\n" ); } if (sleep_test) { if (!quiet) { printf( "Verification: PAPI_REF_CYC should be much lower than real_usec\n"); } if (values[1]>elapsed_us) { if (!quiet) printf("PAPI_REF_CYC too high!\n"); test_fail( __FILE__, __LINE__, "PAPI_REF_CYC too high", 0 ); } } else { /* PAPI_REF_CYC should be roughly the same as TSC when busy */ /* on Intel chips */ if (!quiet) { printf( "Verification: real_cyc should be roughly PAPI_REF_CYC\n"); printf( " real_usec should be roughly virt_usec (on otherwise idle system)\n"); } cycles_error=100.0* ((double)values[1]-((double)elapsed_cyc)) /values[1]; if ((cycles_error>10.0) || (cycles_error<-10.0)) { if (!quiet) printf("Error of %.2f%%\n",cycles_error); test_fail( __FILE__, __LINE__, "PAPI_REF_CYC validation", 0 ); } cycles_error=100.0* ((double)elapsed_us-(double)elapsed_virt_us) /(double)elapsed_us; if ((cycles_error>10.0) || (cycles_error<-10.0)) { if (!quiet) printf("Error of %.2f%%\n",cycles_error); test_warn( __FILE__, __LINE__, "real_us validation", 0 ); } } } int main( int argc, char **argv ) { int retval; int EventSet = PAPI_NULL; int quiet; /* Set TESTS_QUIET variable */ quiet = tests_quiet( argc, argv ); /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Check the ref cycles event */ retval = PAPI_query_named_event("PAPI_REF_CYC"); if (PAPI_OK!=retval) { if (!quiet) printf("No PAPI_REF_CYC available\n"); test_skip( __FILE__, __LINE__, "PAPI_REF_CYC is not defined on this platform.", 0); } /* create an eventset */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* add core cycle event */ retval = PAPI_add_named_event( EventSet, "PAPI_TOT_CYC"); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_named_event: PAPI_TOT_CYC", retval ); } /* add ref cycle event */ retval = PAPI_add_named_event( EventSet, "PAPI_REF_CYC"); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_add_events: PAPI_REF_CYC", retval ); } if (!quiet) { printf("Test case sleeping: " "Look at TOT and REF cycles.\n"); } work(EventSet, 1, quiet); // do_flops(10*numflops); if (!quiet) { printf( "\nTest case busy:\n" ); } work(EventSet, 0, quiet); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/ctests/virttime.c000664 001750 001750 00000004140 13216244361 020635 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include #include "papi.h" #include "papi_test.h" int main( int argc, char **argv ) { int retval; long long elapsed_us, elapsed_cyc; const PAPI_hw_info_t *hw_info; tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); hw_info = PAPI_get_hardware_info( ); if ( hw_info == NULL ) test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); elapsed_us = PAPI_get_virt_usec( ); elapsed_cyc = PAPI_get_virt_cyc( ); if (!TESTS_QUIET) { printf( "Testing virt time clock. (CPU Max %d MHz, CPU Min %d MHz)\n", hw_info->cpu_max_mhz, hw_info->cpu_min_mhz ); printf( "Sleeping for 10 seconds.\n" ); } sleep( 10 ); elapsed_us = PAPI_get_virt_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_virt_cyc( ) - elapsed_cyc; if (!TESTS_QUIET) { printf( "%lld us. %lld cyc.\n", elapsed_us, elapsed_cyc ); } /* Elapsed microseconds and elapsed cycles are not as unambiguous as they appear. On Pentium III and 4, for example, cycles is a measured value, while useconds is computed from cycles and mhz. MHz is read from /proc/cpuinfo (on linux). Thus, any error in MHz is propagated to useconds. Conversely, on ultrasparc useconds are extracted from a system call (gethrtime()) and cycles are computed from useconds. Also, MHz comes from a scan of system info, Thus any error in gethrtime() propagates to both cycles and useconds, and cycles can be further impacted by errors in reported MHz. Without knowing the error bars on these system values, we can't really specify error ranges for our reported values, but we *DO* know that errors for at least one instance of Pentium 4 (torc17@utk) are on the order of one part per thousand. */ /* We'll accept 1.5 part per thousand error here (to allow Pentium 4 and Alpha to pass) */ if ( elapsed_us > 100000 ) test_fail( __FILE__, __LINE__, "Virt time greater than .1 seconds!", PAPI_EMISC ); test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.7.x/examples/perfex/x86.c000664 001750 001750 00000010620 13216244370 023440 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: x86.c,v 1.6 2005/03/14 01:48:42 mikpe Exp $ * x86-specific code. * * Copyright (C) 1999-2004 Mikael Pettersson */ #include #include #include "libperfctr.h" #include "arch.h" void do_print(FILE *resfile, const struct perfctr_cpu_control *cpu_control, const struct perfctr_sum_ctrs *sum, const struct perfctr_sum_ctrs *children) { unsigned int nrctrs, i; if( cpu_control->tsc_on ) fprintf(resfile, "tsc\t\t\t%19lld\n", sum->tsc + children->tsc); nrctrs = cpu_control->nractrs; for(i = 0; i < nrctrs; ++i) { fprintf(resfile, "event 0x%08X", cpu_control->evntsel[i]); if( cpu_control->p4.escr[i] ) fprintf(resfile, "/0x%08X", cpu_control->p4.escr[i]); fprintf(resfile, "\t%19lld\n", sum->pmc[i] + children->pmc[i]); } if( cpu_control->p4.pebs_enable ) fprintf(resfile, "PEBS_ENABLE 0x%08X\n", cpu_control->p4.pebs_enable); if( cpu_control->p4.pebs_matrix_vert ) fprintf(resfile, "PEBS_MATRIX_VERT 0x%08X\n", cpu_control->p4.pebs_matrix_vert); } void do_arch_usage(void) { fprintf(stderr, "\t--p4pe=\t\t\tValue for PEBS_ENABLE (P4 only)\n"); fprintf(stderr, "\t--p4_pebs_enable=\tSame as --p4pe=\n"); fprintf(stderr, "\t--p4pmv=\t\t\tValue for PEBS_MATRIX_VERT (P4 only)\n"); fprintf(stderr, "\t--p4_pebs_matrix_vert=\tSame as --p4pmv=\n"); fprintf(stderr, "\n"); fprintf(stderr, "Syntax of event specifiers:\n"); fprintf(stderr, "\tevent ::= evntsel[/escr][@pmc]\n"); fprintf(stderr, "\n"); fprintf(stderr, "\tevntsel, escr, and pmc are decimal or hexadecimal numbers.\n"); fprintf(stderr, "\n"); fprintf(stderr, "\tevntsel is the primary processor-specific event selection code\n"); fprintf(stderr, "\tto use for this counter. This field is mandatory.\n"); fprintf(stderr, "\tOn a P4, evntsel is written to the counter's CCCR register.\n"); fprintf(stderr, "\n"); fprintf(stderr, "\tescr describes the additional event selection data written to\n"); fprintf(stderr, "\tthe counter's associated ESCR register. (P4 only)\n"); fprintf(stderr, "\n"); fprintf(stderr, "\tpmc describes which CPU counter to use for this event.\n"); fprintf(stderr, "\tBy default the events use counters 0 and up in the order listed.\n"); fprintf(stderr, "\tOn P4, each event is compatible with only a small subset of the\n"); fprintf(stderr, "\tcounters, and explicit counter assignment is mandatory. Also,\n"); fprintf(stderr, "\ton P4 bit 31 should be set in pmc to enable 'fast rdpmc'.\n"); fprintf(stderr, "\tVIA C3 accepts a single event only, but it must use counter 1.\n"); } static int parse_event_spec(const char *arg, unsigned int *evntsel, unsigned int *escr, unsigned int *pmc) { char *endp; *evntsel = my_strtoul(arg, &endp); if( endp[0] != '/' ) { *escr = 0; } else { arg = endp + 1; *escr = my_strtoul(arg, &endp); } if( endp[0] != '@' ) { *pmc = (unsigned int)-1; } else { arg = endp + 1; *pmc = my_strtoul(arg, &endp); } return endp[0] != '\0'; } unsigned int do_event_spec(unsigned int n, const char *arg, struct perfctr_cpu_control *cpu_control) { unsigned int spec_evntsel, spec_escr, spec_pmc; if( parse_event_spec(arg, &spec_evntsel, &spec_escr, &spec_pmc) ) { fprintf(stderr, "perfex: invalid event specifier: '%s'\n", arg); exit(1); } if( n >= ARRAY_SIZE(cpu_control->evntsel) ) { fprintf(stderr, "perfex: too many event specifiers\n"); exit(1); } if( spec_pmc == (unsigned int)-1 ) spec_pmc = n; cpu_control->evntsel[n] = spec_evntsel; cpu_control->p4.escr[n] = spec_escr; cpu_control->pmc_map[n] = spec_pmc; cpu_control->nractrs = ++n; return n; } static int parse_value(const char *arg, unsigned int *value) { char *endp; *value = my_strtoul(arg, &endp); return endp[0] != '\0'; } int do_arch_option(int ch, const char *arg, struct perfctr_cpu_control *cpu_control) { unsigned int spec_value; switch( ch ) { case 1: if( parse_value(arg, &spec_value) ) { fprintf(stderr, "perfex: invalid value: '%s'\n", arg); exit(1); } cpu_control->p4.pebs_enable = spec_value; return 0; case 2: if( parse_value(arg, &spec_value) ) { fprintf(stderr, "perfex: invalid value: '%s'\n", arg); exit(1); } cpu_control->p4.pebs_matrix_vert = spec_value; return 0; } return -1; } papi-5.6.0/src/validation_tests/papi_sp_ops.c000664 001750 001750 00000010666 13216244370 023367 0ustar00jshenry1963jshenry1963000000 000000 /* This file attempts to test the single-precision floating point */ /* performance counter PAPI_SP_OPS */ /* by Vince Weaver, */ /* Note! There are many many many things that can go wrong */ /* when trying to get a sane floating point measurement. */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "display_error.h" #include "testcode.h" int main(int argc, char **argv) { int num_runs=100,i; long long high=0,low=0,average=0,expected=1500000; double error,double_result; long long count,total=0; int quiet=0,retval,ins_result; int eventset=PAPI_NULL; quiet=tests_quiet(argc,argv); if (!quiet) { printf("\nTesting the PAPI_SP_OPS event.\n\n"); } /* Init the PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Create the eventset */ retval=PAPI_create_eventset(&eventset); if (retval!=PAPI_OK) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Add FP_OPS event */ retval=PAPI_add_named_event(eventset,"PAPI_SP_OPS"); if (retval!=PAPI_OK) { if (!quiet) fprintf(stderr,"PAPI_SP_OPS not available!\n"); test_skip( __FILE__, __LINE__, "adding PAPI_SP_OPS", retval ); } /**************************************/ /* Test a loop with no floating point */ /**************************************/ total=0; expected=0; if (!quiet) { printf("Testing a loop with %lld floating point (%d times):\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count10) { if (!quiet) printf("Unexpected FP event value\n"); test_fail( __FILE__, __LINE__, "Unexpected FP event", 1 ); } if (!quiet) printf("\n"); /*******************************************/ /* Test a single-precision matrix multiply */ /*******************************************/ total=0; high=0; low=0; expected=flops_float_init_matrix(); num_runs=3; if (!quiet) { printf("Testing a matrix multiply with %lld single-precision FP operations (%d times)\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); /*******************************************/ /* Test a double-precision matrix multiply */ /*******************************************/ total=0; high=0; low=0; expected=flops_double_init_matrix(); expected=expected*0; num_runs=3; if (!quiet) { printf("Testing a matrix multiply with %lld double-precision FP operations (%d times)\n", expected,num_runs); } for(i=0;ihigh) high=count; if ((low==0) || (count 1.0) || (error<-1.0)) { if (!quiet) printf("Instruction count off by more than 1%%\n"); test_fail( __FILE__, __LINE__, "Error too high", 1 ); } if (!quiet) printf("\n"); test_pass( __FILE__ ); PAPI_shutdown(); return 0; } papi-5.6.0/src/components/vmware/PAPI-VMwareComponentDocument.pdf000664 001750 001750 00000466025 13216244360 027031 0ustar00jshenry1963jshenry1963000000 000000 %PDF-1.4 %âãÏÓ 5 0 obj <>/Subtype/Link/Rect[193.78 633.15 213.58 645.61]>> endobj 6 0 obj <>/Subtype/Link/Rect[213.58 633.15 220.19 645.61]>> endobj 7 0 obj <>/Subtype/Link/Rect[220.19 633.15 246.59 645.61]>> endobj 8 0 obj <>/Subtype/Link/Rect[246.59 633.15 253.19 645.61]>> endobj 9 0 obj <>/Subtype/Link/Rect[253.19 633.15 272.99 645.61]>> endobj 10 0 obj <>/Subtype/Link/Rect[272.99 633.15 279.6 645.61]>> endobj 11 0 obj <>/Subtype/Link/Rect[279.6 633.15 299.4 645.61]>> endobj 12 0 obj <>/Subtype/Link/Rect[312.6 633.15 352.21 645.61]>> endobj 13 0 obj <>/Subtype/Link/Rect[352.21 633.15 372.01 645.61]>> endobj 14 0 obj <>/Subtype/Link/Rect[372.01 633.15 391.81 645.61]>> endobj 15 0 obj <>/Subtype/Link/Rect[391.81 633.15 398.42 645.61]>> endobj 16 0 obj <>/Subtype/Link/Rect[398.42 633.15 418.22 645.61]>> endobj 17 0 obj <>stream xœ•}K¯­¹q]ÍÎ$²ãd@È bXŠ!ï||“-Y–õj©¥–dyf8†Ñ×€2ÉßWqo÷¾‹EPã\¬ó"YU¬*ÖãoûÕ[ÈïÞ_ï_ýóÛ¾zûòío×{iþýSÿïõþÍ[vþOÿÅ¿óö¯o_¿ýû›{ÿoþýÇõooîzÿéÛ?þÓõþÏý×Ý;þ÷ÿÏŸ¾óñ›o]÷ÿ|ï_ä_.üR'èýнûøþÕ¿ôOá_Ý{M·àÞs ·Zß¿úôö½ÿþçWÿz_ÂÝånÁ+ü_R¼«·œ¾â-g…ÿ>ÅÇNOPøos|[éoŸÓúýoQ|9ùxGÍà‹ý¥Á¿§`Ÿ×mü‚âãu»4üšãû6êcýÅg+ö]ô¹^ãÿŠâk^>ÿK`b½;¿ x¿Rÿ+ eý<''ùÛ¥ÙæKŠÏýp«}óC퇾ߺ 6…ÿšáãåWzèéÆë‰y¾Kñþi½t?#tBTøR|zZïo)>?ÑOÙ'¶ký>=¯tÅÛí畺Ž-Ÿ?.ß•°Ö à±Þú•ñ¬wžáíV5þ ŠéÖšÂsrB“ÕšñÑßœ‚ÿ‡×›÷ ÿ#ŠïÌ…?áOñ]³ùªðßâø´~?Q|?ÖåûÆñyý>§§tfÖÇõ7_ã­jü/)¾9¹YÌçÕ €3þK†WßÍ>¿áø.,ÑNp}½ù€ž.]퀂¢œíør»Ú>\7—ìçºøæt°?]Ù–l?ß Ô”}?S‹HOøŸÞñz]Ÿž×Ùµk/~¯÷ë~MÃøAÃÿxРþƧç¿ó-½ú9¼¦éó4<€4üˆî[·ÛÊ ~P0ãÈñM, ÿ0½æOÏ{‚Þ£ÇÏáõ}~OÀ׳þŸžÿfñƒüõçðš¦ÏÓð~Ðð[ºo©­çò5Åwc9—Ÿ=Ö¨ÖðéÅšZܧÃë~…à‹]žÿ§§?HµÛBùógåãîzÿ; oþV_žžŽÜ<ÿÞXÌÇï}‹ý™på—àI»çç½T?~óݰ¤Ÿ~¾oß¾QáüÀöîÅÏgäõÜ÷YÞ–cü’Â}ZŽ‘2I~9½Ÿsx3ú…®{ÝŒÓø‡1¡·øÓó–wa»^ý^ÉgÅæOÀþ ¥9Õ[ÕøÿJñÝ•Šÿ¸õ>=¯©{%õÕÏáõš?¿ÆШ{C©ë¹PÝ #oÙÇîÕkøôbMõý‹ŸÃë5~à³öÕãÓÓßÜꌅ&BÃh¯x%‰©Y•ptî–ýËsÜháûå‹ ~ë?[nÞø/,×îLµûã†|€Ë}öÀþÒp™=°¿3Üd,¿$ÇíõÀRƒ]®®ò\gûye\ýþÀRà®ÜMGq×ì&†x¨õø?Ytº‰{ÊׯjwK½ A /‚RŸ³Ô'üƲ–ú„çŒt·ª'üO¸\ !œðôv|È¡™þ»(NxnÒÝ¥qÂÇ œ ûÉ ÎµÎË N¯á»dZ‰áœÀcÏ þ…AD'¸Å›àü.‚j&æ.«žëŒ»¸Nø?üÁ"²æ¿ÛKV>xØKžë›»m3á¹Ê¹Û6Ÿ£g `ÿžúµ~5CdÏ]õ=u[(ÞÕY¡`oøø„§1Iç¯[ ÿŽ—˜üŒ§‘=ç+T3ž/6øõûÇñÝ8Rp8t1Þ’ÞžR|º`KÍøßr|¾9MÏ/(O" Nã°®8˜‚3žF])·¤É¡L®^¸|ì§UãÍ0OWÞ 9œyºö¾â9-­ßçô´‚»Ê¼þZ—ËŸë®rsÙ~ºt.¼ÏÉqmá}**¾_ɺ_¾Ê­$ûiùèn>lg—Ýàì²î»ìÆp@O·¤ÚÉzó…û~vKja¾ÞÜà0Îxþú\ÞþÍÜï»rXT3}Ôñ]9ø“ý¬áVÂÁz«Xâæ« ¶];ØþnÛ-ªœ?¨]~½*þžãž)Ìô„n+ûéWnÕÛw±UŸÞ\¡ûA‹.ü5Ň ¥öå†~ºÅ®ûCì†@98®nr.ßçOˆÉá1ß~¼)ÞÚvƒ…´[ÈnÕÎü19—U;ožpÿíôwí°hç@ñÝXØÿ/)¾¹[óßïâëµöü>Ã#3k¹-¾Íñ /3¾Q|¿¬—ïÓýŒ.¯òË¿ï#’f<ÍUŠˆKèïÓädŠ-ûIõOŒ/Çfý»|-¶åϘžŒ%¾ŸýöÍO-ùø|ûRýË“#B-ùX€3þW_"a§§dšÃØååà6J`gÿ|r¹;fõ–\‚·;ãÿ#Ç×Õ‘¢ì¼[¯#ÊÎÉäîØ×$ÅÐl¦xÙ‰‡Ã^¨Ú³Yç2pñš2Mç1ððšŠl»n-ÎhÊ1Ýwº%› †uùçžß•ß™àT˜\çÆæÍ¤;'y—œº*N ¾ ‘ KêN × ©ëàëˆÜ5;í¡,ÄP±@|Dm;õbÕé^ZtPð9˜—¥rK’Þ8Áy¬£\E}ʺ½ÕœÅŠä6ši/U¬]+tWL+ êžU váëŽUöl /Vó×0tL㯠v·žˆéŠCÃ]Ô~‹%û1ù~‰]ά­=L¾`>U߇W;CoTy¸23$‚9Z´©÷èc·NŠ™ÃPCãŠý”RZî±óÀ‰»Ùï1ß;yû6–Küë>v=P½}¥u½ƒ9 Ôõæ«nCêmç´÷+>*Úé%Œtt}Mn²Ñ×Kx.Iâ2t{P+Tº3H-×jƒº,iµ±ID‘QëFvaªÅ¾3±-×*ܤ –²Ñ< )ߊ]‡…ì~çÄt¯L_ñÔú %-´ó˜Y·–K1KS¨’Ÿo5­B—¦PíÇÔ²df™Ø…I³ ÷n»0黀;“]˜B6 Sì«yc&õqz§Æ0yã6Æ{ÑÈ`1^â–•RŒiÑ¿ü”Ò…:+?Æ.Kê þE÷kìàÛ°O•àQÓ$ökLs5«¢àÌ´tkVÓò ï~²¾ 8-]L£]î$p2£i YlyáFv¸®›R_4&™`Ëf³%×=C»©‘\” Q$¿J¤äχ?`›ÚòÔ…:ЂàŠqWà&e©Û«¢~»Àššà<ºâÄššà”’*‰]3œÞuÝ«*šî¹w·ªjj~Æñuæ3~3 rS[W‹îð—­{ïúÓ4=›ÄIŸ™ñ›¸É…ÔœÏàÎ’W8XotòÞ3áy`&fñk­ÌæºÙv;û8çjúy-WTvÛééŽMÔôlbÃo2ó›ô5°¾ »ÓÌn]¼jµ/¢¾Ù·¹ÑÙ·SŠÜØ¹× 1Ì`¤Å.âHïpDqäqš0Ru¬ì€d‘¬Ù“»é1ŠÛb>_XrသnÊyMÏ?îb%ÙùÓçx;D%š·‹ r?®xÀ>Õ¯ÇÅÙ§FyMðÔÖõí›Î¼^˜iöÓE.Ç¥·‡»¥W’Ç6+9Èå¨öÝD.ǯ$c5‚oëamR9ÂMžû½¡Þ.ê㪙7‰º˜Y ©ÃËYmªêërVÛ¼Œzp/"/£X…ÈË퀞’W;ƒo?$÷ÀÎ µ®ÛÉéoâXy3^Ã1°2Ò2NLøx|ã ÏCÝijä{iéa¾æ"D÷@QÅnÕ.f ßT—U;wJ8¤lO·R³³ WìÂXÂÁq¥$pVîDL¤Ù5[Ìie~þ)n5²ùq•¼n'gŸê—k‹G€º,žÈVÅE¶x —h9`æÖnî€ä”,.µ!““æUfzR·™O˜'u›yqIyŠ÷®±rCê»#ߟ.¼ÉΜõ°ÒËí¥™`¥IÈóM7±»eˆ‘| éuÆ;‰”4£é;·CÓ 7ñ‰Ô} ·ù'ÑL‰s#3úÎ}Ï{žÿœG:Fš¿ñ|²ü½t/ ݬ'äÂHš°FÒÄœê;ƒ8ÍÖ¯Ç&>íœ^nTJš&5ÉXú€ÓGgTòDE Gu3áºø¦'ÈŸ•˜zÝœê(|ø€óÈOupžß„Gçbú¥¤ÕÑ&ŽÓ–á5]®Ã¢›ܪiН‡q8£y3Âà%aÔ¨ÕÑÖ4™™UÇÙ›E Ñ­Á¶Á»6E]Þô¿æðº¨$ž©R⢒8éÕIäÀxo †EkßMÉK“ ¨ugÚªyŠ}¿òŠâžaß]§K³ÞJǘƒmïÒw5»8õ›)ÚUA¼F‹‘ t$³æ@œ£* ÷ž„9ìÖItãqкÔ.zÕ.©hj\ì´t ÕE;-è¢n¿l"êÇ‹}Û£[LN¾í—“Y8b·PËÁ>&ɳ1ÚaR0“ìÛÞÕ€â¯M %ËkµõP\“í»^ÒbŠo¢2+Çð˜Ø“ÖàûØ}Úp ×ÍKÞ¢u©-i…Gù+‘VþJx”´Ëâ1ú¦öJv²Ý:I>-†>ÿzXfžjsÏ(ù€Óê?„Kºâ¨³]££‚ó´ñx3ÁiD·ŽÔ© N™·³ ž'8Ï ¸âM}SWs‰àY©qÝŠšîÖö‹Yæ~ܨ±4¿áLxîM†"ÝP&<Űò5i‘ôqÀ:è>‚WÑ OZTÀ,¼Æ·3Â&3=ÝÕÊþ`{º¯åNè¹w°ã³¯ÌÜßFl̼^¤ì°Ç4ˆdǪb§ሪ¿Ï“2ƒ•íû‰!W= ¦Y¶óƒnìû‰¼ùþAÊ"¾.Ôl’3F¯8O £fô΂L·3ïdLò&`¥=V }À©«Š^)š™×Ý»?™áyM¶…ײ±éÝ‘Ä6°n;L³ƒ}Ĩ«`'½fþ`›Ìëœà<©àx#íˆoè¯S#Ñ»ÑÝÖ¨îÐŒCk‚Mw wóæ}ôh à›Hŵw¬Ã¨V2ž’U¬1ëÆDÉ25ê$GM½õL»\gßõ4BVÒÁ´‹ªj’¢ç~ gb¶ó@ '*:Ù+ÒyAšƒ™ Œ’Ô÷ ¯®ja¹S¹óz]‹$mªcF+Iã¾ ÑGUÄð'71Y Ü áŒÚ}ò]2o$"*©Ùá÷^^púê„›bÖ1ÒAõàP»¨ji,1Ü›sY5%I¤³ÒžF|ÁhÉ„å±ÏJL¹´,qöEk.µ<©¤”ÅßwëïŸ^¹îÙ@þ} À÷)TjÓ'ô— =\ ýŠ.ðŽ&ô× U¤(ù‚¢ 3úç ,íî'ÛáR`¥Å]ÐÐÖuŠë Hÿ ‡K åÿ%…{y:´¨t T»þ"FáMÏ\—î¬e¾ÄçÎÝ/1Á7hç Îßó‚t„Ÿà›æ†Ò~‚ó è£àÜ”N2¬À¼Ô$o—œ¦?¸ð›à4¹ƒ"®`ÿz©0_'8M®pU uóR»©S1ü{tºà›!úy‚oÆúú&a<¡îߺTaš‘•¼çæl§ÅIm£•!Qò_H÷84¡yîzQœ'¯£;»™ühQf•ki¥¾Î}Œ.JÑ.×Ã’}»ÓPÕ™ÒÀ¿Ì Ïf}*^C1+Hß$r‚o:øÉ{²•yþTÄì_?›yßaØ;gÿº—är«è_aaZOó²’¢ÇQ#5ûNƈñWœ77LRñÍħ• øK)Àí´ ÿ[íûfÒ{Z®ŽMµ{]ø}“Ñí É}$ÛÙ©É\M3-ý¢Ñ‡´Ièö(µƒüo»RŠHrvZº ®›g9£u“ý^B»³“Ž~× N-™ڢ¨„m-§üëèH­X€Ï HÒ©n‚ÿ-…ÃÆ›Ñ‰¢;NßÀžÂ¥$ÐŽ®Ë L½d-kûñÏ9¼.70½–Ò—˜?ö¹k±ªø×]^t57QD®ïºT T³z”‡Mçð‚ °÷gM+íðó’4ÞƒæšwÎ â*OpÞ ohõ NO´‰V·ÒâÆ¸äÏ_XPõ~Ó8OéØ¿À@QøMã<ɹžñ›ÆvNÜÚ O¹NvÓ»¿)â–¤eóᢊÖÉ„ß$9KHvÆs¯¿kÕâž—‰ç²îÿ¦Ó¿4Fœñ›ç‚$çÏ'\"©RóÏf6€´T±¯·6$aÚ×Û¤§ŠY3 sÞuÀÏ2d1Øå×;ée^/:í5ÍÜ–—1‹Î~À(ëvÉΠâx»‚@ë¼Pí „Öyé€üîª/ûÉÏå¿ñà¼RÙ¼ÜìWýÃ_‘³4î4Ë êÇ}µIºŽ7M>sĨŽõƒÎyËén:á]Gê!Eýðœâî—¸jß΀LLo÷p‰öMxþ4äÄ;µ.­öR< Ç_ëíÂÉñy=ÞÍD÷p;ýˆ³;÷cRwÖßߤQûU;or´+š¨˜¥±‡ ÷‡>\ X=äƒýÉHaŸáüÅ5G Úñu5®¸´”¼\ZJE"ˆ=«ty¶³g•ñOöõ6IÄ4{¡•õ®ãy°Ïãíû‰ºuŸíì»ø.Ú“çýú(6Vñۧéá.}_ =QŽ^œ‘ˆæš—5òºƒÆSÛ b> ?­¾×¦l\ zfü;Çוÿ‘¤ÕtÛ e”ò~;»Õ²ŠË¦ºûºH/^{íðßy $¬¦O[†4Úu­Ì#8ðÔR¿{ÛÁÕ>F ˜eñ>cÀÌl÷ŽöïÇ˾7Ú(³Äö\Y¤Jw†S¦¬’;Ã)O¶ýšà”%Q˜ ‰¡g„º„ à”#ÑGÏkj6©ÒtmÆsgC´õ÷7éà匭ôl‚1#xcÝN´ßƒõ>á7 õÒí„üPWfàõþQÊf̼†õv°;IfùÙw'_bœš—‹ª¢p@fëånfø[8 §È”%;9P î`{ªôô·3O“0vú¡œ§\2ñyÆóœnœ'`"p²ó&Ò¼&Ÿ;Þ¾H\Ôº›(f¿vÅìúpy[;¤ekæáq†äVÑÝôäIV͉¬‰p ëÃÄ“íAš…ÞŸÌÃ0R{=ã¿»‹«MÿÇI¼¤™éoiÕ%îô 72ÊHœðÜÈ@ÉØ°xŠ6Ã7S˜¤¢}ûïé¢VrÂ…AÛ3œß(—LÚžñÜbpa%‡[2Vi†ó ÝØ¦UÖ%T3ÿ¦Lzóà‹0óÍÂ-°~×¶rp\±Iþ„ߌ~öb2›Ï+ÉÈG³6 ùiƒ6‰[Ù™W–ì1ïOwz¸E…öUå€þö¤ö¤É® ‘B®vÉ!=¸Œðs¢dl´³ócä9°á:Ȉ…þŸÖóÝ WrëùòÇ­1¿ÂÌo1¹UÞ7ØFú„§oµ1ÇÕÔÛôÙ½VzøcR)+=›&;a¥gÓ™7¯ò¾™—äWË™7juñøx"Lœ‰vñM×l‰K„v“e€yJ*œgäùQ–4á¹ÑŒ îNá7G²¼9Lxž¡/ =MøÍ\˜,¡¤ O½ÅvEá·al¯éߌÁvs|EÒÁŒÿ.Ň´®÷¶UÌÃöµ^Ò+År‘;é>Á¹ã¤øg‚‡’ J ¬—Ïc–›ç¥’t¿vØ_Qì¸sXÊ!÷ ÇFïý¶y€©è<®š˜_ª÷{æ¦Bó¸d`®!î7Œi3×ËLeåq·<ÀTP˼—’+ÛŒÆnÞ£eU°äY&—3œ>Uc}†S§¶ôo0½yH N­Eé‹9£ùÄR±¤í´4„gf8/ÔºdÒ÷Œ§ŽÒISã÷ÚLoå¦Mr§f<‰Ü)½=<÷Ë'Hœ™î…o3žwE ¿>Ý›êP `æ{ ®q3÷@1תð·VðÚ`Æs·ÙSéà¼rExlÆoÒ¡$}ÆsO¯HS`ûzÇSâŒçQ”šWþÙLÉw™÷o‰ ¿ñ·G'¹èfþDá[=Ð˨{kššëîÑóü€¼G8ͬNdxˆ·«7 @]î•M™\†Uj'¿›i¹lòˆÍxšëŽV˜%ÛÕ?za.Ç»m†¹/DØè@zɱ“_Ÿ¸™·¸„“upZ5­ÂÅGçt ;Ù}”Õy»2ñýr_Œ$þX‰yÕ®LUƒ]™À'k§‹ÇÓ|pÙáñtYï¦5fFÌ×|Y îmáÎMÇ„¬;3W«=Q÷¶hÞ'µU»Q~»Ï'™ñ4Óo§þÀøÁÛéboˆ¼•Ù׋Ö;åà|›GLbÆÓÌ y;ÕçK37âUVc•zCàåÒhs†º­ôèÅHÒŒá‚3œÚÌ] BJ&8ÝEtK NµÉP’NuFWçÞÐ0­_‡{ã5ñÛAAS¿éë‘{dÇ{yõšñw%ˆydÿ~GwÂoÜ›„RCóöKmˆf5î.ƈì‹¿yõºðk_/ò_«õñªÃÁþÃ>ªû™ë³!‹Ñ0s‘PŸY·ªíàpkY…eÓ4¤›#g5"ÐvrÚ/o).ªdÓ¥³ÊmeÝNI«,vú½+ëz¹u‡P†Æó>È]×ÇË]ÅÑÉvÆsW/âÉΞȫôî`?!»ª;³^/÷ÍòõXÙ==üÉz»uÔNÖ[ªX#V݉Áˆ)ðÍëݸŒxpQc.âr5n2 /i;0ÿ-,F±J;Ððnün_ûöHSr|X¯ŠMÓ2ÓžLm¨Ù“¶) Q™N87W=øþ˜3<ãiD4éˆúû4’rYÙÿ¯¸³’$Ò9áiÚ„.ç»q&žÔ!§¿=©CÞÊñ’RÅÏ»3:i"cÇ{·º ¼#:têõò~ŽaK&°üc  3owJÝç»›¹°3Ï mOìÉéoqUo¼íåu­ÖUÏÒãÀN]ãç’Ü“ºÚ´•>æÛqôÑ0û.÷>fö¼÷Ñ0Kè£a>[ÄJPÉl`Ëû»ñš¿G”ÛZ¿}þ@ó‡]Ï`Ê-MŠ$&ô&ÝDr¾'ø~:b›áûéˆÍ¼N™ŽÌ+E±KærhÉ^ì;Ó/ ÎŸ¿&é«7Áùcb÷°éÛŒãìí¤#!N­téð¨Û™à<žÅÌêRàÙì§Ôm³¨ø—W³"“'Ûw¦_Mé`#KF®±.Íñ&øæ8,;³g1#>à›RÓî#ÙE¯•E#m(H ²2»ïžÞuºPyxV»Î_¤*øvN£Bo"'ײé|Ž^·<¯ZºáéÍúy®Z#m‚&’¦hþú˜uhæ”®*9ݼî^bòÀùã4OÑÎãaÕ/Çfl‰“L#£$á-ÕÙïö€‘>Še¶4ÚçOõUV&æuºøB!½{×Q~§àFáR >ÃÌáåV¾$3Áy®jtâ2MøMºjî5““$}wÆó¤Ñ,Í9fü9~Üzž'ÆbÆ ÆIñõZ×û÷_ÖõnJ&º^Ê rhî¡©²þ*H‘Ÿñ¼äÀÉ3ï{?|+ÿ ©jM<ÁwópY//ḈÖóò£ÄlÆoJ&ÚºÞ ø,CìëEÈ ØåÅß--«¼ àí5ž—д›ð4ùWD< ‰ÎËz©üb¶Õ²^^b)"'ßG®k´óƒ4CÒx^š„„&ü;Å£;™ÆÓ)´ðÑ~¾ˆ0/ë¥ò…VËzi‰T(~]/ÕW¨%YÖËB\ìNÖlŠyà45C¨~ä‡ÌpC~È 7ä‡ÌpC~È 7ä‡ÌðMèã‚áeþ¼Ã˜ ½X+qhÆ;Ã7ù!r¥Øñ#£nÆoÓÙC=ø~ÀX‡¾I)¸ì»ß]Ÿà쬃HÐÛÉ,Iž¼Í¼†ìô|@~w4¯ñ\m$·êÏó I—ÃTÐ óTñ ìË…}§×Ë“yÆpºÏáëI3pgÕ=©Þ ÑÅ•;ù¿+¨]1s¿Ìd9Ð (¡ vÕƒüVìÌìqÝVûñúÑ—wÆsoùÕÎE,ßßä‡H¶­ý¸FîÕŒçaš\íšÉ×j@ì¢h^Þ ª’yh–ZWUÈ™­ÅUn²%$þf>]<– 凜YÖÑvk¹§·Ùùàf‘Tóƒ›ERÍ«]1«µ,7¦U•o¦¯Ê ûr»ëWÊ{"uÜ»pÁV7/RÇÓÁzÑuË~ó¢‰V8¸y‘õéÒ)î˜PÒ¸¹ú´h¦òÆÝÁîHêF´s5;ïGÿdgìg±Øh¡µØ´ý&ÚÁ¶f?]¤n8g?] d]t _o¾ð”f¶K0’µØy1çÕÇá-Ær]u§¿Hk³!€©¯þàjAêÆ"-› %~‘–MÇ­Š°ÖŒ§ïhRØ9]m•®ÍDé=ãiUXòR’j§ã\ó~¤V˜ï"¤V˜‹¬2vP#·b†S f$WÌpfšüNí©&Íf8÷åF¼y­È™ÈÍN½ÌI ¿ÉšºùÏ}3×Vz8ý£~Æóü/ÉÑv|8ÉçöÚ»iÞÙ¼¨ü Ïqè ¿ D ™MxîzgÉX°?KŒߔ~\«°lB Ã@2³gi«¸lJK†œð|í7ãi ä9gç<=e}¾4¹½T²æOj¸²˜fzÚ?8/Ì))ç%ùò‚9% ýœÜ)ÉÎÿˆM´| ï˜T²̦øÃ‰{<áy0 ISpóuI%ÙÙõ³ Œ=Ù â‘_e?à2âÞæý©Òæøà:ráÐ}µˆ>°M|¢¢úüT}`›Zu¿rô¦/x]9z3I$JtÎzá_V ÊûL‡°jÐM9Ljž™é‰#zfehL* šî"Gêf¾1B’ öõ"úw Ñ‹­¹~@ðdÿK^éçüSƒ­ " íþÉyáFõßo·gÁ¢z¥¶ƒïc úA£ÃÙ×›žü ~\Yúr™Ù-Ž\³:Œ‚q "ÆÞ¥öÄVM?Q`Hëz‹-¬þݦ¥¡]öŒ§Õg(/iú¼h -¹Œê*óu:Ê?Ììp/ÿ0û/…u;%FaÝKÄ(J„öÙûÉ¥ ,=¡é%Q%ál†Ó BoÖ¯7yB˜ÐÜżÆ„ç.šs˜kiÿ¾“êÿ¿™Z:4„§Aï{§¾¿™¢* †gü¦`“ÿ„ç.ugIXÌÖÓrÝóÙÎ 9,ôo:õ¹•þMCyä9ã·sQs:àŸ®Áñ¤d¦?K6ጧ7ºY8½Ÿô•Á¥ŸïöQÕÛC/PDªGÞ¸î’ääOËuÑm¢iúiù-’’;À£‘žÞNZ®ëÓ²â½Ê‚$Îxî€G©ñ›nU²Ìß¿0ž;ìiø»æõŽn3~3ê4­ôðlˆ\Åžµ^\¾OÔ':éEÍ?¼[Æè6aßO<ú”ƒõvÿfÙžíÒ’TÖýA·‰“ëKF£êýÙĤ{Šy½è6qi~ØF•'ó}*£Q“! ¬Ïë§.““g<÷wƒ´#0Ë âËõÂÏ+úõzÙÌ-“©ƒ߯hÇqp^]- ±É £UB,žM†,ƒÚì÷5´mvÊgð«š# ¶W•X¯kŽf8M«5G3ügŽš£Î  ¤æh†ojˆÆµ>áyϨ9šñ|lɨ9²“?jŽì›9jŽf<¯!5Göý5Göý5Göý5G3ž×€Œš£OǺÜkŽìøQsd>¯{ÍÑŒçSr X¡Ø¦OÃÑí~úU ö]©·Î=Ý(Öݧ”ÔÍšàÜ1.b›NðÍãzÇ’ OcÑò¯ñ×x¼5XW ×ož tW·izxG¸®'ôc y= tm43º6}^ÜõFSzw@šÒëõòùÍ£ÉüŒçó›1NZÿï\Ñ’øõ÷y¨¡‹ lkóùv%˜ÓÁ~¶,¯å~3­­çËm}tVtvqG"8àgøº±ÙåÝû‘ hþ>º¢ÀȽOø­«âÁvÆ‘8á·-ôjyÙB’ÉSfî§{ÕƒåÞ=]«vC—€|rZ%¯Ú„¿4W/–û„ßxºMžî­Úž¨ÓÒÂ;]4™7Úœêíß$ÿ_«o±}X_| >ŸM|Øo_íêë'¾ÖÛ“¯·…U¼8»µ¼Z|ÿQÔZíû™ºz8О©K{vvm…â‚EºøCüèÒjÖ&xçɾ= =QšýxñÐ9ûñ¦ðt{mC }K£7XUÌÎp^ ýeg8½'îýœn|Ïg|7çÛ>Ái‰$8…7%XÉ‘r½ÚMyÁuÓŸçéá÷·~3>Œl!órÉßG¤l ÁÎ ÑÇ»é{ ‰áÑ<ôküfr|^9y3¶Âß4í¼ô¢4qGÍk­I. óÞ£ëA;Øû6 Íô´L²òšŒi­vú¥¢^/o‹ç¤ ÍŒ§}È1‘/hz¸{ï傳nã‚3oO•Vn–àJ;ø~yæíHצëA–&žœëöàBϦ°@zSÚ×›ËJϦ!‰ ¢1‹¸ÐgÈ~A÷»«˜Â–W}¯5ƒû»!ø~™Op®ÓΫ¾¾›g4¯Íë,_|ÛD¸Ù)ǤŸá›ù±m&8ÍZ’‹gFó¢BLh±Sž%£Û¼ç˜ì컈†;/#Êa¦}9&8¯n¬òJ9Áy5[Võuª]“ðöç¯G]Þ¢‚oî'¼ÎOh\E¿+ÅŽ<¶Š~WѼíÈÏfîE_ëŒMß^ T›773sÚë4u¨¼‚0Êô«è¡—ãtàV:\2Z:ø™Ã²3¼ÏïÈD1½Û“EÑÎÑEkFó´KD#¼òؽ´9™àøÞÿåÛß®÷Òüû§7€¿yËóø/þý›·}ûúíß;òË·ÿÄØUj endstream endobj 1 0 obj <>>>/MediaBox[0 0 612 792]/Annots[5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R 13 0 R 14 0 R 15 0 R 16 0 R]>> endobj 19 0 obj <>stream xœ•KÏe»qždöMb'Îm` Š¬ØÆÖâm‘ôPdIV"YWK†'†c’e’¿V‘«›ä–_¾…œú<{/EÖ…Åß|ë§áþ”Óýé§ÿøñŸ~|õñûëS®þÓïڟקß~ÜÎþSþþ·ÿüñ‹ýpŸþ߇ÿôƒFýˇ»>ý¯_ÿýõéÛÏÝ'ùçÿþŸÏßùòËç[×ø÷ó½Ò¿¹äG­Aßü®ÿäܧŸþSû”ü­û”Û®ú*­•¿ûøÆ¿û÷ÿó§ÿ"ý·Ø»¦Wðýdë+Äÿ9ÂK|•‡-)õUׯüÂkgü{wWxÕ²ð?ÂüýŠÞ—Wɼ´Îµö¤…ÿ6äýõºn¾óO¯tÚ®× í ÷+¯ßÿkÈGÿ*ÅÂ×þ!„“{ŵñ_a>ïóÈßáu[÷.¯¼î/ ŸÝ«¸…ÿÌçW †Éß–â• íokÑ­ßÿ#ÈWÿÊÕ·ŕøµî¯øŠütðmínšv¿oʲò½ïÝýº/­wõu¯SÿWoK×­ü!â®—qo¶¥è ¿Ô},[ïÿ âÉïš NNŸÊ.î· 7qüo!ž¯]‘CÅésÚ÷!ÜüÒÄ]ÛóKÌ7q¯}/¿êžŸ >\×+T¾;C[,Éñýœ݆þ ®éBÃô Þo³íûO¯¸Î¶ïB¾ísõæuCh‹ëò†æ‡¼/v¨jC[ŒÛ>ŠåéU ç’Я¿ùÅR[]kûáÖî¶¼ªAÞ»K2?ýCÛïÊ/¯Ð¶Æm+ÅãUÚÑamÜ[B »z€GP¯]=àþl[£3lÕñr/øùÛrÏW'±-÷²¶nF±-ßË N¢/ûQ·§­GŸøþ±©ÃÌ««ÛѤ¾ßv;ƒ:‰)½nñ<¶åµiO<¼÷ýòÑÐÙ¿‚¥;sÙ— TW±-¯øåeye~¹DY^õÛòòõkÝÍ:8}ÒÕNu˜œ{]õ“šÙ¸íÖp¼’œ=ƒ¡=>¾²ãçsòyß¾àÙ9…°›¸?ãÅ7^|¥­—L²Û6‘jµrëªp웈¯Û-8¶¥Û‘mk;œ”®™CrÄ›xlþ¹K'ÍÄ}i*×&ÙµòPé¸aà°½ï†3ñpOqMçGK{šÎOk{îƒvf3t3pŠ3|¾8bÝO<öÜYu=¼íHu­Ý‰]1m­¸µ;±7&××m˜ÍMå{Kw–ºÏ~<;Ûj,Õеugäg8'.ƒ¼þÊûjÿÍo°ÿ uÐúƒ?>©A9°*39±X”ý+¨½Z½Åm˜¶¯§ƒ?WTì„;¬“êØ‰ÿË“Öô}wïßÿó£N[¿ÿ§˜/:¨ðÏzuáÑò†Öž‡ç4ÊÞ|ÜœèTçL<žÄ.fµ§`œÂkm<×¹ô6Ûڶŭ݉uà]÷郛ŸÓ+C{Dg®ã…÷ˆ’ÔŒ£ÛSºD'þ ã«š_ËÞì4k:³$^\ïº×fâ±ËÒå×µvÿw°Ëµ©Ø‡NñÐÞÑ npzJf»Ó·Å›*?Z¾­Þ­=؃ãÞžƒ‹öÒS;Ýþå€4áØÝcÉüdöm1nÊÏž|oº‡š»éÜûÍÆ5¨_ò®ªðy¡-ÝmßÅþñª¦Í„c‡èeP#áª/çùž.©;‚íJqûÊ÷Mðn× ¸ým×½2¿ÐÅ[œVyáÑ+4ó6­ßÇÎôPÕ»9ñ08!Þ_Wù…Ö©½§©íÒë|ÀÞYÙF£¡ÿß·QœxßFq¶¥héžRÔ¹OO·ª }ª mݶŃsöRg.»\âLËEœ¿5ñz<¶mwëþƒ³ø~ôrl»îe8´E›1Ñ×­9pWŒáÞ›sp{u]ÓÍo‹wSV¸=Hµô~*Û!ê’ØÖn6tf3)³eîäK*tsÚ&½I‹=ŲK{^UÅMg6ñ,og¤ÿ€ù ^ºÄà]Û´©­õ­=Øó+`…ïŸäÒ~¨:x®«"'zêS;3×Èoj[õvìÝdz̎¯z–Yaå¼²¬ö³ïžå‰†‹¶¦O8UqªÄ‡ÐÈË™pØçµŸÖ&§E]NƒGô+kðhâ±gÐ9U°ðsg N<¶ZåôXÅm _kÿà4­fö‰S…n}1´?öXëÄã´«¶ÝÙÂ×WXû†=$­nkÿ!m¯;mèþ¿½Gèþ¿Ë«ÄÍ^Ã5Žk;еò8ŒÑ ?ï ÝÙ–£[‡ ;=$ ðËKœ05ñÓÓK¬r•¢m9ºµ=8S«×Ù†µÚQ ­Q7Xeå£ÓĉÇyZ1kâ«ø}êaº7S{н)‰6ÎÐ?’hã ³§™f~mÜŠÄ«Vå ·QßvÆyåà˽-öƒWåÚ;<Þùú¶QcX-ûÞˆä¡ïŠaþˆ+¦f~>ÉZ­¼Á_z¾cµ•¸VòÍ+ÿÐ6G¿Îσ+¦ @²ðFyc“w]ï8Q+Þzže×Whë}ӞؖnuµÑí¿¯}þàD¿fÏmó¿é‡ÌkgÉÃó†Þi«=xCï·ƒp\{-GÃI,¶­1Žåâ(Ù&6¦]И4;XQn8Ã÷ýÛdÃÆ´Û,YxNÓŠèþiàf·\%^³ðØÍE\+)úS ?ƒåÛÉ6g^ùHÚž·Œ×ýv8¤CãsÜ5 NÉ,×nâÉÐvÞm«Ã“¡Y¥Ñ"lÝU ÜÙÓui4‰]ëIœ¢…Ÿ;’‚·íÁ¯R÷S?ì}ñ“ƒfKí\»v> Áaw—úE{š¿ãþ}Ü?ùÚwÓƒ×#©íAómùnßÇùEmùFÃQ/´å»N2v“Äm¹ã ¹klÐÎñê—]Ùî‰WÙ?‡|gk ¶öˆ-giO·ö`·“/êÕšø_c·JP$«žå2äe˜þrÒe~y©[ÅpøŒ)îê ·gø8Yu%9+Õ2^m¹_ëxÁ´aÉYquÛaxS?x}•²«ìÖjËwÛŽàö(·kå×KjëwS?ð4“®² Øu㼺ÍXu(9+>ÿÛãëþ€?£5銛äâaíì× [eÎø¯^nq•Ìø·^½™gü‡×$ŽÿÄ5ã–nŒÜ=Œná¿ù›3œ3ÿ=È«sbÆá¨:‰1¯} ß ¨lhM¸% læù¨Ç÷™ÿ>æ5qæùîàÛ“ÒþýC^‚ÌÑ0{$ȼ~ÿ+Ì«œùïB¾¨äÇ«^rç§OÕBP3ÿuÄKJÆö}8^¾i§°Nç_B¾)À¶¡Óý齓8ÆÌÿóI>ÝŸrÑ%%~¾ÉE—²~ÿçêJù?=)ñ¢;ã¦m %k9…ÿö¡ª—~江ªu|L ËÉ5=U{ý‚\f§u»oæ×ýâú}œÙƒ¸=fþàu­ª–éö$/É 3½ŠM¯•ux™cÞ0¶wyµ¿áß”`ʆ¹ã>¸‡û×·\ç;¿¸}2]´[{ÕäŠnBôdn§Ø´N~|wU2©ßQÊÛxòê²Üʘy\¿­bs0|ßqÚÐã+JÜW~þH^©=¾Š•5óøòdSâÕ0Ÿå¶btüú’ÛŠ)Æ7j=„™ÇNËf…:K{$¸¿Îìä¼5ÙƒžÏþÖÌ"z½ø¬É|{š~ÈkûqС­÷­ýxþÔõãønf³?ÜÊŸ®+¾,ÊYÒärä'xt }#Õê¶ 7Ç¿éÂCù9§lvlC[‹†­B«ÕECïKøfå÷Ëþ}Üûm_7hfqÿn+‡äÊ“7|ÿ®&ͲVÖ¤5aÈÑÖžrííš9”{ßÙq{$«Î0™Û1<’„W “_ÜÅ—AŠ»Øâ.¾×é0_Ôú˜xXEî+n5þ~xÓm°Æ‡\X¼‹A^ ÖFCÿÇ]bo¥â¯«ä~c5œ cªê,™xœ÷v'ñ&Î<®%×Îå!äÍy·ép÷·Õ[¡û¥ÞmhÔÂ+üòÕZx†æW½»=ó;ïývagî-·¥hVJÕöŠl§¶qŸîMI¬£ûR”~3IS"ö\½O8ÓÄ}‡痞ø6ã𨬉ot[4‘mm .j3|ÅÏÜ÷ã¿/Îß¼ð‡û~·z¿&†#\3œâÚ›‡šNZ„hæ5š¾_ðCM§¾`éæÄn'N<Î ŒÝN¤»Sô½¡9I/(ðÍi‡»«fC;Ü­“óPÁJk†æK†«e²§®Ð‰Ç^ž¢uÁé•.®ëhY\MÝo“óàUyÓ<‡Ô±^K†Ö&¾×’™yœåõv2/€øº×öàÔ´ð¦M¹xoÚáp1m«{U¢6gßW¯‡A;H™~ í¹/ •°óY¼$Ûtøß˜/û|;ÜoŒ»vÃòf¹^J/_IÄ+·a6”¬‡_º9ÕïËoÇJ¨ÈwúUn _÷“Ãѯ²mØÔuU¿ô÷ÛiðZ‡÷ðn@Þw |}2„m·8Ü>Lê“æù²7çøl€3hIÜ»ùÅ(y{Wâg›øm’á(Ò®Ûn¡ºë’C™©$u/øÉpw<ÝžìÕ’cð!w7ÝþÒ=Þôä”0¶á , äÛ0\Uk½ÐÊ!^Zëeæu©ê6ixrG€-ÉÚs†ƒ¤\†ô«jÃYTáÚU!þ~¸·s0NRlç†`P%1º}òãæH¸díNœô–Þ4-nO;ÇÇl˜<·×ð=¼÷½OþÃ]Ë*%îéÅsÚæ‡$º /vú•¼Ÿ«°SQ¢£™ÿ~’Ûg«¸ÐÈL’2oÐÍRgj›øÅç÷Û Éå}>.P†—aº%ÿ¦Û°¼¡GóX§@ ÷® ÿãÉ5Ôöгœ¹Î®q±#ùݶ—ˆºyX\0ÊËùAaýq¹/,6ÍÅqHÙÔk45[Ù.ªO˜’Nk€O_ÆÕ€¼_šqH*j¼S=§ÏT¶7âõ¢›,Y"ÜW%.çÙnkDš¾Œ“[R–Ë®ŸáCÕn}ó3ŒÝ_à|àÃ5Ë[÷ð>÷VGÖÊ>ù¥É‡L™´4ùP*,_ÆcRµÀ¹HÚZßpkûÅ]ÈfH* ¤Ðw"Ùuò(c å“-Ÿ¸…2¹®5ÿ±²M–Ë–ÓZ=xƒôõR@I¿c—«÷¢‹Ž^¦<Í#ìƒH«²=ø—òÒ ì¼jk;Гîî8JHmñXÙáÎúÅgøPЪ§q#XºwŽÒGRËÊVÀªw†>Ã'mÕ:;Ÿéå¨ó~7 5ÂëÓ®çñ–sŽÓ£éùLÚkKÌ8.EÑó1'?<ÝSx&—–ˆšâ?ápÈqÊИÚ÷N¶1 ™X¼]=À:ñ¸TëÙ,¨C!ïÄÎ8Þr½7ª¾°²NšÃ Õú2âÌãCgÈzð¥Û#¯LFVînÜy›]ž¡ûSP5G·'M˜xèp×'SŠ¡?ÅÎ64?ëæ3?EïnòâJ­wgèþª/óÍÇRÕTæå­÷~ø3è³¾¢†sÙþO®çz°ëW\úa_“ð—Z¡ìŸþ&/î ÇIÏ®xMöd—{³t¥„e;½O…Ý©?ãø9õëÏ8t›t×þŒc[4I>ôŒC5å./³`æá(‰ã¾¬ß‡jJ F¿v\…’Ž™Vq¡špÅñ¤ƒnk?öã§K®þIk3Ì?€jJŽÎÐARÒy>POÉ#•Íbœy¨§\¹%£Î pÛHóÚCPOÉMÎÌ€xŸ‹aIÆ¡K|ÿø6AcµðuŸ@¸=Ab¦'´d:g蟼èÙùð‘\Éñ «ÀPïû¶n :+¦µ§äìùõûØ0’¤½uaëèÍR¾ýÕK óÌc+¶ ãÊ㼟KoJÓò©§• |ßâèþÃèZ'4ÎCòq_`8ÉK5ÂÇIor Ø.~ƒ!¼-øã›.Úß«_Χû¿?L@O1©é±’v/ÃE›Ô“¢úJúëýAÆ?%ÌÉ`Âá–X£©1îºöÖÀT^‰—›oŽ»ô<2á‡û™ý<2ñ‡÷õá‘™Ç^ú`ÜŒÎZ aæEµ‚žG&Çףߛgq»ÎüáyE}‚žk’ZÓžW¼$GlæE²nyˆoÏ]wyq9ëcç3Ÿ#-úØ9ßÿåÖÓ;½ºªVoå¿_߯‡ÅziÍ™Ça1§ÍÓóG.D&ƒ6ñÍÚº ëËëK´voF4 —_^¸ WÌ&ÕìÓµëf,nÒ éååå>ó‚ãˆó­£ó£ÕV—‹¼.—÷S1t¿T•ã•¡Ú&ç¡h”>Å<óWIÞG ›Ò×¾Wà pWÚ'¶uÛwÒƒ§D¯Î<.üÞÖîeØ[äိ=8®×yqCÝUíÁTš5Ç·?VõLüŸc×GÙ÷ lzßqàø@\çÏñ!€xå 7k5ôO {âˆÕ$Ç™Ç!ó*É[ts׫;µR“a¸$¢½ ×Áwpí'ÏËmiZ=H¡þmùÞ?”‹ôIF®ðyÃÁYêôoÚáPIIoóÍ—@”a¯ç ¯ûåFžI\)ˆºŠ‹ó'ä`¸6çOÈÛÀë÷áÁ*НòòJxýæ‹TÑ÷™×UÉé}!º;õ…Bù__( ülKí\#¯;G0žÖê9`…ÏÁ­ýy>ÃHÂþ½àPIÉEβà81\K/Í8”²&=mN8ŽÝëK%3Žm¹vÞ kkŽ—îÒ‚ã›f⸛àÃ帠QBVT7”7Û“’`隦»³34?ö´ºù1íCu¸‹§oÍ<~¯ÙÅÂß~ÿþ±üµh?vÞ»ô¨ÃóuÿþÁ¬×ùö½À<óØ-Q{/IRðkârÊ—&GÍ<6䮺Ϸ㛃um?¾k%÷42ßÿzanm?.wìõ‘˜™?¸49væu—ôMZë‹ßÀ2ßÄo°Í·Ã“†ú$àÌ®¯©[…Õn^,ƒzóbÙCsòÛò™ð’4dظ|[¾ñ6È[49–—W Ѱ\j–\fºýAªË¯ý §’“±µNÅsp~º‰'àö¼º’ŠÓ9òËQr& Ý¢ÛÅ=8â®­p‰ç¶ãÊRÛó~ ƒ—⤄tåg[è/ŠñÝÓ_ã{¿­®bø|[,ÕÒûmo ksp¡£÷S*Nù§Ý+¯|Ô-aØë$s¾$^YIEèšyå#•ˆ.CJÎÄ6;qŽ‚¸à¿x¥‚t6œe¤¶P1œe$g¢®Ê;2’×;¾R[(6ypSæ¸=÷mRæò  3i1kÉZ=HÎĶYr Ò¾YJN——ᨭï.8öêêûüçÓµoØ®o;c4tŽø=Rá— 2œL’Ï»?lGÅiú *~`Ðýê÷`/~¤¯œ;rdLL8Q\hÆá12&&.‘Ú5ø„Ÿü’6áØ1qéKb3cØ’Bç´ %®ß?Ö'J«¸Ðç$)’„ÇŽ•øJòÚžÃ;dU.ÑÓÝ/9~mÏ1g"CûcÙûçàüˆšdÆÎ6É5NχÛë=ñ‡KñE7hv)JÑé ýŸõ&%?ßúÕ3¾ä}2ŒWÛQ\6´§fÓ|SçÊ:^Øø¾¤è7=¼^^YÅÅ7§›qæ ÓMžæÚ–Ëá}U7-Ýž÷ép¸v_Ô>`‡Kßc\Û¸v¯enùöH *ðêP‹TGCÿÜaߌ7ï«únèögµéîlÖ_µLI¡µLI¡µt z‚™øƒ/ãÒû‰Ç¾ )"íùÕ«¾Ãnä„·NlMûž1Çö§dM¤Ì÷§>·Åo^z]„_\’3±~»¬-–M™\šÐK7§ÙN÷ÚœÃu‘Û´5Ê­þídˆ¿/¶a+’”‰kmÏ¡ @¿¾BO†Z÷cöñ5¬m+ŶåÕo²{]t×Þÿ‡*Ì·i«–œ Ë^'9ÛÑüר—áö£?œÎq›û8c"Æ}°pçHþ’ÁäŠ)íþþíöî‘‘öÎ?IîAz²Éeƒæ”·°,š3–hÑœ’ÁaМúÖ:ZøjÆ¥rhÕ£žŒÄ÷¾”HŽïM©§p¬ÒÔ_¥g[z·5’¯ÝJÆòö·¶èñRÏÛ9`„*n¼sCz¡Á‡³¾ôa‡©tg÷„cW†ÓApèʨZZuÆñM”ª p±#ãêžM¶'åjÉ&,vd8­)3ó‡”"O$ÌüÁ‘qéîÏv¾T k{°#£YZ1Ú/uš×þÁŽŒ¨%¤è©)›½ã§ƒ8JÄÒøƒ#CëÑÒs_.‹”µýø-µfiÕh¯¶IöÕÄãÚmíÞÞ0Ú´©†« §Û_ûíº‰?<’Þ/3±ëÑ_=ÉΩ7¸©“CVI¿ÃêÍBñ|ÿx9=&~½HJN|Êå•’y}%·Wjå׋îË÷µRöí ßopaßNïžëÛ´^Þ¦å›zÆ6ÛýR`±~õÊ}ƒ2ÇM4–BÛܯ›åµ­Û:3õ Ýü»»'þð|Ö­Æ1»tõ™tËäÏÑ6ùóÛQøðÜVØ÷ì†l'sW ³­ÍR g›d¹¬íÁY ’åb8ÛŠ§g;âœÚÞ~öRÉrÉe½>eJ/ÉŠ †ù,ž¡{ÝKY4yß»ðm¦èöåŽmoO·¡ýɽª¡;åÞ·a´¤®·çW»äÐ$Ëhµ“üfÇâÛ4òàµÁ’’:"›vÀ£U´Î;?Z5ì[#UG„=©>uDØñzꈰ'Ÿ§Ž«}ž:"ìø>uDØþ|ꈰã5ꈰ³_}CìXI»[óÛô¤}CÎø†&'ÑtßÛ˜QÉs±+)ˆF`Û"®¡µcŽ®!I’›xì èUDض‹g(¬‹šHŽ¥¥9¾Ñ›xì)‘—c×Þ<¼+Ô’›x\Æ"hu_¾¢×Ý–7vgôÄÃ¸Š¼+QW?ÒéšyìÉÓã:^‡û@½R-oÛOV^w‘w%ª7tÑgêé•+Žž+ðzD=.VcíeÄØî÷W¿©>ñØv½Œ;}[ŽÎ 7½ïŽ<¶ÿ¥JIˆ|ÿËˡں#nR¦ä6¨7}æ½ú¿ «EÞ–0(Iˆ‰†Ù/nžMÙ¼6~W¶ørXÖw=øöd}Ù€Ÿ m›Î–ÙÙöébP>¾FCãkÞ—Ö!Ý&îSùpu(èIpâW‡ô’/{lì™Ûpn›FÞЗ’=cèM)κís¸ŠÔ Z{Vo–j«n]Z[{µR7?ŒîtY\‹ú‹?Á~¯ÉÓÿ‰¹Ó+€ÅëþÀéש_î ŸêzÞì¯V<®Üo V²ügúW–’ue¡¿iq ÎôWˆ–ìÍ…þ¤ó¢îRôÑ-ø×!®&áŒÿá’D½â ñ"î@ºc”2r´¨±î¢þÂSÜE…_¿¯]Ôâiu*ì|;+n²þâ÷.ëw.åà Ã*6Þ:{„ðêEÒ=Ù¸‹Çåù±èéaõW‘Òùô×å™åÿ&Äë.*êH‰í]¼ß&ê žL¢öZ=3þg×¢‰ìb•èkÅñ,¾¡ÿ.Âo-‘;ã?ƒx‘Œ¿ÿ!ÂsØEý[ˆ×]T¤®%  )uN±¶‰m¢¢)&‰Ï›¨h΄Kë5±‹O6ßþò€¢tÜ{Ixì½4­{/K÷½—¦3ÏŽ½—ÆûÞËâcï¥ñ¾÷²øØ{i¼šD{/‹½—Æ“IÔ±õÒømul½4Þ·^[/÷­—ÄŸ­—ÆûÖËâcë¥ñjõÙzY|l½4žL¢Ž­—ÆûÖËâcë¥ñ¾õ²øØzi¼o½,>¶^¯&QÇÖKãÕ$êØzIüÙzi¼o½,î4´ËãúÒ*{M„àñ,y4.õË-x1‰ƒIÔXM¢J:ª—˜ƒO&Qó%¿%„Æ‹Þ1åquÒxÛÊ¢ÏQã,¢Ê•$ƒ¨RõÕ jl[™ATq“YD•jü‰V*¾Þ¼me—¿¥4'­NÍãÙ$ªÜ·àÅ$jÛÊ,¢æbUŠWXð*Pi¼êú,žÚV-¸¦}ѸÓ:„´%yuEvs¨Ü ýOGût‚ àaŸNô=Û§,ÝíS²ÙjŸ²ì°Oi¼Û§,>ìSïö)‹ût¡ϱ­Í°ö9ô KpÎÓ_ÃGN¥^J'ü‚xÑSé„ÿÂ%,½â_ƒ¸&½ÍxBxÛ-6QaˆÅÇk—õ[O»¬ð¶]l²zˆßzV›päÃËçZñ ñ¬g5VÔìõXÊ~=kÁºí%좢¸X>›¨Ð-ئdH<.¯¤ßôê )è¾Ë~½hàÏ’ó;ãƒð^›N…q•¦•G~p9—ÖGÎay)q“¹Íå\ºÉúm„ûk—Å}¢ORÆŒ')™W ÝñR1Ï×G~p¹7~W¾ßÅé·âȇÅéWiQõÀC/m=ïÐ+;5Ëý¦vòü6&§£6FAõtDÂãtDÓz:bé~:¢é̳ãtDãýtÄâãtDãýtÄâãtDãÕ$êðÞ³øðÞÓx2‰:¼÷4~›DÞ{ïÞ{Þ{ïÞ{¼÷4Þ½÷,>¼÷4^-¢>Þ{Þ{O&Q‡÷žÆ»÷~ÂO1Ó¨¯‹ŸÍWÕº$<´.M«Öeé®ui:óìк4Þµ.‹­Kã]ë²øÐº4^M¢­ËâCëÒx2‰:´.ß&Q‡Ö¥ñ®uY|h]ïZ—Ä­Kã]ë²øÐº4^-¢>Z—Ň֥ñduh]ïZ—ÅGÌ”Æ{Ì”ÅGÌ”Æ{Ì”ÅG̔ƫIÔ3¥ñjuÄLIü‰™Òx™²øˆ™Òx™²øˆ™Òx™²øˆ™Òx1‰:b¦4^M¢Ž˜)‹˜)'“¨#fJã=fÊâ#fJã=fÊâ#fJãÙ"ê3¥ñbõ‰™ÒxµˆúÄLY|ÄLi¼ÇLY|ÄLi¼ÇLY|ÄLi<›D1S/&QG̔ƋIÔ3¥ñ3eñ3%ñ'fJã=fÊâ#fÊZ’—n •D¥:ñ¯žc¦LÄL'šˆ™²t·OÉf«}ʲÃ>¥ñnŸ²ø°Oi¼Û§þ'öé„cGœÛ††L¥lÿmY™ ¦òðá:œ(^êÚ¬žY,•E¹Ð0VÚÝú C¥Ý­?Ñ8RÚýú£‡òÖ¡3|] †¯<¼çü>oaÇ´‰¸Í[‚óít¹Í[Ël§K“¬íx¹É /º¤ºË {^ ¯8ŽÛ^»¬ ñ{—æ´ãe´L²r« N.S¹·ëy!×p7YaœWêS¾ã¶V¡¨b“¬‹Å2åŽìÚ/(P*ɺVÑ}!©:¶É‰B“!»}ú¢‹WblÓÝÁ”h¹eHë®zѽ«x%ºÝr¡wƦ]ÚÅıig3ÊNºÐ0×y#öâå]èÃÃyÜ&-ìÂáãe…Ìy6\¼:‹n•Éëm‘nGzÛFÑ·ÓÛ6ŠOzÛEÿÑž×…r–² ªç`ç`šÖs0K÷s0Mgžç`ïç`ç`ïç`ç`¯&QGœ†ÅǦñduÄihü6‰:â44Þã4~ ŒJŹLبºàHx,8šÖÇÒ}ÁÑtæÙ±àh¼/8 ŽÆû‚cñ±àh¼šD ŽÅÇ‚£ñdu,8¿M¢ŽGã}Á±øŒÒxŒ’ø¥ñeñ¥ñjõ Œ²øŒÒx2‰:£4Þ£,>£4Þ£,>£4Þ£,>£4^M¢ŽÀ(W“¨#0JâO`”Æ{`”ÅG`”Æ{`”ÅG`”Æ{`”ÅG`”Æ‹IÔ¥ñjuFY|Fi<™DQïQQïQQÏQŸÀ(‹¨O`”Æ«EÔ'0Êâ#0Jã=0Êâ#0Jã=0Êâ#0JãÙ$êŒÒx1‰:£4^L¢ŽÀ(÷À(‹À(‰?QïQQÖ’¼ô-YG¢ÅËíEÑ>Fgú¥iµOÙf‹}J³Ý>åqµOi¼Û§<®öéŒÿç³}:ãØÃsoˆ£žÿ²¼²Ð8,ºÎ@-ûDaQ}ãk¡QôO ?¯2°èµOA±ºÖ9Csò4ŸáÓîm˜¨›ƒøbíÛ„1Ñð6aL4Ô}¹îáÆ]V½vYaǧ´Ë »æv»¬°kî{—âÙí¢ÂÆä¼‹ §oQ' ßñE½$3ï ¨^’‡Ñåª^V¿¨]Ç+ºàßæ$Žç¾ÍIÏ5t£\!^—6ŽæúMÁhn*›þÂÑܰÏuͽëÞ‡8š÷>„ÑÜ\÷¹Ž£¹»‚„ý"VÝ/jè²í8øû6aÚ½MDFõoº;Ûn„Áâð¦a6î“~<…­aH7½ÍEŒ¾ßæ"ŽF›æb,û\ÄÑè}.Â!ªû\<Ç£Ùo÷x4»ü{Jã=>ÊãýäOâ#>ÊãýäÏâ=>ÊãÕ"êˆÒxòx2‰Úã£<®ñQïñQ×ø(÷ø(k|”Æ{|”Ç«IÔåñjµÇGY|ÄGy\ã£4Þã£<®ñQïñQ×ø(÷ø(“¨=>ÊãÕ$jÒxòx2‰Úã£<®ñQïñQ×ø(÷ø(g‹¨#>ÊãÅ"êˆòxµˆ:â£4Þã£<®ñQïñQ×ø(÷ø(g“¨=>ÊãÅ$jòx1‰Úã£<®ñQïñQñQ×ø(÷ø(mI^z •C£§ƒ¢çøèñщ&â£,ÝíS²ÙjŸ²ì°Oi¼Û§,>ìSïöé„ÿÂ>p½^a¡ñ;lé•ùoßë`Âøè·i#¤ÙoÓ FHsÙ¦Œ–°M+br¥., Œ¸÷y…"uãÑ݇ua¯´Ï+zs×>¯ Äï}^á;¯nÇkï]Txó2ø]T4]|È»¨ðëí\¼‰ ãzòô5¿âäÆk6à[Ãá{z#øÊ~Zœ—ôL—¢Æë·ásm¥…ÆÏ®•׺Žà‹qm­: ¾ÑÖVQ¦ÛÚ"r«˜è¡ÑÐQâ{%¸ôª+ã‘m~<Õ¸¤û¼Ù– +‡·)ÅÃÒÐî¶­ô/!¶™…#À×6³N·/fvÛÌB³6ä¼Ï,\);l" ¢·Tè%jÜ6DZì׊Ùo˵bv ŽkÅl—Ķ­ý ‹L·CÜúíïC:o³ _A[wÃøy¨[wÀxŒ[wC)Ózr‡ÆÓ~t?]X.´ªŠ÷~t?Uô6ȘùU¦‘jÔjë&Óª'¶uÃkïØÖM¢»;]qST0ÊÚÛäš퉆AsïévˆÝüùd"œ¯¯?ô~ø4/9vX—,¬3”„»mÉ™F‡eÉÒݰ$éaW²t7+IzX•ä^zþgââ}´?³Ìh“píÏ04'Çh†Ñ;&}´?£ÏŒ6×äg´?Ó(Íêóh¦Ñ9õóhsß~Fû3´ËâþBÃÈG„›lÉp¡áÓé#¾M¶d„·¿ÐðûÝ&Çr·¿ÐèôĶ¿ÐðÅ÷Ú&[2"Û\KžÀö™O\›ëÁ'¬Íµû‰jsóû js3ö‰i¡¡¹9BÚdŽˆö(ž€6·žx67¿Ÿp6Ùß#šý…F6ÁÌ&Û=bÙ_hø~üe“spD²É‘ì/4: =qln~?aì/4J|¢ØÜèk–®)Ÿx5Ip5K÷h5I`5K÷X5IP5KçMÊ“é´p¡¢çl ¯&Q‡•ÁâÃÌ ñdu4~›D¦w[ƒÅ‡±AãÝÚ ñÇÜ ñno°ø08h¼ZD}L6'“¨Ãê ñnv°ø°;h¼,>,曆ۃƫIÔa}Ðx5‰:ì ï‹„Æ» Ââáñn…°ø0Ch¼˜D†W“¨Ãaña‹Ðx2‰:¬ïæ‹{„Æ»AÂâÃ"¡ñlõ±Ih¼XD}¬¯Q»„ŇaBãÝ2añašÐx·MX|'4žM¢ŽeÅný}N‹éSZìÌžÓbÙ9ØÓbÙ•ÖÓbi:ïóΓœùùWvÍýHçMF8î5у®v=нØÓLÿÝɪ£Ç±çIJ³OrbÙþHþmb’%%„sqY´r—-0_Õ9MÂú¤iÓ,Ý­KšÎ<;¬Kï‡oÖ%÷Ã7‹ë’MøKBç9Òþ Ì ü&zŽÿB#•ó <Ùø/,´YŸÿ‚ÄÝgà¿à0i÷x²áÏÀÁ‘"ù<ð_p˜uÚÃÞ3Ž6ö¦¿ÞÃÞtÛ{Ø{ÆaÎq{ÓéaïGÕ4GØ›¦öžqth„½g§äjØ›íÈöfÛ>ÂÞ33P{Ø{Æae{³Ã4ÂÞtÛ{Ø›3#ìÍj¤ö¦û½‡½éŽìa¼=ì=ã0)¶‡½gfóö°7Ýï=ì=ã0Ÿ·‡½é¶÷°÷ŒÃŒÞö¦gd{³“`„½Ù}i„½Ùù>ÂÞ3Óz{Ø›¦öæq {Óx{óxÙE%òoy¼šDíaoïaoO&Q{Ø›Ç5ìMã=ìÍãö¦ñöæñlu„½y¼XDao¯QGØ›Æ{Ø›Ç5ìMã=ìÍãö¦ñöæñ¼‹z²KoUгrYx˜'4­æ Kwó„¦3Ïó„Æ»yÂâÃ<¡ñnž°ø0Oh¼šDæ ‹ó„Æ“IÔažÐømu˜'4ÞÍæ wó„Äó„Æ»yÂâÃ<¡ñjõ1OX|˜'4žL¢ó„Æ»yÂâÃ<¡ñnž°ø0Oh¼›',>̯&Q‡yBãÕ$ê0OHü1Oh¼›',>Ìïæ ‹ó„Æ»yÂâÃ<¡ñbu˜'4^M¢ó„ŇyBãÉ$ê0Oh¼›',>Ìïæ ‹󄯳EÔÇ<¡ñbõ1Oh¼ZD}Ìæ wó„ŇyBãݲry\³ri¼gåÒ–ä¥ÅK›š¤Ðx‰zWôœ•;ÁDVîDY¹,ÝíS²ÙjŸ²ì°Oi¼Û§,>ìSïöé„ÿ7Â>ðcuÛºàÇò¶•ÿzÓ+n£S‰Û´â83׿ꊟŠÜ†uœªÜæ?”¹}mSìXé6­ü©Òm]ñS¥ÛMÖS¥ÛMÖS¥[Ç·]n®o¢Â4ZàµTº5 «Tº5 «Tºu…V)u› /k ¯Zèõ䛿ɀ߯èÅ-Õn]à¿ÞÖSr|O¶õTW—Ç­RÙuÆa$²jÍà‡9¬m_r+ŽhiGÄ®]T\"÷ÞEʽn—ÈÍ/çéQ“3VºßÅä,+³YÛîä+/j39o~» éÖÃÚ„Ã8çíô°6á°¬n[MeÅa¶l[M~ÅQe6)~{g¾gÚjº ?ßKyE^ÏH¦oYõ Ì%m«É¯8¬$ÛVÓÍk±èºÜú¢ë^ó ‡¥{GÀ|Âa5Þ0gû}Ì'WŸ€9Ûö0Ÿp˜ß8æäŒ|æä$xæ‹–Ž€99ߟ€ù„£üÆ'`NÓ0gñ0§ñ0gñ0§ñ~Ôfñ0§ñ~Ôfñ0§ñjuÌi¼šDsæ4Þæ,>æ4Þæ,>æ4Þæ,>æ4^L¢Ž€9W“¨#`Îâ#`NãÉ$ê˜Óx˜³ø˜Óx˜³ø˜Óx¶ˆúÌi¼XD}æ4^-¢>ssïssïssÏ»¨'»TŠ7 ôœÏKÂÃ<¡i5OXº›'4yv˜'4ÞÍæ wó„ŇyBãÕ$ê0OX|˜'4žL¢ó„Æo“¨Ã<¡ñnž°ø0Oh¼›'$þ˜'4ÞÍæ W‹¨yÂâÃ<¡ñdu˜'4ÞÍæ wó„ŇyBãÝÌO&Q‡yBãÝæ ‹¨yBãÕ"êcž°ø0Oh¼›',>Ìïæ ‹󄯳IÔ‘ÏKãÅ$êÈç¥ñbuäóÒxÏçeñ‘ÏKâO>/÷|^ù¼¬%y}ªUzž![·øA³y'öœÌ;Áç\^VÓ”l²X¦,Ú SšV»”¥»YJÓj•Nô?¥MäðN4‘ÂË~»gðN4‘À;ÑDþîDé»MdïN4‘¼;ÑLîî„©»MdîN4‘¸;ÑDÞ.Ùî‘¶;ÑDÖîDÃò±=iw¢a9Øž³;Ñß„F¥¦ì²ƒ92v'–ší »äêùº4­éºä:Ùºì·{².¹F®îD©ºMdêN4‘¨;ÑDž.Ù;ÑD–.û힤;ÑDŽ.9–#E—ìï‘¡;ÑD‚.+eÏÏeÛÝÓs'šÈÎh"9w¢‰Ü܉&RsÙ>陹ìülö¼Ü‰&Òr'šÈÊ%[2’rÉþ9¹ä*)¹ “1{F.Kuñ- Uœä*–¼1²ó$¶µSúT‚×Gzäå†`dWš”à]¥D™žR‚7ò}’ïMJ8«ÚÚñžÕl±­Û³š-¶µs-4L K‹nIº¢xÉI){.ûiMÃ%eLžnD3ÅŜ׃ÆW¿ÿÿà?ýîC¸ß~Üm_zþ”¿ÿíÇ?üâã_ùÕÇÿžœ Î endstream endobj 20 0 obj <>>>/MediaBox[0 0 612 792]>> endobj 21 0 obj <>stream xœ•Me;v–‘˜U”A$`PAB§÷öǶ2ŠH€† Ém1‰:QÔ©™ð¿àâµ–O—íBë<Ö•nI¥§Ž¼üýn¯ß|ùã¾ÄçkÉÏ×þêËŸüðåg_~óåúZZøúëþçõõW_ž;üöOùû_}ù›/?ÿòw_î¯ÿûKøúgúÛ/÷õõ?|ùË_\_ÿªÿóû«ü÷¿þú·¿óí_¾ëÿÿÞ/õo.ùG½A?ýÓøµ}ýá—ý—ä/ﯭ“wxåûë¿þò‡ïïÿóþVÚúÿEó«FLß×ýzž¼¼Rãø_1àíugއôj•ãñzÕû^Oâx ¯Tðú:¥œ^÷ŠÿOo¯|ðëO~Åuü/׫L™ò¼ò:eþ‡‡×ûÕVüß»xŸ ý_=º…×:a~îÒõµNÞÿîÐኯµWþ•K¯6þG½Ó‹OÄ®WXÿ?¹x~=‘·;Þ¯kÅÿÔÅŸWZñçáéÞ7/oðC*»©?óðöíÍóëë:è÷'¾êºü…‹·W(¼ßKz=+þß<¼^¯‹ïF¡æ_s¡Ý¯ƒ^oÏ‹·$^aߺ¼n‰ýÙ¡Ó³ïÐnŸ<ûíþv ›•î`–"î”^ßÑS­¯’¸¡­_£o{ë×h¾*òõÝíÍ–|íÏùÞ÷goâæÀU—hË~êF ,:úôNy˜ M¾¯.Íè§oú˜¾»Â=  gC—|÷^Db<ÆW8Á«pOéuq\¾¯ûæˆ ü7˜ ü7Ú“XïÿF{{É{àaKt࿱ÞfüÛÿ†»:ò=ðßpocøíÀÃ_ü7ÜUïïÿ†ÿøK—o»­?ùð¥b³Õÿôpí¶z· ùR±ÙêΗrﶺ=YžÝT·íõÞMõŸ|©Ï‚Î=OÉÖÅjÀm—‰/¥pÕW]qïà”›©^¿‡.³žƒ¶‡¼›êJáxí¦ú_,ònê§/›©î7QYqÁ=Í'_,®ˆç{¥µâîw%‘Z‘·]´ÖAÛElE>ª]mm¦ºs¦«­ÍTwFÖ¶›ê6¦b›©îw‚~Š=|˜b?Æ®„_ìçXZñàâÏ«ò8ö³,œàE¾¡a¼Ÿe× ^wSýoñÈÔ~”˜Ú²SûQvbj?ÊNL-ý¢”ð~S:ÀûQöœàåuàK'x915õ£ìÀÔÔ²SS?ÊLMý(;05…tdj?ʿҦ~”=x?Ê®¼ß–ðÜoK'xÙMý$Låáh ¿ûQŸ@xèL«>¡´éLÎ}‚qÓ'úã¦O(>ô ÆÛ‘©CžP|ÈŒç#S‡<Áøsdê'7yBñ!O0nòâoy‚q“'òãíÄÔ·<¡ø'ÏG¦y‚q“'òã&O(>ä ÆMžP|ÈŒ·#S‡<Áx;2uȈ¿å ÆMžP|ÈŒ›<¡ø'7yBñ!O0^LòãíÈÔ!O(>ä Æó‘©Cž`Üä Ň<Á¸ÉŠy‚ñrbê[ž`¼ž˜ú–'o'¦¾å Ň<Á¸ÉŠy‚q“'òãåÈÔ~”˜Ú²SûQvbj©G¦ö£ìÄÔ~”=\ª¤~”]Ïý(K'xŸ,Œ÷£,p\Dg¿Â—ÈÐçÏ&Eï£>à¿ü¬O'ú÷?ëSJ›>…ÍV}JÙ¡O1nú”âCŸbÜôé„ÿC O'Ü÷ܽ^±-¸ï¹›_¥ñ_ïûʽŽQsñ"j3~¹‚3ˆƒÚŒÿK¯âÿ0ãÿÄœQüf<»x{mSìÇ?tgzå•ÿco¯¶âà*μÛ] yí¶Vv[½¶‡p了¿žÝÔÇUœa7ÕíøËnë?s%g|ÝÏøÐ/É9s\ܦ"^~Aü¦"ÿõ¾žÊ§°HÎ{ÅÝ÷¶¾žòŠû/bQ\§fÜ{³ÉWÜ÷ùM»©®§e?—6S=§ÿØ—S>øõ¾œ6S]gÕ¾œ"ß²Er> ÷{ ý²¶âÞƒ±HÎÔ¸©ýtªü@ˆ)ë fÂݧÅ~:=+î>\æG/kî…ÄÎ¥w]\ŸG/k´gJØMußhKÙMõŸ¸LXM¸÷¾« « ÿ·®†4a#Þ¼!â~oÞ'â¥-޼׊»®Å·}Å£x°¯x´1¡î¦ºn±1~αéG ÷|AR¿ë¥„'AêgSMxñ‰KoXqÏŸ"=—~ï¡=ó<»©îë«)Þ˜¾šjÀGêgSx­Š[ïøœé«é ¼ßûUo3õÏ\ ™vS}?Ý{ÓXžS˜yõÒ³#ÞQœ¹ßfÒç[ éÏèÏ¿jzÒ\ÚÕ1Ç‹t¥ï¨§ Æ›tÆC–a¤´DÅ´%öøA[RPý‹ñª'Ås/÷ÿ[zÒö'ïmwÃ9Ë­Ú}½ ZžDŸ÷ÖÏ]ƒjwÚöÚ7¹ºà~¸hßä*oLëôâ®èÛÐUqÛÃÕ‡)/¸ww׳“Ę–wµFWœwÆ)1¦yÅ]¯GYMë¾áFRöÕWÜu’LEBq¿ç¨Gׄ»Ñ·ÙžiÛ{V„ËCbLkÄ32{V¤“ Ú³â„»>ÄõÙMuç{»wS}Íòâ–J”iÁ½¯úZw _mÆWÆí–ÓXð\—ÓRx[⥒`ŸKˆi^q7"5]ú!‰ššìåî1ÛË/Òl/¿ÔÔÇ^~iÛŸª_&Üû<KÒ/î}”HÓ²âÞ¼X³~\™p×!¸]úq…šÚžW\šòA›~ê¥~Ç»+ÞcD›æŠç»hÓÆI‰4­üL¡éS.Ü6$ÔôYqW(÷ÕtñSO‚MSÆó]¢M+?ÆR~^%ðazÔM“IÞ!VÜ•áòyGÊ;Dä)ïozò¾yócL´iŽ|Ë3?õô}ï¨CšÂ•:¤)´S¥)‘]oöƒ©AijwM$M#¦GÞ#ŽkÞ#Œ[Þ#ŽkÞ#Œ[Þ#Œ[Þ#ŽkÞ#Œ[Þ#ŽKÞ#L[Þ£yð¯[Þ#Œwmz¯sàS4a^q?‘‘æ=šqO8XÞ£™vƒ>5ï6T”éÚ~⣤û wãñ.M«A›.κۺv£Î,û5U”éµNï‰B”iZq7‹HÓÊg˜HӇϰìåiÂ]y—㾇}JTù–'Ò´¬[ž÷U5”k_ÙîëÒ4Lš.MÛÁ”ìÒ4FÞïÍ>kÒžiæu1áîSÒe^þ_>ˆÓmóŸN5ÒŒƒ H3î§4J»©þÓiÛMõ¶È‘iÆÝÇMK€Dg¤ÈÓ›d’‰Î/Ë€„w K„\S á.×Hx¶~"É‹)¿6ŒôGxbYú#:ô#ý=4Fú#Úö‘þˆvùHD·é‘þˆ‘<˜Ü¿,ÿnºæ?¢ëÓòÑùbù°™šÿ[ù|·C»°ëÑ›¢G#y+m“±ëÑx0‡_¼Á¤áG÷¢4üâ`k† hïƒÊ¤pz A O.¤T:v•y_òæ€éuɉ‹iÃô=¾£aÜ6ŠËGé|€7Ý)²ÞŸ(-( ãE?wR<´$Õ£1êrt#WŽ>—~뤿þ˜™p__šwÕ„û¹pÌ»jÂÝô3Õθ ÷¾¹‰ݦ£+v›qþ‹‚4ñ¶Kôèfª{¿Únªûìu§ÝT7óH¸vSÝW²¾”_×"Hã:g>¥ã-o{×£÷ÚW{§¢gË„û3èÑBMͶ2áž‹èÑp`êÓô{à„»/Ú]nkÕm»èÑÌg¤èÑÌç»èÑwÕn×£÷Šÿù=šù¯kF^~Òˆ­ ªèÑð¨Š}Uy.½ø±'z´E<ª1Ýû)é>hv=Z"Þ–ä¹ôŽ|˜º$Íü ”çÒ´Ù®?÷cù´í%©¤ÃTšŠAÚï5«„¥(Óí ñI™Öÿ”˜7ð~×çÒµ#]M%Ï¥·]ŸKWÜ}‡ öª'°(ÓÂ'(ÓÍTWâwiº™êа”vS½7s§7Zy. ëõ>ÈÓçÆ»˜ÈÓ+à üvå¥m—çÒ›Oy.½ùyzóQyÊïúŠ›>Ä)ìõ!NáÊât¢=žäOWœ¹oI7‚ÕëDœ¶Ó"Nû~Îñ"i™1ÞïO>À›|dƸݨ1.7êz€‹ß¦“†¦r¼É–ˆñ¬ÝŒ’§é í]žÆuP?yònsÀMÖ[ôR:ãîƒfÕK)n{­òn7㟪¹ÉÛ-báÉòvK§˜<—¦ÆMµTa¸1–* 7FR…Ý|˜$UßôB«r±£¿¯(;:#ãÕäKç{¼õU{ÆÝÇØpí¶+ÂúÑtbjÔWí÷ Æè«6í÷˜ÔߖΙ˜Ôß–.˜ÕßvÆÝÇĬ™–pÏ<ú›ú4ù`2ãne—’÷³É˲ëµßO¼, "OÛŠ{YbÓ/~3îeYoÞ²â^ˆzº4¡ÄŒ»Áþ¢O7[½L¢OÛÉÏßu7ö§já}#þ¼›±ž—¶ÔÍV/o‚Ô#[SÞOb/o‚(Ô²ân‘¨7¿‹GïUñn 5­¸_&HdnL—¨¥áÝ@2Ý ïÂ"Q3?¹SËx×3}J7=Ó§ôh2}J[mú”ö èS¬$»è”ý(AÅE³‚_í³Ãwߵ­<0.Õð Ñ×FŒ÷ u905êwXŒÛG2Œç[¾Kq\½,fÜ—§êeýQ§UŒ—$_×gÜU„õÚÛîšæ½í¾{®]ìhÛ[Ñ»üõÐWGZq÷á¬éõ¿£^icú‰þ¬¸û`Ònª+dBÛMu5[?Ñ7S]­œ®#Sû¾™ê¿oê+nL¶&ÔÔÇ>˜L¸û¾ùè+î÷åÁrÆ}oÞº›ê~tó|Åݵí¦úz3ë×Ç w•‰¸ò¶K2]ùòç»$Ó­Þgâ­i(é$ÐÌF+î:ÐMCIç»f6zp¿ÇhŸq'Ü ©”³iÅÝʬ_×gÜ ©”¼{…·ý¹ÅÃdÆ}Ý"&3î~Eè·ã§ð~ï«é:À«æÍ˜q÷+B­»©~°iÚMõß7/ýÚ>á~°iÖÏpy¤¾šîŠ—Gêwä\ñ¨¦pëçGjª¬¦Š÷wÉl”*Þg’Üô*Þ#å94T¼aKf£§â)&κòq“öÌ£Ux˜ ¬ÂÃT4° 7¦Ô½íþû¦Õà_Z“¶½ñCuh͉v_ MkÂv ­ ‡hMxwP­ »/‡z Enö‰ØPô‘9þùW%?/…-?/§%?/¦5?/§ g-?/Ç5?/Æ-?/Ç5?/Æ-?/Í>ðý"Ò·ŸÏsD~‚ÁÀO´÷=l üD{’k ^1®…ƒ0n…ƒ8®…ƒ(> qܾQÜ q¼˜: aÜ q<™j…ƒ8®)G1n…ƒ8®)G1n…ƒ8®)G1n…ƒ8ÞŽLµÂAoG¦Zá ŠÂA×”£·ÂA×ôQ·ÂA·çlŠ[á Ž×#S­pÇÛ‘©V8ãV8ˆãùÈT+Äq-„q+Äq-„q+Äñrbê(Äñzbê(Äñvbê(„q+Äq-„q+ÄqûbKq+Äñ²›úI—vÍÛú±®)…‡<Á´ÊJ›<Átáì'7yBñ!O0nò„âCž`¼™:ä Ň<Áx>2uÈŒ?G¦y‚q“'òã&O þ–'7yBñ!O0ÞNL}ËŠy‚ñ|dê'7yBñ!O0nò„âCž`Üä Ň<Áx;2uÈŒ·#S‡<ø[ž`Üä Ň<Á¸ÉŠy‚q“'òãõÈÔ!O0ÞŽLò„âCž`<™:ä ÆMžP|ÈŒ›<¡ø'/'¦¾å Æë‰©oy‚ñvbê[žP|ÈŒ›<¡ø'7yBñ!O0^ŽLµº¦¯G¦Z]SŽ×#S­®)ǵ®)Æ­®)ÅG]SŽkŒ[]S¬$»è¼{_2R¶‘«šNì碦ü¹¦)…Mš²&«2…覔6] é!K)mªôý€(ýFû¼—ú˜|£ýd¼º˜Àß~¢äŽžh¿iU¿o´_Ç4ª3ã7Ú/cÚÔ—ñíW1MêÊøv‹˜J ÷B¨aš73Ý2 ×µ™é×鼞ÍN/´&Ü÷f§×+á.›nuÔ63Ýz¤}õ´‡›Ù—O\p·i_>å¡“V=|¦»ªW!¶|DTÖ†[²ï>tjòÝ…öëfuÔüFû±¤×6<~Òg¿é½Yéǖ׺…û%Hƒ$¯‚¿­YŽúSI˜XèXJiH´¿%†ôYh¿úè­j Z5/*mw—…q9Lüì¼UòˆL´_y4I‘‰ö£G›Æà|£ýº£IsaÀ>ékgµÒOU”7+ý÷'­v2Ñ~ÍQ-v2Ñ~ÉѨž¶¬%â—2íoqË}2]Åâ•{-´_nôQiép«²ƒ-éGÉj¥_k4lVú¥Fëf¥_i4nVú…FMé°•–² o´›~6k"<Ú'æÁ£³J\q#ÝÙ$onŒtgK}í”…öÝv«ÆsÁ–ôµ“#µr8â²f?\Öêá†Ë´á…Ëf«:áBó‚0º+Ä$Ù;ïÏ7Aû°„üªÊ_J2¤—2¤˜¶2¤×2¤×2¤˜¶2¤×2¤·2¤×2¤·2¤3Êâ_·2¤3ZÒw[­ 錻ޞV†”ãEÖÏŒ¬õ²â®ck_O±Q†tÆý\GM¢XfÜÍ~+Å^*oŒå÷žq?:TËθ›æÆÊÒ~eHqÛe5­ýîÇž~7L~6"-C:ã® ³2¤¸#­ 錻ážV†”®¦Q†tÆýpO}ÄýÞç:¨n}Ö®8o¹!i7ÖS‹ÒÙhEHéø[Rºìâ]7+]Ò‹÷÷(AJ7»Q‚Ûi%H±¡é!ŽGHB² ï˜\vS}OI}¤Å¦>úHKרz©ü€%Hé¢%HéÂ%Hñ$°ì”ÔTÉ©ùÅd” ¥%Hqc¬)íÍ©[ñF­9uoLÔL8tkNÝw¥¢„d7ÞïV‚›*ï+î×pÑ‚Žt'Мº¸äÔ}ðZéZ~½ÆÝT¯R•p¿iŒ¤3î~\°¤´í"`#?LÂÒùk–‘ùà+b³¨ïÒ¥}¶Ô€u)¤ßºãªK)=t)ÆM—RÜt)¥‡.ŸéRŠ]ŠqÓ¥ºt‰.¥¿>té„»ÚnèRúë}±6áŸtiª¼1]—Ö÷ösIªVܪV‹7æiª¿h¿Kbøï‘Rìe»xO7¢LóÍM•Äð«©¾}¤¸ÑŒR¦%à)6j‘Ò)6j‘Òa’R/鯫I”i½ñZ•R/ª(ÓçÆ›ž*S> T™òK¤„p®kÏ aºÞļÐ( à¼ñBUYʧ—¨RÞåV†7ÅÊâ²2¤3î–µ2¤t„†.hïsÿÐ¥ð¾9t)ìÅ¡KáªÈÊGÄf¿± 5 Z¢‚èÒvcz”!å¸Ý0)neH9®ùë1neH1neH9®žg”¶2¤×2¤·2¤3Êâ_·2¤ï›PXGÉ}uìÊôYq_jF•ÉîÊd+C:ãÞ7ÊθïøÛvS}OÞ¬Oî>ôÝ—¾òL¸+¬ )5u”!qWeXÒwú¬ éŒûBVËâŽLš~Æ]—ÒñgÂ}O^-CJ§Ø(CŠq+CŠceHqG–¼›êêÞzo›˜;!«^tè|lzÑ¡ó«éEg¢]1uéEvŠˆÒÕJWyõC‰Ÿ"IŸÕLW^HJ¯HÒ´â®Ö±”^t/–Ò ·]Rz>F’Ò+àu!¥)ðžyÂnª_¦î¦ú¯Ÿq7õ“+oŽxeÄšôËÁ„ûq¤—~9 išiƽïp"I·;˜û’uiF$:L"IÓŠrå­ q7ÕmL¨»©®;lL»©~ŽT­>ŠMµâ£ØT+>ŠMµÚ£¸íV{”ÞÙFíQz†Ú£¸ß». üª!ºôYq÷3F×¥›©î{|×¥›©n4t¿nç¯/cU˜Â{̦°åC˜Â³=¾Ý‰Ú´Ü{H˜^¬ºÏ¿úèÇZHkÒr€Wù^@éÛ°Š;áŠû!£ê¤2ã®þ’`·uCuÅ»E<%ém|Tûj x¿!?‘O‚¾š6S}­™wS]U%ÉñøÉ$Io[Á“ ör,xT¥þhYqïvƒ9ÃQ•ú£¹à)Io?l$ttÝóüS{¸ƒÛŒ”m7oz¿ G~HùÑÂO)?zß|†õ ræ§”}ÖSØ÷ÍÕŠÎôœŒU3»Î¸ÿª™]icFš#jªp±ò_¿ÍŽªÈÍ{Å]Ô`ž¿p1Éuº­‡‡û&ióøY£‘£ëiðG®ÜÔBót›¹^|R~´ÞöGSäÒ£IäfxðŒ¹ùšqû›j1¦3bLgÜMïb1¦3î¾­YŒéŒûÊTcLgÜ•<’%´áž1¦3îúÃZŒ)ÕcJ§˜Ä˜ÆO±4|›j1¦¸1cŠc1¦x˜,Æ·ÝbLgü\e*1¦3íæYÖÓ™®®.•ÔG¸áí»1rU€…˜Òù5BLgÜõá´S:F#Ä”š*¯¦!áu-¯¦OÂ;Þ1Åm·Ó÷e¬~)Ǧæ BsÆÝLº¹ŠÐœñt…©Æ˜âÖ Ä—wÅ?e@ÚLu¿XŒ)î÷¢¡À3þ;Ÿu쌻NΦcgÜõrnë„ñÊ?˜Ži¯Ð騙öò]¶G¯t Ù÷Ê?ˆÅòÒRU‚>*9‰2m7¦‡3/ÇÕ™ãæÌËquæÅ¸9óbÜœy9.μ˜6g^Ž«3/ÆÍ™wÆ3/þusæqïº+Òt›Þ}Wœyïux^‘¦yŽõ&Ò´üºäß_q¯Ê‰HӲ⪹¤ÝV·ÎIצ›­?þòƒ8m|9‰8­ërú”˜7¬¸û–Ø—ÓÃWßpçqW±uqÚVÜ}Lu_Û¾ÚŒz)pßû·é¥tÂ]ßâ.N7SÝýBÝÚÞÅi<èÈ~¡.|×3^ÜõçiW´«?/m¸ùóÒv‹6­7ÿq)һ⮻¥yôbÜ…‹)¾ûŠèìÂãŠCÑ Ó…èÓ„a+BÊi©BŠi-CÊéÂY+DÊq­DŠq+EÊq­EŠq+FŠGóR§¬TÁÑŸ`0ðíVбŸh7¢Ó~¢]É(?±îCéø w=~ÇÀO¸[$f ü„»WÇÀO¸«uÇÀÓJm7Õ}qÎi7ÕíÈçÚMýô¸¾™êW ºL-ÏnªÿZ~了³±½ŸO¸÷º#……žÀÓ4« ‘áÒ¬.3îê?Q]w¤|¿ØLõ£—Ûnª›¼WT×AcDu˜:Tׄû¯å÷nªû…ATWÄ£*^âWä¦fÍ.1ãn•#‰¹ˆx5‰—xXq×!Câ(#^â%¾™êîH»©nJñÚvSý÷õ¼›ê”¸vS?y‰_üˆŒ·=:cÜ)4…Ç5…Æc<25Ö#SS<2µe'¦ö£ìÄÔ~”˜Ú²SË¥ãŒñªÀ·ÏoöÙãåÄÔtÅSÓUOLMw:15ÝíÄÔÒ‘©ý( ÏžÅãý(»NðGÒ«`<«ß7ÇËnê']*á]ýÝòÂCž`Zå ¥Mž`ºpvÈŒ›<¡ø'7yBñ!O0ÞŽLò„âCž`<™:ä ÆŸ#S‡<Á¸ÉŠy‚q“'ËŒ›<¡ø'o'¦¾å Ň<Áx>2uÈŒ›<¡ø'7yBñ!O0nò„âCž`¼™:ä ÆÛ‘©Cž@ü-O0nò„âCž`Üä Ň<Á¸ÉŠy‚ñzdê'oG¦yBñ!O0žLòã&O(>ä ÆMžP|ÈŒ—SßòãõÄÔ·<Áx;1õ-O(>ä ÆMžP|ÈŒ›<¡ø'/G¦>áÈÔ§™Ú²SK=2µe'¦ö£ìáR%õ£ìâ¸ú Ÿà¾Iqñ済Π*gý½út‚=Ï€¡O'ú÷?ëSJ›>…ÍV}JÙ¡O1nú”âCŸbÜôé„ÿc O'Ü÷è½Ä»x¢ýXÓü*ü·Ÿu0=þ[Ò¶-´û¤(YÛÚsâ¿ÅÅc¡='~Éâ»N+ωÿÿ‹‰uÝrï–öyåú·¶Ï+¯S$…ï6¯|'áûÚ'–ûóýb|ñÆKßÍV¯×Åéw³Õ uŒa·Õ­©Ënê¿pefÜMõæ®8ý¦“ŽÏq·Õ}ìy´¨ÇŸ½5Þ’å>jL_K7^¡¯¥¼Ðþ›Y}­«É÷ùMÛÎå&¨êkiݹüÓ¼w¹ïÅ«%#gÜu½-4†þzÐ’‘3J6l> 1FÜåª/ןvc(ÓwóÜ^mûšöŸ¿ò6·\?Õ¾ˆÖ¹å† K$ÊB»y¯$e¡]ÛRö¹å¦kªaŸ[¾·oÙç–ÿ–•¶£Ñ5´/¢õhôü Ò•q;$9RÄÃ#> žW’éæ¿Ê6­Üœ81lÓÊÏÚ[7+Ý€è”âÁm+å Ïɉ”ð:NÏ~‰÷‹›î—xßý6lÝí&÷íkgín×¥ºŸ>ü„Hýô)¼û¹ñšG]¾Wå+mVz~,â¦q/]¸ÔKþ²HÈÞðÜ9:*§7K¼›"…%ïnÍœn¢¢)ÝÏìrcZ¶ÞnÉlø`:& Ó¢tÒk8¦‹|¦tÖ//”~Ô|¢ýÐÑG>\Ðß.Z]l¢}·Ë"qúíV•]d¡ÝH²‹,´ë*»ÈB»y|[Û¬ô](Óf¥ïyK@*l·hÁ²Ð~Ú\MùK["«¡Ò± ’æ³Ò± Qs†L´Ÿ˜HS†Ð>Éš1d¢]߯œ7+ý¼A×f¥_ÆåÙ¬t¯¢ ÞPD†eCq½&åcÊ'JÕ¬Ù´Ãík \<Á>¦L´ŸW7oV~ò€|m·¤É½m·<0&¼%K’ÜZfÚû¤ûQúŸº´~9§-IúáœÒY¿›ÓÌy³Ò•DϽYéWcyN¬,ñuÓU,¹qñ¦ûÊiØÄ¾p"-ºrèºVÝ€ÊKÓ"ÃF§[³"ÃÎÍÚ-7îƒëZ²LžíV¥éGN^V¤¯«¤gœhW„ô#'.ôŸ»t“à[8î"ún¼ïˆè[­t‹˜>y³ò“è‹xEŠè+Ï“ª¹ä謚J޶»áåk‰ƒàÈXÞ  þ…«à$mì “{ì||]”Ü·\˜å˜„YŸ0|É’¯tQžU¾#*qù·¡=v)l/¢œ–)˜ÖQNÎÚ‹(ÇõEãö"Êq}Ÿ½ˆr¼™j»7]Žç#SÍc—ãÏ‘©æ±ËqõØÅ¸yìr\=v)>p÷9rèÓ ÿì±;ÓŸ=vño?ë`~öØéÏ»3ýÙcw¦?{ìÎô'Ý™%»3éµ-<7[s¿_¯éPìá„â5ìïº×z²y™Ó.]ªæ´;Ó~¢^uÚq÷ÍÌœvgÜͺjN»3î>²™Ó.n»9íθëAÜÓ}ðë’ózÅݧGÉyÍÇtøíâ~•¹â®±¨ÌSEe´]]wéI=\wgÜõ"6×Ýw½kÍuwÆýD½qŸa®¤ç£”®´›êûï^{·»¢wÞ7Tד8Üû óŸsÅ…—Ž’¹ðÒùe.¼ø·Õƒ÷Šcà;R’:ÛíÁu'–B½øìW{w¾”ýjïz¾èRýîjï¿è~wµ÷ŸtÅ·[ýxiËÍ—îºæÇKÛ=v'Ú-°ø}A„eŸ¶þ‚FÍŒþùW³8SZë”Ö¼èŠßQÊ]p¼é‡KЇ,Æ£½§`ÜŽ Чƒ–ôKñÉå´Ñ§Z0÷Á>ZhÆÝ ¶}ûIåô+΄»Ž·} ó«ß‰ïµíÞ>.µ`ò©ý o+öM(®ø§2¥eÅ]÷Û~o¦º.›áÚMõŽOyøl¼g¤Ìfªç#&eJ7S]éÒoÅϺk¸Y`ûjº2ž²;§wSÒfûâJ;ò±/®ÔÔ®0ïµ1Ÿ*Álû©›’¶^R=Ï™ú¨gú„»>ÄíV×tjj³ç½ w…×eÏ{°íR f;:|iÏ{°ß¥LXqW0„´›êŠxí¦z‡$¥88%¥ÌvJºR*=RQ·=[ü Ü#¥LŽxÇ'¨³#OUŸNx؈KqIxÓ“j0÷A¿W­à6ãn צÜfܯá*÷<~år0%à /å`î€ÇIÊÁä€{RÊÁ4~ÓJ¥7Þ9´RéŠ{Yð¥̳ân‘d_IhÏ$ûJB{&_»©¾0}vSý€Ñ{7Õ×¥ªîáÆ¡…JySJÕo*ôÇkÔo*ðЖ‡Ìû ›é{ºöDœ6ÞÃénÃëî¿C£#葜}áÝЃ÷Öú󯚓¤ïá€ñöª•ã·ÔeÇ´uǛގ(-’Œâé %©õy~Žú\<ýÖ†ûâT‹@á_/úag¢½é}Wý°3ÑÞ&qW}°¥t¿íÊ–2á~é£[ ´Sœf·Éå ª~ÛÍ+îJž~Ûm+îÇœ6½1N¸`Ùo»eÅ}íxé5ж½ßvsâÙµ£Èä ÷ê_‡TT&O¸+Â¥FÒƒç£hǸ⮠z¢†ßÀ)UDïÂ{¦kÇÒx¿×[*ƒãQíÚ1¯¸«Ùûjj¿Þµcäm—B¢õ€VØQŽ^¨qëB?¢4ëãô„û)‚n}œ†³%FKð@#Ïü/Œ(ÏüÏô(ÏüüŒ‘—ÉTyÏHµ»wEf¹÷#É—Ô%Äm¯öx8áÿÇÅíñî½aºJ®¸—WŽËÓd»qÏÈÓd<øõ‘ècÂý¢ ßÀ®‚ ßÀn¹Ô¾š6Sÿ¯‹—ÝT¿BFÔïp×çÉÈHyž,+îF§>ßÀ®œ~¾;€}awí¦ºm/Ïnª[=ÕûäZ-Q§±ð®ißÀnän;:‡œhϧlèÀ‰ö|Ćœèñ§´ÕR]—YRæò’ÔÉ&J³î(ŽJ [±ªÃJ)ln»œ–·}L«Û.§ gÍm—ãú¶qsÛ帺íbÜÜv9ÞŽLµ°RŒ[X)Çó‘©VÊñçÈT +帆•bÜÂJ9®a¥a¥×°RŒ[X)ÇÛ‰©#¬ãVÊñ|dª…•r\¿ØaÜÂJ9®a¥·°RŽkX)Æ-¬”ãíÈT +åx;2ÕÂJ)>ÂJ9®a¥·°RŽkX)Æ-¬”ãúîŒq +åx=2ÕÂJ9ÞŽLµ°RŒ[X)Çó‘©VÊqýìŒq +帆•bÜÂJ9^NLa¥¯'¦Ž°RŽ·SGX)Æ-¬”ãVŠq +帆•bÜÂJ9^ŽLµ°RŽ×#S-¬”ãõÈT +帆•bÜÂJ)>ÂJ9®a¥·°R¬$/}„ª¡òßÈQ¥û9¨t‚?Ç”RØ´)k²JSˆeJi¦º”Ò&K¿ÑÞ;ý[•~£?Å’†e?Å’> ÿöcÕ7úS4iXèOѤÏBŠ&½–‘ÿMšÚ&Õ„›ý1ž4,ø§pÒ§ÐN‘hÒ‹7E‚Iÿñ;ov~ %]Ítƒ7ó™ég‰a3Ómw,›•~áu˜h·ªLR¯¸‚„ë`¸_‚K¤ SþÞÿ¶d_|è’}q¡ý×(Ýí:WM|>Ñ~ —(ÏÜígn›•~ìhÚ¬ô ¸\›•þÛlÞ¬ôË·\›•þël‘÷pØß6šÚ}ìK§5le×±áv'ûTõv=—³}©úF{¾ú.{@Ûw*he±ÏTlÆê£lÀ}Rµ|ÁD»þÖ}í„LWƒ‹> ýéAö¿-ï±)Ó9˜Äõ÷Z¶X×ÁR2,´Uúl›½Û’~i»oÚƒ&šo:«ô!–ÿ¶¼ÃÞ¸Oäö¦36Y­±ùẜ>y3ÓOû{Ë(lèf%Q¢y¡ý˜Ò"{´%iï½#ZèâÑÑ…þ#ZhKìõvIx£˜¥TÛÇ‹‰’¢æ>ÿ¦ªIÆ5IaU“65Iá‚Ñ¡&)mjÒCMRÚÔ$¤‡š„cxi i7Vî ã=ÑÞål øD»É—lÄ'Ú­×"C>±®ÛïsÚ1èîêü1ê´1cØ'Ü1ãNh¼mO¸·Ÿ¾ß¶'ÜõqoÛ´1ãm›6f¼mO¸[ßg¼mÓŽoÛ´íãm›Nõñ¶MgïxÛžpWo·mØ‘ï·í w+ÃŒ·m¸<ÞoÛp¾¿ß¶a¿¿ß¶'Ü §oÛ´íãm{Â݈íñ¶ gäûmN‚÷Ûö„»esÆÛ6œïï·mÚïãm›¶}¼mÓ¶·mÚ˜ñ¶=áî—¥ñ¶=ánõœñ¶=áž/Üûm¶ýý¶q{Û¦øxÛÆ¸½mS|¼mc¼™:Þ¶1ÞŽLoÛoÛÏG¦Ž·mŒÛÛ6ÅÇÛ6Æím›âãmãåÄÔ÷Û6Æë‰©ï·mŒ·SßoÛoÛ··mŠ·mŒÛÛ6ÅÇÛ6ÆËnê'1jŸeýìz á!O0­ò„Ò&O0]8;ä ÆMžP|ÈŒ›<¡ø'oG¦yBñ!O0žLòãÏ‘©Cž`Üä Ň<Á¸Éˆ¿å ÆMžP|ÈŒ·Sßò„âCž`<™:ä ÆMžP|ÈŒ›<¡ø'7yBñ!O0ÞŽLòãíÈÔ!O þ–'7yBñ!O0nò„âCž`Üä Ň<Áx=2uÈŒ·#S‡<¡ø'ÏG¦y‚q“'òã&O(>ä ÆË‰©oy‚ñzbê[ž`¼˜ú–'òã&O(>ä ÆMžP|ÈŒ—#S‡ë-Æë‘©ÃõãõÈÔáz‹qók£øp½…øÛõãæÙFñázK•dÿ_¨¯]oï* Îýì{;ÁÀùv¢÷-¥MŸÂf«>¥ìЧ7}Jñ¡O1nút½/øo}:áŸ+ºÌôçŠ.ø·Ÿu0?Wt™éO>¸ë´ú\Ñe¦?Wt™éO]f–Tt™yPÑ…vʨè‚[3*ºàŸ·Š.´ñ£¢ þu«èBiTt™ñèâe7ÕÅ­¢ ïI+é2ón+éB—Q~QöƸþ»å>jL_K7^â’›ÚwàÇž™vß|µœËLû¼mÛ¹üܹyïrßËV˹̸ï«å\ð¯[9—÷xµœ Ð.1q—«¾\ÚuâMßÍsß‹·íkÚþÊÛÜrÓ1õE´Î-× µ¯¡µ[ÜDRZÈe¦]‡b+ä2ãnJ^+ä‚‡È ¹àN”˜n¨Ä„,´ëãzeÜŽÔïrxæ<¯R¿ÍÝü·µ€ í+àB7!+à²+®‡«VpÁ]˜/1'i¸†4¦ÕEš¶Ä<¤á&8¤éè˜ôDƒèMj¥yGÓ–˜s4íóžhÏj¸FÃy2<£'ÚÍâcŽÑí>[š_4Ëá ûdxEÓß6§è‰v¿˜O4Ëáé¶Y â5)mþИÎ'Vš74¦ÕšÒæ iu…¦´yBcºX9ü 1]¬^ИnVhJ› 4¦ÕšÒæiõ¦´¹?cºlV¾õÂϾüæK-|ýõaõå¹Ãoÿ”¿ÿÕ—¿ùòó/×ÉŸ}ù±ëAd endstream endobj 22 0 obj <>>>/MediaBox[0 0 612 792]>> endobj 23 0 obj <>stream xœ•K¯n¹qžd¶3±‘Û(À$² _Öâeq1F&†%˲b«¥ÖŲ„L Å0$ò$¿,ÿ/,V}:$O£ÖC4ÐØýìÕ,Þ_Vùû?ÿú#^ŸJ¾>}ýßýúã«ßŸJ Ÿ~×~Ÿ~ûqá?å÷¿ýøÇŸüóÇùéÿ~„O?hÔ?}œÇ§ÿõñ÷¿>>ýCûóó“üó/ÿçßùü—ïoöï÷÷~Ósȵý÷ïÅOõÓ׿i_’_žŸj#ÏðÊ积÷ñ§ÿêþë×ÿ$eýF4¿†ÏãxÝôõ œ>Ï×µAΆðJç^^÷ã+ìà÷ëÚÀSz;xÝ25§-S¯cËÔ+o™ZÎ-S˵eê}n™z—WØè¼5¼®ü~G|¥ü~Ýø™vL gÝ15„¼ej<¶LyËÔtn™š®×9žÃëØÁË+màWxÝ;øý x‰[¦–ºeê¶L½ë–©m Û05¶ElÃÔØV±ƒ/‘±-ci¿^÷Þ–²°ƒ—×µ·¥lÇÔ¶”혚▩m)Û1µ-e;¦¶¥lÇÔ¶”í˜ZÚ>)oàm£´·¥ìÚÁËëØÀÛR–vð²cjjKÙ†©©-e¦¦¶”m˜šÚR¶aj iËÔ¶”¾£Mm)»6ð¶”;xÛ-mà¹í–vð²ej[ÊvLmKÙŽ©m)Û1µÜ[¦¶¥lÇÔ¶”]\ª¤¶”Ïm)K;xÛ-màm) Ñy„WdèÕ¦™&n;úoŸôéÿý£>éÿø¨O1Ýõ)-¶èS̪>åx×§W}Êñ®OGü/ŸõéˆÿkWp6n¢ÿ·K—Wæß¹9ÑÕW›{ø•KçÅJ¯mRÛgE\ƒ]Âa+»‚›è¿zp¼$mã*ÇS|];x]Úǵ³†ŒÇqÊucŽHm¯et¿xÿÎm4¼Ãæ6Î ¼M÷Oœ¹uð¸·MMÚh Öi3ÇEͶ®XÁEÐr¾J_m“æ‰éó8ÅÂñÖaÇÏ(Çpo[”‹ãm÷.­Cñxö¶Çx‘‘Lé¤jãUºÆs·Éˆ»ú÷:Äm‚¿~µÍõܨÞ"!þÖ¥x3ùYÊëœûÀ¯~å ÛðÊ}æ¾åÔnĽÙü¬QNíFÜÛMžµÊ©ÅÑ䀒Öd89 ñï¹x^Mõ¶ð!«©^Í„p­¦zÛ­ÐFÓ5>o} m43î-£!…¡Ò½í_ºÒ6¾ÊÍ+²ísΛ×ÌÕ6Q3îí]ÂÕvQ|Z ¥m*/Ì}.³˜[–¶×)¼^j?”£c£öC9h¦èæ4Ü•”v¬KíT NaÑN•`G;UpOE;U‚ã4jèˆÿíƒ||¥‰©ÍÖs‡qÅbn“uäeÏåU#žðâ_‘÷ÞxÝÂáNPÒjª§Fc©«©® o¢»ò58ÖîùÅ]¬vÏ/­™tœë>Ì•6ʯWÝ(KR¿ Æï¾­§¸Î…ñ³u«ìW^ËîMp"LÃŒ».è¢þ3Z˜¦Ke>pW%ߥχîÍ¢KÃÍ Ót©ˆêwU@Ó¥yÆ]p´fʸÞE—ƌ˂îHiaš.½®÷ƒÄ ¸«Jš.MW¤Œ&>´E—Êa=-LMÇŒ»‹¢Ó´ñù&LïÊm-GwLÐ^Єéb«·‰0åƒO„i§÷'e/<øDš¾xˆ6½æ•É‹IˆGyñÿò MÓŒÿñƒ6½ù2)ÚtžÞ½€‡×Fòýº×‹Ï"LçîâJð¦Kã‰' Ñ¥…¯1ßÝÇ<஌mº4ó…Ftiå}]ti¹ñ@Š÷ÑO§¨©wî§SôëM—V¾ê‰.7nUÑ¥‹©¾ h›¼“¯’©íòŽŒg ¦)ãéT„éÍ—=¦%à>&Âô ¸‰0Í;x¿®áø*)´l|ý «©®“º ÓÅTWÇ6aZ#¯÷&L㌻:¶éÒ •òÖ¥pô¥]˜éRøeÓ¥°ÊM—ÂQmº”êø†JԦĭ!hjÓ“—]´éÉË.ÚtÆ¿~Ò¦õ.ÚtÆ]U"Û<ÞL]šòfŸiæG:Ëjª¯5c?8ðï>HÓ0ã¾ ©ŸœÓÒ¤õ GS—¦x÷Ó•)¯õÜã[h¹›.ÍsQÜpè¦Kkåý«ÄW¬¼5]Z*¯Å;õ0Z86Þº.iÙÎz Ùt)¬Ó¥ýëg] KmºÖ`Þ>¢5ÛzW2“¥ç-ÇRD–& [ +¦{ +¥5…Ó…³–ŠqMa¥¸¥°b\SX)n)¬´5¥áÑöÏ}Dþ3Lþ3í¯wæ]'¦5ügÚõaö†‡¥~7ügÜ›DþÐðŸqWä¾þ3îÇ¿[þ3ï:šß-ÿw# õr­÷<:v¹®I½\‹ãy5õéìâØÁ¯ÕTß§~n™ª—ka\/×â¸n´!n—kq\7Ú×˵8^wLµËµ0®—kqÞýKaÓ'˜îú„ÒªO0]8kúãªO(núã*O(nòãuËT“'7y‚ñ¼eªÉŒ_[¦š<Á¸ÊŠ›<Á¸Êˆ¿å ÆUžPÜä ÆëŽ©oyBq“'Ï[¦š<Á¸ÊŠ›<Á¸ÊŠ›<Á¸ÊŠ›<ÁxÝ2Õä Æë–©&O þ–'WyBq“'WyBq“'WyBq“'¿·L5y‚ñºeªÉŠ›<ÁxÞ2Õä ÆUžPÜä ÆUžPÜä ÆËŽ©oy‚ñ{ÇÔ·<ÁxÝ1õ-O(nòã*O(nòã*O(nòãeËT½û—ã÷–©z÷/Çï-Sõî_Ž÷»1®wÿRÜîþå¸fmR\ïþÅJòø”ë!‰m;ˆZ}¼ûw„Ÿïþéç»1Ýõ)-¶èS̪>åx×§W}Êñ®OG¸ÏFÜåí±^#îç˜öX/üu‰—š{‹{ý¯ÄK͸p’ Ɉ»W7îšqÿà,’÷/>VSnΫ­þEÀçj«ðµÚúppuþ,²Nã·ñtg\•!ܲ%á…ý"´‘¿\¼®¶>ÜœVcý›ƒëj¬{qΫ±þMÀÇjkqñkµõ;®Š —Ó)[/i?˜W.i?–W.i÷Öž~y0.·^<ân«^Œ¿ÞV¦:ãn¬mî9¬¸=¯$ñª¸Ò¯ž_8ân†léù…ØÔûØr\ö¦ó<¹ûY¦Aâìp¨·$Œ¸p”\ úõtTÉ¥ eý¸˜êFZžu5Õýzèw+ÐzO±'¾¸ŸdÚßqa$ËdÆÝDMÉ2¹xÍH–ÉÅ[U²L.<p?˜÷[ÏFÜæÍrëþºÜ$>·‘Ì[z€Ê€ûÁ¼¡¨ ¸Ì{¿âÜ ü`Þ(!#îó¶yeébÁ¼IÞ&y/V.ò­WMhKÜ(Üä[6>/7ùòÂ÷›|gܶ•›|güÛ®ä ;íÚoòåíÚoò½7jR‚¬fþO\Íå=: $–WÂÏ0~Émøtt‡6 ÎÈ¿ÞT>yM¶UgÜõ‹¶gÜõ-Ö~ꈻA®G¿ uÄ}—X^Mõ}\ÇjªæÚ†SÜøzN‹©þÍ¿ý&TÚª¢9SÅõ.šóžq×Ù–§P¹©m«sñõ&æ«ïÖÜ7æì»µw£bÛhºgÜ}{¥¦0ãîu¸¥¿K0â®ç²¦ãæÍ$Ïróy&VUVÐÔtô‹àé%µÑtñYLîæ=æié'.®Çx°G¦ Çxãï^¶ô޾õN4I2MæýµÿþiêÇ °Ã'I4™q÷1I4I¼ÚpZvªnX¬„ö<È»1)ð¯·átÞƒÛp 3îÞþÛ†Ó6:MOŸƒ- ¶ªEÃiQÀ˜¾‘åFn£Hγ­ÏÛ!åØ |óêûŸ‡ÎÏ”¾¥I l9GŸù)}IkC8j"¥k_>!r×±Ö…0Ý;hÿ¹˜þÖ1ýö¥ÑçŸio†•ŒÑ4Ñnhl©ý(à3í¿ý²Ö ÿôËѧ×Ï´ýÙÆ‚Ì8Ÿi÷!Œãì“+«Áp¹Âl éŠÅ ÏíÀ òy¦uä1ò‰ö#3û+­ÔʘûV–$}§ K’.yY”ÖI;i¢ýWK¯¾#‡V^±o®>Ón8éu÷½,w‰}kõ™vƒIÛØ ×÷ú¾ê3톒¶±3[é¿ó’+½]’Hɯ:⽬ÓëJŽSÏ6X¹ã©Glf‹m윅öØØ–õ\hË‹ˆœ­ô£0ëb¥¯ Ób¥/ Uг^%2ž¸N.â°u®þ-IÛîf¼ºÆrwg+l6vŽDç“x«S–¤ö÷LaáX'Ú—™ý5Óöu ¦ ~¦ý-ú©)ƒÌLyÑ%âS4cátLýÀVJ<9С&‚±ò’dõiRZ]š¾Ô£É6"ÃDû³¨?“ q8ÎVú/¹ÄÅJWµÞ÷b¥['ó>é߯Øožô!« ‘‘\Ëå6d¸–ËéàZ.Ëœ–ÇÄpAš²Mõ}AD°ø¢ib+dÕ?‹aqÏR¸{g1\0ª¾YLw×,¥Õ3‹é´úei}æ¤côæ`ÐÞíf?jƒôs:ëH?¥³Ž¬›'im>à®ôµFp7ñÑZV¡5û€»OöX»¸ûfÆ‹ãÂh¼øˆ»G/Ž[TãÅq½k¼8.»Æ‹ãÂh¼8®wÇ]]ãÅqEj¼85ÕâÅG܆·Ç‹Ó>cñâ¸0/>⮯_ãÅGÜKb³xñ÷’Ø,^—]ãÅ9ÞãÅ1®ñâïŽ&Œk¼8Ç»Ÿ ã/ÎñºeªÆ‹s¼n™ªñâ·xqŽ÷xqŒk¼8Ç{¼8Æ5^œã=^ã/Îñ{ËTçxÝ2UãÅ1®ñâÏ[¦j¼8Ç{¼8Æ5^œã=^ã/Îñ²cªÅ‹süÞ1ÕâÅ9^wLµxqŒk¼8Ç{¼8Æ5^œãýpã/Îñ²šú$FKßuô1•Â&O0Ýå ¥Už`ºpÖä ÆUžPÜä ÆUžPÜä Æë–©&O(nòãyËT“'¿¶L5y‚q•'7y‚q•'ËŒ«<¡¸ÉŒ×Sßò„â&O0ž·L5y‚q•'7y‚q•'7y‚q•'7y‚ñºeªÉŒ×-SMž@ü-O0®ò„â&O0®ò„â&O0®ò„â&O0~o™jòãuËT“'7y‚ñ¼eªÉŒ«<¡¸ÉŒ«<¡¸ÉŒ—Sßòã÷Ž©oy‚ñºcê[žPÜä ÆUžPÜä ÆUžPÜä ÆË–©šÎÊñ{ËTMgåø½eª¦³r¼§³b\ÓY)né¬ïé¬×tV¬$pC´u™ËÐÇtÖ~NgéçtVL«>…Åîú”²¦O1®ú”â¦Oüß}J¿nútÀA:눃tVüuMgqÎ:â uÄA:눃tÖé¬#NÒYG¤³Òª±tV\KgÅŸ×tVZxKgqÎ:â •¶«¥³ÒvµtV^“šÎŠmÕtV: ,•ã=•ŽnKgÅ_×tV\“šÎ:â uÄA:눃tÖ鬴ì–Î:â ]ÓYG¤³ÒVµtVZï–Î:â ›ªé¬¸ìšÎ:ânµ¦³Ž¸›pªé¬#k:ëˆ×Å{:+®MgÅý]ÓYqEj:눃tÖ鬴0–ÎJëÝÒYiEZ:+þºf³âšÑlVÚL–Í:­6n ¶¦³âšl»½›ÏŸ¼ìTÝÄ`Mgq7·VÓYqÕh:+nVMgŦj:+®H}ЛZçÞîeÇhœ2§ó"°ž´i/×@C•©y£úDn¦>J÷$M²‹yþjW¦6eŠé®L)­ÊÓ…³¦L1®Ê”â¦L1®Ê”â¦Likýu¯@úHoø ?ÐÏÔŒ´Щ ?Ðn­4<-µ5<-¶5ü€»±ŸÖð´0Öð´0ÖðîÆ­šËœÖ¹¹ÌiÙÍe>àî2æ2p?ù\Mõß³¹¶ZÕ\æîíWß.ówÌÍe>àÞÊóv™Ãz»ÌÜÛf½]æ°ìo—ù€»Ï˜ËöÈ·Ëv‚·Ë|ÀÝhds™Ãþþv™¸/l.sÚLæ2Ǹîµ)n.sŒë^›âæ2ÇxÝ2Õ\æ¯[¦šËâo—9ÆÕeNqs™c\]æ7—9ÆÕeNqs™cüÞ2Õ\æ¯[¦šËœâæ2ÇxÞ2Õ\æW—9ÅÍeŽqu™SÜ\æ/;¦¾]æ¿wL}»Ì1^wL}»Ì)n.sŒ«Ëœâæ2ǸºÌ)n.sŒ—ÕÔ']Úf¦6þ:úÑ a“'˜îò„Ò*O0]8kòã*O(nòã*O(nòãuËT“'7y‚ñ¼eªÉŒ_[¦š<Á¸ÊŠ›<Á¸Êˆ¿å ÆUžPÜä ÆëŽ©oyBq“'Ï[¦š<Á¸ÊŠ›<Á¸ÊŠ›<Á¸ÊŠ›<ÁxÝ2Õä Æë–©&O þ–'WyBq“'WyBq“'WyBq“'¿·L5y‚ñºeªÉŠ›<ÁxÞ2Õä ÆUžPÜä ÆUžPÜä ÆËŽ©oy‚ñ{ÇÔ·<ÁxÝ1õ-O(nòã*O(nòã*O(nòãeËT‹èÅø½eªEôbüÞ2Õ"z1®½·ˆ^ˆ¿#z1®½·ˆ^ª$›èÔÕ ¡mYm#»£Ï½ "zDôRZõ),v×§”5}ŠqÕ§7}:à$¢—â¦Oü1¢w¢zù·¯¹1Ÿ‚yçnõË;w«§PÞ¹$O‘¼s·zäØÇ(Þ¥_=Eñ.ýê)ŠwéWQ¼×ÆçÛTrðÂKïbëSïb«×¤Å»ØúÅ»˜]™WS}ü^Mõ+>ÇÕVÿ ›Ú52uáÒàSŠ—s-Œô[ÖŠ÷ýZñ•ñ` ÷ýš‡“ëÚl£ižºüˆßºL]~Àoî¿îü¶¡´Ô¹ð›{À¿m(ů·¡TfÜ ømCéä-Ú5扫½kÌ÷~ëÚ½\SEcn”½i̹ù _KÿòãwÏ¥ù×”µÒÝXâ;¬ý˼¦¬ýËm£×þåû¿ê²@úÁ¾óøw#}Û0J¼ ²¹-¸$r—÷,Ùcgþmy‚WIBOE© Ùʇߺ3œå š„{JºÖí¼á»nçݨײnçÝ’¾•hÝÈKÝ¡ÂKÝ!>צ6t2.·Eë´—`Ѻ°[´.¥7öMJÆzÉ1ÎãfBÐûõþüÕ¾bRú7&±p—usßà‘È“]"NÜ¢Dœ¸Ú£D̸«$ràÄ#;JäÀ‰ç(‘|5ˆ±©­ˆûoŒm¾Ž¼ìé”Ä:EÆT$Y™öߘ›àЏÄ\VSýLÒ¸šêªÜë^Mõc+»ãuÄÝLÒ»;^GÜñîžW\šÚÆêÅ;|Û:¾®¦£_A;M:ʺs_s9û•tZJò¶ÚŒ»èÈãje—×Õ6j&¦ÕTWïÆºšê¿sÓoŒ ­*™¤¥à‰F2IÏ»¤¤’f¾e–TÒtóf-ýùÃwŸÒoÃÍ›UÜ 7ï4w•DnŒ‹ÇáÆó˜èÓeözÔ0ã?p5ç±n9\ùÛ%*þxàÂCtg¹d—Š$j‰2¬‰DMVÇ(§å$ÓÝ1ÊéÂYuŒr¼;F1®ŽQŽwÇ(ÆÕ1Š[óè1Fw}¤7üƒ†h7)O~ ŸJGÚ}“C~`ý Qmxj¤5ü€{[¿wÃS3­áé×­áÜQ¦®¦ºÊ^#¶GÜ]«,dפ†lãvÒml«†lãÂhÈöˆ»Ë²=ân6¬†lãî«!Û#þ7î)FÙ¦¦ZÈ6-»…l¸ÿ¤L]Mu £!Û#îzê5d{Äý'eòjªŸ"z®¦ºe×mŒkÈ6Ç{È6Æ5d›ã=dã²Íñºeª†ls¼n™ª!Û·mŽ÷mŒkÈ6Ç{Æ5d›ã=dã²Íñ{ËT ÙæxÝ2UC¶1®!ÛÏ[¦jÈ6Ç{È6Æ5d›ã=dã²Íñ²cª…lsüÞ1ÕB¶9^wLµmŒkÈ6Ç{È6Æ5d›ã=dã²Íñ²šú$Ls–PæŽ>f”RØô ¦»>¡´êLΚ>Á¸êŠ›>Á¸êŠ›>ÁxÝ2Õô ÅMž`b\3J±’l¢3eñ(#´u÷`ècFé?g”ŽôsF)¦UŸÂbw}JYÓ§W}JqÓ§2JGüß}:à¥uÂÁ1øëúF̈ƒ7bFü)¯´Î8x#fÄÁ1#ÞˆqòFÌȃ7bhÕØ1¸4öF þ¼¾C ooÄŒxqñkµÕO «­nÚ¢¾3âÿÍ•œý^“úF ¶U߈Á5™k2p7O÷J=ÊxÀ¿ãŠÈcµõºx^Mõ«æ>V[Ÿ"ãÅñ{Ü%œÉ$ÃôLøëöH ídöḦû)¦ý‘˜wÃ.õ‘˜w£˜õ‘˜wÃcÛxÊ—½§:ãnzlOqãëm8•wÓc›è\L}ò‰e^íMsÎkŸ”\zØ;µ³Hv$.·>ƒkEˆÁ@Tfü×.^{%´4I‚ ŸÒ©²jÀÝlÍSeÕ€»9¬Ae-Œ>C;Œ=C{Œ=ƒ+R߇Áe×÷apÙõ}˜÷|9×]ªï¬ïÃŒ¸.jïÃàšÑ÷a°©ú> ®H}fÄÝlãšqÁõ}úeÍ8¥#C3NiY8︴{qry£þDk†žŸô¸Hw4ÈŽçù«]–BØd)¦»,¥´ÊRLΚ,ŸÊRŠ›,ŸÊRŠ›,¥­yô쨛ô‘Þð Ó½áÚ}cC~ Ý´NixÊZø'j ?àn>ª5<5Ó~ÀÁ1#î†•š¿œšjþòwÓ†Í_NÛßüåîm&ÞþrZv󗸟”|®¦úÑ¿ê/§Ídþòwßñ19lÕ·¿|ÀÝŒQó—CSßþòwSFÍ_NËnþrØLo9¬÷·¿|À½ ƒ·¿ŽÕ·¿Î3o9­w󗸛òlþrZvó—ÃYìí/‡øí/§ÀüåpžyûËÜ¿+¯¦’p^Xö·¿–ýí/§…1ù€{'‡où€{Ñîoù€{Ñîo9-»ùË)nþrŒ×ÕTÎKqó—c‡óBØä ¦»<¡´ÊLΚ<Á¸ÊŠ›<Á¸ÊŠ›<ÁxÝ2Õä ÅMž`±`Üq1vïïÕ`\8è-c Åh/æC¹îàmá™Íto5nÛ¶ÙJ÷öÛŠ ë[#qa§Ò@\XÙ9à‹F,IÖÖÇ]Œ Wß<õ곤ÏCÆ«œrc\ßmø„¦Ö üî Åc–÷M0žú“8—7ª0{VÆÛd"ËÈ€û¿ô—‡ð×e;—Ý¿2õ–«þñ×ïþˆ{«ÚYû“f#î^m[s?1€_ÇÙçðw_‘'œfÜÝ…Ÿa5ÕÆÃYúk2„Ø·‘î†ã…*—¸×û¶55U°A7U€5dyvp¤ÝH¿|½xï=8Ïn8f[—w´­Ë|܉ ¬7oOYUnÞw›$\¦·Û@:gÜmc›oܹD.¦ºâà=Êù^C\†ež#ýWVÒ:¥úO†öüM¼®¦Ê§ Mà¤Mjªv/“°ïšn„î GØ:9Ü;"/·A´!òDlÊÛxéÒ¶Ä|µû9!lŽNLwO'¥ÕÕ‰éÂYsvb\½7w'ÆÕßIqsxÒÖ<º'“>Ò~€Aô+µáúùµümiøõÓ=µáÜÍ#´†pÿ kù÷RpÞ-O«ÜZãuµõé¤ãÞÀ5þšãyËT¿æøµeªÆ_s¼Ç_c\ã¯9Þã¯)nñ×ïñ××økŽ×S-þãÍñ¼eªÆ_s¼Ç_c\ã¯9Þã¯1®ñ×ïñ××økŽ×-S5þšãuËT¿¦¸Å_s¼Ç_c\ã¯9Þã¯1®ñ×ïñ××økŽß[¦jü5Çë–©q¿æxÞ2Uã¯9Þã¯1®ñ×ïñ××økŽ—S-þšã÷Ž©ÍñºcªÅ_c\ã¯9Þã¯1®ñ×׊kü5ÇËjê“0ýÑèŽ>¦‡RØô ¦»>¡´êLΚ>Á¸êŠ›<Á¸ÊŠ›<ÁxÝ2Õä ÅMž`" ?ÂÏ ?Òî+%½áGÚ}`¥7üHûÏÈ”‰Ã`#µáGÜÓ]Öð#îé.kø÷t—5<.»ºÖq ©k}ÄÁÃ0¸0êZqÿã\Mõ¤½¹ÖGÜ{Ï\ë#îh«k›ª®uŽw×:Å͵ÎñîZǸºÖ9^wL5×:ÆÕµÎñ¼eªºÖ9Þ]ëW×:Ç»kãêZçxw­c\]ë¯[¦ªkãuËTu­SÜ\ëï®uŒ«kãݵŽqu­s¼»Ö1®®uŽß[¦ªkãuËTu­c\]ëÏ[¦ªkãݵŽqu­s¼»Ö1®®uŽ—S͵Îñ{ÇTs­s¼î˜j®uŒ«kãݵŽqu­s¼»Ö1®®uŽ—ÕÔ]zÞ‡,Ü_~Y¶ö#[B÷ƒÕozó›Øœÿà..Û_Ìñzxråžðyx=åŠ×ÿ¾‹÷«óFüo<ülƒ¨Tþy¹ªž¼øµâÄÿÄåÛªT2¯Ì3ȱΈåâúÔúÈÿÀçuåx·Ûœ)¼fk¿åâùX?ÿcŸ¿$µ{äáòW”§¿yõ9RñŸûx¿ü„w†6RϹ8?õùÔgšÿ™Ë×~MÍÈÿ§§AÞìú¦ !|ÑñÛ‡Û4||ÓŒðÜùøÀÿµË‡£ïgþû>Ée)#ï^.aœðŸøø-7_ŒüÏ\>¥.æqñ%*"Nü/|þê‹ëÀÿ¥Ë7}“ËÄÿØçï>Lþ\^n×ÈõÙVˆT6ê§õû<—ç§>÷ÐÀÿ•ËWññê©ýâ2\¹vm-?rùóX‹ÿ>½âÜ¿ry9´›Ëó=Ÿï›Ü\r—ÚÉchkJIÅ‘‚o.¹Ôa©wtõÃ{£:Û¢’ëÄÿÜçûý„¼:%r¢lt·ÒïGâõ‰9Ä£%ÜY‚üxùë¹×ÛðZÚË­O9tX†£Û¾rê¯O »ÜÜ~MwàÕÃÙ÷0¸ørU÷Ü=ÝÅHNòÆè•+Ù®ºñý6Ϻaoþbmw»ƒÜʶ³¶Ëµl;ÃQΖáèw‡«Ç_ñò—¼Gwq”#ˆ¼cï]Öú÷í­_¬.îôë«‹[žtôk@ñtžŽ"QM¸>Óû!9µW®þ¾6V»$÷#μ»WMÝ#Í«§ ǺÑý{LÇNõ'‰™Å½­ŸFlìeRÓËâøCŸo{‡ùûçòW\'sÿûWiù¿õù{½îl˜JZGï\¾Iº¼±¸§ûêGSt+Ÿjè§%¸;Ô{­OwΞn÷Ìm4>¸òY—µÔ[9äu-u{Cn;Õ²±α?g€[+§ƒW½޳ȕÞÏ/ýâþwË-ìšpw‰n}ì˜qwƔLj治S‚õÈw÷/U¢oGÚí0ç‘^‰ýlÝW”1­÷~|7á¾ò“Ó»ÀÛINïd«<ðþ¹DèQï¼øL˜yG€ŠºÅ×õûþÁ„\t]&þáàãì{Í÷2ÚjroT§¼B1óßõùõós”Û—çûÇ‹Sм~Åk¿ ³«lôλßZ:òÇ÷Îàªi,¿ô=Âúy_÷wBtÞ‘¨¾+òÎλŸ3Ðæ’c’kc”¤’°1JVÉÆœâÝê1ŸÒZ=î\Réº?Ç×\wc²Ê&\ûm#xÖâ_y5×/¾DÔçò4·ôÿ”§ÜëÜãnu$ŽãÞè̵?Æ‹/žÍÕéîÃåP%lT”¸Ù¹ú}-—xóÚŒÇÞB$¹/ËBä‹ôðÅBäʈÒÖÂÞmæær·á1~±TøåOqm.w[ÓK…Û;%d§7´u:ÎÍåÆ(ªocß g0yc#5%œWg©k÷ñËç­}L¬1‰Ë/êÏö>œñ|±Ôùgm—}_¼þ“<\8—Ç?585‚?]Çьܿԧ»S’køëÆl%‡6¹n”_\í›Òß÷Û+Iš!îÎrhsÏk—»ñ”\œ6ŠßÖÞw'79S‰×Fk5¹›vz›\Ç¿ÓÛªFasë“¿[þ¬Ï âµWRs–­‰*!/\^?YÇÈ{[‡Ëc§0´{¾OaèìÙOahcÉ)L›ÝÊ7¥ }Áæ~N2àn½Üg_£Ü5SÂÞë„»kJ=À];ëÕ÷_î»Èþf÷ÈûJ]œf'7ö4q6ð¾²7q6ð'g—´6%ód.Ž«Vä $^ûg¼ÖâûÕ“B_pñ÷Óµö5?Â"‡µ³=ÄÜkoó»Ï¥>ã÷.õ¼”QÔgŒËÓ–”:—ß]‚$¢äØ»IµÕÝj«yäý˜†C7„´½ÂQú†öŸ ù›i£<çýÙ±_ôgÿì#ôÇßp}J2Ê\=¾øŽ÷ëÌÕ)[…w79+ÙªþüEw~Y)ò:=/ÏÕ_Ðâå¹R×g¸>府¹þýÃŒÒï)æõßôÖ•6Ê÷w[GÞ?”¬—kîn—Ë×þ ñÈËãã‘Öòøzî¸ûQ!í±é§¸1ýÄp¬«õÃñDìYAty‰¢Ÿ2ï1^ëÎÊ?YI£¼‡?xB)ëÿÀ? h*oÌÏrȱ±ý‘·ã—õÎ?O’²²Ñ`òÚ|Ýh€÷Ê_ÔH' Ød™Ëã€ô;xFÜu%H Êyñ%¥nÌŸò¤à²[}8ÿШüÿcŸïQù´u%yf©}wzKôŸxoHñ\§«‡ãŒ{Ý-=gœëtåßÈ»õ˵\D’7vorÉ2ýû!C¹®»w7ÓƒP®î&Ç—ËcßÃÆò(WZ.íë/ B©|ò”;-câ“[ªç:9<ð©'bskY¯¦ùˆÝ¯JWÓ|ô·„qùs›–òû«]>Ë*Þ]ƒå@æØØ¾å^âî}~C‡×ûü†îúù µµ 09ˆS£´n0Ân'¸{á@»U"g77¦Å7—FÚݵo]¨‘g[Úò„?ÄäÅN·ëžÇ½æ!âŒKiüc ‰ÅœJãò´up.È Ñ6¦­šgá5zÖÿ€û)L±K,Ú ÎÔoòÅeׇ`p§iÛÝ#à±! cçÔýÃŽ¶Ù Í$âóä muËôuÿäNîj*ÒX’¥²â‘ÝCr¦ÂøÇF‘³ÑÅjª×Lí§´UƒùÒV zâK{dh‹ä<¸‚-œyÚþ÷Þ¸"C[ðBÚÀûÑ<­÷úöWdì»CÚå"•ÊgɆ:xŸ‘“«yžñ›)‡­Âäk)Œñu9¦pwßÐï])7ãÔgÜ¥OÒ¦6ŸYÍ…ñ;ð—ÁçêcÉ™Ú|m&È|Zê״ܸH¬Î<ø·ê2øü£€6l,”©³Ñ£dO¸Î#1yî{ÄKŽyhŸ‘[]òT‘n8ª^m‹+FòªN^í’Vy£6I\ù)IX)â)2¶‘3/L‘ûìú;>–5û!–絇;Ø}-ÒÀÌíÌgH üIÏarcLÅ´\¸.‹’%®<ÒQ—)Ì?T8û™Ý̤³gAÒF•ûeΩfüó:ÙðþØ4j[ Ò"g¿úøýÇÑþCøô»á~ûq5uòþ)¿ÿíÇ?~üüãŸùÕÇÿÆA›ö endstream endobj 24 0 obj <>>>/MediaBox[0 0 612 792]>> endobj 26 0 obj <>stream xœ•][ËnIqä"ðÝ$š„bŒ&òºúÜr%ž5êèè¨Ñ‹€‘ar“–ß—zj½[»ûÛ>]ÅÀlØ<ïÚÕÕuîêêO_¾úáKª¯­Ô×ýòõ_>xùôåzm#¾~"^¯¿Ôÿð'þþã—ß¾|ôòû—ðú¿/ñõ;‚úÝK¸^ÿãå?u½þZ~^ñßÿü÷¾óÇ_¾ûÖõüÿ»ïýFÿæÂ„ ¯|#¾†ðúáoäSøÛðÚä×xt¡ò“—/þYûÒ‡¿±ïÇöÇÿ.ƒ÷üèy‹ÁÇõeÿ9…çÇøGÞW[àߣðñ(ë×ÈàáêÚìk !?ÚXðß§øx=âp|?ÖG|Š’ü9^Ö›íì 9<®•þ8¾=âJÿw(¾¤GŠvá ¥>R_ðß ø}ÅÿˆãWðÏ(¸åGˆæt†dýГOxúØ…ç'?Ò®[?åøþ(;Yñ º®÷ëßÍaxbHšíZ ã1ÖýýÅ‹2®ìü9…'Y®ƒú$« næëÑšƒú\}Øw7–øHÃAO)¼îÖ·9^¤Ç®‰±æG]•ë/Ž¢zðo<§§•ž_P¼(oq(WåÝ”ñsÔ§Ç×:ÔØž· XqêåùÝA±ã‘ò¿h¬€MÑÿF½s‡5žáŸçÞ¹>âJ{9yçPü/¿ÈÝmØÈÿ*‡7ÈÀŒÿWîm €3>rï©eÆWî Ã#¯ôÓ80ˆ †æX¯è 8 û÷[@lg§_Ó´ÒÿîàêNF€M˜ñÅñù±éÊáûâpWyNÜÁ‰I^¿Ot ×þ}n£B}¬Ëå+ÆÇº[4ŒQ€«iø&wp×£®äb}´uµÜbæøˆÝ¾[QbÓk¥Ÿ;P‰M7kBcM8¸êÁ×7ÖŠó§]ûz9Äc¹øÙƒŸ¢?i8 ·ŠÖFÙ?eO’h3%;ùéª9”ú$Ž9;ù)ùÍA¾¸¢ä°m Á©Ãö'É{³k{ß•VöÓL:‰ïÚl_o¹6[Å—+ÑfZñÔµ¤ͱ»Ut¥ØmCO·q“sGtëZ…“æé©·}w9{ÄÓu·¨'MCÃL³gÉWAê4ãijœCDÝ`ÆÓÔ8‡ŽPÖ¼_Y´Å£]Y"·M»¨çÍ’Ë]þgÑ–ü̲_ÉÁŸÜ}ôH2×=üõÚä™Z·\…þáàí›±åä·7ÆŠ“#äpJ¹—êóØmgæH{ ÀW+iQv˜ÚrµGvcUBÚl!KJhç}‰ou,ET±U»è—”ö¤‘ŠZÉ—õˆö›šÎsF/™ñÈ œWѫ朲‰tXàt“î*º™˜p]šYNxžÙ\ûv;9ZG_ñ¼Î*™ú ç•Öx=ƺ\^iƒ»ð¼Ò*© ÜÕ„ç•P$öÃÁλìn§GÜC³ËŽÖ ²c·òØÉá§â¶ïsöÔkÿ>gOMÛrŸWeæŽ7j¸ð´Û@™¹‰2ýŠ?–éSwpgDZgü¡¬¯‰•œxEͳ¬Ü×*}t|Uz‡.¢ˆÑWÝúÌÉ„—hÐ\Üœà†âæ„¶7'¸©¸9áMÅÍ o*nNxSqÓþý”qð;ÿà+'£v5ƒ$æÍaÁÓê¬T[éáÕG±:W÷,¸]§M?øçcy³:v ß)ô„çåVIúVr¾|ªnÆj—þ(1@öÊ•mýx)4“'<-ÏF”H¢ãû±ã¼Õ,@ñY®Ÿð¼<›Æþ}^žÍe7?¼>[.4bÌx^ -z^<ãy¶Œ]Á¸%¯ÂŸ•ÿ‡¤(¼g¿ÄñŽ•Ÿ¼@+úr­üáWq¼Åƒeÿþ±¹YhÊT «âþ}^ c7¸¼†ó€h—çýÊzÒFšé‘¨y%‡W\% ÞÌñ™“ƒüÒvõåßÿ’WDO¤QƒvéO-îÚÅKŠâBrH›„µ59–+Úµ¹_j}Ò¸KNVög‰S›CšQ²ìÉn=³¤°qý>­Âdñ^Ù!9¦G)ŽõJNš«ƒþ7ñáEq^ãœÅyèø~‰»ñä56Ñ®â0&¹æýüûõ.ߘÙÙ´ƾ޶' Ô·äîqí†ùPPlûÞÒ@¦\’¯;=çØÁÎû"Ž+9úyRU/n3åOÇ•ºÝÔÉtŠc¹¨@ZiAúš+2¯³Ééw|1Á){ÓóÜ NW9Ä]-hºHÉvÇ‚æ<1Æ×ºR^ÃxÖ­K}Ö'8õ=ïꉞ—`Äv¶àyyóYOœð¼ä”ª™éAæ±J¡ž54šð¼\ŒÔ;;Ø_²ú63ýbìkpЃarð_Œ}/va­h#’™Ÿbì³c¹¢¸¹Úõ<à¼)Ú5¿¶ÒÔü²ýñº ž'¢áÚ´‘J?*~#Ú¥-Š6^+ùÇ aR6W'¸¡B8¡- nªNxS…p›*„ÞT!´ÿY!œ~`ªšô¬NxS…p›*„ö?+„ÓLBó<+„f  ‡f±Kб¡1;=ïάŠ_kŽ ˆQ°JDà%¼¦}ž–\cN;A‡žÃ±K/q–´Kô¡ h–bÞ€Z›9¤EƒˆFŒìÁKX¸ С„Wv ã%W1¸m¥‡»Š‘µÏiÂSW”$ëþº³4[÷+‰g¼‚ãû¢0×J?ïÚ EÛ&<ïK‹÷ßÌüLضáX/zz=ô#°]éçEªœq‰ÆNº»ƒÿ¢›¼ñd¹O3?QósØôn•Ó¿ÎüÖ´`iž^5Š7/vDâÍ‹•xiå =.É¢‹Ñ¾ÚŒê{±‹Zƒƒ›Y¢±²ª ½ý–%'ÝD™–‹QO QËQ+f1:D-'‰eìš’Ñ/䋿–ì ¿ÔÝþ ¯–=ö䮢‰a8èåJiëñá°ShhÜüÜ¡Bö@ŒÞLB…p‹3häŒEª?[­ÌG‹â×òŠÜ³GѪ»ïz­aûÅ O¯=k…ÐÊÄk÷É÷ÙŠ•M—ùìhœà4øÂPXàT&û}º9ÁéžÂ9ôΫ•ZœÐÔX\ÉvΠÿ±­ßÿÇ=zŸð‡zey¬äê•AOÞ'ü¡^)¾§Û·J3ûä`ê›ë÷yA a]v°'ß·=¬’‰†ÆËÃNuÿ‹žD›É¯YÏ;¬Z«åÊà`¿deqeïP|v4šéMÇÑì„çåSt4®xZÓŽÆäØ®Q´Ì0áy–ŽúæÊÏs}3Úù¯Nþ À›}½T°ÑÃ[&cÔó,3ýbz´«o„«rX7Œ*ví}WT1³'Wù¥ìäó¢Dù¿ü%Ç·Guß®Ý1òÁ -=¶€“/n½¯ÚÅ'!ôû­Ÿwz÷DÛîÙù‘aCÉær(»Þûtøº$Æa󥼄$y_]éç}0’÷mÆÿpñóq¦Ú•Ä·—äà§({-vqCÉfÓ^Þæ”ßg¾Þ\vãÉKÙgÌQÚèçU2´KÑÌÏZµÄl^o›õ9´ÝפÍâÙßDÂ\Y|µÇ·çª!j«j †OµQü[QûÇ¿5*úé­¨QÓ– jÙÎO\SÙT‹ò3½µ/Pìô£Tœìö-÷¦…Q«wÄpÉ‘ìüGçfYñ4šAçfHvþkçf²Û«gòa]î3ù°zM>¬¤@,“ö&œÅæN>&4uÑwò1¡© ÞÉÇ„¦ëN>&ôá@¢jî<áÉDÄ…¥OÅ7 UiÅN% .0ØéOw0håeÈw08áyp}¿’b§GÌSM žûâncrÐ#橯øÃ9FF+ÿ÷ j;?{Û×ËéDZl§'^:ãÊL¦æl§'eÖÜut¥Y_¢¸Ûm½T_ÐJ·­—î/Zé²CßcÖóêÿsŠynÝÁ‘ç´â©;Á¹Çþ4½5c߯®×0ìüé÷¹â„çé͸Ï­~"Ýí)fùAwY[ñ¼X/éJJöõ¢[ ¥Mëþ"½©Íñ}¢84ái8… {W°ï¯^Øsøi éŠÑn0¤«;|uºox›í?îÔ]Ñn¯têVtð_‚‡TüMÓ{«|âR]©výÍAgԘ׋)Z­Ú÷+Ǭé½U~0Ek4»üd‡üc,V v{’Ë~›é)wúmÞ¯z§ßV{‚³•ëmwDíûÕµûqÆÓÆ<"š•ìû;n)šéÇèªáˆ'‘Þ¬âùu ¿Ob¬»õL†¬Îú™ Yy£É•1H†¢,<;õ;šÐÔÞÉЄ¦Ï Mh*^w24¡?ÃÐïNb&<öc@s䌧µ,´\µÏ;ºRÖäÉLO¹+ïž„âèãZÀ³¿zg[žgíÎ¶Ì k€leÂÓV}¡4;è:ØÞL?æ—ÄlütÆŒ§¹:.òälçN îzòÝ(iUEdmÅŸâQ\^ñ‡£ }ƾÞ{&©YÝõêI²ËCú&˜™ž„ì²-xm"»lvþà®G]ñ‡h¼iã€UÞpw£7Çz˵Dž­¹eÖ/D×1Ú÷­H=:èß2ªƒŸ]”jÿ>Šû+;y­É™›(í;ŒmŽ66ÃydîHÐÊ{ö[°Ë2ž¤JÁ®+(ìÏÛ¨ê}cÞ+«Çzq¬¾â]E8W_ÀOJ†ŽS13´`–Ð*œ´/½½GhfÐ3Ø´KÏ`s‚ÓsÄx"ñ}—TÞ?NhC'õÛwü8¡ <ÚÔÉ3áM<ÞÔÉ3áMb¯jm  3»SÔËBf¥ôÆöBØó-ŠÚP´Ž1ã;ÅײK‰‡¬škÁýBÕuþ²Ž–á+ØUgêÛ÷#LŠÃüàbvë©8‰!¡áˆvóƒ#øá0'8‚¿VòOo¹è•¿fõ°dQá}Í*žð|N¨n]ñ< Õ…[œð|¥wP;Áÿ‘G‘ªÑ3ž Ì:EcÆó©‹¥ ;›ñãȠQí„ç1<Ê+ ü¿NAg_Éù,ÇëPÕOGß„~-`1‹ÚD‡|Ä—|D£Dµ;;t`®Y”qik£‡ÏØCßÃç/ĈWÌvêoæbW\Ä›°˜þ0àP»nÍÜGWæµ~ÿÏ\ °s?é™ÿ<€ šÏMøÃ„ÃŒ®ÆO³ï˜›ææý*[…eªíÊ…Š%;¾_ïøÚ¼½¸¾Àiƒ5F‡ð÷¨Ùå„çé„Ä›aãó(G·ƒq$ª÷x1"®ß?Œ+Ü}ÖaZ¡>0aæ=†ilšË§¦¼kîáÁˆºk._®Ëu]/ŸÞ‡Ž‰b×\×}åMŸ0M0¬ßçÁ{ÕS×OÓ'¼úZWKÂÓ'ôë­ûÅÓ§¡SÌg<Þq¿.9(£%#Ù„y|exŠz¡Êþ¤¤á¯y÷“&3ž¦O¸0·IMŸp®ž] ®a7Wü‘*„ä¸Åš Ô`°yÊ ž‡ðxº³,x>Ê:V­´™ñ(¯xò§®‘É„ç)E¾í•?öïÓÒ*ǰŸf~–Û^Mx>¿êý >íôðáȘmÖJÍq'ˆòí6ˆž—¾»^˜²ã‡¾;b ¡Ï˜¿YåmU0štÅûYk3?1Û¼¯üäõ<œp­ßçõ¼xW{&<¯ç °­ À_,Ë mL3ž?'øZàðdœh̶MbzQp0HÒØÚ<u­hÿð:ts,@$4® æõŠ,›8PÉ×þÏz15óZ×K%Æv¬Ìû÷¹sÁôèæø~Y?NË‹0´ÛæþÝÉÐÆ•ûÍ í¥åË ÿ_õÔpÂÿýÉ’àh` £ÙÑ÷ôž_¦¯~jŸ`õàõ¨ßAµõ–_1³Ã8q°ã1 {ðxkÍÏzGÐ×j¤ßápíxÜôˆCÑÓ~^­¦_#r.^çGÚñ¸Eäá¿DTÃ#hÓtÀ%þŠ.¼ž·9ð ;Gö‹“öKGf¼=8Ø=ØáâD‹C0á!:´M©ÝCÎ*â⣎—µo< 4ÒÏNðÃy§c&øá¸³À¡Op~$†‡hê ç•¤ÍÆæ¯'})y‚êB}#†W²çMpÞ?–u­™ö»7Áyw]©poDÓŠ7¬w3üp¨[MpçT>Á¿ÀMqݾN;Bj*­ò‹¶À`—̪Yàß?ÙÉõë‡Ú²}ýX§‰víÀ|þÍ"£—³Y³aó¤£p!7'bĈ]õž£ ÎrE±£Ýl`¾ÎJ ¯Öá9›nçL֧جò«Õ;#ën®Om€êXvCëVÃlòM­&ïÐf(œ±Ó‚{žÑ.21] -‡#V<¬be#dWÍãñp×w»¸‚yÛzÐ6©ÏícÔéY3þà‚õpxÆóÓŠ¨÷Ågü¡M?©7˜ðü´"éý‹ð«ñÑËÍúèÉŒç‡â¶ánÌß/z½ÆN~Ñ£Ã8<)}'èà“Ì g¿í/w° 7–íëmYí™YÜðVq,W¯†:Ø9®M|xœ5ôa³4ã(g$;ù¸PWîs³Òc…s_†æf|¬»4ºî/—ð<çnÏxîÞDÙÓp|?먤ÏÊ=ÊÑ,ͱ4uˆViÆe½àP^•·ÒsxöÒªÞ„çAhc-vñ÷£g3ž·5õáÿ»EÒÎÿ»EÒìq°»üà2àhvúÓ=ÉyÆó6q̆ vþ'TU“ÿ˜4^p¤´Õn}ôÔnW/Ì ÙÔw–‰º\ÁÁþª“ã̾“ûVgÁËZܵ…/ÄU‡´u= ´/W´k³Vœþ¡G9vúGß­Õá]Õ U@ëzÑÇ•ªÝZaàíÖ·.‡3Íqþ,,®8V›n²›w7£±:¸Ÿô&ƒÙ–`LaËz0iÌ‘(è3¬ŽX ëëøÔ›Éoúˆ…YYr+Z#3 ›({®zpO>8¶·W-Y]¦&&ûñ¤Ÿ#p.—Cv4ãÅ ªû Š ú|ïçyÔ×{'ø±@túp^ॠüœÙKŒ-e¨5Jì^8_è}SiÆó«>ÏDÝŒOúdÛŒ?Ö¿Áø Ï‹ÎÏŠö„ç]vwI{‚Qîšö„?乯nÏxžØ×¶‘ÃW‹ëñënÒh½?㹤=ËàfaÅǼŠÎíø¦æÌŠÇuú=ø„q¼Ž¡tàZ¬íø÷ý=àËÃÞÐ5r7ã£Nvsà³&~v|Ó`ÇŒOzÝO»±:àuƯŸõÊ»ŸvæªÇ#vüP7dÆ}]Ò¿»‡ìxmñ‰âëíu'<ïðmi÷-¼ËUBü.LøCSÞ(š9M?8¶£¢GVTìÌLa̳_÷¦äK¬SNf< QæN£#´¡,hJ8¦ÕN#dLmXIçÜ”ì´ãY›¶Ày!Z x[ËÏU‚>q<ãbqìäc&„ƒùˆL¯à`gÒGœfüáVŠÞ2™ñ‡p°ïüäÓ¤JÞ¥‡ózujEŸ‹YSИœÒ$?,ÑAOW7ã©ÉDÀ×ýåáx•¤Ø¿“‰¾Êç—9^Ó2ózµÁ.>Ú°²“WêcB<5ã'Úa`'?•]½xÓ=©v󃣬ßÿ?Ž×ò…Y}qò‘=ë-:gÑ,nh4Õ!nµ#^¶‹[ »8š*N¶ÌêŽ1…›w9´ œÛéùáð.éÒÎ63ùéÒÖŠO‹*8˜¨뉃‰+ÛÅo 9¸'…¢Ãxâ xévmÄD—Ú$„ïërù¹ ¦9œKºÇy™ƒ$ñõf é´„„ hzÚµœž¦/pš­9Æ1å䟞vëp8)h'·Ë^|u‡8ùØä‡[%Ößä‡[Ã…DÂlÍ1E¾9‚%\…ëuÏ÷“ufùÏ)îæ‡Ÿ5¤¾{|¼U~³_|¾nxûþâ‚|(~Ö7ûÅOªª°ïo»¾3Äž »=Ä üèðv8œHÅÁO<ââ¼¹à0ÿ›¿‰35?årxj¤ÓMoËœílÓëj3œ§ÓøΓõö š&PCu›iÚX4Ãy:‡N‰ÎžNxž …ºS<åAö:áyv†{ÃAOì¸P3ã}c)iºk^@j'>>>/MediaBox[0 0 612 792]>> endobj 28 0 obj <>stream xœ•šË‹]EÆwgã{%³p¡ ®®ê×F$˜1F£ŽÞ$>p¢„Dˆ›üû©>×ÄîV¿[ÍÀ ¿iªUõÕwçùvë´qÚsLûéÑvû´]oÏ7¿çögúÓïO·DáõÏöû§ÛÛÃíÏö[Øï*õd#¿ßÛ~ýÍïôÏio_ýþzþòÕZþïï¯Ö{|üÆ·?Ò€n^ÉN´ŸëRí·´g]€¼+²Ÿžm¿?9=iÁþ7[\è;ˆ.Ñk…èJ®Ø×®ÅÉ@ßG4ùè<ÙW'"—FþóÅ…‘¿‚|WVâar2ò0_æýÞèyþ÷H5¹HgöÍž ó­Òž éÍègçSO¿èêåžþÑäKÏVÈRt,=î!®·{úHsp2,þÆ‹‹Ã¡Ü‡¸BßÁx™vzâQßUOÛ§(N†ƒùâÉ»8\èCŒ³K¼€'—†'ð#ÄspÅ~.9º2\Ò»/®’W®fû+uZã5O±C¼ã?Ä£Îð<ðoa>;_Â/ç,4Ç_ÎiØñïái(™ÒñЄ ˆÑŽ73D™Œ“³ÒáØ<ɇRípx0•vÜá—í“f‹Òñ]Êñ”Ögß쪞ÿôRŽûl?yâsGîx<þ˸YlsD?/Ž‹ÈG¿·ói¾Yìt4 6«"tÜ.6/Rq’xm„q<žo0ÏG}íxØÆIÇË)8]æË€C1GšµKáT?‡?5¡›â©õPÇi±ýµ5ƒ‡ÆóÚ8¨6N çTçñub[EKIÏJ´=—…óa=ûk¬Ç“ìÉD\“ý‚é”5ÕñØèQÍëÇ㼇ùäVN_kI ö×´8¬ÔÂfçƒ-Åæ>­<USã…ŽŸ‹'6<gÇC•i­8³f{ÍöêÀ:ÛJ^X?СNÍû 2Ç?Ùa–¹ÂûmVTˆ ñë01UÃkÌ7IÈ[ØŽÒýŽñCaÕü¨©üàýjþòø~.8R²RNXÅþTN`³h–TZÐy¬ÃÁ”îÐKi¦ÔJº³¦o](Wâe~þØNi¾T°ïWÈÏéŽ •Ö|íÕ\´÷.ÑI=Ûƒ¨ˆŸÞþkª:^áEæõq<ªã§Ü¦JÌsëÅ—Õü¦‡¾Šh*NËC% šŠS+…¾ŠhkL+×¥­1o ú*Råøp£ã¡¯=Íã1Œ'j®ÐTV±ý/ÄÂuÅÖêìãw ~ÖåÐW‰!¾GÇC_%†:ëòË£‘ÁT!9d[ÇcSE7Û>.êxlªèfÛHúüüï(-m[1©ãFíÿ?u­_/I J. endstream endobj 29 0 obj <>>>/MediaBox[0 0 612 792]>> endobj 30 0 obj [1 0 R/Fit] endobj 31 0 obj <>stream xœí½y|TEÖ7~ªîÖkÒÙº;ÝÙ7bB„@$7"°š ‘€€ 3dSg„¸‚€ .ˆ+èŒÊ€M‚ǸŒë82n£Î8ò(îòÊ(¢ƒ¤ûýVuwq›yžçýýñû$ñ{Om§–S§Nª{%ĈÈJm¤ëÜeKüßüÍÛH¹“Ș1¯õ¼_Z>Þ“ð·DÚ_ÏûÅ%óþ2ý;‘s?QöçóçΚó·êˆÊögè|$įH "~ñìù¿\rñEO-%ì%*í ÏÅÞÙCtÞ爿ùËY·&þ]]†æžFyëEs[ÿyúnÄŽ¢í¡4‰(MÍ¥4¢ÐÁ(‚ BEž üS"–F䧤¿²|æ§vŒ<ô-Kfƒh©ô FºƒºéJ¤©´‘ÅS6¹ic*ÊÑ:vGhYè:•n¤{C²+BÛ=CߢÿPUДŸFséåj ÝNZEvA“™›fÑøý}¸‰n¦?°_‡¾E«‰tê«¢ª =:N…´N]¯½i}˜6Ð^¦‡Î - ”IkxQèл”KMôz}*b]êXÊ  èjÚÄ’•gº…~KAæàÍÊhíq´4ަӅ´œÖÐ6zųíMípèW¡H§ÊGŸÐ'lÏïS¡‘¡·é,ÚMÏa¼â·K=K}@;+Xº+ô$%Ñ£ÌÆö±'´2íúîËC÷„~Oôg$2í̦+é zžþI_ò•¡•4–¦ å?²tæg¹ø<™¯à+”W錶½]J›)€ÙC{é1Èæot€>`‰,•Îf³ ìKîàsøËÊÊ.å5•©¿ƒ¼³(2ZB÷Ñ#ô'z‰^fê/e ì|¶ÝÊîbx€οQ-ê•êwj·–<ü.4!ô5y)…ΠKi%dûê ]ôgz¾¤¯è(s±al>»‡Øö9·òL>‘·òü>þ2AÙ <¡QG©¨/©ok×hkYFðøýÁ›‚ÿz4ôèN êÏ¥:HôrhÅ}ô8½ŠÚߢwè=¡?¨›ÁÎA+‹Ùjv3{ˆý‘ý…}ŠQ’üÍä#x-Z]È/‚œ®à7ñ›ÑúËøÝÏßæïðÏø×Š¦d*C•EÊ=J@éTö+ª.5W=E¤NTg¨!ÌL™vš6EÛªmמÔëUú½Uÿظ¸Êò§îÂî)8?v@w-ФK!‰»é^èý.ÌÁ èŸÑãt³Â2Xú]ÉêX=ÏÎdg³¹ì ¶ŠÝÈ6±;ؽì÷ÆÀ ô½ˆ×ð)|ŸË¯â«øu|~÷ðçùüM~=÷(YJ‘2H§ÌPÎR.Ä–(+”« Ù Ê6åeåUå#åcåfÍ£P—ª—ª·©¨»Ô¿hgh¿Äï½ÚãZ—öí¸v\çzŠž¦—èçë[õ÷ Ýj4ׯ_YZY+DÏýÔë‡'c àÛx¢º’BB:S)#/¾—¢]¼ŠOçw*Ä>`[éèûÅt3»€-¦íìÎ.cl%½ÆÝÊvU…îå*³²qì0¡t¹:‡Î¡Ÿüa•ôwú$x·êT ûÔI1£Ò»ìwtŒi¡ÏaÝX£Y°2ë ïW“°zÍXg+±“aA~¡¿L»˜+^¡T/¥Ãô/úDÛKúQpz·ú~¨"TŒ†UF[±îæÓiX1@KC\ÄÎÆJ·Á–”aU7Ð šC—ÁêmBw†® ]ZH/‚÷ÈŽ±-Xਢçð{½ÅÖbžöÓãü±Ÿàê¢O™—å°2¬‡CÚ2m½¶MÛ¥ýA{Ii_Ew@£ßƒ6Û0‚sé/ô)}Ã,˜›dHåèï0ô½‘~Á›”Çh4K¡V¬Ù|ØñQ‘‘,F-W@zwb=?†µqvâlú½É8ó`Dç¢} ꩇœg¢ôý˜Á+YRæÀjÒgw Æ— =5m„ÕêBŸþNBÚ!Ù¯° µl:êú†Î¤9ha(5°˜G¨–µVùäÍ\4Še²ß‚¯+4†Ò©R{ŸqœÆ(a !} v¯T:•-B/b1ŽnJbiHp2úð*SÔ{Eöâ6>7´JYü½H¿Ãœ˜ê2£–Ȭ™jV<µjÄðÊaCÊ— *-9¥x`QaA~^nNvVf†ß7 =-5%Ùëq'%&Äǹbcœ»Íj1tMU8£c²êZüÜ–€š›5vl±ˆgÍB¬^ -?’êN.ð·Èbþ“Kš(9¯OI3\Òì)É\þ*ª*è“å¼T›åïd3&5"|]mV“?pH†ÇËðzv"œ‘ÿïüZ€µøÇê–Í_3¦¥Õí´ÛFgžk+H;mví\ÍäàGè9NÄÞƒo%z~Š™ª cº>LµYw(œë¹Ì¯•j\Ûayi»èm³èbÕQª>T}(\žuá˜äôøWÑš…LÇyÑ «ÙMFèMÓZQY®çãat†ºLkþrÝı7͆Œ<äáQ@…j¡–o+q £ ­Úq>Ïç*ó´ù–ól+±§ëŒ[¬L±Y­ªaep€ŒDøYºUUýšž¨iºÅf¦¤´‰&ì)éå¶®(ºjídûÌÝàšŠ¥Åáñ¤P'ŸeÚ}LsÚ˜Â:y¶iõYY©µÍÊ­{x6©(aõkLK¶Ÿs®æ‘æñÝÉG›i^äíž0fn퇘8¥ºjü¡¸øÊ’ªî¢¢ªUÚ)E«.{zÕ)^A WUÕª§ŸÞ©óÑSwYË­Îr*jTÊêö)õXB»I Û-ªmO(Iß©«ÃÄO[Ô\$22ü²ŒEÑþ¡­û‘K‚Ïð¬²ð…gØø`‡¶çøîï> 4dVè#ííUœÉÞ0'\c½6ñZ÷fÚ¤?k}MyÍþµbͱæ;ò‰î¥ÚRë5šÅH0<ž§€*9š‘¯Ý¦Ýj}^ù£]«f¡_“]ÄÀ=â¡vÄyË%µaðL·XµÄ˜1ñå1õ3cÙÄXk&yËc;Y¾™_lSb¿ˆ™N_¬*¥4¥%åm1X¬á3J  e]GêŠ)aá.h‚«ùhóøCGQu÷‘¢æE‹è/5³ææf¦éj–Ÿâ\”á÷¸=ZnnV¦çr.ªV3ߨàKŸÿ\Í.eå̹uNYðo)÷-ûÍ‹ÏmY¶§žuøv΋²[6Ÿ¨»èªOƒÇ‚Ÿ~¾QèìÍÐÙYÐYùh¥98jxšg®:סz*=cÝMîùn­Ò34uUêmÚF»æ‹ËaÄâsb]–伊Ýaµ—‹Q™ mÌŸQšÁ3ââýäw•º¸«“¯íðŠŒV¬ûñ®æEG‹0niªåú§æE¬9!£ÌãvÇ'%â†ß¬ 7¸¬b$Rž››—›u3O´åòΖâŠy㯜ýÛîWYþ;¿®;³ªêSF>¬íIË}2øÑŸ¾r˹õ…>õÉãCbâ§ÿqÛ¶GæÅÇY‡Ç.ŒT¡…»IC§ËÊË5Ñù¬IÍêDO9i¦Ö µi4ͧµh­ÚaMmÓ0‹\! WÞ‚Ý ÀcVº„n£³1•.TmŽLçEÓVQ1D¡Éƒa2Ö±|mϱ:ôc΂É臃{M»]ɵäÚaĘ‚íÀ´¦ /·ù‡(·v†tD¨ùÛ´SЇnµØÞ·~nSU«Í–ÀÓT—ÕgËâU¿µÄvŸ¯Îµžo[Î/VkÝf{غÇvÔzÌæÞ¬®·n¶=c}ÞöWþ¦ú†õ-ÛGücõë§6çrëŶ+ù:õJë:Ûzn4ÚçòóÕó¬ómËø%ªQËëÕZk½íLË™ÖF›áµ•Ä”óáj¹u„­:ÆP¸CÕ­V[OQ=V#¼ØMWa¨4‡a”é1Ž2i¦¹¥Áâ,·‹‡eŒÝYn1còÊíâ¤;M—Ø-8«Œ6²S]ï© [ƒfVrÈõÚ!‘Úa£¿j±ZË5QQTn·ÙÊŽ G5ŠCåÜaƒÅ4,¾ÓÉœâD±‡“SVsxÊ=S¦–ke†i¬´0Ëc+1 ÙývïäÃÌx̵‰‚d¢•ùÌ!ªqZŠáÈ¢CEE®ªÿãªJIvu/ê^T•âuÁ"Áup:ï’½=Ù2F¬`Â@KèÀN»_˜¼fù#u¥ˆŠ5Ca["ÃØÀöâlg°}ÁCÁw‚ïÿÃçU>>V§^ñÝ ¬â[ˆÔo¡S±8­-7stmwân¯ršÆÎÓÞÐx|\Ž3&†R]9ÐãX²¸¿·nݾôÒô–ôÖô¶t-ÝëgBµaû°tÓN^º=+W¬[iý£«†iÂ1Nk7+™Ã6 ÅÒÅʽ…ýÅL^±mö­Îþ‰{w,}ÎØ![´=îŒwv¬ê\—ÔýWõÉ`Ë)³kæ;mX#1žÝr'½ÈÌ*±–ª¥Zƒµ{Ôz«¡3çàlhÅŠ-M]‰½ª“›6ÝÀ®F+E¿Sbx+oãëáá$[º ¢~RãNnkªDìíx`G;O•\³üŒ$ÿÝàxõºàõÉo¿ýn$zuv™lô*™Ö˜Ã ‹a5\‹Ûzšå4«q¦uºk£ëÖ¸MIw¸p=êþkÒúQÝît8`.œ«Ãîw¾,T‘¯53ÍÔ†Ô–T¥5µ-•ûSKS·¤v¥ª© ÅŸ\šÜ•¬$ ѧUëm5/:Ú¶.‡ä, LBF\"Ä.… qÅð¬La.‡ÜÄòí 7üzE[ Ë/½üÍß¿òÖŠÄtè· ›ñËó6þ^): ~ûöƦYwL[qÆ1Â^0ãÓYL)Ì‚µÚ_)—ìÔ”áå]–7Øü-õ-MæábíV¶‘ߦnÒ6[, Ùõ‹0A-–åÌH&·^@¹ú8:M?³×ÊÏ(‘1Ò•¯Eéä³M»Nø'š©íá³HŪŒ¯´«l¥Ú¦¾«PUµ“ÙMÛJ¥MyW9S Å}%°X÷0;qᯔ2Æ’^þÊ‘æ¢æ#ÍÍEÞC=kñÐÉ+1l¯„ÒÕá’V¨ëaø)Sa‚à€ˆeºk’ŠX5à ÁzdÜÞ}„Õ°Åì<6¼û+mÏwO©§Â¤CáÆ…>VOQGR•±Eæ|#Å’¦¥»SNO›6.ço®wã¬C“ë’ÏÌ—|^î5¹7&ß”rÊîÔgSžKuèº3É­'»óô‚¤¦äåü~¿þ°þŒîx¼ü-OÏ.7ЙmRžmfæã‘œ^¾0ûx6Ï®K&¬4&¶üÔtFé®ô@ú¿ÒÕôôl0™HÅ–ÎiZ†™Wa¦ºðð¦”gtò%«†Ãi(,ò$E¶¤(1%L3Ñ>`P®¥Àšïlò96;8ì_&ÐŒq—;R&–³ò¬ˆë…àdÌô°w=l¢g¦g¡Gñ$^PÝ áÖ,:Ô,<œ¢pì Ô_:,9Ì"ÌÔAØ‘æEEái/Ig‹šE§';ÔõhjzùÔì9Ù¼¹¨Iø†p:•Wx±.j¶'oèÐÁe0=J¢Û“ÝÏÓu,‚!åC‡V ­€ ‚¯Ä„UJJÄ:AÒÐ!ln¨è•—÷uÖ+©9ÁOí.CûÛæß>6ýŽÿxFÃÂú©ìœ¡ŸfW4Öž1f°ËÎß;åö››®}4عîê3Ò*’-uuí«g\WŸ–ãO›4fDð•ø2o^Õˆée¹Ùs!òUІ›¥5N£»vS|è[s½²"õ´T?]Ÿn›îžîmJûÆÐ‡¨#œ#†¤ŽQëõ cRo6n³Ú1p7(“Ю‰b.ìöX²y2,)­ØWWráj˜ÖJmh/9½:,ïEpÉ»«>œ+¶Ñ‡„¥€}^'rt£iŸ§Ï³ÍsÏó.HÓš› çÂ3èâáWB`yI 0%°$a‘áÄtEû“Á`÷î³všñåã.i¾òªóæ^£íé>|sð£à¿‚‡ƒoŸÕt'/¼obëæíÜs—ð²¦aìÕX Éô_æ¤ÆØ¦x8± â¸/ó^’|+¿ÕñŒëï_]ox?Ñ?±|’ðIÒ·z°„aI§ÇŸî®ó698Œáñî ¯²\[»J»&öÚä­ñ¸wÇ?â¶ÆH M-‘& ±bºm…Ù‘.£d dÄWŠN·{ tXm#e´&£ÎÊ¥Y6cBJ1¢ÑÑ<Žxžj’•Êc.´*œI例ÕÃÄ”û±ÕŠ9'%Cî B' ÝÃ1ïÐOv?»zK|õ‹×»MåŠY£fä)O?»ªŠ±É%·ßóð†w˜…Ÿ >vÙڱ엮=z±XG^(ć8Ѻ©ÓĪ~—?®Imójõq/OrÇñÄxw\LB,¹b¹x¢Õkg3í!;·‹‰°é,.ÖÍBnæÑ.Ô{Uë ‰6ëàjËD¸åŠ%ßU73ŽÇu2ÕtÆ$äòÄ™´ÅÝåæn¡VG¹;Ùsñn¾€Âs#nlŽ7ÃuJ>H^™æEUÝ@5•e±ø‰Øå„ÁÂ.ór•$ NÊ‚¹ÉòÞYyÛÒ‹çŽyêW^ ~t§šÛpÍUS²ŸvUNªçø£Ê8±¦'©-rG-aÌÙËÓW¥óx‡³uÐ5ζAªŸeñ,¥” 惓棕³b››r¦LÇT]ûmÜ· ñ#œƒÝ#ò¬wÖºëókvt{l×c³;œöB‡3/ÆíI*v:‡½smðAq*ê™Í)ò^Å,s©5h¼M h]Ú~í‹ðeÊJm 4 IÁ­ä2ŠÎ%«ß›µÈ< ÏQäîd‘¾ V!ØMànF[°ÂŽ$Ýí(WÊ-åÞò¬Z>Æ2Æ[›åð+%S¬-m› ~«?`ÜïxXØ(Ø_p  † J ñxÁ»z™’V^x›ÌÔŒ ÕHIf³ÝfdHë©®¸¸¼Ô´´Ü<T/Ö•gÎÒÇB‘:y›’š›ž†´…i¬%¥!mWŽbXîíDyr¶V jE¿óP4Ϭª€ì¼ò±0É[ÖEͽ'…4€'©MÁ²'UŽ[RC¡­}µùö‰>>àÁ¹Ã®júÔÜ;wžկĞ5þÛí©Þï­æØÙG–o¾IRŸåk<>YK¶ò&×ô„éî&ï­|“¾Ér«£Óú:ÿ›öwë뎴ô®,/ò?éOYžqhK-×êWY”8©…vQ¢j$V)-©­©<5&ƒNrWÃN¿|QÑcý­ \óâç¹xU&L?kN(ǰ()vnN/;?yM÷ÿdåÁç?¿1øÍæßxá…·Ürá…yæ:¦¯ >ûÅ?ƒO]Úz÷Ö­[îܺUŒwmðê­¯ çœÛÍS†%ŒMàñåJ¥³2¡<µVç—P›ú¯T«8ó4ŇO=G¥Z°~zŸoÜv»+6&z¾‰+ˆ‰‰Íu¹Ä²1í}O8ãUa"]¿wÆaBµÅ~'Î8 âç¹ß ucŽrâ2X¯Q¯eúàߟ¿›ñàñÝ7LÄ»¯Ÿ7ûŠkÎ=o5¦¶aNðÁîàÑà[uÓº?Qvwl¿«ã{7C!W)rì[Íü[5faS´yÚRM)‰oŒ™Ó¯Ú¬±Ÿƒßà9xµc¢ƒ;:ùr³À0 ß ×mùduYK­­VÕš²2~s<Ÿ¿2~Güþx5ÞE¹L‘ãç¼mÁ09®z7K£è1¯G6'»a´»²,,ŠETðL© ™4£q§­lä!uºÇ!ÓãØ¡Ñ£/¨mi:ó´SGL.Qso½ vÈ×§Ôl þc,…>»0ÆBþ¤Ù¥ÇéY–ãÒJ»õòËYŠš{ 8õŠ¢´Ô· O3èöòW¼ò¹VfŠš ÿàNÓsfÜyq5Ū'ëU¼*®ž×Ç}ÄX1Ô8Õî&[R"¯8Áæ&%‘01né%„¹?á%X-=î…¶0ËÉîAoß ¼Åôñš3†ÈKÅ\qÉžxâ¾]™0ü±l;ƒ%û&W½¨%ož6ûœmù– ÷ÀÜ—d]8R`œvøA30N;K5“´ü”’rCV«oR6éò6Ѽxài¡]U5Õj·©ŽTJQÝZ¢5Ù–äpdQ¾š§[ómyŽAT¡´ÖÑiü4m¬1κœ.V—k[/¶-w¬¢Õê*mµuµm•ã-zK}]{Ýú–íuǧô©zP;hýÔvÐñ/ú—zTûÖ8jý—í¨£Xë ½jZS‡—«¹xX;Co˘MÄÑ<1]^q—o(µ›xD¦ØÂà1Ëy2ßLBÀnŠ˜]‡7 AV1éâLy™žù]OÚUÍßߡ۬ g˜e 9üàRÄ™êP4›Ý°Zt‹ahá4º|¡J¶’˜ê¥°ÔXY ù!ó_’0Ia1»ü,Ùùôn–öUR’Çw§x»»S’»½a%èy!ãŠüÊ‰Ë ù¤8ÙKè…·´áØ HXþ]vÓY‰Ûî/¦¾…Ù·›‘rf_ Ä´ÛEì@t»ˆð‹„~%ˆÿX†¢°¦`€Å=û(‹Ýù"K n~ùè.èØXÞ)ðÝÛ|{÷4ù›ÆoøäH÷Ö™±U_[’-ò‹á{߯’ÿ'È3¥íŽ;Þí"K’ÔH&9„v’128F»èرc—º(’Þóã¢G’xeÛ¨Sy‘ZÕÅÔéÔ¤=K3ØGt6ò.F+锦>HÓP~)â‹Aoâ•¡n”ŸÜ ƹÀYÀ™LjÀó<° uÌõHú>o¼D§¢-6³€›µét ònÕ+i¶HG[ëPG·!ý.}m@xò›DYIÿt:ù¾I› ב4B¸én´£è3h.Ú_¬.B¸uCþ*Ði S#ýõÊðû‚GŽUŒñZ†|V }0X œùþRðù¿a;úeu1*Q&ÊTñS)ZŒöGGÆMrÜGϘÐÙ§Æ4Ñ¿Þ@ŸÄ¸>^ö÷ê[_\wS­2XΟ³Á_¢QKPŒKû ô4ïMŒk/ ©sh…BÛÐÏjmmB¼ ¨’XLL½“*G0»èR}#݃t⃀£”Ã?§=‡* ¿FÔ&0u>%õaŽèCèsPŸú¬ábjÎGÛÏGå$dƒøXÌk#Ê+r½ Xl.ýCû%Bæ˜÷oØôàïPöÚ©@›> Œ=<¯´ü‹P“í„ç!L䟙þxxBô! ©gȺ¶‘·…¾MR€—€ B߀ R”Aû6”·I}…ÎÝú!tC{VêêÑ÷ðäZXY3¿ÿY@2¯?HgG²B>³…Ίõ­[è–Й(•:}ÔûçÄ8…Nõ¢7k]4IôA¶ ÝŠR±îPï%‚*I²O·+¯Óz¡³BߢTÈEèšXbMDhC¯±Œ¬‘à uº¥QYôЗévÔ9]ß=ýŒ&¨oÓåO4A»ôFŒo7Ò0õuذ"šhé¢ÌåDðÞÖ‡n0^g磭ÔíÅët—”ëëmD[ü 쀨t|/=:IçúêR”Fõµ/:#ì.¨šŒu·Ø ¼ÁïAǧ‰½AØg¹?ÀF×…õ5t¨G?Ÿ§;A¯êg=-죟F_½ìKÅÞ"ì»Ü[°NÑë¢ãöQØ8a#…{_´|_Ú‹ÿØŽ¿J;ü͈¬ë (Aû"vd/<Œ#X£믆öÕ¡½Ê ¡½úm¡û BÏé»BwbÜ={jWØ–‰õÝK…œÄ¾ÝGµ\š±g·Ë²h_î£Ó¥ ý¬¿ói6êý“ØWÅ:Tîĺƒ$ô‚˜{¬ùKÝetø=º]=†1waŒÏJz‡Ô'ÁÛ:&ÆgŒ ¦`|¢ xpRöGä±QÊ¢KÊè©Ã…¨SMú¤½‰ò›é2‹n·äÁ>}M)l‰lk'i1¥ÜU¹_ÿëã3èØ4Z­%†þ%õÿÁPH9†5ôÖ—C^%kŸÑXK«¥|Ât­X?Êg”$tã›*ý‰Ï ã÷ÑEúvZ§wAï^Ç^ð:æí3Œå†ðu{è;”ƒ:H´ôIÒ?û”Ú/Ö‹ÑE^ÃDû(#ú ý?´«|€þÞD«aKj,ŸÑotñY1á4…!ã+Àº0dš+LáéÞD—‰t>—žC/8Qˆ‰µ þkïªQÀ)pü‡Oé ^B«” лCØ3ºLÄÕ”¯¢zå[¹ÿ¬ÒlT!˹±L jø»hŽÚNs”Â^àè#ø´Nš¡ ?ëÔ +5èk. =(ÊÉ6¾ ¹ÔK¨Lòõ‚ìk¢Ï÷öêó-ÕåÐÑ_ñ-d¯þоöô3ÒÇ꟧¨|²Ìߨrú;¦ÁIü:ÚláoÃï¢lchäZ×c{ÇÕl5Ð¨ê º´ôSàuàN`ðÔ!t5ê~´Cœ ø`»@‘ððh^oˆv~(½7ÔC{zǵ2ªàaÓžœ'ËßMåêŰå¡=Ê2² è1T`X¨€¿‡ôéàë×òéVu!ÊN&åçúôSÀOi/9š½ÇP÷¿¿÷¢~A±¾ŠÅþü?éߘߕÀyRþ[è©CÃ'7BO±}t;:{® „ã”"åy7ÅEç é«ezŸùƒ® 2p•@4Þw^.ŽzôFT¢0ÊÈPÿò@ß8öS@:6ðûñžv S©rªS§¢/ï}?®»¨D€·"¾ ùRž@O|*ˆ²m–d½G€¿GÊdäM–åG ô’k£«Ò%x%¿œŸ¨ž÷ð’ú4ìÑAøÌS)¥/í½fû®Û¾iQ[òCeú¬Ò«óÿOÀÚyxxæÿi;ÐsFÐUÀEðé^…¿€¯úœ1_¤ëˆºW}÷Ññ™°C؃?„´iç‚þð"m(v£ï ÜŠ¼×€—€-j*]ñ+“æí¾?R_N˜_ðƒ·óÝÐ0ÿw«€;þ3-ûî)ЛA¿Fùøš@á¿´ñúpü/ˆ°ï| ŸÇáÆ/ÿÝÀ2áüÀ9ô—þÈùãߥá;j–>'úÛ÷ ñoÓè|þ í{ÖˆÎÿÏÑèYâ{4"ø|/ô:ûüä'J1ŸÿŠàð…zm¨>¥!ýhø²Òçþc„JûuéO²È¢¤Âwþ«ð…ÿ z§<罌þ,¦3Ä9_ö+ºô²­| ÍÜÀîÑh”yý9 Û‹ýõkø–ë"ç„z{W,lîãl_èkЗOÇ^fîiQÛú=ûý=íÿiü?Ý#ÿ{êÄôA4}^}óK"Èè»ÿ§ø¹½û¿½—ÿÈÝ{ŸþŸÆ£û|Ö‘T&`˜è·ù}¿´¯ðsñŸósÿÓx_¿£W|§ÀOäËx_¿$ï‹ïå_÷ÂþL Ö[}ÖÝ ¬ÓQêÂÐ[ÑõíCßuܳÞ"q}%Õc¢”m¥|Ø‘`]äÜ•…0öÀÐ¥b³§2ËCT†øÃòß4‘6'ÔÞûBëØ.øÒßÈÏåjÄ õ%Y¶1‚¦ŸÓç¾z+üséBf²ïë1G¨Ä;_öÌu‰ø.(øœ‚Wœs•ƒ¡¯Q××?æ þÅ9ï"qÞC<ñXØâT}' ÷òüÝ…óëû°‹ÏÒ‚w|¡n½C–9[Þ-¿M`ççáL¼P}?ô |‡ãÅQã¢è{y·v[ä]Jª¸2¾ï~BEîç¦^ìƒ_Ñ4½†È÷á»ø9({®¸›âÿ%þ-$y‡ìÐbq?%ö+½@î11½î‘‡MêP**#ï©ÎRŽ¡î{%ïZùNæ; ¨Ñtñ.̶n¶>K7[0ëtºËH£»Ô‹è&ÛpÚdwÈi´AìWÑ}²þÀÝŸ¸ËÌîu§)ÇÜ×'ý »Zº·w»Q>ËhÈfrøÝPäžý'}ÔSG€ø¾3ôRäÞ³-²ÇŸ×³ç÷½§¿›NQ d{á;YìÙšõ8dߥŒûö%ÚäÒýc¾PÔ7‰ÜQ‰÷lWDÞÁ•‰‘´©Ò/¨¡30_Å™¶Š¼êšÌ·…žî)ŸIÞ1þYêìÑO5üÞ.5¢s7qñéà[”(ï$_“ïð®Ž`ôô~ùÎì3y6Y߈{ÇM†¬><¬­„>T·£-ùÞ/ôtw¨“¤~&Gt3Uý–Æ©[¥ÎÄGÞ Æª!»VèèЋwoòýž¤RVŸAîkh¢£¸›Û½…|”»åýàYѲ–jšj\ }ÝݹíÖ‘Oß!¯>þáŒ{ x¯ ÕüŸT&À®½ÇU„Ñ…¨L…gÛCâݯx'y¯¶¸ãï¶¾ådÙð»\B¾eDÞFà á°L{!‚¯"¸¿P.ô_Àwü¸9Dýýú?á>).èj€gv .ÞªgBV'ct_€WÐ’¾@º 9}IOé ¤ :ª/>êúñcå~¬?–žÛHÏý_èÇÕ›ÕHÏú‰þÕ÷Òëÿƒ~ü˜œ³ûéÙ?Ñ }ô }ûû´xgÔ‡Åþ‰½úbPñï– ºø=Â8÷†æEâÏEÊÍ?ñòEP ˆ³4öãÐçÀÀ¤m…’"ÿö[¤Ð zZ¸-ÁÜn["ÒfðþH_v÷Š‹¾£íà{áödÛèGpOØ Ý)?"Òîá~= KÂåE¾£ä{àBX~!Øñn1¶)' úÄÙ?tOØg vFdyo¸ÝnœCÉ@q$õ »@ÏÉwEâÝöj 'TØZisPb¯½*úÎúVaïtôF­¢t>ê° ¿AØpyž„Ý—çÉ¿Ê÷ês$²±¼†ø¡Ž-ÐÃØÍ«(_´!ßË,ß³„î>‡òM¾F—Ü«kÄ~`;•šôáèÓJAý©ÆŸi~Žø¾Dîyvc>âçÁïïÈ Zf¹›Öo"_¡:ìW££éѳ­~u(¤ ${”Ú‡S“õ1¤__*…êE{Æw:Ѷ{äð 8çÊyòß#§îÁº:Nù†þÅ#´ÚÊé6c¦<»ÏR7Ÿô½@±xÿ¤¿Bµ+))zv×߆\ _:BÅÝHô>@»‹nWŸC]w‘_¾×ŠÜôÐhâ}Ûgt‡øV¢¯_õ£zü›ÈAO‘ñ*öÎ^ã—´—¿¾SØIgŠwcâ=žô;úÐhŸÄ{<ñ. õœ#ý¯‘tŽq)öÖ‡¨NšFk£á§ŸN£-)ä7î£dáŸó ›Â_w8~*Öî ¬ñÐhÌÏó Ë@d}O¬¹¿FlÇäpº\›Hë¾=’~>ðk`A8_ä…V†ÃÝ_„ë—y¿—ï¶J¼ƒãâŽ&‚îð½\³ßŠp/9o”>ý÷iôÝýÍ¿õ§é¿y‡&Ö°ø¦êÞñ÷¥âýðˆhëó•0„/úsÔîKÃïû¥+Þû¡ïDè‹Bׄ¯×—öý~åǾgùq?6²Î¢ôäï^úÒ¦žïr~†ª½¾“ù!úïÞÝÉ÷õ°SQúýï"wr=4â—÷ýç½~Ú?"~lLø^‘®“ßæüz¾áú*ô¥þUo  ˆoz~ú”@dü>ô¥ñûT~cðÐoß Ø_|¡/-¾Þ”Ðçõa„žþ|t] })ÿ¥Ü ¡/Õ ½)öŽ ò|òCß×MÖïB»w¡ihVÜxý…ß4ü೓ßl9ƯÄ^ø“ø3Ú·`¹í\žch瘤_ Då•cT.ÛGr¾¢}޶©÷:¨sÝOáÇç%ô•ÀÿÖ¸ªïÚ³¡¿ïˆ0ÖÒß#ç’¿‡¿ÝC'÷ùzÙï1FÀ83 ù-ͶЗ|¹~üMبžŒ|³ô±Ð-: €v"@;}õà«"ñð÷7¡nýÃЛÆé¡wÄ:ßþ_ê‡äcÌ ý:øŽ±ôÏà9_ž‘„ï%Þ?‹»ñí¬ˆí˰¾„ýë:y'ã{½%Ý&ØŸ'hÞÉ>_hJÄ‹ï`Å7EñzMU‚t&ÎËTlÒ¡ÐÖU<Á†°ïú#ðTäI‘þ»ÞPN¡G{h%tWÄß~ìEaˆôP¤ŸèWÏ7’ð„qÖ%Œmæ;Vú/·¡o·á Îhðä‹óø2º>eŒüæ&rî—wÿ #r™ªÞ(¿"ñ ʉïjùMNø;¥Iz6MRÈïkÂßüŠo}ß¾ïçð9§ø¶FÔ!üAá)ïCް)ʽ8ãÂsV~:- Uý5ÎèÁGýÂ`Ò3A—#œ z)pð`$ýWT¦%¢. aœ¥ÕGÂT‰œ«åÙúå0?Ú@:客rþ%ÒÆ6`\¢Ì>øx"o”,WÆ?AgMI„G#ïmÀ>¿Ë{…/#yÑ2£N”ÑQm|ªàÚЭ&´‡}BÔ©âû¡ÀLÅyèш…ÕšÜ-î_¿÷]@ô=y„jÏQ¥v#•énºBK¦zœjôXìÔû#¾¿ž>ÅÙn¾øžX|K¬¼¾÷ŽÜ˰¾ŸN±^…s/¼¬È7Ç’òíá~“M“{§ü–ž ïm{Ø#“ßOc­Eý\£…®3î/y±E³"w]qb_G¸RÞ Шð7T¡Qa?$ÖÃ؆ž»WAÅ7mB·"¾ (ÿ âß\…*Ä» åTñ½–ä ŒVDdx:ê½»×û§Gú~³ñÿõû­¾ï§~ì}ÑÏ}›ñsßj|/þ¾SéûíÆÏ}Ëñsñcù™÷eêû¡'„-쨑.¡×?* @—CÈ{º”rWòÃX³Çþðù"w¢©HO„ýJT¿•ú·*\üóyç*îæ[{ýká»Î~©²'ô´°sò;D’w–ãzÝÕÖôÜÓŽ¤IÒÖ¦Fîj¯ç4iƒlº°3±ÃdHØy/¹ñŒ°]a¾ «áz„?.l§„ Rf‚g&ÒŽ…m–´™Â¶‰u{¥˜À9ˆl? (÷ƒ,ÞÕo Cì9ÁÍbo’¶“‡ë•÷Gþ±÷‰ïÖg¡\ÍÏùKÿ2êc>Ý7þs~!Ê<ß}ó{Þá¼+¾õ‡¿ð,¹ÃÿÏKäÜù6Z,Ï+Òî`.S{Ý¿—GîœËå|]Kq°)©ß;(´PÌmôL9=ù¶$Jg†!÷i!Ǥ­dÔ,Û€‹è.ýq¾g‡máw‘³_ô,çŽèV>ƶûàjñd¿ß'Þ·Dð[ÑïÈ;ÿ_ü±ç?KüqðëÃPàèí߇6üdן€åœ>8Bd…œí1ÿ>œiD1^¢Ø©aĵʼnUü# · À*ó%£ß)*Epȇt?öŒ ¬•ÌkÄßGêG?úÑ~ô£ýèG?úÑ~ô£ýèG?úÑ~ô£ýèG?úÑ~ô£ýèG?úÑ~ô£ýèG?úÑ~ô£ýèG?ú0ñ›èKª¢H'N.*ÿçª~Œ?IñSÛjœÊƒ´@&ž~`  ©<Øa8ËÌNÐøDIÛÝEe»C] ,Ó‹o.kÛ§l§™4ÉÛÛ§‰äífm™¤ƒG„iÉ IÛ-ál#±ÌW“¶€Sl$4¸Ø <èèÐvzвU¹·½Î‡îCE±5‰Ê}ž‰çË@PÐûû0–ûè‹HŠŠ^ý¦ÃêÍÿFr¥*¿W,ž.  ؼ h´ÏÍ@Pºy÷WîUîiwù\56ånZ påvŠeâ$v)›:\R6·uÄ&”™5.åj8”ñÔpT»lˆ£x}{ñ )Âú[L™ å×¢ÓkÑ‘µhr žLÆM@”_Û‘àÕ_Ù'ù~Õ^Zt¸¼e ÂÅÄ”¹Ê…”E>eèÐsAÓAg+sÈ)ûivĺÊÚÐ^5ŠW+IT€ìÅMe µJ ¥ÊbKÛcÂí,mÏ/,ÈG+^Y$VqR9¨E1ÚË|þ½Š)…¿ºÃjý[ÝîJ*{L¹Z1(¥ÚPÊã‹}L±afmr$S;¬Î²õ5e*†9bñ¡ R¾PVta;*ª‰SÆ(iäFÞJ:%Ö)$}@¹‡ê@ïêÈMóuíUn’\7ŠJÑüȰjìpÆ”uÕX•‘È (×c®—¯ïÈVF5¹J>•2^‰ÐJ©ôkZƒY[ƒ™Zƒ™ZƒN­ö‘r-r®E™åRjU–Óz`3ÂB­’Ú!ÐÝ2_¶[IV¼Œk/DÉšÒa=ó¶Ç'ÈbÞGLYõcÊbèùbÔi*K:<Þ²…{•B9”ÞTÁÐÚu}Lñ„§Œn1%)i„Lº2 =ɨñ!.ÙGŒ¿À÷ !ñWùëbºùˈ úb„¾¡ÓPß^üAÔ¤ñPÙLþmFˆó½ü)*ÃÛ¼Sô‚¿ÅwS5蛈ÏÝ :tO{Æs¾NÞÙ‚¾ßÑît‹Áò§Ú‹J"_N$àIâÝe59üIþ¥¡Š¿‚fƒ>Á»(ôqP/h_BÏ>̇ÐÐ]ú4ß'Tœ?Ê¡a í1¢ vCíº ¿o§p¬¡Ä·ÿžo§}¨=7©[;r³}±{Qã÷ñ%íé¾ø¿‡5²#(´…Þ”âù½í¢’õíûü¾Ý|=_oz+̳ؼ_)Í)-.½_ñçø‹ýþûý5.~= ÈfŽõË×âYA~íL`=¿¶]­ÔtcLb\œÚðÜ"C-x¶ÊáéêÉ=,CÕüjšpÔ±X ´—“Šç¥À¯€_—É”%ÀR`9¬I+8ZÁÑ ŽVÉÑ ŽVp´‚£Ur´ÊÖ—‚£-àhG‹ähG 8ZÀÑ"9D[ÀÑ"9ÀÑŽp4HŽp4€£ ’£ àh&8Lp˜à0%‡ &8LÉa‚Ç)9JÁQ ŽRp”JŽRp”‚£¥’£¥à(•~pøÁá‡_røÁá‡~Éá‡~Éᇠ.p¸$‡ .p¸Àá’.9?KÁqÀq$ÇpÇpÀqøòÊþš?‚e?Xöƒe¿dÙ–ý`Ù–ý’e?XöƒedèK¤08Ôf°hox»ÀÛÞ.ÉÛ%Õk) xà€#Ž€ä€#Ž8’#Ž8’c 8¶€c 8¶HŽ-àØŽ-àØ"9¶HÅ] Žÿ\)ÿã©á—³F öZÞÆ $]IŸKº‚Þ”ô2Ú)é¯é~IEWHz)UHºœr%E}’.!Ÿ…µû*bkÜ0™ÀB`3°x0dèeà] ć˜™j¬1ÑØlì07´ÆƒÇêõÍúýq]Û¡й¿&•;¥…i¡äs%ž_ØD𬖡j^ŽvËag‡à·œ—›q‡ü_²— Ùã…lG!»¡ÕXùiL•–ÎOg¦#w¤ïM "7o$,Óõ|îñµçõu²}aR`~ìî®*€2 È|2­åÍÌH•û€< ð‹&Èí&¢ø8‹¹›;Ùýt’U´“—¾½íy¥ íyAmÏ›í«±²G(OxEìaÌÜvÐí¾ƒÈ~(Ll÷íÙÚî+inÏ;ä¬ö¼—|5N6|ª`¡S0nA'·û¦£Ø¤v_HQ{^®(]ˆ†r[Àé hN„+;ÜRV»oHf»¯R”¶Pž˜x¦S±ìžªt C_ìf*3í¾C¾›|Ÿƒý3êñ–¿Sy9§“M7m¾}Åw£p¯½Æ&ÊcØ¡AöÝŸs­ïÔÅrñÝæ;Åw}q§Éסß×Ê&Ú}Wø;ùv3Á׿+õ-)>è[ì;Ý7Ë7ÙלƒôvßÙ¾}¢›ÔÄùöG| ¨pF‘Óî;-§Sv±Îw‰Ïôåù*ýû„|iX¸ÞŠâ}BTn} ä[˜Ó)t|ZE'‹3 ÃÆzã,c”1ÂÈ22Fº‘h‰·¸,1‡Åf±Xt‹já²$Š¿Í^$þæk¢.ÿô«®Š§*Ã..žò_¬ÁVÅ,œN§@‚RÏë§Œbõ®s©~¶?ptJV'³MšвF±@|=ÕOVTßi„&*ŠêFÃY;»¾ ©¾º“ÑÔÆNIW§âG#“®¾.u71–|õuMMäu/«öVÇŒ«¬«ýGKäYtâÇÛ;˜ØX?¥1°-½)P&¡ô¦úÀåSüg7îæ±Ü9¦v7¤©q·ÚÊcÇLéjkmДŠÍ1(Fy‚ ˜eùE1Ø“Q¢æ(\.ì(—!ÊÙœ”+ËåÚœ²œÊD¹oúÇÔîôûe™¢7e™7s¨Wh xkwææÊRY~Ö(J±Æ,¿ìX¬ÈçC‘bŸ,Âà×ÉŠ|L6(9Q$'RdHO‘!²-…(ã —IÌ–IÌG™¢ÿáÏÜQE¬cÐÒO™›5¦%kÌ\ %°vÙ|o m¶ß¿sÅR‘á(¹-³Ï/謹¥Ysk+²jý;=õÙO‰ìAYµ;é©1Sw>eέmd“5«¶©£ºª±æ¤¶®íi«±ê*«•5жªk~ »FdW‹¶jD[5¢­j³Z¶5fÐû†ÆÕ4úì0íàvt¸%5£i”ÛÕ:R(ôîÞ©{Tb[É^Ôpd 8‘U\S\#²°ÎDV ’c#YÞ#2R÷°­‘,’ã²Fõüah…êC&Õ2¦Ìhª0gýðœ-?2ÛKcÔâ?Ä—Hà·wIZüƒ?K~ègéÒ¥‹ÅciÑb¢ú@á”úÀÐIè‰a ©–Ú&¤MS™¶ÓjÓêBf:Á–ˆæD¨ˆA‚¦ §.ƒoÑ·\–t¤¤—-| ;øJç8¾¼½DŸùòŽÌq~YÒQ2$Lq\´=%£Lümñ ° š¦f\1ësÖ¯¯Ø’³¥xK…ø äÜDßýb+m/¹_¡%E‹£‚@pI„n‰öîiOK— o¢¢¦¢Åò¯|S_Q÷üõ]©u±¬~ItBÂé‹#•`&­/²-0ÉÌ¥’)\I8Öó8ñƒÑÿú(ÁÜ endstream endobj 32 0 obj <> endobj 33 0 obj <>/W [3[277]16[333]29[333]36[722]38[722 722]44[277]48[833]51[666]55[610 722 666]68[556 610 556]72[556 333 610]76[277]78[556 277 889 610 610 610]85[389 556 333 610 556 777]]/Type/Font/Subtype/CIDFontType2/FontDescriptor 32 0 R/DW 1000/CIDToGIDMap/Identity>> endobj 34 0 obj <>stream xœ]“K«Û@ F÷þ³lé¯y4´é¥E4i¹[g,ÃÍØ8Î"ÿ¾c}©.tÀ|Æ#KBS~9¼Ò¸šòç2Å#¯fS¿ðmº/‘Í™/c*êÆôc\ŸoÂxíæ¢Ì‡ÛÊ×C¦b¿7导y[—‡ùp:½~ª>å¥çeL—llóûO6Çû<¿ñ•Ójª‚Èô<äPߺù{weSÊÁwyzÌly¯‘Aœz¾Í]ä¥K.öU^´ÿšœúÿ¶Ûç©óðþyKʦ¢MÕ)›ª'eÛ‰j,)m åIi[¨@Jk¡")íNT+ÿ-þØJF C^­DbµŸIéÔŽ”΋²–”©ZGJß@!o¡GöVƒá펔þ¾#¥P¨NèQ£eRú3Ô@JE¹Š”p5)=C5¤ô:% è—s¤ ¨ÑyRÔèÐOa@Wú) (Û¡ŸÂ€² † ãöo®¶ÉÛ.…r¼/Kžq¹92ÉÛ ‰õrÍÓ¼2ù)þ÷ïâ÷ endstream endobj 2 0 obj <> endobj 35 0 obj <>stream xœ¤½ |Eú7^U}÷ôÌôÜg’™LfrL !a šF.Â}$@$È "AAƒ" ¢¢»Þ¨«x Ô5²¬® »ÞºŠ»‹çš•ŸË².™÷©š™ÜßûÿìûùÏdº««¯ªzžçûõta„‚Z‡ôÙ«V†v?üÔ<„8iÞ²ùKÞYÓø”OÂïªùW®™÷õÄÓ™›šûü‚¹³æ|òÏØK­Ðáœþ  Â^éJÂö¥°]°`ÉÊÕ§/O…í+ªzïÊ¥³g …–)í4`û£%³V/³Ž#ô™Ž-[>wYôƒúWa{B¦_ ‘~~á)äãcŽI} ¿oè:¹0õ ÝO×ä;8»#󃋣]x!Ú…^E‡ðI8ëtµ£ß! ýZ‹~‰6!Mƒš[Ðø PÿKìKµ£2´Æa:ÇNE×£ƒÈ½©oÑ èfî]8ëfdFùh0‡–¢ÛðèÔÕh:Îß„jÐhtZ†[S ©ÛSw¥ž@¿B¸ß¥º‘ ùÑløIý]ø(õ'ÔθÝŽã»”}È€»´Â‘£å讉ǩù©3Ђ0ºÚÀ£ztw’8\}.ú{ñZn\åñT[ê0DMhzÄÕx 3Rõ©#È ÷X W½íAûáÛ^AŸ`M8™z"uùP) ýiG¿Ç\²{}²FL€Q*F سý½Žá~,4¡B0„kSï!'ê‡&CkŸ‚3¿Âÿ"×Ã÷îu~xêdq¹“Ž6ú-ú3öã2<O!Åd)y„[Žd¸c?øÎA a¼ïƒ«Žãx?ÑÈQîqþYþ¬˜“ü"eŠÄЃèaô6COCx¾€ÿJ†™äAòî—üÓü¥YÐëËÑtzý Ûñ<OÇ ðZ¼ ߉ïÇGð1ü L&‘Åän×½Â_߉ü þ&a£p«øM²!y8ù‡ä¿R©h<ðÃzhýÝèèÙt} ßãè/XÀ&lo‡ñd||¯Ç·áÇðNü4n‡»ÃÁßâñ?ñY‚à+’ “|øFÈrr ù%yˆ…ï1ò=ù7çáò¹8WÍÕrÜRhÕ&n|÷qæýüQ>ã\!Ü#<*ìž 'EMºQFò;çï.éþ<‰’›“÷$÷$ÛSF. ¡F!ÕBëgÁwÐûà¸лXƒ±óã|1 #3/Â-x5Œäüþkûóøe¥ñÐf3 ²6÷%Õä2¾—“¹¤…l#w‘vò9ÃIœ‰³r.®„Á5qs¹•Üî®{‡ûŒû wš;߯òy|>ããü~&5ÿÿ5ÿµ0Cx[øRTÅ%âF±Cü©¿t±4N/5IwHû¥÷äfàÎß }èEÔ냿àÖsø}èvRÉûÈïÉïŸg¢9\=N%;ñf²·“aµ8ˆ ÂcÐI>cý:y”œ&ƒ¸z< OD‹H¿ôÕD'ÿ ¬jùß .þeèÛïáÊ«E _O~5´#’€{þ–+çãÜÛèî8–øèS^ÅÜEžâƼÂ_,4 0÷zžkÁëÐ>2 !õ¬¼øx ~pa®À?q)Ä‘1ÀE5Ü_ÑMh1ùuoF÷â9ü|t;ªÄkÑ×èIŠbá*±Dtá7ÉB~ qàvDx@U’À˜œhnâ £«ÑQ^EŸsÏAë’ç¹zþ¤0/ X‡6¢–Ôz´Fhàÿˆç#OAQþ @·µ\†õ €*3ÓöƒtÌÕC8g4ðÅd@ˆà{à´d|* ØïQ»8‰t ù‚ê Ä¿œ€¦¥žD÷§æ£«Rw¡>€›RkáŠ;Ñ—è´ßœ¼-C¹ 9ŸãÑÂprTžêC¶ÉDrÏ…ô…ÑŽb/ú¾ÏÃÆÅÂKh ÿ!šˆêR[SïwÂÞ®@—¡ÐË¿Ã.å:Qer ÙÎ-ƒþGãSO¥ò°Š¤®DcÑËèW’€fIq qþ#ô÷:4—LH­äæ&Â8Ü£`Àh] øs‹1dò¤ÁFÝÅÕ˜PS]UYѯ¼¬oŸÒxIqQa,Zɇòrs‚¿Ïëq»œ»M·ZÌšIUdIxŽ`T:,2¼9Ôknãc‘K/íC·#³ bV¯Šæ¶T ¿ð˜¶P3;,tá‘9ïgGé#ž#±ªEµ}JCÃ"¡¶#C#¡¤aSï½nË0ïÂÝܲeS¨mûø†Þ{ÃtÙØ×€sItxó–ápë­0ˆ£&†àn俯†6|3Ü2D{B{•îßÜÈ0ZÓ¼(Ô¦D.‰,ز¨HãßÒ†&¬ ïñû©/Xhˤ†H¸­.iœ54¸Û‰¶LX³×g„|îéSº[·¥v·Åš)hæÞ…¹=ûX‰NK£&ôŒ,¦-ŠŒ†h ÍAK"Чt1wÚ2{ŸF gµÍŠ,lS†4oÑÒzz~›Õ#¡-ÿDÀ‘®ï/¬™•©£ú?-R>éa5ØŸ-·Åãm%%”E¤!@ShãÅl»ºO骉,ÓC°‚áCã`lg5,ƒá‡)oí0аÑÖ:¾!½BWö £,ÞØFšéžÎì×dº§5»§çôæpr;¢æª«MŽõüYu·cØ‚mØýÿ±{nzÿ¨‰‘Qã§5„†miÎŒí¨Il¥÷èÙ—)µ9†4p’)‘ÇöSÎè9˜n4hm|þDÆÔs:$¸’ÕàÐð6½ùÒô²Q ‡ÿË“:R'éYluþ´L3ÛÆ/ÜtÁöÍÓ¶pÐ`P•£&MÛ²E½`°Zú†#3+àx4©!Ò†&ƒdFá¯#Õ9€þm Ùzð_º*³yÁL¹>”;û” Û²ex$4|Kó–Y©Ö+"!=²å9DmY6¬9Ë8©ƒ·Ú†om„±Z€‚PtÉîÞ<~·7OœÖpœÐæI {&Cš/iÜ]û„2X-¡µ´’n„è…¡“{ˆÌŽíåY۞ݫ“³uÍî é:=[G ŽO׬Ž~(Æ ™ÔЛ{˜H6ön$˜Ø‹]B(l Û¢°À tÏ…¸Îs†€Î¢ßI÷Lõáâ`Töòüž]›‹ùòÒþýˆ‡'‡Ç•5õ_ÄÍ.»–»&¼¢ìºþ›Â­e·÷×ûu¤>Ñ”È … ªJ;R_¥¡HÕ"y@‰[« ¹KÊÃ&äÒjú•‡‘«&\^þ–VãÔ´šr-\Ã{*ÄòÄþqºpð yÈ®½ž¼wã¸ÚP.w«ÇãPI®ÙƒÕ ¨m/~›ƒñ8ô¾=Õ+b@dCµ'ÊcF¬5ÆÅ:ÈÃZâöxòòB¡**Š‹áì_när:ãñ~ýL&U-}ÜŠŽÁtÍP„¾+–ê7èD?ˆoG"`X넱 Â/øoÜêÑO5Õwjé‚u­~þÛÝ{£vÆ»Nu¡ºSPª¶Ž.ôîì¯é„ÍîIl²ôo²¬;l…O¿rï5ÆeáÍQDó£œhY¬f+kÂÕcqe,J}Ç¢r ý£Æâp¨f@UaÅXTYÑLJÝq‡³ÌÚé§™mC<³H¯q>%ñ’õë¡„âM¸I¨îK kÜ·Ç+ŒÅª«jú×TWºh…‹ÚÜž\ârŠ’‹E—Óíqôï_]+ÄÉMO_§îsU^´t唦ÍÓ÷.zxÆ*ïA}nÃæÒI‹eÑÂ5ó¯[´ð–Yw¾Ûn›zhkþC›Mä"×àòg®ì¼fœ}Êkýϵػÿïˆ.ºkòKg”ýb‘¾¹iÆÚh·ÛüЊ+®)ó =“üßþ§ŠÆìSiŸ;ð8#†¹ZB°Šk‘J8Ø@âiàX°Í—‚¥¹|»iÇ}Þ8áÔ ½ zêèRïÒ»»°ÍžèW^ …®öï_³ÿȸ©‰þÜ‘#-·Æê}³¦Ã}㲈,9(5|ËÈ2ŽÔãz¸e¿° ðñËn£¼p¢Iÿ •Õwõ+G-¸ÉQv &Ÿcß>Úúƒ°Ø­çPÔðÚØÚt_@üvØ¿g­<ÝԌҕnÔÁ#GŽÐsQêk’Þ…s'@\êó=Îé2BÎĽ&Ü£Ü áV!ì„£A°9¤rß ò îÀOÃÍù½×z)ÝOuépmà¿MBßxÓ:ýp¿rÜ»p%ÆOoK6ø„ïÏÀšœúš· HG9xònBAÄPý¹¼àÌ5›=JGê›v«•L¦Ãg6CɆ4ZƒÜšKÖ¡2à±#°8ý¡= ìÿóJ§àJ"½ÒWíf3+üÝð™L"½¤Nk®itIëz.yþšíbȧA¤š~ F~vøYq®àÅMd³i³õM‹ H&/æíºÌ7$0É1Ã5Ã7!°XZlší¸ÒµØ×XC®W™®µnï“îÑßô~B>?0}jõ÷4w…b„#Uå FŠ®e[žm¢0cÚ¢ ¼-—¢1Þ‹–xW¦™¸©5¡ôƒáרèÐíý++Ün»K'b$¿0æÐÝ•ýmz,’/‰“¿»}Õž•—,zwÇ{kî<ðôÚµO?}ýÚ˚Ȼ˜Ç=7so2õI2™üÍ®û^Ä'ïýá$^€ý}áFÊ+Ç€gv*zÁq†ÙVµ˜¿ÜAî—ùçx¬ Q œ"`à·TÖz•ö ᜠ(Ý®ë@ºŽÔw†4Èja…Q6|”\Yš0úø5Á0[«„ìH” 8$|¦ƒ¸ߌҢчq‰§?°Q[ß ‚XçI`[‚ŽjЇ#6Q”ªA +ÉÙöÁïNº÷/e+ùë.^›÷üˆ·fÒ¾Õ/Kз\üF†—›nö:âd3e%›þn(º¥\§KYÔCÈÍ¥{sƒØ“«Ñ–çv— ¨O(O·Ê4({ï]Ae]´¥uty¸‚2/鹡f·vCC±ÚHö>_&»ƒLÎuÒ:zí=pi**&™ …ï 6ŠÿÛÝ(?ÓûÑ»±›ý ƒÄ—„WÅ—¤7ä7ƒÒH­Q›dY¬Í±\k¿Öq‹ýeû—þ/'ýÚ«¦$ õ=W:‰$`~Ö PËŸ«ê²(¾ô;ƒA¿ôZÈþ gÎÕAÁîköìÝG{€ØpX1ÑÔžwa´)¯ã—ÈzB:è>Ͷ¯ŽÌ$KÉ `$(ß±;Íì€+§ã^\ºkëº2J g5› &´(+èœåQW8VgÚ$’Ï@äàX”à—ÎÕOôñ~Øyÿu7>„8~úû§/}êÐc3rwí\;»óúÃ_Î[ü‹‡¶8Ž~üÝ®†g^~bó¬~À)SR_ñnà”8nÌÎäótü½A„)«Æ5ØÀÅÕlÕ¬¹ªZìÊ ò¹ÅA¡Ø1k^FöN™?$Å(éá±2Š>GÊèÙuu Dº€~]¯ë¯ÛúáxýQú f·y˜y£™f›j[à&¸¯Ô9縯6¯qn4oqÞø•YBã“f¶ð†ûbJj⽄ièÓ †¦¹x/µ|dQ­ ™fûŠ™¡¥!òRNµJ+b ›bÅôŸz‘î‰mëãíÀöøÞÅÔ6BÐqÓy´*íÀwíΣ"ŬSñ¦4nuŸ ÌÙ•6RÒäQ‚´â–FG›b#œTÓSÌÒQ¢KÉMiÏ»{ñ /<¶®r´ÓnZѱqÑ­Îöðwϯ~kñ¼97nK~óÁk)|“÷þMm7®Ýá|„¬^7ûÆ BûÞ˜¿gÎ̇úæ¾r{gòŸ_A£ý€ºpðÍŒcF{ƒ¶@{@{Z{SFs£Í¿ä9;ð8ÒDNT'! „ý-ŽwrÏ™Ñ̼ĽD^B2˜ÀÛ ñ<‚ÞRù2ïEAPœ¼*5‹„jZ1±Âß™†RÁæ4Ì’‘©’ZÃÕÒ6+¡ìd2;«Øã!Âz2= 'öÓsÈ>KÞÊFú{@?„§(¼Ôê_é Á"<]kKÐAN$6õó 2ÌÄà‰@fÐùö@Î{†©2Áå÷Ip|NN-½D#Ž1œšaJh­ãšKhùAX÷I0´m³¾WÚ*]gÃäžî äá_¼þz{²Ïü·ÿÜe¿J»{10Þ,_·ð2£e†å°óðGd^±£QN0¯hæG(ÃeÀ¿U^¡ü Å3ñLÂÕÁj)¾”•Ï’a1j)·Ô֟꣟¦èO-Š [" ¸…YL"âD)Òßn¯™ÅíÛšìÕßz€»ñ·ðgvm½;iOžíøtþ¿ñõK&‚â)÷ *'(-éí äö¥4Ü'“ûöµ‡sE¡(×nÎU4*ÐÔêØÏ¬–¸T3e¬Y ¦¶ÓêåèNÊ\ö(Žb>3x¸—Fw±+º˜Áã:o\húP„ïJ$z, YCÄlCÄtCN0KÈšå™ÌýiÎù´’Þ–žéb ÌÅzz¾Ù›Á½pY¦Ù¥šj7.vtŒ}¥}[.(åxZ‡×ò+åÓríjóµž[Ѽ•ß(¯7mÐ6šoó¼c{ÝaÏ~Þ ùé**£«>¡®Üâ†r½Hƒflï‹{ôŠW¬tù†_a5Àñ+·bdÕ­ÄÚïÜ_á]Ѧ:ìßS°ÂÕcB¹ qmë×cBj¢~U +dúÖÄ:G…„!Qúš¤µ46bê·dÔGyÔ8œçÁ‰sž-/ZvåW¯v~·xɦے§?þ8yúÎ+6.^pó-óæo8rÛÄõ;wÝxÃS\ ø¾EÛ?9¾}޽ť‡7¿œBwÞñž´`ÃM3goÚp.U¿mì“­7>³“ÚÎ@unäcÌ«`„x‰’BÄZž«Å"VªC„Z[;äŒ_ÒBe4 ë뚌~Àà9÷8-Cáñ¿¡³„†v× ¦œ r2^"Ó ’#ÓÀýâqmã z?â‰_æk™ywµ4uÚZ*…ÈWæ¯ï‚ׯ§oNábÆ®eÜÛç’!ëwâö&'_ÛK{·ïàò"ëÝ£P1/)(Êá(G¤(Ï‹QˆGÉQBÈ«ò+Ø'Ó{‚˜v ¯Ôóªe7L€ðS:†«)B…ùçp¿£?îòÝîD?ïq+¹ƒœÂ€ì„ö#z,˜öXH÷X|6Äqµ"òË! ™Õý­­ï¢]þ_zŒq5ýã«æð¹÷6YŸœµ×áÚ½Éy´×K“ã¥÷…÷Ñ4ý˘ʇõ;ŽV›+-Ã,#½CÃà †1e’åÚb‹;ZŒcJIN¬¸Úß?1$:ÅÛ˜3=<¥xÊÈÆ)s½s£óŠWù¯ÍY^p³wƒkέáM1ŸEgAÜD ¯ªµ°Ü4ÎDL’û%r)‚F‘—Ú‡ äÔ<Øûâ@Š/‹“øA\ ÉKûË.-°JXê 7V}ÜŨÀ¾ÝZP®/c!‹§Q€<Ò^7 ¤ŽWP„þãò©$ò÷nË-Ωž²üæg“»Ž|—\÷þûøÿÄ"¾¢a_åOÉgþçóä-ÉŸ†Lšs-~ ?á[—ÏzgÿGÃ&;ÍI÷“¬m¹tÓ,£e‘ñø¨é >Zÿ(®Û>½éÁîY[­Â‹ÆaóOáüç?MÎÿîŸÉGžn»~á'7,ÿòîW>=õ¶âÐÛoîz;ùùŸß*)ôáÑ·Ü7dÃÛó6ß3xÛïþ©n„„F°9$dÁó÷c‹UgÊýÇöLá'fª°3زL/×çË ”f}3·MSx]ìÔOê&YhÄSÈ8}©Mÿ‡öó?, ¯ñfÞ™TEày°eQ’4(Ë¢&aÞ×O†•yc!IsÂ.Âq´ÎE븯9á,%Wä\‘;È2CA²ö­A0!± @Ëdصš+qÆñGùã<·Ç|ƆiœÖ)׸mÖè¶n•ŽJä©U"Ò/¬|˜F)üàÏ Üá÷é]]È[Wën©¥Q•.SˆƒÕ²©¯—­˜|oÒ¶>¼IH¯AÜGµ™&ŽjË?­¡·r²tœ”ú‰"y#^ÞÒ”ö#¸G¸0çs±BQâHåHÃgÏv?¸ãcü?÷ÏV Ï Ç/'‡’iøž×Üv+µîëå[ ” å ì8€x É;àùá‘)‘y‘ÊE\è¿ZX¦¬0Ý$Üd Ý ç-,Éuç(ŠÃž[RR\Œ‚9¹0nyà4"Ù5jˆ` •ÔíTß‹"yQ¦W­E'åqR4¦éšJÓ(_¸èQš¿4'7İ8”ñ³O3Â2>ö™vFätAL{Ý*ó´›âƒfx{¼è&°žÆ°ú®SÇ:ãÁ̆L”ÙÔvO›îÔË®´…{ÙæÁኴû‹ WÔ0Ù„ò=$¶óíóæß|ÇÔÖ×¶&/Z?à²QÃo|$ù)^rylÈ´“îÞšÜ%l<0÷ò'+ _n¿»¹7ÁæžW?riñÙí’6`ñð kúQŸ—úZX%¼ Tywßl²(‡à´ÁÅú÷1“–B¨Â<eN+Ú³ = <ËýÊ|€k7¿a>†Näü#Çf±çØrr¸±ÈV å0OqNuMñ-ç\g¿Õþw¿åàNüÙi{ßâ@Nä׺Ÿ§Á²=E æ÷õ)JèV„ù€#Wã¹¼¢Ç¬—¡XcìÏóÄB2–5ÚÙ—;{ÃÉ8% 4 ô¦ÍL êÄãx9ö0Àƒ³TÚI1j]P<¤¶ß~è¢äo¾ìJ~øà xÈ¡?áÒA¯VúÅÓ±ä«ÿ…~?œ} _õÇ/ñäÝ_¼Ýgû]%¸ó¥ä·[^¦ºçÀžiÀÑV»/²P"§¹Ó¦çZ‘ MVpsmÆTŠÊbs^VÃXA’?/Gÿ¯Yï_YÖû)Ëz¹?g½L¹é<Ëõ+²ÆèÏ$Y”™—yÑçõ{‰hRATPn§ÛáæÄç c»^9ÆnÕFq’.ÏzÜD9Ôãö¸í.'þŒ†+2ñBàÊGð¿Ÿv}ãÊc®½óÈÍÉÝ8qç¯ú «¿÷Ê1»’ï]9£¯H=üT2ùô¬Š]ýû ûöɯþU’ ½~ ff™Ð݆KreY’ÇÓT•\’%Ê9º½JšÄ]RCf¢úͼòÿC\µAÓÓ ”´z&°Mõ§NÄ.§ýʡ׮pæ÷_pî.~î}nƒppW²î¹¤y•"0‰ø›¡ ºÍˆ³>Üê?Û èÂC!2â7ýí6LéXmF“ÿÑ|uÐŒ^ÍïÕþi× šÞôó¶ïä>;÷%iëGÛ=pW÷ÀÐCùH èú‚œ¨Åô¨+–“£|,õšsÂÈmu„á`§#$ÁV¾ ã  8ÛiƒE®£ˆÞ8œM¾d?”×Á²ªŽÚ.@·GêK>DIdp¿M–Ü‘<¶ý£ä£í{ñ¸OÅø®Ø á+ö/½ùÐ5á›0¹óú““ºçp÷ËWÀ—ô^Ñ>¿ã—åËZëÇo»ùÑÃÉŸZgÕ`Ðã @”|& ÑÈB§áw¸ªx.WQ·«ÇT¢ „˜dà$‰4êÁ4Œ·abZÅDêþy™æÃLó5µš±™˜B™8r§¡ÂEÿ ö“3ì× qÜé ™qÈ<ÎÜl^fæ5zãM-=ä4¥é¯e!&ð aPrÀ’ð‹Àò‰CäÌ¡CÝ¢p°ûI2íÌp²·»Úø*Ôz½³Ê¡ì½.b콕UéuŸòôº¨8½ŽDÓëœÜôÚëO¾KÌzUHØ&¼ ¯‚±vÚŽÚ_† 4G'‘`Aå6Ä éhoft¾ÏŽÎß³£sÚÐÓ–Çø{ï {ZÁœkjlY^ÛÝ”F¢¢Xi{õ5 5©¯¹YÌzÚÐç’ùâJrµ¸Ù¼Ù&*LÞÚMTÜ:°ß0ñ¹VE‰©ª3u¤¾caV  2¥ÑÒJ›ÖnJ1SSÈCÃ1ÎÑìà8†X°4 ‰ße‰ú§ ¦Œ²ïÏö¤KojI÷ˆZ ‚]ñ:æ~¦£Šý«¡#Næs zAZ6{䢢C¯ÝøÚ¼Ý»sí×s?žóu¼µèsŠ‹`õ (G㤑Ëå×$de`¡Z-öWG¨S¹Ü‡œ´Jý˜û”E ¦‹„­üáþ;YPy\ÍÀ…2µbWq!º£a¯–°ÓÚ½°-gÖ<]ç°uç^»›Ön\äƒ{F£ÉŠÏwˆ®¢*²*p<T§ Àˆ“V»¨ªH <&’IF²Êx±d a-ðv¡Mè¾xá2™Ö™Ê%+¼MâÀÉÛhh¦Ðÿ«2úñ¼2ÚIÍø uuƒ'N#‘j©øÔÖÒ  5äiÄÖ^‡”d½V®³Ý f{ÌvjU4 1Ì¢'÷j6:^' DÝb«’u‹^¥Ð’ªƒl ô}™ÝÄ>4fiSòaÜJ} žþò ŽÏ÷»¡èNˆtXMö„œïLð†3A‡y_Š®Düü§‘^·,oŠ#ê8PîÇa ’ížCä#,ußOnL¡îÓ'Aü‹É‡ÝÏŸ»|õ]’Os _\# %††  €äu‰ÈS†U"Ü­úOÿ‡¹$þ‡¹ôUSZç§E4ì‚æýÄô»à÷!$Z¡%:9‘Ža@2`CHÙb¶1íàNoÑ’f§»«Æ)Y1Y¬Õ$2ÙÕ3‚{f?\Ñ@v¦'?e{r®ý‚‰Z®¬ëìÔ뤡‘x”üÕŸŽ'·ïûžöëä¿’GqìŦ_$¿Lûú|7p·yÑ£t®m±“ŒÒG9§ëÓ¼IË„AoÚ׳Çd]’õ öf¼Ùòcøó{Íÿ¯.àz°¾Þj,qjiJÇœzœÀ´Í ® sÜiš ‡mPîñÙIñ]õWÞÕø÷ä›ÉÍøº—iÝoCòá Å>wÿ’—’ÝÝÏqxë 3nr™)çìߣàEùøœ¶›,ØÞ?8-ož¼$WX҉̖[PC–öƒ¥€Ð‚–-˜²{Gê/{íþ*XŸÜ›_Xe£Û9…UzfmͬaÿG{sbéýp¼žYÓýÆH(D-—/ M4Í. .WV[ÖXoV7[ï5?mí°~cùÚªƒ¶ Ù¬N›Íj³jŠ=@Â~·*ÚiÖˆàU·ÇïËõP(aIN ç3zz½V«EÎY³éUb–TÌÙÊgn—ÈŒM¡‚e­\A¾÷¿¥±øÅ£5 æægÀwÂKÃ;Tadh§™‰2–Ý‘NîzòÈz}PÆO1TÙ°&¬ú@›} … ÜÂ4†ÐÇïKØŸìð³Á„fžžŸ¿Àiìªô¸=Ž×—;Ek±¼ð²åð;×¾õn}ÑäÑ©S‡&_5µOxÔŸñŽ›ïsïãÉráàØß­y胜hÁ˜«“-¸ß†­LR÷Õ\eÍš X¶ÔŒÔ×üß„wQ9q…³¹Ùü n%ÏG «¹Dp7R3,ohÁð‰\£4#gjÑ-K„†èxd Ñl!–-f FŠôÁéB4[ˆe …ÔßNKEæX)à £ý­U‘¡ÑaeÓBS"“£Wš™[æ9çzט®5_k]§_]°"º‘ÛbºÅ¼Åz›~sÁMÑ»Ì÷Xïqåf,µ>á˜=ó+±bpÈP±ßÎWô‹¡¹ \æ>k·H ê6÷É-Œâ¨à(v¤g,rû(¹¹nŽa^ÜfO4¥Ã!tÕĦËʺÒ߀Ñ'Z`1›„p0'7 K"ÏG ò¡Nr}üe»;‡ºÜ¨ î0-«ã‡›ñ2¼ ‹àz¶Ž>ô–ôÖÐâË”*ÆÅÂ-2¹˜6ÍLÏ+öW@ŸpÌNÕ7ÝeÏ2¹½gbÄ>‰Ê‚¯_&ØÓT‚ù™],J~>|«ƒÏLç»â§h€éL7Rÿ³å<æ;jrIeE&úXÀcÓÉ-™¯ËéqóƤàªÄf¼hžù»uKŸ™8nÆ ä•ãοþÇ_>þïÂAë®§Ûv$àZ¯Ýxöá7’ÿ¸¨_uÛÔKV 6?♯y|îÒ׿,|g½åÖÛ×O[Y¹¸hоUW]±ò[ʩ堲9¨[ ³@raÀ{ðGé +ö†Ò39/Š!LÊèÔ:Æûp&óabð g°áǬÛò—,HœË‚B2m@Ó+ÊûïïíÁ°çîM_é,ƒ6ïeÓ¶®°8’9ü–d@0ïÚuæ´µ;@ûÓ“}l¨1kß ¿)ónÊn°¡ªøAòpþ2y•õI᫤!bë /µ‹Š3F²öé±Ïˆž ð}a™{Ñrã{œ›4»—¹[ÝœÛÌ‚}YsP eRwÒp¨f9EíC•ϸi8T{àPmrQóì<‚«\¯7eBik€i»8j•¶ŒÀb,laã›ÍIž}ï÷É3ËصîƒýÂÁs»?Kž{üvlþ–{nÏ«û®8Äòt‘zn8ÍdÂgrÔìF2Óî*YÀD(ûìˆþÙ[e%Œy›Œ e.AE\T-Óʵfíùe›Ö©ÔL!mœFxb’I&ÕDÁ8Rpɺ:6#g«Š’§, X„NBnõmHÏd®Œç™©ŠãdÜ*o“acÃLŒ¢ÄL‚ï Bh-$ŒH9x#Û„Nᤠ€G²y¯©ygÚ#i¡YŸôçÕÓ™Ç~_—7}œ™(¤ó„i¯Ã žÅdJüÏÅŽé 30îÒ)Ô)‚Ãú3±8˜QfGcW¦ý‰JLwÿîx]ß¼ü>xëë݇À*ý°uÙêÕ|ñ™átÌ}I«¨m?5bÅ(f+¶Ç¼ Ôß–°÷÷ŽD#l#í#¼ hª­Á>Õ«ß'ßgÍ ¤Q©c¿/´¡ÂPm”k’0I›îš#ÌÑ»V +µë\VÁE=W» ¢FëêÕ< =éàçr<ø‡¢ƒ¯'*f‹Õª9v»Ëíñz]©Ú½ò†èZ³ÛèÚ˜æ÷ „€âÄyYÎuy.—×®)J®ËE»M³ZCºÍ©ë6»¢É^—`µé WÐ$óêV«¢È26yív› É~ǯVðxB,]ð3€ÇïÑ©0Ÿ¯ߺ;m4ù}õÝàNvû}ÝÞ1ÃæýªÇ&Ⱥ“Ô Ó½Ù¸.õ½Ë W I›,úáð¨=œ-õ^±­@lå »êùMs@*KÎs@Æaµ@Í^ÍŒi¦XÞ áH3„Ã+G%8™tãG’×½q¼À?@Åžïþ86ìóÕo’W½”|»Pò8“o‚¬ÖÝ{÷ß ¸Ï»ýÉïÿqk;÷<84M[CsGœ}¸çÖäBâe<܈ó\]ãH²s„Hâó¼ÅHL‡µ…L¦ðûœüð‚t:uôNÕ2ôì™0sØÂ®ˆ­Òu+¾íã“ ¥ñwÿûã»)Ÿ&âvv§:Ãà qIÔ9Þ@…çy.*ÑÀ±¡²»=§<8÷þ—[àp5øiÕaÜž\ññÇø¶ä»ÅBzÔŸ“ ´ÿ†8Ê×ѧ2¸WºN¿r>:¹ðÆ©m~Yê>È_ŒŠP éc”*f¥Ägö—›KJæþ®šÀÀ’‘%M榒Eæ…%Íå[Ì‹p?èÚì*ÊÆŽ ÙS´ô¤ï™¢ý¾—ŠûŽýÑõY‘<Ôs©Æ·QP¶ÛÏ'QTSìŸLKyž ‰LFÃTweLÃïÓº«€§* óV4_°€z$´ï4(n¢·+`7*Èjá‚2ݰ47+½Ôá6˜Œìg…~ ŽÈT•':d{'<´mƒé=Qo~YÁ«âQ‘ä‰u"-Ì`IŠ¢—ù,yQdŽŸha>›)û 蕤S\ø¦ÖSOŠðXüË/©<ϦgoI›‹‰žÌ*fEÑÀ,jIçQó©†}«« ÓÉÂfO¹]ô±¢HŒ% a™Eô ®vÎE/¼ñýÅñ½ç{ß_s¾ÈKåùBB™µÌY.ÔY a´uœ0Oø$çŸü]Ó]^$(XR]A‹É[pÌ„u“aj6µšøôŒ¾‰ñ¨É› æžÎzµ'³³0éÇgL4ã˜ÍÆP(£ô4­Ä¶JħcQ̨«ä¢„tb°»·ã6|óy¸Å¦feZLÓLs({aÆ*˜™]ØNY3VÁ4ÄJ9ŒꦷÆ^6eË_°/wDÍÆåŠåtžjÀä>_É )øcÙ)é³–å¨% `hû:—¸tÉ/äÀ¼>Ÿ­Ùç©ö廯x¡ÅHþøÊË‹IÕä;W=÷««W='ìþçcïxkEò‡äã{^|ë‘·½N3&Ç¥¾áº¯üxZÆÆª²Ü`ÅV¦sËûx{Ð$yƒ¼ [\’L{/±ÞK,³WÒiï%ÆáGÞ{=í?nª ?jŠP4œââ™è˜èiv4{$r˜ŸÐŸðk²Ù§." ¹EÂÕÚ2s«ùImŸ²_ݧinm£öWÂYògZ—Zo°rV c¬)g³…ÍЬmh;úÑj5¡óm BÓ ,2çüô¯ÀÏ›Ó$.J ƒQçRF?£ÉÈ «à¨„ó¤:‰HSéAƒW©_ êpÆÎ§³Aé™ñå™GF€–=ÛµüT¼kyv–Ü–(Ó›NÀó–€nØCeÙ2©YψRŽ«ÝóÃóŸ$ÿµüÛ[vý)ïß Ó6?óĆE·ã›=/Å9X}“õ/ì,¾ò7ï~pˆé˜á@³ãé.<ÙxB%¼9j®25 ÕÎêàT2IàœœOæs•ÙÎæ`gÞ{ÂûŽÏ|_:¾tþàù›ïK&yî¼¼¸ŸŠë(?•]©/)0÷u$ÕæQd˜y¸sdpª:Å<ßü¥øµû >eѱ‹³˜t+H¤I²!IÎä­Ä(j³Fuý˜ ë6ÃÖlkµhRžH ¨ÍN%ÇÆ”U›H9ÈÆÖÆ:â6 q[vÃFÝK(ul+í¯JG¥ãRJâ)‰ÆJœ”ËXŽá´”›fEF6¦–$¦}$_nÕ¸Þ¹%-õ]ݽ…Ž=ÞX{‚¹+ôw^Îh>\}A.'4ë"=`îáÞ¿zÑ{75ßS¶·;ôÜÕ«~µóºÕ;6>²õìãbnËøÁÄrf8±¿óÖk¯òÎaJ³Q€¢¹ g. ÙDÓ‡‚.2™kš”ɦ¹Üba©2×$»¨d݆‚1–r‚tYhÿX8ã<íçûÙúúÛëýƒƒãí3|‚³ìKü³‚«ÅÕ®Óä´WGnl5{<ãÜÔóãÜAë6}»NtU $ÏPŽÍ¢Y'HŒ»Òq·¤Çc˜Aë2WМ}œÃœ¥6Óã•Â’ª636ûóèän4VEׯ`ªfópž»R/Œ‚’ª,¥B½(d”J Xшå1PJõÆÄ¦x}÷‰1zK<~º¥Ç¥¤“ø'˜p5Õv·ÔfòÀ3 {l~3+bép³S 3o‡cL‰r—,ýûo“?`çŸÞÇ|îuÏͳ·vBÆk¦Ü²öi<Åóx;ΰ×pQòóä¿õÐ à»7Yð$ ˆHØ*¼‹<Ølä:lõ•ùÊ}†o™ïAí!óÓfÙo.2·ù:}¼ŽG‘?¯*G6sš5¨b‰;<'"õQ'v¦ï‰òˆ#wa6e²·ß€*6uæUmCØgP1ñfädq‰"—ȧ‚ƒJ3‰3aKg&lùS;l“=€žü‹Ì {Üë{Dat«ÈŸŽ÷:£rª–e›w5Ñ E-{>-aK§ö8u›¨H¢ ’®ØÈ&Z8ŽÓEƒœ,§†s%}Ä`¢š‹>E³çÑGþ›VžP1aèÑ£Ü[[W ŸjXÞ|ÅÖsó@".IŽç¾‰ÈE%x©Ñl2 ÎRSÔ9Ú4Ì)*9¾œRSÌYI˜ú;/3 wN‘L LgÔº,}#¥…G..]¸­t{©Ô?Ü¿¸®t¸ixxXñ¤ð¤â…ÒìðìâæÒÖÒO ¿ ÿ=òC¡Íã]dw{QÐ!1M¢‡P9Ó#­¨'¢ƒ¬3*„`ЪËjªÛU­T£^ï1Ö=†§ÙÓêáKaÈÉäRkkžXó0Xó¸Ù>úp(ƒ5z”H·Ó°æ¡FÁe”é=+­8Šòó ^µµ·¦¬|žµÎ:“«ŸÒ֚ϳa+Ã6+Ã6«/^º2Lá->¦¼êÒ†pÝ'NÓg¹O°éº63MÓâ¡éƒÌ€,©!iœóTgJz?2ïSÅ•ë6{-xUÛ§'¯úÃm/_ûäÜO·ÿú»ûŸ\·vç®kWïlðVÌ™VÓv+®ýì>Œ·Þ×znÑOGW?Ë•ü¡óÕw~óúo¨Ç¶ !Žæ:ñ¬È ŒïòT±§û˜y嫹aÜA3Ϫz|UÙ¦Ùœœ€‘5(HN“ªE£²UJÁ v3ã6XBg[:) êXØXj'³í?=N¡dFÅII¢PcbO©™Ç«OïgÓðcXÎSÕ¿ªÍ}ÒM–¹·»ÛÜ)7ï&ÎhzŠS‡6œ¤Ïœ‡€s¾ ¯BHÉ&œ1zËkÙÂ0R8j¡,¿¼%΂QÙÜÞÞÎÿíèѳ.>vöúìÂcÉñx ë³½o ã…¨0ˆ¯6 ‚G‰ç /86›çÔx›`’hM¢´Y·¢{< •横n3áK½ì韭²r“.§s‹-²nɺÀŠE  4GÐ-TºpúùS‡¥Ï™llO.ÈïŸWÓ¿½rð½#ùoÿð‡_w¿eä]üŒ³Û×Ï¡ò ¼ÀýDsÆÈ,# ¦m+qŠ8Má¬æ§ENÉ&ý§§ÕlAÉX¾›‚œÌ]£»r°,±“{í…U uÒ`mXE˜U Fäyk”@ ±Ú ^Ã]­~ÂýU”žqDŒIQ9!PêÌcÍ|£Ø 5*ëø5ÂýÊëâùÄâ·Ò¿ÄË.»ª Çšm¦È°¡Èr4cÆñ|4w¦Ãò4üË 4èh2!•ïÀVCx]É—éV8ļ==E¾ S‘(øŠס± 94߯“}Fq”NOdœŒì ˜;˜k‚|šùÏáózÓš‘šÍÅ´œfs1ñó3‹`žz4*ÌgÓÎhþ™d—k9¶ÌÄ`Í£œ§làˆâ5Óð=ÒOȪRš“P䜜Zš7¶'‡¦½·'ÄV»Ã™ç`YþI b¹g˜êÜf©{Ütõù%ÁŠmilµÛ”Í_¡ùôVöÏx,;Ýp7§³–-èDí/=ùûÝôḩ1ý “Hé´´JŒ#X ÅÏ|›\„_ý<¹ãá๗q[rU÷’wmr:åË›`QÃäõ¯ûP,Ñ´f@:á´ª:½.ï—^ç§R(¨«'<*ø±°8)pyÂ2¡UH < ¹J¸4ÀÓ+1 weó(Âàf’ÞhÿÓy´Ïé…öiZ§í19cŒe§‹R©ìR»ÐþBì¢àÅ" ,I³-ú¡#sS;KWMëP16S¿AÓŠNe³ÄNeß ò‘Qo2WEùü åÏž/CÂûÂéñÈ¡ˆâ „Ž‹äE5)$,Fü>]=ÅÛ¢Û£$ 8f‰n³aÏ<6/óÖX˜ŽylNÚI{[í¨0¿Á˜èlÙü[6ÏÌÖ› ÍÝÀv¹@Ïåìrš«g£— 0-`Žw€ÊSÎ^8üèõ܈TF¢øÂ4@ò•?ŽÉ_ÎÈC\äÎhàsYù”ádª8M KZ$ ¢xõÞŸ#p:>Ó}¢WȦW¨6ºYÀ¿ey:C´.-Ä6Oï y‹ætÄœš-€ífWVQg\úÔ9›0õ°çr™ºfvtoŽ£âÉE«îÍ»þ­GžÙ™qñ²_¶7̽~ »{ÌÌ+¾°¿»<|åÌw?Ñ}/Ù³zõ¸îìþ8ks}üâÆë ‡À‰²SïÐÿÊ}í8Évˆ<…ÜZ`˜5:¾O?æý›òò!Ùiqºí`saÑmVÍÍRàev–—Ù\&fm™˜µeê±¶LLLùì:ÂÌÚ21k ¶ÿ&¨IÍDãN MÌ 3aø3ñR¡óSËË{ÒK–y·{Û¼^ÞË‘J—›Éæév›-“Xú¿\êÏ .[/ƒ‹ÏHb§aÿ¹7ÆÃÞEÓóa“,®Û»6αYúèà+Ì-ÚUV%•õ˜M´°UµgˆL`h¡(̨œ‰âö"ñ¦Ç®þ¬yÇ8]m/Y|銧øØ½/ [V_±®{ÙxÕ’Áw½ÓÍžpšú†/*š‘/Þïbo´pÐÙæP‘\AK>¶Ã.©>m„x©¶£åÒOލk^Þý…ºÔ7Ün…rÎc\Çç;ó*—)C ¦äÏÍ_«Ü®l(xÒñlé!άxü^Où¨Ò¤ÝUtoéÝåO¨Ok>Q´7öÛ˜»(kóäg ‘l¡ [(Jû!™ch!’-d 94Ïמ›˜&F5•÷‡b.ÞÔ7ÇOƒDù¾RÇöÕùÆúfú^ðõ‰V_žo©ï¸ÏóÝá#¾W€6.à U5œôp&³ëø¸XÇì)¢½NwU:Új±UaÜwFΕ9$'è’øôd's¿Êº¹_J`>Ø×”çÇþŸáðVUÐÓËXdЛ^R¹ò±·ÉùBôL_ˆžåc.ŠEV}dú© NÝL+Á%ô.ôŒ’leIúY$‘¾coU)ñ³[… Kªš+:+H]Ek© âäM[VŒåBéQ¡Ú€{÷ mD¨ÀÊDÝÊšg ±0ÕÇ!ö&ö¼C& •<ë@ùúeÂÀM-õ¡§¯Óaµ|Lf’5oéõ,w<=积ka“¬Ôj¦‰ktÕóœ¢'­§Â>¹ÁY³évÝ¡sb¾9@J‘ÀBXä:a3l‰P~ĬÉÅàD*ªç(OÏ¡=~þ=‘™÷C®_¿õ(ihêyëSa¬°/©®ê_ó qžÌË")4Õí±ÞrÝÚÕÕÑ_¼~ÿØÁJî•i¶6mŵ‹Üî²À†Wï²ðõuG?Æ/Ÿ;ô¢ˆ7Z1rý˜kŠòâ—^7ß;aÆ„šH0Ç¡T^;cÚ£SŸ£rZú‘”÷#}†Q¥æÅ¨‡Ýi †B«#¬™UÌ!·®Ä­*( ÎdÕóQ>6Û£NIò0eX³´Lj•¶I<½]j“:¥c’ÈÊ3™å§I4eŽM ¦-ÿL!“k~†qÕþTËÐ BÆHÛ/ÒA²yqÿÝó~æ±—Qv×ê'NÕ²YšîZ ò¶ÊJýÍt:mÔ“ž¤¡1h[ {{Ë1#ºtíW–nذwß>G¼(wÇ£úÅs#³·béÊäm[»Q_êgž$`ÙôÿS౟În€HB7Mg>iTÚUq.n ;Ü&s ªtG½j¸ú™Uìaö°ÇÎÀ=i ßžKØãÌ„‚3qGsm<Ô6ÓñHyp§{Æø™çI`ÿI?Yæßîoó§ü¼_‹*=Šƒ¾_1¤S¾Px%«8”Å‘‰{ª,ÚI¯Ïô…¬`……•1¾ œO^üOs4›ƒ®Í¾ä„ÈÏë³ÕLó³è#ì`òòZ™e[:ØTR²T0œ›™?+Œ±€“çü‹\ÝÚ÷/|¬nj7Ù®?þöAíµ_ºdlõ rW÷ÞÛú?ñŽÍ$qö ŽŸÆ‹:*þ.33íd¤Ê"{’ÿ Øs*eñÞ9€,ðÅj£|[B¥øn¶%phªdº €t{a3k•:ÍJn¸ Á‚Y8J~´ ¹a[Ÿ×õ­B!XXµbT¤ÄÔªV/E#Ô)x i””yxY(/TV£kð5d¼Z¹FÝ„7‘Ü-Òfy‹ò0ºO¹S}=¦¾‚^”v«o¢ßªŸ ÷ÕïÑ_Õ³è”Z ÝQ½È­¡˜Z£ŽE†ª†Ý]%«TeßãH3EjPP–²²TLÄ0”Ž­c†VKA3ÑÔ“Ïâ06ð;?Ge=)’5ª$ËQEu*ŠŠ8B¢éÜ9AU‘šN„%UáÊ4¬åˆa(­ Q:p`Ÿ!´ D€’¡„ˆóMßý‘rS—ß×ÝÔÝä÷vhʼ¨©'‚eK\ø0Í$Îd¾œÿ¤3Ybš£ãç“WþúD4Ïÿþ@ò*>Ö½aþÒI«ÈfµÅHDHx¸ÃÎçdŸ´ÓɆ>ét#1c;Ç^Mȳ<_Z²…´ôŽÎvK:ü ª•–lÛVmFXCX´Âh˜5–±¦Ù0áUÞ¦fâ i ³ÑW‡Ñ?8¢¿ÇÌd3²ÞцH —ðÅ*¹Ì6Ýv»³…Ò/Êc¯ÁcJÿ‹ì´ïICÉ WéÁœt„Ôx1¯ Š5Å!Ÿ]à/š“E¶ëÈÁ9¥ 0倯•J严 UKåA–¡ÜÑêåQ¦!Ö¶ËìÓ­ì‹¥9ò|ûñZi¥|@î^í)¼“ì4=©íCûŃ–ßñˆ+ßðßX¿¶ŸÏ(A{ÒBcK]L'„1•Ζö ÛT‹•·#›,ÉQɵP‡Á"qf¬EÍ©ŒŠRfà¾æ˜±Ó!ª&[LÛ&ñÔ¶+mkm[lªMå)9Ò„ùyâhYüTY:]]?A¿iíÃɱ„RIø?í |ÕÝ÷ÿsfîÜ%¹¹77 Ùï YD†%"¹‰a ¶ J ,*µÁµ>p-*"àÚRDe«+èCHø¾ ´(X—ömÝJUÚZ‹µh‹˜ÌóûŸ™ — ˆ¶}?ïûy?7ðÿœ3çÌ9sÖÿYf®Ûãq%$&züÉÉhßëÛ€Î2*<ËãK íMvºBÎä@ ÔáLu8œIÈç"oRª×›äÂ(·ÔãJ…wÞejÕŠ3 ¹|ɉI^½ÚqþfW€ßDò¤~é÷*üz~Ä«zÛ•'ÞÐ2ß³È#<íbBØ=&Y™Ÿ¼(™7wO'øÊt9#©¢r=ùœòeÊ—³¤J”yñ±Ë/Ï€^ƒÿ\É.Ï8óS«Ö%Ëã·Ø`êLòW1|ÎÔ·&oõ†Câyã0tÚÔd¼±•úúB”Ñ®ïÉ5Ö·ôoïྱÅÉŸ™ƒE~C}K?¹%ÆeÞâ ™¶ë}I~¥åmPqo´Vo´:ûò[iØi†Ôuó.ݤ¿dãp›'¤…hµ{ÕzAæà¶@%x;…'•íab©ù~|—’Ùž¤t“Û\ÕªRß¹kçÓÕZ¿§w¬pÁ¶Í[w=Ýó-40”|@\Û±ê•WŬo‹[Ÿûúu´4>ôCEKãWÞµú¡4Ÿ’ k­ ݋铹¯¬TJùí›ìí¾€âëži¾¤=6³rŠo¥¶ÒõPÒjßÇ}óŸÛN¯ÌRSÜiÞ,ÿepÂmÊ= ®²À$­ÑÙ˜09é§Ê*Ϫ„í¢=ñå„I¿ô¿­rÿÊûŽÿˆ'`W®„D $û2¼P,ø¨pŸùt^òx„._”ä"fÈÜ\=K×U§ËíVtÝíÐT¨|>ôç^Åçóú To‚šè÷è>áóø÷Ñ>·ð‘;•È­ ï>¯â-JTSUÛ­ªBÇH 1‘¤U*Ýó+Ýᜮäåôœ\@‡Ó¯w=¼¡öP|Êíýþg}rÎ+j{«ó~eÙ{oîüX”(ÇGô­íw¢3±ã5etcçåx®üÎqê_PF²”¿[e$דêSÔœL_@OÐSÂ_(!œ²ÊJfYiÖ{Y¯feúYÈAºì6²Û|9Š⚜ʒԉ¾Í5ì #CB%}ûûùàLtÒ½ ={x&ôHz(9¡$P’22½1ИҘ6'0'eNÚÍú Þ›“oI½%íïÝÉËËS–¦®ò<•ð¼WòÎÔ?{þ˜ú…·Ã<ÕÈɳKTzJBN¶æ«óÝîS}™]Ñ7']›ô+|¾D?ÚJh™©))EO* ¾D4†E  ƒ=)¼99AçPŽ?G”åìÎ9í¢ú9Ò"œÚ.ƇªဘØv¥v›OéNò=|I¦V8”Ø7qL¢:6ÑH‰pÑVæCÚˆê­Ù¡[Ñ0"ñ:ø{k(Düw†ÿØG™üíO³2üŸÊ3Êàƒ]¢\Ñ‹g\¤–ÈòƒV/ ­MZ›]”hü‰Œ?)ÑmMªñþ¶ŠJO÷ŠÊ$Ô²çÒ*“­WðY_æÏ* ø¤ô0÷STÈMõ– ßh.è¾(uÈyU#»%;:¯yñ½ÒîÁÒ·v^]SØ÷Ö‰ý;¿÷´¿¤0{ž/W+éxèúÛn½AÌ;ñòæÚÆÖrJÐöD¹JR6‡½v±ß%Jy ¯¢¾vãDš'×T_ ÆIOQâ.óW*•žQÊp1Ü5Ê=Æ?U/Æ»¦¸Çú¯Vfˆ®¹î* \?t/Sîp-uWމìLW±ÒÓUê®t=îzKqrmÙîOë/мºù“ºH‹Ánpyÿ‚|n6xUÆZ%íà“JåÞ%OTù`ˆxDnã³TMÒÏ­4\w¹TÇ›ÏõTŠ]<)c¦ž‹Ó¦·s*rRš"º®Q¹\æ½ Í‡ÁÚž]év¥g_ÀÊYk·J9ìò¤WŠT•~²aé7@Ñ øå/Å9°_~Z‰x¬yr絩ã…ù7ÏU>y@uéÜØqÅÝs>O½µ‰êõ”NxIKÏNHM×¼¡ÒÄ!(úJˆä>™(xþû`"L ©íbr81Ñ›˜•¡Pf·ëÌOVVù¿¬¢²‹ù{•Ç>õó°îSó“¥r‰IŽÃ*ÒÌUt×'Í}¦êö[† +¨Y60¼øÒªk>¾Nô~õ¦‚þ¹K†¬˜3·‚ãwµú±rc?%Ђpñ9?tŠ-νNñ¹KyеÞ%š]?r‰ ®™6¸W‚J®MNþé‰ßGÊfšâ Ò|ÇD£á­t¼D³ÀœoÐ>¤§ôJºæÇào·FTÁnàg¥¾‘VÁþ\Ÿ»5“a^ó©ð××:w;WP&K Ã¾'î³ÌzÞê 4Pk6~giÄ=Gƒ;ÆXÈá nR kÁå%ºKyÉØ€ë´á/a{PgÉ‘¸Ï¸^ …0/Æyâ¡Cú@>(Ïj=Y†çŸd>7x‰fó3w=âoÅétÌ8ÖGƒ0ÿ(•ÆHwTÜbYÃ(µE çl0N¼J×h‘‚ôzÈq„T%Óé}pÖD—À¬ ž Ž­´šÍàbI³Ñ¡=BëÔc4×nÑWâ9šÞçƒ/©Lü7õÖ‹hÊWîXƒ{þI–‡&ðû@öÓŽÈ2t'XްŽÚéÄiómÈ×KÖ×\#࿌@¾DÀÕ„_ÆiÎù®L쬄Ûàf*ûn<;—IöÃþq¯"«n8)iܬ@º†Ô@ÇÁF–3 \Û‡ûdä‚>àØæÁ ” lB¸ª,¯(3\6eù@Ùp¼„4DÜd™5ŸaÌO³Î¬·îÅáäëÏÐ<‹|¾'×.³ˆËûÞ\§¸ÌØR–ïy²Üÿ…Ÿ“ËT—DÝÓ>¡YQ¶lÉõqæú°RL » W£/æ2Ëñ³%§ —5™&¨–¬ŠzÖ¾²Ž@ªDVY_lK;-ºälz ÷œ®_…6eÔÐHõ~ºJûŒêÔžÔÇÑvx¸mŸÐ¥®=Ôy9æ‡bä*ÆyH™ë؃çÜ„ôÂLå4‘yA”Áý÷‰êOÎÜ­ Åê;ÐØo?J–ýE5MBÜ÷K;ô©,ÙÎ1‰6èŸP¹6míjâ¼âçàøpÞ»®'¯+ íÄ!:_{nÒÈwëd„éIY.Øï<¨TH ç r¢Ì^7|¿õÒO˜Vz<&ÓBú‡.Âe˜Ó÷ÔÓèR©O|BkhêÐzg„ÖC«'Ô‹§pÇáoÇþ²dýº õë.´Mw¡Í!Yþ§'ÔMxž›Ð®5‚4ÚDŽÒpž|ö:Ílc—pýQ7R1—ý'h‡YŸø Ý­•Ò0}­€Ý ÚI„» v·£þöEÝ] ÿA«Ý&„½öì·šuÖ¸¾8Ô¢G¤@2¬§ |õcZ¯Ž¦»PŽk\?A:ÜA½Q¤YiÌç›HóB‹å&ÒÎoJ%_õÓ°½èG¿F D÷¡;´ÛhŽ6‘ÊÕóQw“©·ö+ÔÕãô°ê£iÚzXk§ålÖR¨DmÁóo…nÉö¯ÓX¶¿†yMѪàÿ.ºV›FÍꔽƒäÑf!¯áÏqÊI!üŽûZ(Òu"êÖ8?n<Ãîd[IŒ6’zKQȸÚÄÄYÔã©F#O_>?%¾ˆkW<í8ž!~ò9ù¾ðÇn´‡ùW¯ŒwA‘);lj´ ¬oÓ…êÅt³ò”±é:<†‘Ñfm€r+è£  íà6œŸù¿ÁfÓ Ým½îÀ½_€lÓåBÆ6µ4%ìÖ€UàûZ4Ιì£qd;O1?‡¾(ÇŒL¬{¤ó@„7P»ÀØÉ ,ŽfôE”ê¼Rհσ¿³#õé9*TÉøû¹âôMà¯oT:†£ŸÑÎÈôoÁ»Q2ÄÒêþé¸ý³ Ëeúþ…ÒÌ2DIʛƻ•7ÉÑz9so˜Sìô´ó öJû˜üCY!NóXûXsl¾žË,ÚhZ4v9è*ÐPF«†{kví§¡Œþ \ûÅéfíÉs0…z©«9N(ƒ=N7ëc¨# ×,öƒ:ºÌ¯£ìVú÷Ò†ë.#¶b¼º® aLTºätUW›×íü±ó%6¿°ö‚,†¬„l€mËè:cíì¶äLnbêFß³Ýóÿ'Pw€—À¾ÿÓa)„² ü@zH5ôÈCÐO.ã7:Ж|]ž@;4ò-Ø¡÷îì ¼8O†Ý÷ %:ñÎûC&†Ð²i¥WfÂn›å×eݯÁôâe¢¯ŽÍ¦ÿÁ\œÿ ??ñä «àþÏðw;䋿õŽi0ßž‡ù˜¯“q~däy à%ÃúÈiãл<óøãÛJè,3Ï ÏyAÞ;†øÖÒÎÏsÈØ±†ÿç’Qs1ÒLŒ™>€Þ×=öù¦1Ž-‘ŸÑhŒ蔉¬G³.Ëú³Ô-)ÇoRE¸D©¶dÝ™õWÖY…\/ç 2>xœ/ãeõÑm«rŒÖ?ȶä<¸9.z¯¡íñ¡|±Ñc ÌI`¢‰ñ:ú.úºÝhw¿€|æ\È/ì>Ín[OkcÏѧý»Íßµü'úÔr‹i1œÍÞfÅ(&¶/þ®œ«ïþ§ûò³ôÑÑýô¿j¶ûy÷P*gœac'«—ž¦œÃ|.=÷»šcõŽïlŽÑKls,§]-{¶>“EY]ÄÔ»ï -´çNêþvbëqW}³ÌH£aÑ (±úÐ h/ ÿ¹}”ñ캾¦r׳Tósýfç§M| r­²‚ç·ùWÃ:³_{UºlÑt®ò[nY?—ú!ÒL¶ƒ÷qü© °\cç5!öoz]çjSŒ/´×@ŒxN9€®ÏÂìƒÙ‡¶8UOF»¦'y>ÒéAû>îäŸÑ¡ß"ÝŒ–sË h$ÚùkµC<÷eü\Îéuó‘×Q£ Úót0§ñÜ3Äó%F»5?7]ÿýà$ô‡nî;îD¹&4OãyÜÏéÇjÕYsÈ©ö\2ÏOq¥÷žÇˆžGþÎצR¨ÖÌuª <ÿ¢‘k5KxÞ]½„ž·Ö·Z<iû%Zãj¢á®Er½i¥ú-†Ý#Î{è½T®¯L°ûUîÏ0÷Çs™Y]sšÖ3Çê2~Sé"ž‰×öç޾ôs9eÎcžC·A7h2×+Œ/Ï<ßiüÒš÷œmõñ7tõù±óôSiœºã>{Nö È7é íN`¥ql\ì°.gÓ…lÝç“ä\Ÿ¹ÞÃsP)QëpÃe:,ókç™Ã‹:ìãü7vhæú\­vÜ ÊÔŽsîQ®ÏñÜ0ÏA2b ê赨+(ƒÚƒr ïv ¸5žþ®6×ÍôPxÍB8yíȆî8‰ñ‘6î–Èy5cƒH5v@þ@¼"×}ÖZ`¦¶ù<]ÎKÛk‚Z‰œ·.ÑÆä?¸æBùì–”i†?ÆuüŒ<7ׇ×\êkŽÔrëÜNÃa”×îh£Bu>ô—=hërw£‘¯>Z¬~@yÚ š¡&S£ 7^S>„¦Îˆ?Ãþ·÷ÃÌk¿oÑöºš9?M'$ +k-—™ÉˆJ¾µNØhçšç°«¤mûé‰(àÎøœ?FصÔ$ÚÆ:Äá¨~Ô¿àç*‹+œÚ$Ô±S¹0øeY ìYÅbÙgÅ{–µ±À¾ö ñ8›»³ÅãlöűÀ¾øß³Ý· Ø|Cüêc}ýwˆÇÙÒ¹0Ø~C<.‰ö—ÄÆíƱû06}ò7Vÿ1äE(}?Ç9ÆÆ,ËüËÝOÆ¿ÆCce£ÖmžÁcà%ÿ 0®6Ƥs?d޹ÃÇxôͰØoç.3l‰fg›é¿ãY+¾QæÎtð3<6·½;! ÀjËý]V¸-fÜ;<鞯ó3J-'1Tp)®!NÒùœ‰±ò?Ï‹¾^¶Îó¬ôàgÞÎ÷:Ù.ÐWr­ˆ×vÐW;7É>û‡t‘ls_?¥¯2׬?¤§d{g í«¢rÝ =äQªe½ÛpÇLé~™£ }A?™(×óæi‡É¡ý‚2Ghšv-Õ©Û @{‹0äº îÍí6ëêRºȵJ¹&Äk'7ÑÏV©¿øá&Uû#âûíÆ˜í.^?ãöÜÙæûð,ëé&Çé×5´[ÿ q=D³Ð_õiTéø´Ç¶ú5äv$B/°d‚B3Ü9°ç½  :÷èuoÐX¤Y…vW:8)öO˜ó+H¹Näý×¥à"gÄz˜†±uª½oÀq9Ò¤IÆç¹æô4i£“ã(úîQTâtC÷*£»Ü´NÿÏ¡#¬Ò¨ý¼N¿‘Šß£óK¨Ø»ë!Ǔǖ¼gÏ@w[¯Í–úb@®kYó]Ò¾¯·Eh9Õkl=ªK§°æºæìçäþ³ëù-¥o˜s { Ÿ¦Q)¯ãÉ9‘XiÅI®ãíAY²ôYçníT!Ÿ YúÔà¸é’B νpŽ  ÖÏœN©×]Ã}´ã8tÑ*FٿЪï7®K#¬:¾öogÌúÈõ‹íeÝ„]ÇjË~.¸Ì1¯ó5c‘yÞqÔ¼¿¼v«é¾õÐà585Wó;9 Eë©Ö^ª;O“'×î¹ü ?§ü–sh\‡yOÕÖøc僳m3ô¼ß¡Ž>¿! Ûzt¬ÔÌý) M)uC–[òg\ÖX׋•±ûWζŸåôX³žÙòÔ}/¶¼Â’Å]ûrÎ!£÷Éœ”†a™“¾íÜ5ç–eË3ì?0çäNJý´ñS´”yBª¥Ç²þ>Z®óóÞœo k×PNe"Ãû Î„Žž„q^}*–žVô{Ḃ±cçÛLŒ‡->±ØÀ¨ ÆÒ@»?ão’3ﯫÓE¸ÀÕÛĹßDêÿßÒ€œ¨Á®€”:÷…ß´ ÆyÔb™a0vºÛéh§ žíxîÙ]q¶Ã·îû¯æã¿š/ÿ®çþ¦¸Gcíѳ%ïÝÓÏoääo&r/ÍFJ±Ð‘®»À&pÀâAu%‹÷*©3QžfÊýŠ]~N++06e,³µÿFסÙ93ÌzÀ{L¨ñLéãœi–?g3ä¾S÷:‚çðZ{lgYm_¡{,­·öɹmA¿Ëõ¼¯öÍ:Uç3Ìñ´±ý¤î“ h¸xÅø™ã´ Ÿ/;Aëv‹ýëLÝÏØlíƒÔå~àôt4Ûæ1ìá5ƒÇ-}›õؘtþÑ´?/»íUÿç8A™riXޝÇjs0¦ŸC™ê'¸}×›Ô+©†û u t+Þss“µ_–çÞ‡4ñ"]ƪOEÕoÞ_Ãûj€Ü“Ãù´}»ß'ýÛãû9¿4íø;”{pMîéÁ=x¯ëE*FŽ1(ãàvœñ+uäH‹€k߉4GÜN½ÕY¿}' ö×ù8Ï€ôFð¸Î—ö'PN¾‚{ j0ÿÒ±½vÇ-–›ðu9ÞÞFMЉ›p?ÓÝ!éÇD§&åEV“Z‹ûÁÀHI…F¡¦Yç:®ß»Íñ;Ï+°{yÍvã>éÆñ) ÷Ì¢áz XjìtÔ;•©J›BÉÈS/€¼~Í?°õ:@jk`> b÷Øëä–tµþ´’ù¿½¾%b֡ζ^t®½çÚ«qšù;®©ÄîÝ8×^ŽsšcÖ\ε^†²Ê:òpô+»õÆ!˜·ƒûѾ>Æhdr~ÔÔ×–ª ¨Û 0E…Öœ(Ï“æ¡ýÊÓ–Ë9ý;ÍûQ Ú¦ZsnÞøÚzÏAΧòÜë¥j†|"Ëz¯ï?Úš¿•ïMtÍÓö§ ÜÖr›*û ÞÛqÚ›&n[Ä~ê'¾6Û å„¸-’󒵈c­”ò\ô²Ú”Zr‹~x–MTŸ±_¶IIf›¥î×Îíú_³½ÊU³ÌöK4Û ñ>ÜØæµ9ž~Þìs:Ÿ–}ÓWf;)ÛBž‡Ä¹|Å?ù¸ò{0çÒ—,ÝrSŒÜeËsé…–ŸM–ŸÓÝ[k7èKRdŸüõ义®qQ?¹7úr¼2×Y9©çÛóí2ŸGæÚ¾;.àõÎ[{LoΛuŒ’ÓLd?ÍéøGèeô»É0ÐÆÉõžfã˜OŸd¢œ.ëûÙc9{¬A4D[C©ßƒ.Ô—÷$Éþþù¨ñíc —3ëå¦^gG´|3*¿x-؈Ö< ­÷›Ð ZÑÒ¿KäÙF”˜Å‹H’‰­&þþñIîe’R‘.é­t{‚(ã/D™p—5›(ûD9ˆîDy ˆbþÝD…[‰Š¯ ê’Z²Ê¤ÉÐ#ƒ>S‰Ê"PÍŽþT1ALú›TèDƒ>"ŒûT]D4Ôƒ*Œû×\BTû9Ñ…Ð,ëè3F"ÌÑx®‹–— ›d2.•h<Ò|Âa¢Iè?θït<ßUЈ›æžÊLOœ;ÿ'Nœ8qâĉ'Nœ8qâĉ'Nœ8qâĉ'Nœ8qâĉ'Nœ8qâĉ'Nœ8qâĉçÿQþÅ&úœªèQr’ ?•ñWMµÙZ.9Hì ñjI[qFðçÕžtµgkinp‡ÚCÍm ·«m´r_Mo5„»•ÉcÇù`3Ø 4š¦æÁÞã"›ÁnðЉpä«!0¬‡ùŠš«æ´†‚þšj&üf"Ž>µP)ˆc¦{ÁZ Kwl3,»ÁgòJXíÖú@?Ľ[ë2)Úæ^].W𯩗KcÛ¤FS^<Δu£LgƒMgç÷7­ûÔš²Çy¦ •GXz¼å{jÒÕtmKJ._[3Z|@›Án Šðï÷â÷´Hæ4DZ¬»Áëà(ÐÅaüûþ½/Þ'ŸxÊ@5˜Ö‚Ýà(pŠ÷pô‹w¹´È#ŸW!ÞÅÑ/ÞÁc½ƒ£O¼³·ÅÛˆÚµVT–ï'¥eÖI°È:é–mÒËÛů[÷D‰*FN£DíR»ÓPê§vo-:?Ø®f´VÍ ¶‹ÛB¥Áu5}ÅAj19ˆRŒÓÁ÷޳7qö&EÀ}`h(e8úAH¿oR_cK¼ÑŠ`ÚÅë­ÅµÁštñšx‰º!Å_/KùK±OÊWÄ/¤Ü™y@ìkÍ RM®üø!ýe¸î/´‚FM²Ø´ âXªÁ0 Ü t±[tom p“]tÀEpÙJKùmpQxn0\|! `ˆŃ/ÀkCk‹E¸xåC0ò¡øžpƇâÛ—ãŒŷ܆3>_}ÎøPÜ4g|(ž2 g|(3g8´‹5Û {+ÆÌSB5>q#RéF¤ÒH¥I7ò?:®qÜníÕ )¶:\Ú³W0²S‰<¯D.U"”ÈL%²P‰Ü¦Dª”ÈJ¤T‰ä(‘<%V"»”AHŠˆÞzб2œ¡D(‘g•H³)V"EJ¤P‰„”Šp»ÈoÕOŠaR´Õp¥ƒ¼`(ZŸÈGŠæ£Ìç£MØãëÀ¦0…º›Ž3óXvoëUmšû .Ÿ_3Rì…ǽȆ½ô; !ƒö¢íÅMöâ>«Á4°Ðáº;"~¯<úp,Õ`XŽ]Fç(4ߊâf±2+ÒcØ$öâ_wüËùá\Ž¿Ô?R½7Gñå)còŒá¥ÙN’®?N¬Ò®[€Òÿ²·[ endstream endobj 36 0 obj <> endobj 37 0 obj <>/W [3[277]7[556]15[277 333 277 277 556 556 556 556 556]26[556 556]29[277]31[583 583 583]35[1015 666 666 722 722 666 610 777 722 277 500 666 556 833 722 777 666]53[722 666 610 722 666 943]62[277]64[277]66[556]68[556 556 500 556 556 277 556 556 222 222 500 222 833 556 556 556]85[333 500 277 556 500 722 500 500 500]97[583]182[222]]/Type/Font/Subtype/CIDFontType2/FontDescriptor 36 0 R/DW 1000/CIDToGIDMap/Identity>> endobj 38 0 obj <>stream xœ]ÕOÚ@ ð{>EŽ­z ™±g@BïÒªÒúGÝmÕkH&©„(°‡ýö ~[¯T$~†€ýŒ&›Ÿ¦Ó­Þ|_.ýc¹Õãi–r½«¿Ê†€Ç endstream endobj 3 0 obj <> endobj 39 0 obj <>stream xœœ| `EÚvUuÏ}õôœ=÷Ù“¹29fB€„4„CA$(W€@AA²ž¨+x"…u½aWqU<á ‚.®ë¢+žë~à~ˆ®kýÕ…ÌüoõLpÝÿûÿI¦ºªú˜êª÷}êyßz»FiÐJÄ nÞUWž6ηBÍéÖ,Xvñ’-|MBê2„^¼xù‚á·m !d„C®~ø’ùs.zïúë9„n…ãQÍ%P¡»ŽÔAù”#—,¹âšýáÝ ¡üœX¼tÞl[µ¡;œik—̹f™ò¿U^„îm‚ãË~1Ù ×ú¡¼ öoa?Gˆ½¹aëcæ"B…Ã¥ïßò7À>ØŸï)Èpö¤Ò·ø™÷Êé$<¾¸E¡Ch úú5ÔUã?¡§„LP1ái¨­GW£÷Ðä·PD¡oP F—òÈŒV <þ%z Dà¬Zô.šÖ‘:&Éþa”ÀÌf|JÃU&¡û„+& Z(w/ô úÌluªPQøïcß(ÌE¿Åuä}ö9ôGÔC,Êß\¸£ðPáaèÞŒ·ç÷…ÊÂ8k2jEW¢ë¡+Ñ£è-ÜLêÉÞÂjhÓ4hà ´ÀI±­ˆGÀÑ· ûÑ.ô:ˆ>DŸaŒM¸ ¯ÄïâC Ô³?¿¿pnana)…ÎGMh%ìõâ(N¦3Ó™g™zþ;¤àƒkOBW¡kÐuh-Z‡6£ÐGè/˜!Z2‰LfžEnT¦£¹Ð›ë¡MO¡7Ða¬ÆY<Kø6ü ¹ŠezöƒL±È=xŽÜû¿BAŸnBÏ£ýèmô\ó[èS 8‰'ã™ø—øV|¾oÂÏàçð?ˆ‚|È0ÌìØäß/h ž‚ßu#  8ŒL-:Æó-ô%Ü_§pþ3I’ƒY}O>_]SXQxµð £[FÂ=GS¡ÕËÑÍhúœûú:†þ ½Ä`-æ¡/8Œ/Àâ+¡Ïâop±ÃøÕ’Åd+9Ä$™·Ø©ìs=Ûò¶üÖü7ùBas¡£ðûÂåñ­ßi„hAËÐåòˆm‡ßyEGßÃo(±Úz÷{?\ÿ0> â¤&7gH©gÖ1o°{þüü’üýùÎB¶0d‹A $ ,ü išŒšáÚ7Ao>†ž†‘ééy}؇+ð¹x ž†[ñ%x)^†ÛðuøzèÕ§ð6¼¿ÿ‚¿&,QôS’Ì#7‘õdÙOÞ'GÄ\ÈLcÚ˜ë˜õÌ6æmæ –cSl;žme—³×*‚QÚÕ<í8½¤gnσ=¿Ï—çGæ/Íß‘9ÿ~þo]aoá3¤DÐÆft1´ñ—pÿ·¡»Ð§¡Ÿ¢ÏÑ?`Ì¿ƒ¾`°» Å~yÜ¡Ýã¡åSq3^—àEÐÿ+ñf¼¿€÷á—ñøþ3þC0´¾þ†‚L& à$›Iùþ¾'?2"“bª˜jfÓ w³Š¹îç×Ì'Ìg,aml%{!»‚}MÁ(.RܧxH±_ñºâK%§œQˆ~óGò2;ŒYŒ6¢&Â0_’?“:üKr ÿŽxñËðk^¦‰i"d("xHùdU=¤ *ƒÄŠ8U+½y€¤™©¬ÈèÑ oˆL'·‘Vô~"瀤]żE6’ÙÌCìÝì0üZ¿‰ˆŸDÃÑp< Æî]Ô#”fžgÿD¯¨P3§Kˆ¡°Šý\A˜?Öc¼‰§ãnÜDìÐ[CÉ]( ewÃö\ÐÀ@òwᩨ–=¬!cÉ_ n1Z_†{܃“=ø·0.µ ¿ÀMøa¦Ý€Û 7£Eä"ËHäy2ú|¶æž‚±‰ˆe d:DšaÔ߯<)Ç7€œ.Awàv”Â=xú#ùªÁó™—N =eŸîÆ[˜sÐ|Š}ƒ}ƒ°p¥—¡7+=$Ç#&ƒf¤¦)H ä¿ðó˜Ž@«|XàÌ#%8rÁYGJÅ#¥¾#1¨CuéT`T8ÐñÖÈp  OŸ8 òwŽ 7:ºåüx9¿NÎ   QÎKF:pk`TÇè«.iÕ:.·E§m 7ÎצSh‹VYä:áe[°c–3Ä1jÈ‚ÔhT‡+5Ê?Ó¡lìPÉ?XHïÝØ’Ú×¾¦‹Cs[“ú‹ÂÍ™9­ƒ™ÓLÜ„ßÙá¸ö¨³¿ç§­¸×Í´r. Ðb{ûª@ÇÆ‰Óî Ò´¹®ç’èèÖöÑðÓk Ç]€_#·6OëÀ·ÂOèл*Þßüð(ZÓº(С _Ò¾¨†ÆÕÞ.XÜêrI» GkT }Ò´p°£Ánž3ҳŊÚ/XÞ)HáÌ=éÔÎ\ìØ-FS)£7 ÌÌïÛ'çäÃinÜ}=‹i‹Âç‚@tæ %ÓÂpOµ4™_‹ÚçÕÂaðiÆpVÇE0" ;4­íÜZOÏïPD¹p ý{îþêÌš9¥e”ûÑ,•“>Qƒý½ùŽd²#‘ "¢j„1…6“˹têª.²0¼Œ Àº5AßÎi’îéßÑ%¡¹PèX9qZ±@sÝ[‘”I6wVºg_ïÛdºgeïž¾Ó[à ÉÛ%ù¶µØ÷oâì–Q— éÀöÿ°{~qÿ¸ Ãã&NŸÕÞZêÛq“Î(÷×öí+å:,Ó7)创‘÷‚PÎì;˜¦é;Ø(ü+e¡¾¨K¥©”kp`t×zN1mÖƒÿË“º ÇéYò¦ÿ´R3;†$Ï,=£|Fóôí 4˜ɸIÓÛÛµgì ÔÞ>:ÝÞÚ>§«°rn8À…ÛwÛ—jíÑ®Âî;Ü£×4ÃM\‚‡€´4bKß>q‹„o¿pú´]`?nŸ4m+P›ÆÖÍ["°oÚ®B’\Kúji)@KhIß Ì‘îrï’Z)ïeå ¹<¯ #¹NÝ[‡Ñ¼.R¬ãä:ø¤·ÂÜÈ^¬ºƒThô¥ª ë·\+XšaV©€Ì†!.ŠÖíÀHPO¸Î™<Ÿ;Q7¾§î|îdÝx®§5ÔõÔÑoeEµ9hŽÍÁ‹Yt:Àì;-)Ð)`÷!LZò§É|<¾IŠÅõ Ž(F‹–·+• Îa·Ø†Yã5ËFc!š'xÞÜȉ…[é/¶Œï9QÇusGáçêÌüàÁ˜&•¸gy~PMuÌ?* ¹VžÎDÊp(&‘´Ô=ÓyAuÙ¬Y—©Þ¨>)áï.Ç_Ö9ÍZý›ù®Ç7å»ÞÐkÍ‚.„ÇæÆéüi²¢ÔÚ¸†h\\,m±†W:ìœB ­Õj¡ÑÐ^L®¹¼ï›³ÔÞ“´½G¡ÁrsÏh­•´NÌeÕð¹,‰AZï°óv²âß¶öÛËó…ü³!½­}Ÿóø&|ΛÐZ§.”ßA[›Ì"ïàr°ç«%ç+èÏè:&ØÿyýÙ¤ò«ˆê|?Ò¢%Ø‹œIhÞÑž£(Ó-7(ˆKÍÊmÎà…0ƒË{>¬ Z=ùÙMT¬…¬IqIz´îUAvÞf* G¹c(3ž^È̱–Ó¿#+®¹ÚôVáo`^‹ È#iñVµŽýP'—ìÂ>$KÐxè8+*›2,7 †LŽÔ6MD“o'Ô9Ÿ~á÷¦2_*–€X,‘†h4v,h˜Z4X3Ÿ«™¡¹Ts¾F³Z½Zs~@³ ?¥Ùvà×ðš÷ñ1üwÍIüƒÆ¡Ó`]~};£†fhºðVhÔ õ‹3˜»ðž-/@¯œhéé>Ñ]ê—¶–Ü×15EñbŽôÌ4»Í‚–<¦³Í‚"ò¯iQÁ¤·)žt“Äà3¸ï/ÔrËàg;y¢ ï.|‡˜Â‰­iu|¸òe…(Vø'²Ã×VøçQcTÉîˆ+|·ÕkLÓ3…ï¤p\á1ú!~‰ÚçáQ9Ž) ¡°1Xϧê¼BapÕ£.òÇ•‘z£Pñ›ÝX â—ºµØ½ÜIèáИn<>óà¢6.—¦“rNt Á.Ø« PzÜ^·Ïíw³Ê˜X&ÆÅ„È*uz­^£WëUz…’C戄—„“ʨ„ÒlFÂaSPÂnQŸ’P9$ L“d>ÉQméƒk~É%›Ùg¬>³£ÁL»ÏÇ7„º §$ 21«Ç ‰›ƒD0Aâ06„i³Ú ƒ„±ÂqŒ×5¤µØiÎk‚ô"_IȘ¬?=Ëß@´œy˜ƒ&ÅÖù¡ÍnÆ6NÖ˘ÿ¹'CŠÃÿª,ÔÄÄpˆØlV(;ìÕU|ŽùâÆù޽¹Ü;Êä€Ü¸›Ê}#9û¤Æ„P6xÌ“βÁç¬ÙHþòvþÛG¯š Þ]?åò·1Gó¡»ë¦¬¸ú­ú°ÎÙ·ëê?Õ‡„î£Úv ù öÀž-[yµ»«ðƒd2+‘Zã–ÜM|“›Õ˜v“§?$i8½ÞĽ¤QZ£€+¿¤.Ñqï¶î&€ uñN¤Ð¨õ±î!7"3r?IZt±ÙŒ/Fæ^$Ëýÿ©(AÝu\O7wB°în€/Ç`ÄõÔóƒ3NÌ}bÿ…Ê Ô"²9XÔç ¬ÏÕA³¢ê‘u8às¹|=‹iŠù¯­“ U ì§f:,¼ÓÉ[lÅ¥`6Ôè‰ÍЀ.%q`‹’4Nš¶Ó­K*X+B]xÆv­ÞZRŠ4ôÐÆUV¸wNý—”rG²ç˜®5Þ»­ì¶øeOÄ÷è·%4^kÏékl<œð%­1_YXoÕQI1|ÉwÛÿÅ÷ØÙ2uoO~²³Ô‘ŠñQV6¨ÍئÑhõ®.üã6ù·÷à”êÕŸšë£Ã d)˜y¨õÁñ:²Œá_õj%wòUJH(ôu7@ÿåºq©Q±A;=þï´G¢-è”%l–°Ão•0¤¤]7ÞXìoø 6Ü–l”'» P82hÉeA\•*ei¦)á—R©Bªr«zûô!Œ¾k›äîºËž”=gv,Ü5ç‘¿‰3®Ê¸{RÒ•×ûzé%Ê?qC‹S¥upÏú¸}Ȝ˯Èò*«¿/ü…ŽB0ð‹k1êÔª®ªÊ™‡DÎŒ6Öþ)Wo«½—]Ÿ»¯vSî‰Ú]–ÝŽ–Ö·±üÕñ•å_ŽBÆLÏÛn ÁÀ™»`=‰«Mºd™™É@CœHö Á(S }g À§ºðb}µ¶Ûùze¸¾¦ $­­žñx3®!™Ý0rãN0¸Z¡4|µ¯,À"¦yôèùÜ1èûñ ¢£ÑsŠÝ€“.e‘‡s4=Ù\$j±²Šh6,a‹Â&áHN”°•åwÉãr#|`SÛÒV‹jÛ°½HCľ ¾ºªÆE,ŽHµC.É£Ô«#ÅAb,W\û}×â/ÊM޳>ôìݯÎÙÑâs Â9më¼~êÝ)ά3;§.pÃç’ÍÙísýùÌ Žçœ¦Ëw.·îBªK¸}ƬuuY«ÆÁ•ÕOÞ{ˤû`nzŸê°/ ¢w$Ìçâ *¼~ºõد÷%»ÉÆwáV‰7_²‚Á‹ |…!A?Ð3v2 «ú >ÈoEF˜|`¾òz¨Ø‘ êì6¦‹Ü,™°Âx±×ëG&Uðí&—¡ ž!é@‡°bY›f«?ÃpDú†£m|ÏÉ–¶º:P‹:`y\]7Í| ªŽƒJZm¬XUžü%·´çûCu½[SeEæpµ¹—OôfJ@Tm6‡1Ãô¼‹ß}~´ßåò–Óüë4}$•ŸŠgÏab§ÿHû.ÿ}/áÙäpOä|?•sè¹ú/)¤skh¥ ­Ü¢aèÄ­èJ¥’!/iôÃÅ6dµÙ Ȅޡ³éÃar±Nk6qZ–Óëvƒ&bòä6‡F°5€>/Ÿxw²6QeZe,O)5oã\°Ø9Èà^@gnìy‚b ÃäŸSÛ¼SÉ.eµØpÛ©×]f'§å…?›ásÙfˆ¢J¼JÉÿ.ô&ú}­g]¬×–LOMÎ' ‘uºVg»óü úAÝú؆äÃé§ðc±íd¯v·~wò-í›IËr¼)H*­i`6[=a_Wá¯[+Âå» cã‡mfuYY„Ö%ÊB» _¡háË­±PÒ >Y&©Ãõñ¸Ò[oQdꕆpþHââq;'Ö3ŸºêììÄÞ…»%]u žû4U¯ªÎ2;@DO´@J¡è˜,¨TNeѬHWºýf«öñ y¬€Cå*°*0úÍ€Hn$iuFB•``ôtbý©%ZpKj[bJ¾èknä‹N0èVªAá„’Â 9LsØ)×Yõ 6'n£u6Zg£ug˜Í}ó7`à ^(”} ƒäiLmË€CEi8ÚP´'̲eZ®X¬<fm£ÞH”f 3ÎʱJE4¡)ÓŒÄÄ-B‘Ê‚ËYjlªãPi„$Ì¡~;J+3}Ø5¼PK’VîÃ0ÈËJZU‡<®2[0ç¡\6&öï fïðÎYSkÝ»ñ/d‹ëgÞpûôÁ.§YïˆU¿‡«¬¹G^úÛß.zyuüáò+.zyу=w­zö³­W5Ý—iqN³CgÁÕŸ'><°~Û«;%) ã,ûJ˜¹È6_¥¤1mµëÔ[‘’ßƒí€ ,¶o×éÁÓï<©Ï­êBÁg¸P,?çPéO˜¹Mƒ†žO¿=kû¼,ÍÇíìBF/·bÐVUw‘$·-bÒ ./;Çðoâ3|Ïð‚§älÊÆ¬C`o´-½v_‘ÈžQb+OË–sMäÉNÊè7ÿ®ÃbqÐ/´çƒÂ `ý™Q=ƒ[v!®°¯ÓãËò]…}’†÷e $ Pê„­¥´uÊ[g¶‘å€Ìnõ Ṍ!Ì1—¨^\_3¸ %mMMU=fÇ éíb’F¨L?6C[ßÅ0’™M)h•‘”  <7xX pßÑ’Á®–«‰ A5Œ [‡§T]8#éì¶HÚI7¥pêü9:½F—Ôˆ 6_ω–JÔ€×r'äÙ´€š–Gå‰E¦±\j‘UႆÆhVáL$ãɲd,)&J‹•·š­œ•UfÄêhƒfxrFí³‘©Â6ªte¸Qu’zXv$…ÙÈZnœõYc¡YVÔ‰d/ÀõÎè¥"Ε&¬šêÒT º`3[ùêª>¿jŸcÄ\tŒØmf•aFÙ«Ì !Á_÷È}Á˜†¤§bTçý÷3sfg]ëCM·¦=çsŽÛ?zAû"«Õ`q޼åÞ%sÍQ+.×±ìý‹¯Û2ç²5!Ò°åæüöWòÿÃ9¹€X?4ë¿oÈÄe¸ £ßÞ4ê7 {ö  6 >‚oštÞ ’bFˆÝËÞ¨ï’5fèQ÷°Ô ôþ\×y©Ñé&¾É>Û5;Õ”þ!aJ¢D"UŽ Ik¹.ò¸d7¬5l0Ãlˆ› ÎìÕšùpœî2ŠbuBã o8‘Ò0r•RY- ¯†¤‹\e·Oáív ïxsÈC«Îñ#ÿJÿ:?ó¶ûãn¿ßãö†Ü.W*‘ð¹]V·ÛÅ›Í>’Û* ‡µ5¾¤©Ü_NÊË5B:%º,¢K ®ÝxJáa’5!º%“¦™±Éíwqw³@(S;*ˆhN‹ün< ™AöÍÚ3•}Ž5™12O0c.˜Y3Û™µØÙUrï´,œ¥“f{d?µ¸è¼Ý";ôA^W)dskU¹3¹ê—`u©û|>ß¶´eNìXñÿT”ÏV½¢ß¢š9Ë`Ã%‰ â³v0L˜a®ëù°í7Aò ép|ù²WéwøÁárõkÔ°Û¸þ ÿ§xUþ­^ƒŽù’‚Ë©Wú ¼Ud^Ï#t d*ÈP3ÈÅP^ ½ø|bsòÚWuhkíÉGE7$Ÿ‹*¯‹¬ˆ^ž¼2½V»ÖzGdmT=™›Ï­Ð.ã–™—ñË,ª±ñÁs#ã’·U¦¡!Á!цÄÐä(ÓN­ÉOÐu'Ü™°)‘T/ç^ˆ¼–aFÎ^¸-Ð^qo`S`{@RƒIžDÈk'jEc¯º"`dÂeƪ@Ìí1Qíóú*«ªìjbW‡£&½_ŸÑ7è'ègë—êUú.|³OG(1™×™÷™ß617+Í®l¬ ŒrºÄr(´P=vyQ&(˜·•ÖuZdcœ2`/ÙÄäŠ^‘’ðLã[Æ,_$Å[µ:‹˜Œ&¬é4ŽjÃiœâãiщiŒú™jkÁmmm-ð‰šÃfU PJm úȶC Κ¢ë$ˆQ_Â=òꦛ¯mÚ4§g -¿Šã³'Ô¼çê|'~jâ5Ú½#ÿçIÅáÞ~탳3ÏštÇ\:ä¤&ìY4h­§íç,,]3 :aEá0{û,ªE‡¥kÒVœA hbv›}Šc¾õ"ûÂòeÖËíËœÛÚAžšŠ±ö±533r‹—änõ<ÑVWšîFŒÚhw ª „}&bx]x[’ÒÝÁú¢ÉA K’£¨n Š¢kˆ[4Uú+3• •l¥0xÕ€AßMÙROí~Ù_ì}™.•Ö½ƒ)sÞ„Æuè.×™88°¿ÙŠ(­÷¾Ún·;LƒñÈÀxä0/¹fû—*W(³Î˜äy¯.äñçÂa¯‡Ñ()×2ùèVJ™„å³¢ÕåHZ,^W¶œ 8©LærÞòXšú:H")ŠÞ´¿ /–ê\‹ºpDtåõ!¤s:$š<øOÁC<Ãip“f£æmÍÍqB“År”æÒ$Ý3¢=À¤©¹À’á¿áSRZ3v©³4rÝ=Ô“x‚Îd\K[7@[ ÍzŠ®DúèÕ æ{Ë¡º¾L Ñäb2Ù»£¯ž.s`s¯«ÝÜç[ì%s¯  ÿ˜R žLn£Ý~z‘6ØËiMÏXöƒÁ(8I.ï—ç±ü¶þÙ*˜Ö¼•7[Þó5MgÃ(­ÔºP«‹RíšÀƒ’Ḡs®~tdŠ®E?%ò„î‰È ÊÝz v„E},,Fj"Ê4x<ykrª,Õ¦*\US^U•)÷æ´jŒK[°ÏáhL×$ü^Ž ºëÄšŒX³ —c-Á¨‘ê±P X­’ˆ²ß‚òò´cäMj¿š¨…úUKÏ‚49X€“鯬K”i퇶¢Ã²äf9CÅZŠ:V,pûÏ@½áZ0 ¾BŠÂ ”s6߲—Û#ö=Ü‹}~m-üÌÕÊI Ô¥U‘^,,ºâ¡ÈRì\Eq݄ʹî½ê­»&¬þzÍ5*êËqòfV¾sÝ{&Ö`ôéy7M-Ž›†³â­ùûs5Më¶®~°+Ú—VZM.ßK~Áá¼xþ]-W=ðÎÉ@ìÄ‹Á®‚½ôn)è]#~EÒó¿±?—é´ïͰEšª3$KìÔY'çÅÞdÐë ½®T•\…28¯Îdªª½©º´Š35øHC²±¡aD£·®ÈauÊd‰Â ¬Î/ñ×dT¾Ž© —%#eeш794G«Q-®MfkksYïÐpȇ0ÖUb*• ˆ®¨˜LùjÝСZ ³Õ¾HÖi”<þì†ÆçÉÚÆÃ¤±‹ì‘Ü£x_0höU‰¬#Ìò6!&2›,% yìA#ÑXü>’×âAe)ñÕMÖÉÖ.ÕÜ:ÊRe>BSs‰žœ-*ÿ¶ôó…ÿtÖÙ×ɪìrÊPCÔdm°Kdw-P€¤è> þd ¢}KÁŸÔœMuoîyW†Šü'²Ög)©ýQF’^æs þiMvvï1‚©ÉûΤ»2ˆœ‡·õæOÛ{÷ƒÌ}ä÷ï s~ô”ΰ劰>`X¶Œ'㦨ÖWX+l žßùŠF½d•lã<¼|6I–}¼ ’ä—Ëžäñø‘W(ÎÃ:EMivò´³Õ˜m6ÞìuúE'!¢Ú$j4jj™'p˜k;ûæ_:ê0Ø ²!ý¿Ê7Z?Y“>Ã-&¿:k]úˆL0e/;¬¿³ú;“®|nô}úMDßI×ßÀ¬°\k]MÖ0k-íÖìj ÑYu6æAòˆêiÕçÜgÖÏìJ–[ÀíàvXÙ*µç€Ü¿ç/N§×¯2ñ:ðÐÔé P*Ì HÒs G^ ¿ç*3-0úÔjÝ¡¢;Vª°Jˆ=² ¿Wt µ} {ôü’yWŠÎê¦ñGÀÖGÛCœ(mv+¤f… PפTYGsJÀ`;áÓEbBU/™(‘wèÒ"fš‹“C½ü”«ŸÕ³ÌóG½s1Ùðø_½àÜA3ò;©À’ÅÎíq=pdê<\#‹ïwcÆ”ùîšHŽõu3FÍ€ˆ@/ I-δ³Æ5hž†¨×Hæ‘ÁÕ;ÄáãTõÇîÆ“ú °lÖÕ Ü WŠm.ºPNA×%ÈØÛílpu˰ÜS'“c®®øWšÖW•'i¤€$_H³g$B$Ž4$Ö²†¢[ªy•â—û=Á9ÐGùψ=лÐÒöA[eäêêŠ!$»P¨p¤Sˆd©±¶Ð„f ²yôsìh|bcäè-1†§<$/,oìçy¸•ÖŸ—¸áŠáÇåÅý…ƒV‚vLí¢ÅR-ô%èA·b‹ú_J|¸ŸZ“†üÍ{ÌGF£±µûÐÛè]ü¡çïItŸôj£(æùÄÚ1ž©ž'}»|‡Ð!|Èû%þÂk˜æÃzYó,¨‰ç/n1™x‹Wï—© ‡BM!Š‹¡PTôú32¹ÑUU×TUåj¼B.««YµZÁzun[ñbNlrúÄ·:6«×]^VÔødS’$ã±d²,æ-ï*Ü!y¼<^¯+¦©¯!Ÿ×g…*ÐV¯¤óEE¿ßçóxELËc=wí ÂØD7)ÏÄjÄLF§Ó³Q¯cµµ^ŸÏ;¨Æ“ÐAìÍŽ-=ÛSĤX<“øœ)¶6övìHì8Ôu‘O%›×gc²ÄcÖãa aÁD^.Ù-†µ²¾ –ƒ–Öo,¬EüJÉ^O•Ø%pÝNóàLñ¿¥ Š-Éd›“;æ’c/h-óIOQÇé¦Âƒ\(j>Hu®úåþUêrgRñKnÒùóÔªíÿŸµÉóú/€Ó·á0þi¤G¯Šbü³Á aòhkþEî!yây“¦cr4ý†ÿIæTÅø>7¨.OAÎf=)rèL:Å|IµLyöFâ^*ÙÕk<‚‡¼F°+Ýnlw³:³,dÆ8o4šAc£É¢0鎧ÊÊ’)oTËʇ¨ª•Še€ò[å2XÖ‡”9â£åP°Ú ú¼Þˆ›`û Æ™äF(%Es¦Ô„ÔìÔÒÔÚÔá”2å*'ŒwÓÃ-ülËRËZËq k²`‹riŸQ×F­l®è!¡TãX‘¬Õ•Èš·#;KzgVâc«Ù ç–W[›e·òÿ3±û))—á;þY¯ÆgC9K÷Ü[Äêr@ŒÕŸÅQt*r’1¬ãtýY¾ãÏ™Wûy A—¼¬pði©ð†éUðÇìÇœ?r?ò'ì'åkö¸ø÷í8ÿÎýW¹8o³Ûìkü¿L'-Ì#š{õ“§Oi׿©|S­¾™¬QÜ©^©_mYm»‡<¤PRRWkêôC¸j¾Ú>Ä©N¤>ÃEù¨=ãJT/˜ör[ù­–­¶û^çnAý¬é9nÿ[Ëc¶ÇíÏ;ŸÔS-í-Πܽ–õö‡êQ–Q¶Qö±Îó„é¦éܼ:îbª± ² vžoËâÕ:¥VíVºÕqS̳],`Vm1X¤rI5GµŒ1JÝÈT6"ºÚU ®ÆkKáÝ4(‰.PÊîJºÆK¾%¿/hnK ÈÄv»Öcnà» ';aËu~èä vòj´ºìN»·ÁI LÖ&îú’n]…÷ûÊ:ž–_¡[Mik¡[ Ê6z^q{B2i¶ ü0‹LC,Bƒ¡´%tËÙô¥­“® Ì–a؉>Dsÿ>r˜Ê1¢qýÀ‰‘™C ‚¼*Kh¸0õ©òì%·³ú@þÎXýõêÉ_¿¸åVmzñk2úÉü§q36bž¶1ÿ·§Þ£óo|òeþ<ŠÊV' É @’0J£ã’“u±n•ù-nÞuçܣܻ’Úë*|-qWºnq‘˜:¡^ïº×OÎæ³ÿޫ죧)Ù #_”7E"$q‚Iš€»2i ˆœP~²ßIÝ륞Qj]´ YÃ#ô}¬bHt&j7—ž ùß›k4¢•:?Ï6ÛÎârhk´´.⎢—wà]_<û—1U㚆LÉÿˆõ-{ú¦ü{øHþŠ35ú«'Þ­uY&]xͰyÒ~§þÏ— ßÓhþÍ.,ì—·%­ç°¹•WV2ªäʱ•Ó]Ó*¯\‘º&wgnSâéʃâ{þw‡Å÷Ò߈f0k+GùG¯IÝêoOýÊÿ[ÿæÔë7‚Ç’ßžÂHƒLÿvŒÎ4!†ö‘?H•¡t*ì/G5%{ |™rÚíå´ÇËËÕ`jˆ‰µ¨ý»Éµ(M6J7â㪣$b± ·l_áYë—Á¾n m ½:bC”‘˜Ì‡3ÜqŽpBíØÅg®µ´m9ÚBmq˜÷O–œ2Ýòª!¨B¯0p­è;ðµh\_òêmõë» ' çOlKêsv? ÄÖl ´½7ö§äÔ£Ñÿ™´«ìgðóhuŸÈLþ)S?ýÈ·><}å--{xóÒü÷Ÿ]Ö9ñ©åùD›{¦à¼öËérÃþN¦äŽ—r“š×NºÀ.˜¬0'ŒD‘õÕç¹'T·T_m¿Í¾ÊµÚ½fð#´çF'T$žþäˆ÷Çß;Tnz“g mNJñú¡.§IaEx±ª"Ì”géš’Y'ˆuuYs´Qw[~G, 62,(~P^Zí[ê#>×hkTªâ4|i|E|m|Cüù¸".Œzd7öˆ(<Ú Suñi¥âzSï‚SY„.CIe/meÃÂÀÏ^K*Êø]¨(ö†Ä•blJ±€1±/²”Y_œKyVÛÁóÐÑyßG——n[;§ê‚j¿ÙãµÏZw`uÝõ]yšùëG¸Øgs…Ï9çÖ[^¢Ìl-èuŒ§wI “FŸã@íB ]Ð jEŽ.´Z{ ØQB˜êG@„.¼tÇ™}p ÈJΓñ´zzX“§Á3Á3Û³ ´éyÏaÚó÷(¥ÔÔÿr¢ÇÜ ÃßYëg¯*üT„{—ú3dÝDz—J¶?Î?)Ç>K»ïL'Uþ¯TªñÕùÕò,=t!Èãõpß8´yzü…¶ú9}äÈ smèj÷Qå1Ï—þÉ÷ÊïÝ?øO4:Â*±[ç¿ÕýRÉ;‹¶“³[µ`³9/_tˆQ§ã(®@Þ„Y[ôTÇ5ƒVã5½cÄê’·£),ÆãN‘׊¼™xI„‚>Œ—ÂØš€fӇ̪—O­ž ™­YªY¡Y«Qh„Ê̳EvlQ©méwqõQÎÿ/‡¯ü¼— +µ%ßôxoP~Ÿñ/G¹ÔäÎb’LÏW¿[öܵc|.£ÞWä½tÓ…«/–­‹b;¬gÄ–ãs_»†¼#fÐÊöÈ;^9ïÑyrM¯Ì•fž$n–<*¤rV óœc“­Þ»¹·½?:LjŸDOz‰ÞZ\àªmgµyõ6{(A«à—‰‰œØ*¾-²¢OŠb"é %‘Nvý;—ª0}p© ,‹8Q©âÕèÎs½Þj§×+8½A§Ãö£ntÙ™Öá´:N‡=!†1hõŒ¨ ƒz½Ž ¬†'Å g“³ÃyÜÉ:éÎAÄŒm¶m¯±A¹³àÀŽÝøfd'ow¦äH”‹hpà±–-rÌh‹¬+½v$ýËdz­É€rv€‰lMþÇŠÒ0ƒ•P ü“‡°sÆÕÿ®–lº2ß<Üa5¬<Øi1-ŽßàÛ”øÆN+œ¸¶¸•Øa›^oÓÓÓvæËeŠ>À–ÙÇalËÈ·EW”ä´9‰Ý¡P²@­Ë\V¥ÐM„ØâÅ©“Gü°«üt“táR×R÷RÏRïíöÛûû¬_Ø5­\«¹•oµ° æìœC²KÖIÜŸà÷úÊâŽRc¯tŒ&£íÃÍx†}šãvÇ“Ž7Èëöá¶äå3×Äa.gå8‹Õk°Ú‚1Zë‹"Ë"E¸HSd_äíˆ"²®,‰•yƒeH¯”ј4~ 1iöjk¾Ñ@Q×)4¥Â«W°=ÄêíÅÞœàõºo@p"¸á@Wþ_RÖÆ2«‚e}6«æ21§`u:‚ ƒ}Nä„!˜ñÙìp„ˆŽ.r•äsŠcÆ&2¬:&]ô?°ˆ¥hÐüN!3\  Ó[¤ªƒö X9AÊÖd…•È„#YAcYA”Leþ²Ùe+ÊÖ–m(;XöM™ºlY$Ðö±Ã§Ù¥ |áT»äÊ™ìßÈ!ÐÓ¶IÌc[¾U°½?gE ü4‹Ó’ÍoÅû¬Ø*r Œk¬âEØG£dÿêEŬnѯëIö´Q;ÂyLàzÚ\ÎîbŒjËQØëä¾F}øVZ ¡.ÖÙ×¢¦ñY оLÀ\íPýO!\?­(ê̸ˆYˆÙN²’¸.»«DÁÆu¸úÂOHá«­Díè*ßbçz)uª¶´4Ã4jë, Üb©¶XΪc>¸åë¿ßr½_†ÎZ:ƒí_úß7þ}É«E,¥~¦áôËì°¾Õ¬“9ýó_P´ 4m%]G%ÕÒ½| Ö†s’~‚}BùðÚ‰úÙö–ò‰µsô—Ù/+ŸSûhùºÚß…ºø®`W¶«ñuþõàëÙ×?D_e¿iènü'úË…œpÙ*Ì7šùÆ0 sÁluf³<Ïû‚Yk0˜­ s<çÃUVŒ«ðlN4‰Z‹È‹A1 ºFˆbV̉C+Å*1ÐE®‘<ÀÓµj—z(Io²8+666ÔÖ6„Ãåå±FJÍù†á NÄX¡×+¼^½ÝîÅ´ÚlRd W³ …kT•†Úí±^ø%º_;tn©—ñ #÷`Qh±çMaü '@­)=…ñG|¯¿O CNwÊuú*[JÄP&‹Ý*€ÔðóqÔ­ÏQ·>GÝú\ˆ÷5pFƒ½µ7®¾¹„äòê)_8Ô ç€í¨N“·p&O­o8Y.Ãùt»õÌK˜à#¹|ê·’Åèh0›Ìž³ÂF°Þ©u/Y *è„4áœ&kñú°­‚íNØš`ÓáÆ|3>;à𬊨8{àì ò^[´G¿£émùMù§n“Ë'hp@5nÏß.ËøgT¢gá‘xÄ,š;FëdjOO_„âKùżѮ`ü¼o= ?6@æçÌ_2_…¦KŸw:üô9èÏ¥œEhØš°¦J U à «”ÓùYöÙÞéAV¼(tOhSˆýgð‡0Q5a[P³½îƒ\É4-Fú ‚¡d%Ôtr帼‹¼"éªÊË+«¼É*ÔkÄæJF¬`ÙM‡)Žé ‘MD䨓X2‹EÂÞD8Â\ÈŒAS¶TŠÉˆ˜LÄ„+ÈórTð°ØT…«ºÈÞí€ê¢™ƒœd ŠˆŸÀ¯¥¡?Õùáønù¹¸-Ý%ú]\’’×ëêz-š@÷ý°÷â¨>“9þü‘²X¢6°d€Å(Àb'rš¥ð»6ôïŒÕÿ»,­ï#þ‚ÿãüÚ*Zúœ&â!¸ú‚>¹©"\~dŸÜ¼@ÊûD%†?¸výBLdfFiðrós4sl­öeÚ6Ý2»²‹ù‚|abÔ¾&?ã0•‘ SK˜ Ì æjÓUÜjr;c*2fJiÂ/F’$Àv*3g"Œ™UøŒƒé„Õ&)3I^ø ž¬Id©9,9ÿRÄ"—Û,ê‡0ž€—âø8f±à‚Q-ºÀR¥U7¾‡N}¯¡Of |ëì§²ÆuØúæ'Sá»-ÄØ Dc8¶3ôÕ\Wáo¥íN‡±0&] Z‚Á\ÿ“]æ  Ûz—¾˜üéÈÓ=ïÐe.ò^Ïr íÝk™Uÿ”í®qWŸ¡ÁQš§ïÙé9Ⱦ•#2lSÍÀ:6“)¾eâÌ'$î=õ˜l-´ø¡çà€g!Ö7…Y‰ÊP ž+M|Zõ¸ÿérFTEýCÙ+,W»®r¯´ÞêºÛz¯k³j£õq×s™íªŒ[¬Û\»|Œ'*mZ,àf4ßã"ו·—?Tþ´qsù«•ïU~V©. u‘ç$W4ŒFCÁPïµ8â5ATÇLµ^“ªéÂG¤éøö2¤­2:M¥¸Ô²“ŠÕëˬsA¯Šî0 @ (j›‚8lNÎn>Ü<T]µŽµA%Ý¿T¹A¹WyXÉ*…A‰=ýŠ‹“ã{Ž•ÂŠÑ½:fZº©•'?WÖç‘l|–ú•4ø^¤ó6[8Žrð ':yu¹º´¤Df…C÷ b)ì£{ÚZ`Øs½ÏCTÙ|¥¯1(.|–”“å}½Ï?LÛùö¯Ÿ>òÁÛ'¬\9wK@Ã9´Æy7mغŒÊÁ«Co9wçÅç_ý‹%{æ-ð¥×î0q·Z0XëäÍZ“+ñȼžC²/é·fnÂРλdêlêLÃØOe?GT†#[(eNÒq™®‡ ;-[„ŒMì¶Ç§b°. ê[t]xÞv1¨ 5Ì“Œ´^¥Ñyƒ&èy¢t%“>`³ÒÇ LÖ¥ÖÃVP‡ø¬»„£½ŽÞ††:JGE$-Eëý§WBŒëЗCš´Hƒ+t‘1eSÊ.*{*´)²ïÒ½àÛÛ¯8 >Ä~¢>ªøRm¶³•¸JQ¯kÄtçú¦àÉŠU‹î"¼@±Xw%¹N{o¹µo·ÿÅÐö¨æøã[u\YWáË->{ñ]-¸­›aŒÍЍ*|–Sx^'îÿ  +óÿÜþÉúWÄe>úñÝwL¿ìç=ïþ!ÿý+ûóÇÿ°I~=Å0yYûõ ýëø–â€Æf&ÐñíA­ÎD—NJ)ȼfû$úQìˆÿHðÑ/cªˆ-f›h‰N-2-FW z;]h¸Übm¶L±]];éR(]gsŹ8uµsq÷9ïum²m‚cÃ"o6 V·k.xEŸ ºÝŒ«t¬Òó[G0¬3U7oôãuþ}~âw¥¬A‘òFÓð“u"# ÉýÆ´MŽÐ˜=Q|7ü-Åeö‡›ÝÔOBC€ècC% r Ð>0Š<B¹,ª®b^•ƒ€äråó÷ìyåý§ç¸ÀÆ™ó{ý@þÖx™1x¨–¼äw9ÜcV~ùëÇÓdu˜“#.ÅÌk°žê ÐÛ›é{¡¿?Ýqnâ’¡Œã¹bðMF&!µÏI«8wÆáv;!ŸÖ*Ó´hA :Ë‚Ðß PÐêCzUÖ"vø5•ô¹»RÑàJ0‡»ðšÎdbeïó2m¥þ¡nï:90¬±£ð‚êÁÏ»™*+ÆuØKJÐiTój 1ýz± %Àj XcÔ'k «#BFõ‘…pNÙ眨rôŠòÀGOYR„˜»?ýÅ;Ë—¿sù'÷ÉåeÞ{߇Þwï‡ìç§–PlùÝëË\}Íák_Ç%yã'Ÿl¤’Läxâ H²€èmi¡Öþ€T‘ä2üüÁò¦ð1ÿ±ð‰û¿Ÿùÿe7ž„'Kj}cÝçùgº§û—ºûop¯q?àyÀ·SaºÒ¾Û³ŸÙÏ¿áyçT¿jv`¶›½A‡Š šuúI®¡^Ô…?“¡ÀP $Hñä.œ@®Â‰­ 5X©?lõ©Q”Rê º‰ë«ieÚžé_ + :e.8×óbUI¸ÿ]3ƒjrEH.ËWWñÔúŹ/â\6s¼ütÁ·c¯ŽÙW½÷ì©SϾ·êÀw¾ùæw ¯?(#Æ®I#R³Êä¸õóÎM ?½ ãíÛ1Ê»ço­¿ç­·@&ƒ.,]¨Å¿Ò¸N‹mø"å•Êuø²?N:p'ÑnR>¡Ú¦Ø®úƒêCÕa—Ê¥6;dÜ6YýVbé´ZÎ9ž‘ OjfE*•©Å9mï Ø0S^fqEþª‹Î,ñ×Ú*Zç2•¹\Ue¨Ó‡†ØxY w-bUœV­ ‡æ‰Ç$Ý Tî­8XA*ºð?:™Ó÷”dÑŒ¬ëé…|Ù9mþYÀÿßFŸÃ®ÞhD\ØGß`‚i4¢ÙUŠF…ä\n…Ju+?v©Õ-=ÚûsË¥ø‚¦õ3殞9 ŒþÙ1qó•3‡g|ðDÖlàE§¦ŽµvBÏ?ûô—™qm:puÏW}oòV|» z¤Á®óì )ªI¸@˜'\!Ü"¨,nšx¬R¯™¦P„ôvp¯ x,ó*éÂ÷ìð( z-Â{0]"`†YV°M°b«à¸¢?Pœë‘G©®ád÷Y‹[°-œ³ü$¼ÔdÝõ+ðXzß=NÙÝ8ö{ý¥0ôQ~âéï pŠKÛò70µòyÑ)ÉÑ©Ž™ajö»ó\aZ‰Vâ•d%s¯Éx¾z­zƒz³g·GáQ»éâ¶´Y¡Swágw°lHW¼aɨSº& Þb´¯÷ÑÇÙ’™†ñùõ†€×;ŬàÛ·ãw³1L~¢wÁ±çhÃÉžþ'–è3`¤wÞwÇ#STÕäÈáëǫƒŒ™6­~Rþ{¹4—ÞBï¾ç´¬ùó.]—öËŠ¿æbÐò½0®ëAËs¤kŠƒÛ qúL˜U/o¥ ¼®áb˲?‹ÖD´<žÈ–åG¢õñ†ì"뢰n‡-5’´Nˆý(ûUô«ì©è©¬zHtHvQdQn³usXɅèãº> ÷P¥ß†üØï§?ªçüòƒÏÀ¼ý3Ã~(ò„QºZF‹ŠŠÑÙŠŠêl(Í™uò…Œ­Ñ¨Ó†Ì4Æ,¨b€§ó9Â3ä¶ZR"­ÏŒÆãb4”ŠF¢‘H —µærÙ°ÕÂ[(lE(Œ,¹ˆUÆ¡¡m¨[)MUM§S)¢Ê›‘z(&Z+5¡5KÃ8ü`429·oDQ¨1,ˮ̒@¶"Ûše²¼ƒ,0÷Ãì³L³RC8M@S:)5BÍüZY\@èÐæ(Fµ•Þ¼B— J+òƒ¥(<ÇàUlyÑój)êôÕÑ©CžAÅ­PUÜ:Òòvkh6¦±Ù«Œ¥hÎÿ¼÷ó~³³(ûÉági[ G:]‘¬UŽû2Ó÷;Ù [ùE²_§ŸA‡A[ÕQúÒÌláäÀç½à8ª©tÔQ!è{–:}û‚ûyMŸ?.Øçû‰;ø…Æâ«x~RÖŽ9ù.¼aŽVrœÖÍÿ_•o`:þ §(|Èo»ø:ßÜç®»4jh”4ʉZ¤ì\Ûå¶›m@>ôÓ(g–82DÞi»×l9C„fŽ›ÀíåN¢¡üÒÆŸGÁŸEÀ_‰ßQüë5€:´ÕFß¹œl4IHu¦A¦Zã`ÓS©Þ$™M£4¼¨¯ÑosoM±1\ƒÉdÏ\Õ\Ϫ+<ŠU•g”j”g²JQ¡T/ëçá!xÈèaC†Ô ²™h•/Àã&þmþœgÏñÏð£ª"]ô«:½žNo§÷î¤×ì v“>Ù@‰²H"DE6–€zA ÎüFôFF™aFÞGg!$0† ƒ£ŒË\|Wtžzg@.÷Íe`F¯oÞhÒï«:§“N@ÎûÝ?íꯪNUª:µ~õïûNœ€ÉºÒˆà‚¾f„‚—h¶„=n9T=Ö½ÙMÝ_æÃE…ìºps!-ü²*©:FÈ‚º“u§ë„:ß”Òg½Y4ôÄ“CÎèš*Ê”¡@KáÿC­ýJa‡Ì¬Ñ±U˜#£‘ «—x}bŽFk*(Ñ…ˆVç=!R¬- oŽ?D £õ‹«\joGt#0,j)¦ÿ }ú=,ë=D^ÞÊàžDÑäg5ð×qÙ`tYMzÐU¬¶;\œ¢ÆO±ÃJÌ®¨Ay…©Œž¤Ÿ¬º½~Q¤¶ãšy¦p™ç=ÓÆ—ßV?™{§S6©‘Ÿç<¼Ü+,šÓÑŒà(¿Gù\åµd‹\ó¶îm=¥;e ?5ôèz ÂZýf=]¬_bXö~¦£›B½ärC+Cˆ†Ò ARhVWÈE]“9ëLTÓ*[’,Ä2YÝ•œÖ¶: ±5WOVÛÊD­Žô“s&‹eG^D£GW’ì¢Q ûÏúˆm(6ŽÞn»Ñ[Ãm‡Q&³UçÀg¸Yüãò‘ÿ(^ë äj zƒÎ@u¹ZpCž‚Û–rÜ60ÄèÄ[ÿx8àT†×Z®4¢½1¸ êÁóŠÑ1r]ÞÎm}¤máôÚy|<|ÀY¾¿{Ǭk³±[u¬t¶5•»®¸4ŒÝ¶mjüþÀ_F Äw¤Ïj’8BLà!×ʵ’[ãvzÜÂkä5ÓÛô}íôo›t«ô+ìt)]ªYaX!®4ßn_ê¸ÍcpEkÄ(˜ŒúœpYz_Š»we³«ºˆ ÆÂBD1ûèÙ+Et2“´—1ÍjÝIÝiÝ9ÝeV×GÎ÷zq Êœ[ps»8о–2Z GˆM7b ÎôgGlN‹ÓÓŸ>;îù^sÐ>O¶ó—Yìý®ÉÍö̲3ò¦ÃL™œhD´ô̲3ryˆñé& #Ñr;íž:'³Nöªµ/}J–Ð#Šˆ¬˜Å_µ u–ê5F»ÍÐg²©\ÉÁ‹/žü3‘N½Hs>Ø·ïäÐ ƒ—‰ý$S_~ù×?ùãÙï=w–QÎïã³—iÕ#§Æ‰Ö‰EÕcn$sh»y Á>Ñ­2¯'›Jו›^Ò½ ¾«×ø^Ñ»ã>Ò}(|B™°Iÿ°°[8 èܹ|Êú*ò|¾Ü¼¨[Ù¥LÒ«#¶¤úh…ºsI…5áÊMàHµTDLbI„<ªÑC(Q +ŒX Äà_–pК§¼¯×äù*³‰ïµËÞ/&9ájôƒ¯g«Í&çŒeçŽ1œ©Ö&¬×Ç¥ÿp¸(6BPž‰Ž*³ŒQwý+§ÔZzË ÷ükÇàÀ‰~O©ÕY$õ¿µk÷™3»tFX´{Þüõ§×L?7¨Sx<¯Hp„hÅŽÓonßñæiFľ{û.äz¦ßïóëÄ6øj­ÂÓƒÂLXà_ßñ¯ŒtÀ=þ;Ë¿[ý”ï.Ü[ö£ò_({ªÜþdŒì)ÙÞ_"(çK6HY›M®WÕeYY†g²e8s8ÑoBbˆ¼eL$W42 QQêó‰Ïè o‰U<'^Ñ?®4”>í u‡4§CçB—CBÈ76CDΦ q+\z±SËt*y5²Ð׬²#;Ö¯(}©@D¸ÀYÆ?;ã}س%£zV¡Ú¥T”BÍE:ðNIæôäÁ5œ¤×uþùÁ"üê\×™]»Î0 ¯íf=øÅo2=Jþþ!G™lÙqúôŽo¾©èóÖÜ,Ü…«½KvÞc!eÆéâJ韤‡¤ê~ìÐç*dœÐ«êé-àê§ñ°#ËFõPÆÔ”§O㺠¢q“ÅÉ?5¤Õ›‰œ›˜_€¸NLÙp3ij;’D«þ²žêýcÀηÆfÄÞå˜.æ+xÄ›¥ì…‰@*\C7?Xg´ûÄoÊÎþµ› öœ]í¹£§Å-åf0(uÖPHñUXJŸüisËý>‡hqĪ|5{N’õu¿ƒâÿ… 1 ‹Î<6g©ßáÓ;bþÖýƒU¼s$»‡>¯â7§Óg…AœeMä/òƒÎTn=•®‡6XÑt | æ¿Õ¾îx­áŽwÜïÔ½ßð'Ç…ªO¾t|Võ·ÉäйµuƆÃåvÕº¢;«Ž[Ms7×®¨]™ØX{_â¡Ú‡?sö8ÅGGCôFC¼$V8Nž”¬ò{­½+g"TUŽiÊ'X-9‚‚Ý—˜4)b4Š}¤úˆf¬%ä‡rná„Hú9#ÓƒŒ¹]ú'›K”¸"2Û%ݸÊm«KH‰¯¹Q/è ňéuÊqÆ%¢ÐÅIü¢*ÏyÝY·3ºOÌbuWõ‘HŠ^ÊÚš)œ[à(ðÔ¹BL ‘š0ZR^ºSÞx¼u“®ÉK".ãO$kCBଷsTšKÍsKe1â8N¦÷$œUbîóéÁƒ³· §m³gooÔ̦ërÖ…Ù©÷X#;N´jÙŽëµ¹ð ­&¶Å69qSmrš¬©\–¶ KôC,œÌÊÚbqw¿š*(¦²7‹ß©ËbàVç[T˜¯jÎîQN³ì½^íÌ-[§%&}àPÓ­ ~÷Ê+—™³ðû<±Ý«ŸÜwãÌÁW¼þÌ£…xŽÔíA¿Û—,ª¯NçZÞØ=×®zziÔiñŸÅáë*MmlšVQ®Zž¼½“:€ØV‚ÉVÂkrþbøôIñ¨ø¢ø–xAÔÞiyÀ²Óò”åeÓ;&ÇÀti Y'» Þ%6§Ñe·Úì’SëË)é#?•íÁD~¾>Aèr">“óAMù…ì,+3Ã…‘—!×–Î]“{2W‹À‡½cØA}?…¿fù,£Î†‰À*/J¯Ð¥¼_ñD“Éo È ò~EU¦™ávçèWT…Õ#ß·¸]ˆîsiêÁÚ kç¼\ã4Û¼æðÿYûèAÎl¾‡u†°ˆMîÿqÝ¢ña3û–Bä†Þ@+X ×-ÄÚq¶c›°Šp%Î5GÝ´ØMü«‘¯À9†œ£!jU^¤šÓÔ©Ev=†)ªžÎÏ„£EÄmu†# (=ÞD(´Œ ›U猦pÀãfgc‰Í6œÖ=#°&°'“Š2S•SKeíù¦Ûaf¹•E"³Å6<‚Œ.9˜Y‡ÆIçTZ^™†užN?7"CRú¼úÆ3€e5?áË ÿÇ^Ý$ÏR(A˧½±ŸwÃ%~ŒØ´·±u òÎØ:såóŠW¡#³>H°¯ÜbÄÈyÜ~²_:àÂbØf X,akOn R+]ã¸.³¯p®ˆÂDÏ8$9D˜J ƒ²Ë f›¹Â,˜§qÕ@QÑ.)›(>mˆd‘A™ÞŸƒLwå|®ø'j¤D!{¦¼ ÝsÚÙÓN Kv'ž#1€°Ãét8œ‰€¨8¶„($D£.–pö‘•²ÉAö”ý]°÷“•à FÙ,Kd¬´ZÚ'½)i¤äŽ™QyŸúˆsý_„,ÉTòkþG³/_…Wù*¼ËŒ¨pü êB»|ú&NöâÚëºHU)çºI’½™#˜3jj¦(ç¿ -¬6Ö<†=Y,”É?+vyžqÿÌÓG¹x @m´Ó½Í}Èý+÷Y÷ Û°vÓÓT0h .¯Æë*¦%šbW‘§VSëºVs­k®f®³ÕÕêk-¾¬Ò,w-ó,ó-+Þ¤¹ÛµËýCÏSt¿æç®}ž£ô¸¦ÏÕíyÎ÷\ñkîW<ï»ÏxþÝ}Á7¹î8»ãž-¾-ÅÜÇÝ/k_vþ›ûò‰çoô ÷ßNžæ"]²Ç=‡Êã&VQ–޲tT¶Ù«hyZ6kà ]Äõ’Só¨3¡e¤®±ÕÌé­XÅ/ãÊ%Ã]Ì»x?w13æÊ’ÛS¥•]ÕÚmZÊ$¨ö9%Y3æóöö¡½û"“ hgBøà"í‘øg±HðŽ”¤I}Æt£ç¸‚ÈßÞ¾ö I›«ªü—ŠÃÑbƒÏ ± #4d]D/EÂ(n×l¡€L˜ðàòc}Ë– 1¾®ÚÙ»¤oÛJFþˆ!½Å„æ\ Y3ô6êøúxö,]ŠëíJœ¥ôòÎ=$Q©Ö>×NŒÆŠ.$wH«#«c _"/Ù~'ý.òzìõÊ«^l´À »¢ÂÕ8ÓXaúçÂaE€&HšÀ…Ò‘‘D8á—¨Lä'b‰Ò†Dc¢:Q•HȆÿ¢òò¢T›¶ª” 7>ž²±—EÆø‰¸sr´à&Œùÿq«v5 s%Æ÷Æ/’xºÈãEmÖ¼ •Œ Íó5‰¢_,Õ%tõýÐ'o2ˆðÆþï³”hgÌþL€3ü3‘”‹^Û…Œ€êúÁ;ŠÿŸ[Ú-å–þߎbé? ²ôÞ+Ř{žÑ¶ÐýcO Yw…P€³M`beCb"Þf 2Þý “%ˆÝ5$ lýGí^³µj|_ú“tU>ü*#p^6J¦”=h’RãÙGA¦¢Ç.º=uvÜ2ëëƒRŠ0«±&מ"Ìj¬ ØÐ‡V#c&ÌŠˆyáº*+Z•N_ ÎÆðîJ†h£+©nc_úT¯ÍÉ(ß§d3zbI´"ÌúJõ 'ŠÌÙ× ¨Úê¿‚Q\£ûÈý…N+žÒÿÊ&E×à±Áã|¼ô[…äþÁgòÿ!ÛÏ–É[¦Ї,6Ÿüfp›ÞmV_GM|E¡ušÝz<˜^kà1ŒFs‰Ø•Y•ã6à¬Ú9xŸfΪJò<"à•¼Ñ¸9â©&ÕöéfÙó…ãÿFMFG‹cjt9Yn¿ÛqwôAǃÑcöŽþèËÑßG-85¥JÉ^éPpš Ù\1„Ì¢ÁÍAÜ £Ñ@4Ƥ)ËφUì ¯t&@­v·Âh$ÀT¦à–ãë!ž ®6%êwT2ÙƒƒòEE\ø Z‹:*+ñ¨3‹Úqúq‚äR‰’€!¨•Œ õ œ ¿g4e¨O~¢t\"/µ@pF® ž ^f'ÓªLä˦ k×hÏi/kuZßøÒ~¾Š+i_kû—¿ ‘! ùQ9-·Êù›Ö-Ê¢üukñ7Eˆ2—¶Ñ©õ[ÒT„"Iæ;a_9¼F½ÐÛ7ú‚~³ËÍ¥Ö’¹dæZU¦Åæ,øïñ±Ç…µ‰×fÉì2òÅy:=¬ !\Ã#¶F_þŒ£Éÿ.‹F"&‹HO¤?súo ‚†Gô\-NTtó!Ó䨰:6kÔm!T¢a³Åi6[Ì9ÔBÜfšC,Ö0xë ›rDÒ®IXÅ”¸šÑë|îöÕ9$ÇçÝE¢»Aåw¾0$d0qøkJ¸Ô)Ê©òÒœ²m—+î⊅îÿìÁõ*³Dy¿V:Í$N\$£-V©&Cß‘Þ袵œßbèºÏ•Ã]ËÀ¤õ\«V }qó¼„&èÍÇ`†È1l£ñ²Ù¤ òõŸeé/2:¾.ðOnAgàeÒD}©áÕüû–¡öŸpùàúGèyá/˜«Ð$ç™h€RÁLôö´(’œyÓzñSÇ{.DÄ>Ńã§G?ïÅ͆ðoÝ}Æ?u§~¹‹û÷v•°0ìë}o¿1èÎõu¡›çÎói>¼Ñ“ã “?¨¬Éûô¼Æ§Ö$%Û ÑXX%Äõ¦Oí¿ÎÁÂe« Ö“Oõ¿¦ïa5^Ù¯T㳋ê7÷²«1~賞þíï¾ï º‚F:ÿÝÁó幃"V$Vtš=äêÁJ èVú/¼&×Èž2¸+§×ü’M¡‡|j¥!J7üN¯HàÉcüc«¬íJ[\”¸"œ¬O®ö}rUóqæ“«ì»ÌZ€{üóþs5ùŸ£Øï§7ÞÊÜ——Û”>3°[ó…~/^²/%žmöMzÄsÞHŸIÿ«æ 5|ø'hÞ`ßýe)ø ""‡ ƒäÚNÆÀyØNâÐOÞ€àCŒÙ/Á;pŠHð|Lä R ‹—zŒ8à÷`‡¹Ð ?Vx6Ã*¼c?´1(‡åЋРÇ`ÌÀ X oÓIð¿H’}å’<Ûa ÞqÞñ{¸æÀ 8'±6.¸v`ÜfŒ= ?€yxZ®ÅRwÂE²“&Éc˜ÆŽ¦óg%Íœ†Í~¼O1ýªa¹eÌ<Õ|InÄZÜÛÈj^kÞ,ä8Ia9ÖõÌi<†p3tC&À¤˜Â$|–5ðù>åCpk2 Ÿ«ïb5ZŽ ÁŽô_ðéß'¤sÙƒõ^Œí®‡Ut6Xp í‡s˜—Ÿ€A+¶b–s3‹›~’Ä2“ä ä0Ž•9ƒmw–y Ûåm¸H“é¸s߉åÁ¾³;ɲXíoÖ+÷bž,u'>%ƒûÒÒSXæv?Áë,}3‡Í˜sʱÕ,Ç6kÅû°|¶a0˜…mÈkÁ¡Ÿðfl­£$»àMØ”þHè·%÷f€Ùð l«Ý°æ±áIóh³ÈüȽËRóßWù¿úG—e%Åç{‚X±ÞleÁ`ì¯ãGÉ ²âÈ`m”i¹L+)-uï¬Â‘» ê°gÁ ¼ãŽ«“ØV™öܬ¶g¦M•öÜ8Ô–(ÀÑÎúô÷¼| GÜ Xƒs’…gãq|%áA¬}¦3A€p|'Ó_âóÔã¶Yš>åót)–ø6Ÿ£mØl†>ŠõX‚ãæÖa1–IŒ] ‹°×ºÈq˜K40™Ü]¸Xq¤ÔÃl˜Jš±î¿ÅzÏÅ>l† ¤};6ð‘܉æÇû!†ío‡»  Ka5`kÅThMë Í]˜Â‹5Rjщµ(ãõhƒЀ†÷Ý\Ýn¬ïvl»M8®nF׉W× ¹ÆCïßÀÖ‘§°þwásÞ“!‚¦s ¾ ùð=¼ë¼›­&'p=8‚g‚?cÝw¬Â’wáü‡õø:j¾óÍä"üvh)Â-Ïàr{.ä»pUþï†&\ªÿ `*ÈùgË‹Vtíç@×iË‚}ÃàÆ<}ˆ%D€\LÄ­9ô‚‘äë ê Ÿ(Ájæ*„@qèÁI„ÓgXƒacacááýE<•ާ»„Fpaþ#ÓhùÝ!„Ь\Xh1†ãU1ÞSŒi‹1ô,Ú„ßÁâg lC8©ÆEù`ŽòÁż¢XÛ ´SÜgE;$D{¸è®rµ¾Û}:FÒ­Øš[±Ý¶²BÙ$®À˜”šbÂ!­p M š"4Åh¢h"hÂh°… öÞv4ÛÐ<‚f+š‡Ñtao8ÅOÆé‚êÕÕÕÛªŸ¨>T}²ZœÞŠf!](‹àvãÙY²üõ6ªù`&çö³Ü^Çm™ÛÙ?ß|a¾ùÕùæÝóÍ;ç›[ç›§Í7Ožo®˜oî#‹dOÜü~ܼ=n¾)nž7WÇÍããæ’¸¹ÞNÚÈ\0ï¸ÝÀíJnG¹Gæö˜Áø<™ŽxRt$rèÃHŸ†ô„¾é3 ó]åjžâ$Xà/Cc#ËBeJH¡âäGNh0˜C€žÄå2ýkúzY?Q_®£/ÖécúÞi 6ƒÅc ƒÎ 1P˜„›g´[§ÎƆÙî·QfS…´K‰ÂTèv-´eVié~a1´, w>+ÖGÄoîÖÆH·Ô-³¼Ý5ñ–>}zfwm¼¥Û8c^ëaBiënú`Ù­}$Í‚¾è–«*)ûþր궵±{ZkÈÖ­mà¾3åMIuö‰“›®b-Tí¬·šÞìWœ¬&yÝ?l™ÕÚýL^[w%ó¤óÚZ°åf…ç·£µtBsÓ1ZܶÖcâfZÛ<“…‹››Ú†ÓAÛŽA„9<„Y:J¤5,]s”tAž.8"ÝáI‘æ¦Ã‘H&Í$žfÒÈ4ËF¦YÆÓ,SÓJšHVý9ˆð4ý¹+Ò¿Aš‚«¦ÉjÍ¥ W¾Dþ‘cìói‡76/5/Œ5/EXØÝuçro÷æEáð1h$ï°¨p·P¸pÑâå̽uiy'¶´©»1Ö>;¢¸‡2Å.½õ*™ÝÊ2+eeMyö*Ñϲè)¬¬gYYϲ²¦ÈSxY|Ôã°4@C[ã|Åí¥&ðÂ@¤­Ám[SÇGs"â½/Яòs0ÅÛºsb Ýf5¦~L=‹ÂYÆ¢,lU£¼÷%"~òs5ʆÁöXx›W4á¿£Cõ|ÃGGÇú[:né`.ÿw¬ß€Àßúw@Çz¦1¿>‡ïo!\ÙÚÜ…ð0_£…ŽŽ¶õ s@Ç`¹­gÖpæC¾ ˜3éÁSÐ1úÇFFÀì:6Îy€uØtŒÄl€URÍàÿ+´$ endstream endobj 40 0 obj <> endobj 41 0 obj <>/W [3[600]11[600 600]15[600 600 600 600 600 600 600 600 600 600 600 600 600 600 600]32[600]35[600 600 600 600 600 600]42[600 600 600 600 600 600 600 600 600 600]53[600 600 600 600 600 600 600 600 600]66[600]68[600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600]95[600]178[600 600]181[600]]/Type/Font/Subtype/CIDFontType2/FontDescriptor 40 0 R/DW 1000/CIDToGIDMap/Identity>> endobj 42 0 obj <>stream xœ]ÕOkÛ@ð»>…Ž-=XÚÙµÁ¼KK!‡þ¡IK¯²´ †ZŠsÈ·¯4/@ ùÇQâ÷Fhwï>ÝMç[½û¾\ûûr«Çó4,åéú¼ô¥>•ÇóTµ¡ÎýíõÙ_º¹Ú­ß¿<ÝÊån¯ÕñXï~¬>Ý–—úÝÃÃïÍûj÷mÊrž׉„Ÿ¿ÖÉýó<ÿ)—2Ýê¦ꡌëŸúÒÍ_»K©wváÛðáe.u°÷-¿AÊÓÜõeé¦ÇR›õ…ãçõ…ªLÃgåU§ñí×#ÜÐÀF'¸aÏQ78á†ÞFm7 µpCá(À #Göïiä—hnl9R¸1p”àÆÈQ†…£=ܨàÆÄQ7fެÙDkÐÈ&Únìl¸‘MVl 3KG…ƒ¥£ÂŒÁÒQaÆ`é¨0cØÃf … .Õ =\a 0À•×@®œ8á o€ØÀÆŽ-\á \á #\e9Qá*›ˆ ®²‰˜á*›ˆ{¸Ê&â®rÛ±ƒ«,'rϦ²œÈ=›ÊrâWYޏÊ@"pW+ 71p©fb ÉpÉnb 9ÀM $ÜÄ@r‚›Hz¸‰d€›^¸‰Û–nâ¶µ›¸mµ=ÓÄmk€›XŽF¸™ÛV›Ù—*Ü̾4ÁÍìK3Ü̾t7³/=ÀÍìK;¸™})Wofö¥\½™Ù—rõff_:ÂÍ,çd7Ãö´°‘Þ ÛÓÂF–n3l£í‘ýïÙ¼=½·ƒÅƒþyYÖsÂN; ¶sà<? æë¼]U¯?Õ_©ì–B endstream endobj 4 0 obj <> endobj 43 0 obj <>stream xœí{ tTUºî¿÷©)I%© !#ÔÊ@¨$U™B NfBB A"$ ŠV@DE…8kˆÚ8÷‡vhl©Ú¶6ôÕÛíÄ“Z¤ oÛŽmp‚Ô¹ßÞUAè¶ïëµÞ»ë­õ–uòíÿ÷Ÿ½ÿŸ}’ 1"Š¡^RȲ|ÃzÇãÝRƒš‡‰L +zV^ü‹_]=ùgˆŒÿ±rÍ•+v”ÝPJdYXVu/íz{ܧ‰7`̤U¨ˆû„=ˆò (g¯ºxýwæ]u)ÊG‰ô¶5åK);ðŸD¹hŽ-»xé=q?2.#ò–¡Âѳ®»çÛÿm&ÊmDc“téî¦LH›²ŒlDZ(Šÿ oBÛ‡46ê›Ð}¸¾F)‰~…öýÚgTM©MöŸ€ºûP~™]ÃÇs;qگˡƒLÓ}Ê’”Ѷ™ýMYý÷AC˜ïÕfQ ÝH› ´g1*]LWÓ]ôC–Ȳ´K´#d p×i?Ò^¡¥h !ö¥Yw¶ #[麛v³"_÷êÈŸÂ×kí-2Ó­ô‹cNôµ…4ަÓë‘»g]þˆþ£6ýnª‚¦Í`½‹þÐg¬–Ôåê)Ì4»öºö;2Ò ŒÝÊ\–ÅêÙžª¼©|CzJ£Œ^LÝ´’´ŽžÄõ ¬<ÆÊX9«åµ¼“ß·ò—”{t×è6ae6ÓÏ1›ÈT6›µ²ì-öfëJåš0Áî·†êè<êÄýnÁJ½"­>B#ŒÁ‚,À®a±íl?{¿¬´éfê>ÕVh7¸Ý$Ì—“òh:4´a}Ÿ¥]´£ßc:l/e>Üßuü<¾A)Sš•ó•«•~åGÊ!ÝBݳá²ð_µµG´´·µßkÃÐg¥,*¤Ù˜é6j§«°rwÑ£ÐúK:LŸ3«f—°ëØØ£ì'ìYö{›…y<ß¡LRîQžÓ1ªÛªûUØ~,<>¦ÕiÚiÜß2ºžn·=FOÀãvC[ˆ5°óØ<¶ˆù¡ñ&v+{’½ÄþÂu|1ÿ©’«¬U6*W)[•“ºÝFÝoõÂá{Â{4¯v),¾Eû¶&R:M¦FXz­†gôк6_9¿–ß(¯;p?çÏè瘗£ô:ÉbX¼ðIݳzÒ[ ÓMoœ:rzçÈý#†y8?Ü©µLmœÖ íÔ^ÒŽhEä:¨~91u5õÃk†°R¯Ã`­ÿL‡ôð7+Ëf¹l[Ì®ÅLß„¹~˜=†ëixÎN6„ë\ûذ˜ýÃì(û3;Åà¼<—{`ñb¾‚_ÅŸâ/ò—xX‰S2æ³RéÆœ^£Ü¬<{xKùLùR— £ËÕMÓuëîÖíÐýRwDwJß Ÿ£¿Ü`5ÜnØÝ9¾ÝOðau¼ ú9ë@ü›1ã?å¿â…ˆˆýÿ×­ìKz…UÓŸÙ¼üV\×ÒGˆ£…¼†}Oz”Mfw³G¸Â»Ñwm§G”gØÛüzºÑ_DŸ"e|+b·ðqØ ïâ»èOðŒýˆ—Ïxòû±Òi´_ÙÏzè+ö9»ƒŽá^ü|,­doÑv «¥5<Ÿ\´ží‡‡á£WuL>öÛ•bïÕmåŸð­ì5ðmÒæÛÙRÚÎòáoûÙù´“‡t“t/ÂKë¥èÝ ìJøæÃ\GOò_Áwgs÷!z·#Nª`õZO5l1ö%‹!+»Þ~"óVسƒv°% ®zíy‰¸~¾•î‡y{(›~¬ÝI¿`ËÇ»Y,žbïÑyÊ ÝX<1ŽëÆëë4^Fïhóè5ìXå]šI¿g·aߘI¿c)ô¶F+ƒ7î×:`ç ´Šæë«ô6ìÆKùú¥q»á]C¥¡ØÀôõ]úýl}~²¾XŸ¯wêÓõ‰úXÝ1Ýut¿Ð=®»±[¤«3+ïbÿPPnSÊŧÁ'Ç+:þ5ÿ+ÿ˜ÿ¿Ã÷ñ§ùf„•¿×^ÑКµéÚdmL8>~)ülø¡ðÖðáÞpOØ?òòé?ž>xzàôØ#ï`ÿú%{-| π˴EÚyÚˆ·dímzø0Û‚{Ì¡ÄרWïÁº<޹mǧò™ÌBa:IØ¡·Ñ¾‡ž‚]N~Z`h£&¬w."óú¨7vc¯}%k•„'€3~Öd1qìÒyxÒ¾LÏh(ó¡c@Ë“üMæ?FyØe.Áói6ý‰Í Opí¦Ý#‚í)Ó`ÝcxšN~¨œ‚Æ=t¯Ó[uøü°;´óÃçcO»ŠöèþLó‰ÔóÚ.˜ßÖÚ2¯¹inã,ߌé•Ó*¦N™\^VZRìõ¸'æOÈËÍÉve9vÛøq™éi©)c“Ç$Y-‰ ñæ¸Ø“Ñ ×)œQA«Þïæúƒº\×Ì™…¢ìZŠŠ¥gUøƒTÕŸÛ'èðËnŽs{ªè¹âïzª‘žê™žÌ⨤ÊÂGËÜ_ër ±EóÚ‘¿£ÖÕáËü™×åÊB< N'F8êÒVÕ:‚Ìï¨ ÖoXÕW篅¾¸ØWMwla ÄÆ!‡\0ÕÕ3ÀRg0™á©uœLñ°*˜áª­ ¦»j… A%§niW°y^{]m¦ÓÙQXd5Ë]Ë‚äª&ºeª‘4ACMÐ(i«ÅíÐmŽ‚}}·Yh™ßmîru-]ÜT–v«¼µÁÔï§}[„ò¤šö›ÏnÍTúêÒV;D±¯ïfGpû¼ö³["í而å9õþ¾zPß.f1ÍC„ùâV"7Õíª5þ ÁWµkUß…~,HF_Z®tfd¨{´£”Qçèkkw9ƒ¾LWÇÒÚqÉÔ×rå®tÕ‘~nKaÁ€Å™Í„ÄhÆv¦ûL›ÌÉî"7»åÌt2a‘kÜ èXî€%í.ÜÈ‘tO¡¾åSÐ Ÿ†QÁ.,Ãê`L¿ÏR!êÅø >Çârô$,»kø/çÖ,Ör,ˆä¿ˆÁ‘bÔÁÐ>šºÝÁ‰…_k°°q†,—lâû]=¦šÛ1¬£Âƒ9w:ŪÞ6¤Ò2‚½óÚ#e-Ë$Õãîr¿hÙ7Ú2v¾hém93Üï‚ûî&ñ¦16hÊ=ó“hIS·ª"ÈRþ›æîHûìV×ìy‹Úu}þèÜÎn;§iŸr¦-š Ž©iW2y4Ç3Ù O\|¦³(´›ƒºü¤'w MpEYÃõA‹f$íˆu:ÿÅACÚq1JŠo‡EÍ V¸Ï-O;§|Žyæ>ërùì¶E}}±çš>×4çcràÁøœ`‚ÌÉLI˜ïvü9Ø@Ϥ"a–ùí‡2ŽvG°m"v–Ê´ãžã•Áf„{0.þ*R½Ô•(Óx©tlN05'Y*OWNîI;z\t‹Íô‰25å-9A«Ì§ä ¦[…VÉt& ýƒÂKåÿÞ†Dù“šLÏI#K¥é4Em‘ûCE&¿¹ÝŸ¹´CDžøÑçÌoäô:Å6¯Ia‘?µmˆÛ`“?ˆÒŽk#‘éŒ ;ë J.³ÌšVXàBŽdΑëÂj„S:üܾ)™.gǦùÅ®*'€ûs¢¹Ï¬+Ø:Q´æ:2±øs;0LAßzßÒ!­w™ËaqõíQR””¾ž:ÿhiÏß–¬¿½~¹ŠU`âT=àb·ÌPÙ-­‹Ú÷XðJ}K[û ô5þêŽl´µïqà9*kù™ZQrˆÍf˜„An’M™{T¢^Ùª“²¼|ˆ‘¬3Ö1Z>Ä#uY‡Xš¶ö³£RNhG¡8p'È•z‰ÂHÓT›ÁxOd½î¸B±ýqEá1FÝqFé¦ÙW¥¹çZNTΩœkù¢rŽe¤’|•#•ÅÞR«Óšã´:Wêè´CÙwZÕÓ)rèöƒí×þ (ÊmG©T®š•½1†±{c-ffbv56i²¶2¤§=·"’!ÑbÝ©ÅkØ,m%jûTsÿö tö oK£µfÛ,»§ðz;k³3û¶oWVNäÐ®Ò !~ÎSRör¤IUkZËìWþ¤ðÅB¥P­)/t4”ºY¡šhóxHH*ð©‰É¾¢"SÚä‚ô)éei|¢Ûc2¹¢ú!ÞUTR&ËàRM†ò^×s®—]Šk³Ûïîu+n4ìn-wÿ­lˆ·«Éþq=ãzÇõÛ7N?îËŠ)EExkâ ÔX«×Â,éS[¦È5îÎd×Ã#îµÃkÝøÀ«Dïýa˰5iªÇ]9ì¶ »òn÷Ú©nÑiÝZ’E·»Ø[s¥êðNʰ•”—r䌲læµ!)ÏœœÍJÇ{²ÉfÇ&åv3·{âµ×²uk;im'³ÂÆÂ—œg*µF½J/«)©)©%©²‡‹çå•–Lž4©¼,×åb÷46ú|áW}ê,‡CäÙ#ÇzÖV5Í›^8£ñ²¯}{GýUW_þ){3žZT8U@ÉšZT4•~ÐÿÃéS;¦^i±O)œÊ®»?3»9¯b9b`ö®î6åIrÓ1uÎëÁäÃÙGò>Nú ùƒìóN%ŸrÅš’c\|RR·ueR÷ØN™ qf–4+iN^GÒ“dšüq¶1#=ÞLzØôÌs¼%Æ’É2‡˜swmÌOâßì¶8ó1C¬Qá†gVœa®M,¬%½¼ÇvÔÆ›mlÜ–Q8fˆÏW“zrå:r½¹=¹ºÜô‚ÿuudéÖΞk ¯ëtÏy_©oä}Ëû–áN,ËT«•:µØKx:«“Ÿâ‹IŒH2Á6˜šàë×Á"Ë!Ây¬˜í¬¼Üܼ\W–alrŠœu¹NFƒ åe$¢þ‰œ¼ÌèÄñcÓŠæmºkçS/õÎó.pMœÞÙþâØ»Yö§óïVVº|³nhœ‘–Èôþøº+n˰̙1±vúùËoüð÷Ìî»_µÒ]ˆùOÙì!5v(i(ùg™¿ÎÔÅiGÕYãle]|Mò¯ ‡ ï$¿“þ¡á£äÒÿÆOþ–t:ù+û×®ÄI†OZ¼:íÂŒ í+\?àÛìý®g컾IoÔ+qc²mÌ$¢gbE™ª9=«¬×tÀÄ›ÐÀR~šdSÇ—ËùOï(³Ø˜jëµñ-6fbij9©I.©Ndƕۉ%â¥ðMR4x‹jN,ÃÆæÑì¡ìt¦uNKœmˆûéò8<´v¹ê}RÎÎü®ì²£q,.#7ûrĤ_M£ºÊíczÆð1j|bÙ˜ôœYkä»±¶#ïc¯ïÄ2Í91,–yÄí¶"";×®CݰXàŸÚÔt§OÜîä  £¥t%I98!Y®¶»ã¯ˆ_„wÔA˜p=ĵCjŒ5Õg/@‚]æÐ ¤è.\ÃÝÉœ)Ò¢® seQĤ)cá0p£nõéŸ9»mÝ‹smùSl¯mù"|„ù\ó›Ò™ÇŸ<÷¯^u¿—]м¬8¹¢`¸œ–òú;,±½´ñâóº6´/\ØŽ9ÝŠ ½Go¥R6Gu3S3ó2'gêÈe<Ñ’TJjœjæÿ¸7µÍRcÕѪûv»¯2Ÿ³ßÇ1@œX³® °Èã-.)%vîc!Òü…*Û¬lµ®¡,[ÓŠ¤¼IaIYv÷çxJê*,¥®Â‚Kš7MMkNó§õ¦Ò ‰]11¼ËKnïIýûD5;œ^'wf”cCÅú/T–±W˜Ã5Ùì X¶YvZöZtdi†xÓ¢³¤— 160ôðƒ÷-•ÃX?ásÞGnQiö­EÝˆÈ »£lºâ4‰]`¶[çwo·¥‘'yJ4ôEÜvåÝ,[l¯#[Eºq™H—½ºw~È7%Lîæe+æ°JQÇ÷†F÷Xö¹HçÞ4`ŸRà™fLŸ^8WT`eK±²÷#ÖËÙX5Ó`H1Ü7^ Å„ìë‹Õµ&I›’’ú»âgÆî7°$&&"'£<ÖÒhá;-;­Ü®êÄʉ˜Sãõé™ãÆÛìgÖßE‰Ì@¼\Ý·¿‡:û´Eg~[5Z›n›eŽ7%Y…ž2«ZՀęSfMÈ–ìò–”IiË•ò¹äô2–Ÿ7ÄÆ«Î¬e¾!#=–L“×Ôlò›zL†~ìön¼Z[“ÄâY³È)±Ùéwö8 Îô‰gE™X7a¬Ãk7gx8rÚ‰¤â$4õï—rLd)iVÛ•jR¢…+žàÒ'*VY¬œ1ánr_{-!ŠÅC9Ù¢¦'ú¬"‘±o‰Ü¢ÑãŒ3Èö\ÿ¿¼òÚ­U]¨ÓݹmN÷ŽÞsBVž””Û{;g4–”L?oÍšðëçF*†tžÂ•XïþKµ<&ÉPžž”R¾Â{“÷^ïãE»‹^*z;æPìÛÅÄ|X|Âü…ÇËŒzcŒqÒï$OC~½Ç”-¼£'.ÏI$±”ÈL®É4#¿ž reO(÷Ô{n.¾¯økÒØW®Ø$}œbŽñ˜½©qÉæñiöô oRÅq·yߊû½'ჩïU|íQ©Ì›ª”™cIç6f;SÌé^^äÀÚ{EbÆAœc£Ò,så±!['M´BŠÖ]Í­¢,¥lolŠ´CÊÑ bôóqT«)÷‚\—GuQ!Õ˜Œ¼²ŠJÅ;ÄרuÞ¢d¯·HqN6Úë6׫Sëšê¸½ŽÕ©®œ²:uRyÝÛÓ§WRÕ̲Ô+,ð·£N…œ><ÞΈÍs&Ç©$ UsÝâÀgí±ôZú-AË>ËQ‹Á’1Ëøs>»g6Î q¶ñsí¥ŽRo©R*æ<Æé*+MŸÙ´%z`˜s¢‡vùhÀ~S)†k;ßwc‡öNßðÍ Eîk,/GŽïISqpŸûYgñ³V × gÍ 56ÞW#’z‘Ô‰¤V$YbB!³£Ò•Ni2^5%Þ+K8rz²Ó}8Þ½» R¾ÖÄZ£:¬rŒB‡šŠ ­¾‘Ô‹¤N$îïút0q¬‡q1âìš*Ï&ò¸‚HÉ3È—Öȳ.5r˜)ÇÎ*.¹¢vRô,(µ%—T^^oŸè¼Ñ¼zÝÒÛÞí¸Ï—˜•äEìä”$xnXpÇÜœòò'¾lmíÜôFÃõ•cœ §X“s¦ð‡íö<+ °$Ž—s÷¼K/²Ûâ|u¾ü’ ù)i22’2g]tɬ®Ìq h*©I“/iâKDΙӮ[’XyÒc’Dxl~ JÈ_­‰zûV¹N¿•þXÄö‹úQˆvÄÒà_„èú«öí+¶ƒm£ƒ(|{Ñ>¥f¹R"B3¡OÄçjX11*¾‡ãF„~MãÐêÆ þ†Ö±!šCÒD6 ÷};{^î…Å5ÌËw`'˜ƒi ?S[øaµ†ûTqOOÉ}€#Šc¡;Žò¤ØÀgÃË×r ZH¯ýå1Nô,À}šaŸhµ£ˆÌžöÜ}æJŒ¯ÃÝÆƒ×Šk)™HÁØÐ V¶ÐDÄ©ãU1+ú ÆTk¢2¹·Ô¢Ï}r7j_Âç3 a"塾–rpot×™}²ëÿ>øŸ…rçÿ9táŒ"Ó'ÿ=bp.Ì7ˆïÈ}ïñ=¾Ç÷øßã{ü¼eóÛñî\I⽓…<8M’iþ xÿXh¯š®cä¼€"s>  Xø7p 0‘#Úw °-Ú¢'»¢‘ð ù.BgJ[€mÀvà8 'U ïŠK(±W5(a S° Ðaè·¥c²fK´´P(Q§'a´¦êaˆ¦éÑË¡œF½E9E`;JG´#NQ¤_âpšöBޱÚ>å«]óZK¨ªRù_S3ÐôAà(€y@êQFpÇ_Cñˆìåú½(ïƒ<í=¢ÇzŒÐN xV/Ñã8ú¯§Ý[²Gfâ­2srWEeɪdå$î­_¦‰H=€h¶;hN Ƙ布S+JªÄ-Àò—j½­(ïš×‚y·¡Â4¢ñ ‡Þ0ò˜NªÛ 0œÀüŸÀj *>œT!Y>œÛVR5WäèÔþ9ŒÊç¢òѨ¼)*oŒÊK¢rUT.ˆÊÖ¨œ•Ó£²2*K¢²8*s¢2+*Qi—ò³ÁÖÒþª|å3Lœ_ù+ù1n÷c¸Q3Ò³kúí@Øb¨_§¿@G »”/ùBšOvè=.õf*Ç¥Þ å#hùHêý蜚~`;ö”c’Uªr¼ç²ÐaÔƒõ F=ˆQ¢†ZàT 0 å0Z#Œ*á?"  ©p^@ôç”å%¾/uvåqÞ9Øe÷À áƒpƒAØ~T9]‡¤®CÐu£aô!Œ>$u}[R”EƒJ—}Hù÷Á!~¹ËÙeO¬*Vj ¾žTƒª‘qYIÚ‡ô(ÀáQÕh­†’jô¨Æ-W“^iPÜ”‹‘•|•CNCYÈ ¥@Ê©Q9Eq–ƒ'KñB‹¾é{‚’‡RJy²”R6JÙ0Ó‹4#ó K!³—(cƒcÒ¥;9ÑLQIÉ‹Š“Ï§i²‹sW]C‰¿*N;ÇÁú<%“™ƒÅ%rXæ`}C4ƒý£Êª¤ò5’k,? G´+Éùc¢Ò>h«¶ïaU¼«@ð#3fÛŒ©2c~͘3ÖÙŒé1ƒÖ 0Ã#Ìð#3üÈŒÉ4ÃÌ»’’Ô!þê`vé¶çù+tŒ¿¢Îç'Û¦?¦çÛtÇt|›rLáÛø1Î÷ö¹Ýà3,1 [ z»Ñg\b ·õ>îSšx“¢sØYŽTÇoŠßº)þ¦Mñ‹7Å7nНÞ_µ)~ê¦øI›â=È;Ø_X%:>*Ó{ez—Hé”L¿’éQ™^ ÓJ™:djc•ƒñ3ÄN:§ã¾O :› †Ë žt–Ù_`OoŒvöø óÔ>6èlX9è,‡X1è,†¨tÖ@Tívzíß8‡tLM´¿ç\gËÙh:§Úuƒöm²)ξÎé¶w;'Ú»"Õ #¢FˆçìÓ;ì…‘š‚HÍü11cbú‡ØµÔØÿkc¿ßØï5ö»ýý¹Æþlc¿ÝØ?Þ˜lJ2YL &³)Öd2L:7‘)Y|â@üE;Ù` ©Næ-\¤<òoÎLœÉÿ<ŸŽcÂô>98F™Íg·V³ÙÁ}Ëiö2Gð‹V׋·(¨wU³`ÒlšÝVí¾4mv0½uv°uÞ¢ö!>=Ø[;ÛO0½E÷Õvsevˆò%ѼŠ|E4ß‹|C4þÁÉîÙCF­%8Å=;Ó|~ûcwv ä·@K[ûÓDÕ™â_¿öcöïÈR»ñŽŽJÙàKó%ͰN­¯ýŽÄMÏú¾CÚ·YÁÝ|¥j¶?k´×í¥F»Ë(êg·¢²ÿYc± ©L¼wvk{P‹ffcÕZ‹Û÷pŸ^W»‡Ï¢£}Oúvî«kõéÛq“gú!8}è‡ØôEûQŽèG9×/‹Ïýò„ˆôË’ý²Îé7Ðଫp:Gû4È> çöÙ~nŸí²Ïöh%ÒÇyVŸ1SÈ)û8ÇLù‡>YÿBŸ¼ïìó_>ŸîêÚtö‡í¡˜¶Aü—žßU× øƒ·mX•ì]æpì¡i,ý¾\ÿ²å«„\Ú=ÄB®îÚà4W­c eÃ?¶7ˆæWím¨kkØ v×¶¨-u®¥µ»šVúÖœCwë(Ý€oåw([)”ùWÓšïh^#š›×ÁµFp5©M’«nµˆ¾æöUwÔ,ŽÈ]<.^ïÏtvT§XzfȘæLÛ”ù<^ýŸ¢8wGÐìªÆ¢©°ª°J4!ðES‚øÌhSÚ¦iÎÌçÙSÑ& ª­®jBüç®öÿþµ^~.ý>ÿJOm_ŸV·ºöìÔîõîKñã¾ìŒ"” ˜.V¬¿ÔMâ»Yfž¿Àß øm~'¿ôÒQù"ÞªÄ[x¿b¨cëIþÿ…ü``ô-‘ u$j ›E„0ªž'R6AI»týeèqEäw|F"R¤f.sý’’oÊ endstream endobj 44 0 obj <> endobj 45 0 obj <>/W [3[600]29[600]53[600]70[600]72[600 600]81[600]85[600 600]]/Type/Font/Subtype/CIDFontType2/FontDescriptor 44 0 R/DW 1000/CIDToGIDMap/Identity>> endobj 46 0 obj <>stream xœ]‘ËjÄ †÷>…Ë–.Ìeât œMK!‹^褥[GO‚Ð1f‘·¯ÑÖ ~à§ç ¿ì¡{ìŒö”½¹YžÑÓAåp™W'‘^pÔ†”UZúßU¤œ„%,Ÿ·ÅãÔ™a&mKÙ{Ø\¼ÛèMßÝ·„½:…N›1˜CõñÌyµö'4ž€*B«ga_Ä„”Å«ì7‹´Šë2Ý@Î +$:aF$m´OaA£þmŸRÑe¸ž®!³*`W¥‚ÌZDU7ÙTQ8dò:©{ÈäMR'Èä<ª¦„LŽI¥Æ‘ÇÔ¾‰u|Ëß­÷gí‰ç”äê\0~KŒiHÌ?gg»WÑ0É«‹f endstream endobj 25 0 obj <> endobj 18 0 obj <> endobj 47 0 obj <> endobj 48 0 obj <> endobj 49 0 obj <>stream application/pdf iText 2.1.6 by 1T3XT 2012-01-26T10:46:36-08:002012-01-26T10:46:36-08:00Documill Publishor 6.3.9 by Documill (http://www.documill.com/) endstream endobj 50 0 obj <> endobj 51 0 obj <> endobj xref 0 52 0000000000 65535 f 0000012943 00000 n 0000086144 00000 n 0000115785 00000 n 0000143154 00000 n 0000000015 00000 n 0000000146 00000 n 0000000277 00000 n 0000000408 00000 n 0000000539 00000 n 0000000670 00000 n 0000000801 00000 n 0000000931 00000 n 0000001062 00000 n 0000001194 00000 n 0000001326 00000 n 0000001458 00000 n 0000001590 00000 n 0000154241 00000 n 0000013207 00000 n 0000027786 00000 n 0000027964 00000 n 0000042420 00000 n 0000042580 00000 n 0000056721 00000 n 0000154103 00000 n 0000056899 00000 n 0000064917 00000 n 0000065105 00000 n 0000066599 00000 n 0000066778 00000 n 0000066806 00000 n 0000085129 00000 n 0000085318 00000 n 0000085684 00000 n 0000086276 00000 n 0000114426 00000 n 0000114606 00000 n 0000115127 00000 n 0000115912 00000 n 0000141748 00000 n 0000141930 00000 n 0000142468 00000 n 0000143286 00000 n 0000153313 00000 n 0000153505 00000 n 0000153767 00000 n 0000154340 00000 n 0000154392 00000 n 0000154426 00000 n 0000157275 00000 n 0000157351 00000 n trailer <<380cf0c28cf23324909a56206773f97a>]/Info 51 0 R/Size 52>> startxref 157549 %%EOF papi-5.6.0/src/components/nvml/README000664 001750 001750 00000004134 13216244357 021352 0ustar00jshenry1963jshenry1963000000 000000 The PAPI NVML component provides an interface to the nVidia Management Library (nvml, libnvidia-ml). In versions 8 and later part of the CUDA Toolkit, The NVIDIA Management Library is no longer a separate download is installed with CUDA. On Linux/x86 platforms, it is often found in /usr/lib64/nvidia/libnvidia-ml.so Other download packages may be available at https://developer.nvidia.com/gpu-deployment-kit Before running the NVML component, the configure script for the NVML component must be executed in order to generate the Makefile which contains the configuration settings. This script needs to be executed only once. % cd < papi_dir >/src/components/nvml % ./configure --with-nvml-libdir=> --with-nvml-incdir= --with-cuda-dir= For example, one configuration may look like this %./configure --with-nvml-libdir=/usr/lib64/nvidia --with-nvml-incdir=/usr/local/cuda/include --with-cuda-dir=/usr/local/cuda The NVML component is added to PAPI during the configuration of PAPI by adding the '--with-components=nvml' command line option to configure. % ./configure --with-components="nvml" At build-time the nVidia compiler, nvcc, needs to be in your path, as does the cuda run-time library (libcudart.so). Please refer to http://developer.download.nvidia.com/assets/cuda/files/CUDADownloads/NVML/nvml.pdf for details about NVML library. Note: Power Limiting using NVML (aka power capping) requires root. PAPI has added support for power limiting using NVML (on supported devices from the Kepler family or later). The executable needs to have root permissions to change the power limits on the device. The power_management_limit can be written to set a limit (in milliWatts) to the power consumption by DEVICE. The value that can be written needs to be between the power_management_limit_constraint_min and power_management_limit_constraint_max. nvml:::DEVICE:power_management_limit nvml:::DEVICE:power_management_limit_constraint_min nvml:::DEVICE:power_management_limit_constraint_max A test for writing of the power_management_limit can be found in the nvml/tests. papi-5.6.0/src/utils/papi_multiplex_cost.c000664 001750 001750 00000045037 13216244370 022723 0ustar00jshenry1963jshenry1963000000 000000 /** file papi_multiplex_cost.c * @brief papi_multiplex_cost utility. * @page papi_multiplex_cost * @section NAME * papi_multiplex_cost - computes execution time costs for basic PAPI operations on multiplexed EventSets. * * @section Synopsis * papi_cost [-m, --min < min >] [-x, --max < max >] [-k,-s] * * @section Description * papi_multiplex_cost is a PAPI utility program that computes the * min / max / mean / std. deviation of execution times for PAPI start/stop * pairs and for PAPI reads on multiplexed eventsets. * This information provides the basic operating cost to a user's program * for collecting hardware counter data. * Command line options control display capabilities. * * @section Options *
    *
  • -m < Min number of events to test > *
  • -x < Max number of events to test > *
  • -k, Do not time kernel multiplexing *
  • -s, Do not ime software multiplexed EventSets *
  • -t THREASHOLD, Test with THRESHOLD iterations of counting loop. *
* * @section Bugs * There are no known bugs in this utility. If you find a bug, * it should be reported to the PAPI Mailing List at . */ /* Open Issues: * Selecting events to add is very primitive right now. * Output format, right now the format targets a gnuplot script I have, * We will probably end up generating a csv per test */ #include #include #include #include #include "papi.h" #include "cost_utils.h" static int first_time = 1; static int skip = 0; static FILE* fp; typedef struct { int first_time; int force_sw; int kernel_mpx; int min; int max; } options_t; static options_t options; void do_output( char *fn, char *message, long long* array, int noc ) { long long min, max; double average, std; std = do_stats( array, &min, &max, &average ); if ( first_time ) { skip = 0; fp = fopen(fn, "w"); if (fp == NULL) { fprintf(stderr,"Unable to open output file, %s, output will not be saved.\n", fn); skip = 1; } else fprintf(fp, "###%s\n#number of events\tmin cycles\tmax cycles\tmean cycles\t\ std deviation\tsw min cycles\tsw max cycles\tsw avg cycles\tsw std dev\n", message); first_time = 0; } if ( !skip ) { fprintf(fp, "%20d\t%10lld\t%10lld\t%10lf\t%10lf", noc, min, max, average, std); std = do_stats( array+num_iters, &min, &max, &average ); fprintf(fp, "\t%10lld\t%10lld\t%10lf\t%10lf\n", min, max, average, std); fflush(fp); } } void init_test(int SoftwareMPX, int KernelMPX, int* Events) { int i; int retval; PAPI_option_t option, itimer; retval = PAPI_assign_eventset_component( SoftwareMPX, 0 ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_assign_eventset_component\n"); exit(retval); } retval = PAPI_assign_eventset_component( KernelMPX, 0 ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_assign_eventset_component\n"); exit(retval); } retval = PAPI_set_multiplex( KernelMPX ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_set_multiplex\n"); exit(retval); } PAPI_get_opt(PAPI_DEF_ITIMER,&itimer); memset(&option,0x0,sizeof(option)); option.multiplex.flags = PAPI_MULTIPLEX_FORCE_SW; option.multiplex.eventset = SoftwareMPX; option.multiplex.ns = itimer.itimer.ns; retval = PAPI_set_opt( PAPI_MULTIPLEX, &option ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_set_opt\n"); exit(retval); } for (i = 0; i < options.min - 1; i++) { if ( options.kernel_mpx ) { retval = PAPI_add_event( KernelMPX, Events[i]); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_add_event\n"); exit(retval); } } if ( options.force_sw ) { retval = PAPI_add_event( SoftwareMPX, Events[i]); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_add_event\n"); exit(retval); } } } } void finalize_test(void) { if (fp) fclose(fp); first_time = 1; } static void usage(void) { printf( "Usage: papi_multiplex_cost [options]\n" "\t-m num, number of events to count\n" "\t-x num, number of events to count\n" "\t-s, Do not run software multiplexing test.\n" "\t-k, Do not attempt kernel multiplexed test.\n" "\t-t THREASHOLD set the threshold for the number " "of iterations. Default: 100,000\n" ); } int main( int argc, char **argv ) { int retval, retval_start, retval_stop; int KernelMPX = PAPI_NULL; int SoftwareMPX = PAPI_NULL; int *Events = NULL; int number_of_counters; int i; int c; int dont_loop_forever; long long totcyc, *values = NULL; long long *array = NULL; int event; PAPI_option_t option, itimer; const PAPI_component_info_t *info; PAPI_set_debug(PAPI_QUIET); options.min = 1; options.max = 10; options.force_sw = 1; options.kernel_mpx = 1; while ( ( c=getopt(argc, argv, "hm:x:skt:") ) != -1 ) { switch (c) { case 'h': usage(); exit(0); case 'm': options.min = atoi(optarg); break; case 'x': options.max = atoi(optarg); break; case 's': options.force_sw = 0; break; case 'k': options.kernel_mpx = 0; break; case 't': num_iters = atoi(optarg); default: break; } } printf("This utility benchmarks the overhead of PAPI multiplexing\n"); printf("Warning! This can take a long time (many minutes) to run\n"); printf("The output goes to multiple .dat files in the current directory\n\n"); if ( options.min > options.max ) { fprintf(stderr,"Error! Min # of Events > Max # of Events"); goto cleanup; } values = (long long*)malloc(sizeof(long long) * options.max); array = (long long *)malloc(sizeof(long long) * 2 * num_iters); Events = ( int* )malloc(sizeof(int) * options.max); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { fprintf(stderr, "Error! PAPI_library_init\n"); exit(retval); } retval = PAPI_set_debug( PAPI_QUIET ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_set_debug\n"); exit(retval ); } retval = PAPI_multiplex_init( ); if (retval != PAPI_OK ) { fprintf(stderr,"Error! PAPI_multiplex_init\n"); exit(retval); } info = PAPI_get_component_info(0); options.kernel_mpx &= info->kernel_multiplex; if ( options.kernel_mpx && !info->kernel_multiplex ) { fprintf(stderr,"Error! Kernel multiplexing is " "not supported on this platform, bailing!\n"); exit(1); } retval = PAPI_create_eventset( &SoftwareMPX ); if (retval != PAPI_OK) { fprintf(stderr,"Error! PAPI_create_eventset\n"); exit(retval); } retval = PAPI_create_eventset( &KernelMPX ); if (retval != PAPI_OK ) { fprintf(stderr,"PAPI_create_eventset"); exit(retval); } retval = PAPI_assign_eventset_component( KernelMPX, 0 ); if (retval != PAPI_OK ) { fprintf(stderr,"PAPI_assign_eventset_component"); exit(retval); } retval = PAPI_set_multiplex( KernelMPX ); if (retval != PAPI_OK ) { fprintf(stderr,"PAPI_set_multiplex"); exit(retval); } retval = PAPI_assign_eventset_component( SoftwareMPX, 0 ); if (retval != PAPI_OK ) { fprintf(stderr,"PAPI_assign_eventset_component"); exit(retval); } PAPI_get_opt(PAPI_DEF_ITIMER,&itimer); memset(&option,0x0,sizeof(option)); option.multiplex.flags = PAPI_MULTIPLEX_FORCE_SW; option.multiplex.eventset = SoftwareMPX; option.multiplex.ns = itimer.itimer.ns; retval = PAPI_set_opt( PAPI_MULTIPLEX, &option ); if (retval != PAPI_OK) { fprintf(stderr,"PAPI_set_opt"); exit(retval); } if ( !options.kernel_mpx && !options.force_sw ) { fprintf(stderr,"No tests to run."); goto cleanup; } else { fprintf(stderr,"Running test[s]\n"); if (options.kernel_mpx) fprintf(stderr,"\tKernel multiplexing read\n"); if (options.force_sw) fprintf(stderr,"\tSoftware Multiplexing read\n"); } event = 0 | PAPI_NATIVE_MASK; PAPI_enum_event( &event, PAPI_ENUM_FIRST ); /* Find some events to run the tests with. */ for (number_of_counters = 0; number_of_counters < options.max; number_of_counters++) { dont_loop_forever = 0; if ( options.kernel_mpx ) { do { PAPI_enum_event( &event, PAPI_ENUM_EVENTS ); dont_loop_forever++; } while ( ( retval = PAPI_add_event( KernelMPX, event ) ) != PAPI_OK && dont_loop_forever < 512); } else { do { PAPI_enum_event( &event, PAPI_ENUM_EVENTS ); dont_loop_forever++; } while ( ( retval = PAPI_add_event( SoftwareMPX, event) ) != PAPI_OK && dont_loop_forever < 512); } if ( dont_loop_forever == 512 ) fprintf(stderr,"I can't find %d events to count at once.", options.max); Events[number_of_counters] = event; } PAPI_cleanup_eventset( KernelMPX ); PAPI_cleanup_eventset( SoftwareMPX ); /* Start/Stop test */ init_test(SoftwareMPX, KernelMPX, Events); for (number_of_counters = options.min; number_of_counters < options.max; number_of_counters++) { if ( options.kernel_mpx ) { if ( ( retval = PAPI_add_event( KernelMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( KernelMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } if ( ( retval = PAPI_stop( KernelMPX, values ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_stop"); exit(retval); } /* KernelMPX Timing loop */ for ( i = 0; i < num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval_start=PAPI_start( KernelMPX ); retval_stop=PAPI_stop( KernelMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; if (retval_start || retval_stop) fprintf(stderr,"PAPI start/stop"); } /* End 1 timing run */ } else memset(array, 0, sizeof(long long) * num_iters ); /* Also test software multiplexing */ if ( options.force_sw ) { if ( ( retval = PAPI_add_event( SoftwareMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( SoftwareMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } if ( ( retval = PAPI_stop( SoftwareMPX, values ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_stop"); exit(retval); } /* SoftwareMPX Timing Loop */ for ( i = num_iters; i < 2*num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval_start=PAPI_start( SoftwareMPX ); retval_stop=PAPI_stop( SoftwareMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; if (retval_start || retval_stop) fprintf(stderr,"PAPI start/stop"); } /* End 2 timing run */ } else { memset(array+num_iters, 0, sizeof(long long) * num_iters ); } do_output( "papi_startstop.dat", "Multiplexed PAPI_read()", array, number_of_counters ); } /* End counter loop */ PAPI_cleanup_eventset( SoftwareMPX ); PAPI_cleanup_eventset( KernelMPX ); finalize_test(); /* PAPI_read() test */ init_test(SoftwareMPX, KernelMPX, Events); for (number_of_counters = options.min; number_of_counters < options.max; number_of_counters++) { if ( options.kernel_mpx ) { if ( ( retval = PAPI_add_event( KernelMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( KernelMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( KernelMPX, values ); /* KernelMPX Timing loop */ for ( i = 0; i < num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_read( KernelMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 1 timing run */ retval_stop=PAPI_stop( KernelMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else memset(array, 0, sizeof(long long) * num_iters ); /* Also test software multiplexing */ if ( options.force_sw ) { if ( ( retval = PAPI_add_event( SoftwareMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( SoftwareMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( SoftwareMPX, values ); /* SoftwareMPX Timing Loop */ for ( i = num_iters; i < 2*num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_read( SoftwareMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 2 timing run */ retval_stop=PAPI_stop( SoftwareMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else memset(array+num_iters, 0, sizeof(long long) * num_iters ); do_output( "papi_read.dat", "Multiplexed PAPI_read()", array, number_of_counters ); } /* End counter loop */ PAPI_cleanup_eventset( SoftwareMPX ); PAPI_cleanup_eventset( KernelMPX ); finalize_test(); /* PAPI_read_ts() test */ init_test( SoftwareMPX, KernelMPX, Events); for (number_of_counters = options.min; number_of_counters < options.max; number_of_counters++) { if ( options.kernel_mpx ) { if ( (retval = PAPI_add_event( KernelMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( KernelMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read_ts( KernelMPX, values, &totcyc ); /* KernelMPX Timing loop */ for ( i = 0; i < num_iters; i++ ) { retval = PAPI_read_ts( KernelMPX, values, &array[i] ); } /* End 1 timing run */ /* post-process the timing array */ for ( i = num_iters - 1; i > 0; i-- ) { array[i] -= array[i - 1]; } array[0] -= totcyc; retval_stop=PAPI_stop( KernelMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else memset(array, 0, sizeof(long long) * num_iters ); /* Also test software multiplexing */ if ( options.force_sw ) { if ( ( retval = PAPI_add_event( SoftwareMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( SoftwareMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read_ts( SoftwareMPX, values, &totcyc); /* SoftwareMPX Timing Loop */ for ( i = num_iters; i < 2*num_iters; i++ ) { retval = PAPI_read_ts( SoftwareMPX, values, &array[i]); } /* End 2 timing run */ retval_stop=PAPI_stop( SoftwareMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); /* post-process the timing array */ for ( i = 2*num_iters - 1; i > num_iters; i-- ) { array[i] -= array[i - 1]; } array[num_iters] -= totcyc; } else memset(array+num_iters, 0, sizeof(long long) * num_iters ); do_output( "papi_read_ts.dat", "Multiplexed PAPI_read_ts()", array, number_of_counters ); } /* End counter loop */ PAPI_cleanup_eventset( SoftwareMPX ); PAPI_cleanup_eventset( KernelMPX ); finalize_test(); /* PAPI_accum() test */ init_test(SoftwareMPX, KernelMPX, Events); for (number_of_counters = options.min; number_of_counters < options.max; number_of_counters++) { if ( options.kernel_mpx ) { if ( ( retval = PAPI_add_event( KernelMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( KernelMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( KernelMPX, values ); /* KernelMPX Timing loop */ for ( i = 0; i < num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_accum( KernelMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 1 timing run */ retval_stop=PAPI_stop( KernelMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else { memset(array, 0, sizeof(long long) * num_iters ); } /* Also test software multiplexing */ if ( options.force_sw ) { if ( ( retval = PAPI_add_event( SoftwareMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( SoftwareMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( SoftwareMPX, values ); /* SoftwareMPX Timing Loop */ for ( i = num_iters; i < 2*num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_accum( SoftwareMPX, values ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 2 timing run */ retval_stop=PAPI_stop( SoftwareMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else { memset(array+num_iters, 0, sizeof(long long) * num_iters ); } do_output( "papi_accum.dat", "Multiplexed PAPI_accum()", array, number_of_counters ); } /* End counter loop */ PAPI_cleanup_eventset( SoftwareMPX ); PAPI_cleanup_eventset( KernelMPX ); finalize_test(); /* PAPI_reset() test */ init_test(SoftwareMPX, KernelMPX, Events); for (number_of_counters = options.min; number_of_counters < options.max; number_of_counters++) { if ( options.kernel_mpx ) { if ( ( retval = PAPI_add_event( KernelMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( KernelMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( KernelMPX, values ); /* KernelMPX Timing loop */ for ( i = 0; i < num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_reset( KernelMPX ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 1 timing run */ retval_stop=PAPI_stop( KernelMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else memset(array, 0, sizeof(long long) * num_iters ); /* Also test software multiplexing */ if ( options.force_sw ) { if ( ( retval = PAPI_add_event( SoftwareMPX, Events[number_of_counters - options.min] ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_add_event"); goto cleanup; } if ( ( retval = PAPI_start( SoftwareMPX ) ) != PAPI_OK ) { fprintf(stderr,"PAPI_start"); exit(retval); } PAPI_read( SoftwareMPX, values ); /* SoftwareMPX Timing Loop */ for ( i = num_iters; i < 2*num_iters; i++ ) { totcyc = PAPI_get_real_cyc(); retval = PAPI_reset( SoftwareMPX ); array[i] = PAPI_get_real_cyc() - totcyc; } /* End 2 timing run */ retval_stop=PAPI_stop( SoftwareMPX, values ); if (retval_stop!=PAPI_OK) fprintf(stderr,"PAPI_stop"); } else { memset(array+num_iters, 0, sizeof(long long) * num_iters ); } do_output( "papi_reset.dat", "Multiplexed PAPI_reset()", array, number_of_counters ); } /* End counter loop */ PAPI_cleanup_eventset( SoftwareMPX ); PAPI_cleanup_eventset( KernelMPX ); finalize_test(); return 0; cleanup: if ( KernelMPX != PAPI_NULL) PAPI_cleanup_eventset( KernelMPX ); if ( SoftwareMPX != PAPI_NULL ) PAPI_cleanup_eventset( KernelMPX ); if ( values != NULL ) free(values); if ( array != NULL ) free(array); if ( Events != NULL ) free(Events); PAPI_shutdown(); return 1; } papi-5.6.0/src/libpfm4/lib/pfmlib_intel_snbep_unc_priv.h000664 001750 001750 00000031007 13216244365 025343 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_snbep_unc_priv.c : Intel SandyBridge/IvyBridge-EP common definitions * * Copyright (c) 2012 Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_INTEL_SNBEP_UNC_PRIV_H__ #define __PFMLIB_INTEL_SNBEP_UNC_PRIV_H__ /* * Intel x86 specific pmu flags (pmu->flags 16 MSB) */ #define INTEL_PMU_FL_UNC_OCC 0x10000 /* PMU has occupancy counter filters */ #define INTEL_PMU_FL_UNC_CBO 0x20000 /* PMU is Cbox */ #define SNBEP_UNC_ATTR_E 0 #define SNBEP_UNC_ATTR_I 1 #define SNBEP_UNC_ATTR_T8 2 #define SNBEP_UNC_ATTR_T5 3 #define SNBEP_UNC_ATTR_TF 4 #define SNBEP_UNC_ATTR_CF 5 #define SNBEP_UNC_ATTR_NF 6 /* for filter0 */ #define SNBEP_UNC_ATTR_FF 7 #define SNBEP_UNC_ATTR_A 8 #define SNBEP_UNC_ATTR_NF1 9 /* for filter1 */ #define SNBEP_UNC_ATTR_ISOC 10 /* isochronous */ #define SNBEP_UNC_ATTR_NC 11 /* non-coherent */ #define SNBEP_UNC_ATTR_CF1 12 /* core-filter hswep */ #define _SNBEP_UNC_ATTR_I (1 << SNBEP_UNC_ATTR_I) #define _SNBEP_UNC_ATTR_E (1 << SNBEP_UNC_ATTR_E) #define _SNBEP_UNC_ATTR_T8 (1 << SNBEP_UNC_ATTR_T8) #define _SNBEP_UNC_ATTR_T5 (1 << SNBEP_UNC_ATTR_T5) #define _SNBEP_UNC_ATTR_TF (1 << SNBEP_UNC_ATTR_TF) #define _SNBEP_UNC_ATTR_CF (1 << SNBEP_UNC_ATTR_CF) #define _SNBEP_UNC_ATTR_NF (1 << SNBEP_UNC_ATTR_NF) #define _SNBEP_UNC_ATTR_FF (1 << SNBEP_UNC_ATTR_FF) #define _SNBEP_UNC_ATTR_A (1 << SNBEP_UNC_ATTR_A) #define _SNBEP_UNC_ATTR_NF1 (1 << SNBEP_UNC_ATTR_NF1) #define _SNBEP_UNC_ATTR_ISOC (1 << SNBEP_UNC_ATTR_ISOC) #define _SNBEP_UNC_ATTR_NC (1 << SNBEP_UNC_ATTR_NC) #define _SNBEP_UNC_ATTR_CF1 (1 << SNBEP_UNC_ATTR_CF1) #define SNBEP_UNC_IRP_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_IRP_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) #define BDX_UNC_IRP_ATTRS HSWEP_UNC_IRP_ATTRS #define SNBEP_UNC_R3QPI_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_R3QPI_ATTRS SNBEP_UNC_R3QPI_ATTRS #define BDX_UNC_R3QPI_ATTRS SNBEP_UNC_R3QPI_ATTRS #define IVBEP_UNC_R3QPI_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define SNBEP_UNC_R2PCIE_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_R2PCIE_ATTRS SNBEP_UNC_R2PCIE_ATTRS #define BDX_UNC_R2PCIE_ATTRS SNBEP_UNC_R2PCIE_ATTRS #define IVBEP_UNC_R2PCIE_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define SNBEP_UNC_QPI_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define IVBEP_UNC_QPI_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_QPI_ATTRS SNBEP_UNC_QPI_ATTRS #define BDX_UNC_QPI_ATTRS SNBEP_UNC_QPI_ATTRS #define SNBEP_UNC_UBO_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define IVBEP_UNC_UBO_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_UBO_ATTRS SNBEP_UNC_UBO_ATTRS #define BDX_UNC_UBO_ATTRS SNBEP_UNC_UBO_ATTRS #define SNBEP_UNC_PCU_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T5) #define IVBEP_UNC_PCU_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T5) #define HSWEP_UNC_PCU_ATTRS SNBEP_UNC_PCU_ATTRS #define BDX_UNC_PCU_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define SNBEP_UNC_PCU_BAND_ATTRS \ (SNBEP_UNC_PCU_ATTRS | _SNBEP_UNC_ATTR_FF) #define IVBEP_UNC_PCU_BAND_ATTRS \ (IVBEP_UNC_PCU_ATTRS | _SNBEP_UNC_ATTR_FF) #define HSWEP_UNC_PCU_BAND_ATTRS SNBEP_UNC_PCU_BAND_ATTRS #define BDX_UNC_PCU_BAND_ATTRS SNBEP_UNC_PCU_BAND_ATTRS #define SNBEP_UNC_IMC_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define IVBEP_UNC_IMC_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_IMC_ATTRS SNBEP_UNC_IMC_ATTRS #define BDX_UNC_IMC_ATTRS SNBEP_UNC_IMC_ATTRS #define SNBEP_UNC_CBO_ATTRS \ (_SNBEP_UNC_ATTR_I |\ _SNBEP_UNC_ATTR_E |\ _SNBEP_UNC_ATTR_T8 |\ _SNBEP_UNC_ATTR_CF |\ _SNBEP_UNC_ATTR_TF) #define IVBEP_UNC_CBO_ATTRS \ (_SNBEP_UNC_ATTR_E |\ _SNBEP_UNC_ATTR_T8 |\ _SNBEP_UNC_ATTR_CF |\ _SNBEP_UNC_ATTR_TF) #define HSWEP_UNC_CBO_ATTRS \ (_SNBEP_UNC_ATTR_E |\ _SNBEP_UNC_ATTR_T8 |\ _SNBEP_UNC_ATTR_CF1 |\ _SNBEP_UNC_ATTR_TF) #define BDX_UNC_CBO_ATTRS HSWEP_UNC_CBO_ATTRS #define SNBEP_UNC_CBO_NID_ATTRS \ (SNBEP_UNC_CBO_ATTRS|_SNBEP_UNC_ATTR_NF) #define IVBEP_UNC_CBO_NID_ATTRS \ (IVBEP_UNC_CBO_ATTRS|_SNBEP_UNC_ATTR_NF1) #define HSWEP_UNC_CBO_NID_ATTRS \ (HSWEP_UNC_CBO_ATTRS | _SNBEP_UNC_ATTR_NF1) #define BDX_UNC_CBO_NID_ATTRS HSWEP_UNC_CBO_NID_ATTRS #define SNBEP_UNC_HA_ATTRS \ (_SNBEP_UNC_ATTR_I|_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define IVBEP_UNC_HA_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8) #define HSWEP_UNC_HA_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) #define BDX_UNC_HA_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) #define SNBEP_UNC_HA_OPC_ATTRS \ (SNBEP_UNC_HA_ATTRS|_SNBEP_UNC_ATTR_A) #define HSWEP_UNC_SBO_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) #define BDX_UNC_SBO_ATTRS \ (_SNBEP_UNC_ATTR_E|_SNBEP_UNC_ATTR_T8|_SNBEP_UNC_ATTR_I) #define KNL_UNC_CHA_TOR_ATTRS _SNBEP_UNC_ATTR_NF1 typedef union { uint64_t val; struct { unsigned long unc_event:8; /* event code */ unsigned long unc_umask:8; /* unit mask */ unsigned long unc_res1:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detec */ unsigned long unc_res2:3; /* reserved */ unsigned long unc_en:1; /* enable */ unsigned long unc_inv:1; /* invert counter mask */ unsigned long unc_thres:8; /* counter mask */ unsigned long unc_res3:32; /* reserved */ } com; /* covers common fields for cbox, ha, imc, ubox, r2pcie, r3qpi, sbox */ struct { unsigned long unc_event:8; /* event code */ unsigned long unc_umask:8; /* unit mask */ unsigned long unc_res1:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detect */ unsigned long unc_tid:1; /* tid filter enable */ unsigned long unc_res2:2; /* reserved */ unsigned long unc_en:1; /* enable */ unsigned long unc_inv:1; /* invert counter mask */ unsigned long unc_thres:8; /* counter mask */ unsigned long unc_res3:32; /* reserved */ } cbo; /* covers c-box */ struct { unsigned long unc_event:8; /* event code */ unsigned long unc_res1:6; /* reserved */ unsigned long unc_occ:2; /* occ select */ unsigned long unc_res2:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detec */ unsigned long unc_res3:1; /* reserved */ unsigned long unc_res4:2; /* reserved */ unsigned long unc_en:1; /* enable */ unsigned long unc_inv:1; /* invert counter mask */ unsigned long unc_thres:5; /* threshold */ unsigned long unc_res5:1; /* reserved */ unsigned long unc_occ_inv:1; /* occupancy invert */ unsigned long unc_occ_edge:1; /* occupancy edge detect */ unsigned long unc_res6:32; /* reserved */ } pcu; /* covers pcu */ struct { unsigned long unc_event:8; /* event code */ unsigned long unc_res1:6; /* reserved */ unsigned long unc_occ:2; /* occ select */ unsigned long unc_res2:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detec */ unsigned long unc_res3:1; /* reserved */ unsigned long unc_ov_en:1; /* overflow enable */ unsigned long unc_sel_ext:1; /* event_sel extension */ unsigned long unc_en:1; /* enable */ unsigned long unc_res4:1; /* reserved */ unsigned long unc_thres:5; /* threshold */ unsigned long unc_res5:1; /* reserved */ unsigned long unc_occ_inv:1; /* occupancy invert */ unsigned long unc_occ_edge:1; /* occupancy edge detect */ unsigned long unc_res6:32; /* reserved */ } ivbep_pcu; /* covers ivb-ep pcu */ struct { unsigned long unc_event:8; /* event code */ unsigned long unc_umask:8; /* unit maks */ unsigned long unc_res1:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detec */ unsigned long unc_res2:1; /* reserved */ unsigned long unc_res3:1; /* reserved */ unsigned long unc_event_ext:1; /* event code extension */ unsigned long unc_en:1; /* enable */ unsigned long unc_inv:1; /* invert counter mask */ unsigned long unc_thres:8; /* threshold */ unsigned long unc_res4:32; /* reserved */ } qpi; /* covers qpi */ struct { unsigned long tid:1; unsigned long cid:3; unsigned long res0:1; unsigned long res1:3; unsigned long res2:2; unsigned long nid:8; unsigned long state:5; unsigned long opc:9; unsigned long res3:1; unsigned long res4:32; } cbo_filt; /* cbox filter */ struct { unsigned long tid:1; unsigned long cid:4; unsigned long res0:12; unsigned long state:6; unsigned long res1:9; unsigned long res2:32; } ivbep_cbo_filt0; /* ivbep cbox filter0 */ struct { unsigned long nid:16; unsigned long res0:4; unsigned long opc:9; unsigned long res1:1; unsigned long nc:1; unsigned long isoc:1; unsigned long res2:32; } ivbep_cbo_filt1; /* ivbep cbox filter1 */ struct { unsigned long tid:1; unsigned long cid:5; unsigned long res0:11; unsigned long state:7; unsigned long res1:8; unsigned long res2:32; } hswep_cbo_filt0; /* hswep cbox filter0 */ struct { unsigned long nid:16; unsigned long res0:4; unsigned long opc:9; unsigned long res1:1; unsigned long nc:1; unsigned long isoc:1; unsigned long res2:32; } hswep_cbo_filt1; /* hswep cbox filter1 */ struct { unsigned long filt0:8; /* band0 freq filter */ unsigned long filt1:8; /* band1 freq filter */ unsigned long filt2:8; /* band2 freq filter */ unsigned long filt3:8; /* band3 freq filter */ unsigned long res1:32; /* reserved */ } pcu_filt; struct { unsigned long res1:6; unsigned long lo_addr:26; /* lo order 26b */ unsigned long hi_addr:14; /* hi order 14b */ unsigned long res2:18; /* reserved */ } ha_addr; struct { unsigned long opc:6; /* opcode match */ unsigned long res1:26; /* reserved */ unsigned long res2:32; /* reserved */ } ha_opc; struct { unsigned long unc_event:8; /* event code */ unsigned long unc_umask:8; /* unit mask */ unsigned long unc_res1:1; /* reserved */ unsigned long unc_rst:1; /* reset */ unsigned long unc_edge:1; /* edge detec */ unsigned long unc_res2:3; /* reserved */ unsigned long unc_en:1; /* enable */ unsigned long unc_res3:1; /* reserved */ unsigned long unc_thres:8; /* counter mask */ unsigned long unc_res4:32; /* reserved */ } irp; /* covers irp */ } pfm_snbep_unc_reg_t; extern void pfm_intel_snbep_unc_perf_validate_pattrs(void *this, pfmlib_event_desc_t *e); extern int pfm_intel_snbep_unc_get_encoding(void *this, pfmlib_event_desc_t *e); extern const pfmlib_attr_desc_t snbep_unc_mods[]; extern int pfm_intel_snbep_unc_detect(void *this); extern int pfm_intel_ivbep_unc_detect(void *this); extern int pfm_intel_hswep_unc_detect(void *this); extern int pfm_intel_knl_unc_detect(void *this); extern int pfm_intel_bdx_unc_detect(void *this); extern int pfm_intel_snbep_unc_get_perf_encoding(void *this, pfmlib_event_desc_t *e); extern int pfm_intel_snbep_unc_can_auto_encode(void *this, int pidx, int uidx); extern int pfm_intel_snbep_unc_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info); static inline int is_cbo_filt_event(void *this, pfm_intel_x86_reg_t reg) { pfmlib_pmu_t *pmu = this; uint64_t sel = reg.sel_event_select; /* * umask bit 0 must be 1 (OPCODE) * TOR_INSERT: event code 0x35 * TOR_OCCUPANCY: event code 0x36 * LLC_LOOKUP : event code 0x34 */ return (pmu->flags & INTEL_PMU_FL_UNC_CBO) && (reg.sel_unit_mask & 0x1) && (sel == 0x35 || sel == 0x36 || sel == 0x34); } #endif /* __PFMLIB_INTEL_SNBEP_UNC_PRIV_H__ */ papi-5.6.0/src/libpfm4/lib/pfmlib_intel_hswep_unc_cbo.c000664 001750 001750 00000010010 13216244365 025127 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_hswep_unc_cbo.c : Intel Haswell-EP C-Box uncore PMU * * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include /* private headers */ #include "pfmlib_priv.h" #include "pfmlib_intel_x86_priv.h" #include "pfmlib_intel_snbep_unc_priv.h" #include "events/intel_hswep_unc_cbo_events.h" static void display_cbo(void *this, pfmlib_event_desc_t *e, void *val) { const intel_x86_entry_t *pe = this_pe(this); pfm_snbep_unc_reg_t *reg = val; pfm_snbep_unc_reg_t f; __pfm_vbprintf("[UNC_CBO=0x%"PRIx64" event=0x%x umask=0x%x en=%d " "inv=%d edge=%d thres=%d tid_en=%d] %s\n", reg->val, reg->cbo.unc_event, reg->cbo.unc_umask, reg->cbo.unc_en, reg->cbo.unc_inv, reg->cbo.unc_edge, reg->cbo.unc_thres, reg->cbo.unc_tid, pe[e->event].name); if (e->count == 1) return; f.val = e->codes[1]; __pfm_vbprintf("[UNC_CBOX_FILTER0=0x%"PRIx64" tid=%d core=0x%x" " state=0x%x]\n", f.val, f.ivbep_cbo_filt0.tid, f.ivbep_cbo_filt0.cid, f.ivbep_cbo_filt0.state); if (e->count == 2) return; f.val = e->codes[2]; __pfm_vbprintf("[UNC_CBOX_FILTER1=0x%"PRIx64" nid=%d opc=0x%x" " nc=0x%x isoc=0x%x]\n", f.val, f.ivbep_cbo_filt1.nid, f.ivbep_cbo_filt1.opc, f.ivbep_cbo_filt1.nc, f.ivbep_cbo_filt1.isoc); } #define DEFINE_C_BOX(n) \ pfmlib_pmu_t intel_hswep_unc_cb##n##_support = {\ .desc = "Intel Haswell-EP C-Box "#n" uncore",\ .name = "hswep_unc_cbo"#n,\ .perf_name = "uncore_cbox_"#n,\ .pmu = PFM_PMU_INTEL_HSWEP_UNC_CB##n,\ .pme_count = LIBPFM_ARRAY_SIZE(intel_hswep_unc_c_pe),\ .type = PFM_PMU_TYPE_UNCORE,\ .num_cntrs = 4,\ .num_fixed_cntrs = 0,\ .max_encoding = 2,\ .pe = intel_hswep_unc_c_pe,\ .atdesc = snbep_unc_mods,\ .flags = PFMLIB_PMU_FL_RAW_UMASK|INTEL_PMU_FL_UNC_CBO,\ .pmu_detect = pfm_intel_hswep_unc_detect,\ .get_event_encoding[PFM_OS_NONE] = pfm_intel_snbep_unc_get_encoding,\ PFMLIB_ENCODE_PERF(pfm_intel_snbep_unc_get_perf_encoding),\ PFMLIB_OS_DETECT(pfm_intel_x86_perf_detect), \ .get_event_first = pfm_intel_x86_get_event_first,\ .get_event_next = pfm_intel_x86_get_event_next,\ .event_is_valid = pfm_intel_x86_event_is_valid,\ .validate_table = pfm_intel_x86_validate_table,\ .get_event_info = pfm_intel_x86_get_event_info,\ .get_event_attr_info = pfm_intel_x86_get_event_attr_info,\ PFMLIB_VALID_PERF_PATTRS(pfm_intel_snbep_unc_perf_validate_pattrs),\ .get_event_nattrs = pfm_intel_x86_get_event_nattrs,\ .can_auto_encode = pfm_intel_x86_can_auto_encode, \ .display_reg = display_cbo,\ } DEFINE_C_BOX(0); DEFINE_C_BOX(1); DEFINE_C_BOX(2); DEFINE_C_BOX(3); DEFINE_C_BOX(4); DEFINE_C_BOX(5); DEFINE_C_BOX(6); DEFINE_C_BOX(7); DEFINE_C_BOX(8); DEFINE_C_BOX(9); DEFINE_C_BOX(10); DEFINE_C_BOX(11); DEFINE_C_BOX(12); DEFINE_C_BOX(13); DEFINE_C_BOX(14); DEFINE_C_BOX(15); DEFINE_C_BOX(16); DEFINE_C_BOX(17); papi-5.6.0/src/ctests/profile_pthreads.c000664 001750 001750 00000013017 13216244360 022326 0ustar00jshenry1963jshenry1963000000 000000 /* This file performs the following test: profile for pthreads */ #include #include #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define THR 1000000 #define FLOPS 100000000 unsigned int length; caddr_t my_start, my_end; void * Thread( void *arg ) { int retval, num_tests = 1, i; int EventSet1 = PAPI_NULL, mask1, PAPI_event; int num_events1; long long **values; long long elapsed_us, elapsed_cyc; unsigned short *profbuf; char event_name[PAPI_MAX_STR_LEN]; retval = PAPI_register_thread( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); } profbuf = ( unsigned short * ) malloc( length * sizeof ( unsigned short ) ); if ( profbuf == NULL ) { test_fail(__FILE__, __LINE__, "Allocate memory",0); } memset( profbuf, 0x00, length * sizeof ( unsigned short ) ); /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ EventSet1 = add_two_nonderived_events( &num_events1, &PAPI_event, &mask1 ); values = allocate_test_space( num_tests, num_events1 ); retval = PAPI_event_code_to_name( PAPI_event, event_name ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); } elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); retval = PAPI_profil( profbuf, length, my_start, 65536, EventSet1, PAPI_event, THR, PAPI_PROFIL_POSIX ); if ( retval ) { test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); } retval = PAPI_start( EventSet1 ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_start", retval ); } do_flops( *( int * ) arg ); retval = PAPI_stop( EventSet1, values[0] ); if (retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); } elapsed_us = PAPI_get_real_usec( ) - elapsed_us; elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; /* to remove the profile flag */ retval = PAPI_profil( profbuf, length, my_start, 65536, EventSet1, PAPI_event, 0, PAPI_PROFIL_POSIX ); if ( retval ) { test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); } remove_test_events( &EventSet1, mask1 ); if ( !TESTS_QUIET ) { if ( mask1 == 0x3 ) { printf( "Thread %#x PAPI_TOT_INS : \t%lld\n", ( int ) pthread_self( ), ( values[0] )[0] ); } else { printf( "Thread %#x PAPI_FP_INS : \t%lld\n", ( int ) pthread_self( ), ( values[0] )[0] ); } printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", ( int ) pthread_self( ), ( values[0] )[1] ); printf( "Thread %#x Real usec : \t%lld\n", ( int ) pthread_self( ), elapsed_us ); printf( "Thread %#x Real cycles : \t%lld\n", ( int ) pthread_self( ), elapsed_cyc ); printf( "Test case: PAPI_profil() for pthreads\n" ); printf( "----Profile buffer for Thread %#x---\n", ( int ) pthread_self( ) ); for ( i = 0; i < ( int ) length; i++ ) { if ( profbuf[i] ) printf( "%#lx\t%d\n", ( unsigned long ) ( my_start + 2 * i ), profbuf[i] ); } } for ( i = 0; i < ( int ) length; i++ ) if ( profbuf[i] ) break; if ( i >= ( int ) length ) { test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); } free_test_space( values, num_tests ); retval = PAPI_unregister_thread( ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); } return NULL; } int main( int argc, char **argv ) { pthread_t id[NUM_THREADS]; int flops[NUM_THREADS]; int i, rc, retval; pthread_attr_t attr; long long elapsed_us, elapsed_cyc; const PAPI_exe_info_t *prginfo = NULL; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } retval = PAPI_query_event(PAPI_TOT_CYC); if (retval != PAPI_OK) { if (!quiet) printf("Trouble adding event\n"); test_skip(__FILE__,__LINE__,"No events",0); } retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self )); if (retval != PAPI_OK ) { if ( retval == PAPI_ECMP ) test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); else test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); } if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { retval = 1; test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", retval ); } my_start = prginfo->address_info.text_start; my_end = prginfo->address_info.text_end; length = ( unsigned int ) ( my_end - my_start ); elapsed_us = PAPI_get_real_usec( ); elapsed_cyc = PAPI_get_real_cyc( ); pthread_attr_init( &attr ); #ifdef PTHREAD_CREATE_UNDETACHED pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); #endif #ifdef PTHREAD_SCOPE_SYSTEM retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); if ( retval != 0 ) test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); #endif for ( i = 0; i < NUM_THREADS; i++ ) { flops[i] = FLOPS * ( i + 1 ); rc = pthread_create( &id[i], &attr, Thread, ( void * ) &flops[i] ); if ( rc ) return ( FAILURE ); } for ( i = 0; i < NUM_THREADS; i++ ) pthread_join( id[i], NULL ); pthread_attr_destroy( &attr ); elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; elapsed_us = PAPI_get_real_usec( ) - elapsed_us; if ( !quiet ) { printf( "Master real usec : \t%lld\n", elapsed_us ); printf( "Master real cycles : \t%lld\n", elapsed_cyc ); } test_pass( __FILE__ ); pthread_exit( NULL ); return 0; } papi-5.6.0/src/components/bgpm/L2unit/Rules.L2unit000664 001750 001750 00000000430 13216244356 024003 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/bgpm/L2unit/linux-L2unit.c COMPOBJS += linux-L2unit.o linux-L2unit.o: components/bgpm/L2unit/linux-L2unit.c components/bgpm/L2unit/linux-L2unit.h $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/L2unit/linux-L2unit.c -o linux-L2unit.o papi-5.6.0/man/man3/PAPI_state.3000664 001750 001750 00000004424 13216244356 020171 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_state" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_state \- .PP Return the counting state of an EventSet\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBC Interface:\fP .RS 4 #include <\fBpapi\&.h\fP> .br int \fBPAPI_state( int EventSet, int * status )\fP; .RE .PP \fBParameters:\fP .RS 4 \fIEventSet\fP -- an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP .br \fIstatus\fP -- an integer containing a boolean combination of one or more of the following nonzero constants as defined in the PAPI header file \fBpapi\&.h\fP: .PD 0 .IP "\(bu" 2 PAPI_STOPPED -- EventSet is stopped .IP "\(bu" 2 PAPI_RUNNING -- EventSet is running .IP "\(bu" 2 PAPI_PAUSED -- EventSet temporarily disabled by the library .IP "\(bu" 2 PAPI_NOT_INIT -- EventSet defined, but not initialized .IP "\(bu" 2 PAPI_OVERFLOWING -- EventSet has overflowing enabled .IP "\(bu" 2 PAPI_PROFILING -- EventSet has profiling enabled .IP "\(bu" 2 PAPI_MULTIPLEXING -- EventSet has multiplexing enabled .IP "\(bu" 2 PAPI_ACCUMULATING -- reserved for future use .IP "\(bu" 2 PAPI_HWPROFILING -- reserved for future use .PP .RE .PP \fBReturn values:\fP .RS 4 \fIPAPI_OK\fP .br \fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. .br \fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. .RE .PP \fBPAPI_state()\fP returns the counting state of the specified event set\&. .PP \fBExample:\fP .RS 4 .PP .nf * int EventSet = PAPI_NULL; * int status = 0; * int ret; * * ret = PAPI_create_eventset(&EventSet); * if (ret != PAPI_OK) handle_error(ret); * * // Add Total Instructions Executed to our EventSet * ret = PAPI_add_event(EventSet, PAPI_TOT_INS); * if (ret != PAPI_OK) handle_error(ret); * * // Start counting * ret = PAPI_state(EventSet, &status); * if (ret != PAPI_OK) handle_error(ret); * printf("State is now %d\n",status); * ret = PAPI_start(EventSet); * if (ret != PAPI_OK) handle_error(ret); * ret = PAPI_state(EventSet, &status); * if (ret != PAPI_OK) handle_error(ret); * printf("State is now %d\n",status); * .fi .PP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_stop\fP \fBPAPI_start\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/man/man3/PAPIF_thread_init.3000664 001750 001750 00000001002 13216244356 021436 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPIF_thread_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPIF_thread_init \- .PP Initialize thread support in the PAPI library\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBFortran Interface:\fP .RS 4 #include 'fpapi\&.h' .br \fBPAPIF_thread_init( C_INT FUNCTION handle, C_INT check )\fP .RE .PP \fBSee Also:\fP .RS 4 \fBPAPI_thread_init\fP .RE .PP .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/lib/pfmlib_crayx2.c000664 001750 001750 00000035004 13216244363 022661 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Cray Inc. * Contributed by Steve Kaufmann based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include "pfmlib_priv.h" #include "pfmlib_crayx2_priv.h" #include "crayx2_events.h" #define CRAYX2_NO_REDUNDANT 0 /* if>0 an error if chip:ctr:ev repeated */ typedef enum { CTR_REDUNDANT = -2, /* event on counter repeated */ CTR_CONFLICT = -1, /* event on counter not the same as previous */ CTR_OK = 0 /* event on counter open */ } counter_use_t; static int pfm_crayx2_get_event_code (unsigned int i, unsigned int cnt, int *code) { if (cnt != PFMLIB_CNT_FIRST && cnt > crayx2_support.num_cnt) { DPRINT ("return: count %d exceeded #counters\n", cnt); return PFMLIB_ERR_INVAL; } else if (i >= crayx2_support.pme_count) { DPRINT ("return: event index %d exceeded #events\n", i); return PFMLIB_ERR_INVAL; } *code = crayx2_pe[i].pme_code; DPRINT ("return: event code is %#x\n", *code); return PFMLIB_SUCCESS; } static char * pfm_crayx2_get_event_name (unsigned int i) { if (i >= crayx2_support.pme_count) { DPRINT ("return: event index %d exceeded #events\n", i); return NULL; } DPRINT ("return: event name '%s'\n", crayx2_pe[i].pme_name); return (char *) crayx2_pe[i].pme_name; } static void pfm_crayx2_get_event_counters (unsigned int j, pfmlib_regmask_t *counters) { unsigned int i; memset (counters, 0, sizeof (*counters)); DPRINT ("event counters for %d counters\n", PMU_CRAYX2_NUM_COUNTERS); for (i=0; ipfp_event_count, inp->pfp_dfl_plm, inp->pfp_flags); for (i=0; ipfp_event_count; i++) { DPRINT (" %3d: event %3d plm %#3x flags %#8lx num_masks %d\n", i, inp->pfp_events[i].event, inp->pfp_events[i].plm, inp->pfp_events[i].flags, inp->pfp_events[i].num_masks); for (j=0; jpfp_events[i].num_masks; j++) { DPRINT (" unit-mask-%2d: %d\n", j, inp->pfp_events[i].unit_masks[j]); } } } /* Better have at least one event specified and not exceed limit. */ if (inp->pfp_event_count == 0) { DPRINT ("return: event count is 0\n"); return PFMLIB_ERR_INVAL; } else if (inp->pfp_event_count > PMU_CRAYX2_NUM_COUNTERS) { DPRINT ("return: event count exceeds max %d\n", PMU_CRAYX2_NUM_COUNTERS); return PFMLIB_ERR_TOOMANY; } memset (Pused, 0, sizeof(Pused)); memset (Cused, 0, sizeof(Cused)); memset (Mused, 0, sizeof(Mused)); /* Loop through the input parameters describing the events. */ for (i=0; ipfp_event_count; i++) { unsigned int code, chip, ctr, ev, chipno; counter_use_t ret; /* Acquire details describing this event code: * o which substrate/chip it is on * o which counter on the chip * o which event on the counter */ code = inp->pfp_events[i].event; chip = crayx2_pe[code].pme_chip; ctr = crayx2_pe[code].pme_ctr; ev = crayx2_pe[code].pme_event; chipno = crayx2_pe[code].pme_chipno; DPRINT ("%3d: code %3d chip %1d ctr %2d ev %1d chipno %2d\n", code, i, chip, ctr, ev, chipno); /* These priviledge levels are not recognized. */ if (inp->pfp_events[i].plm != 0) { DPRINT ("%3d: priviledge level %#x per event not allowed\n", i, inp->pfp_events[i].plm); return PFMLIB_ERR_INVAL; } /* No masks exist. */ if (inp->pfp_events[i].num_masks > 0) { DPRINT ("too many masks for event\n"); return PFMLIB_ERR_TOOMANY; } /* The event code. Set-up the event selection mask for * the PMC of the respective chip. Check if more than * one event on the same counter is selected. */ if (chip == PME_CRAYX2_CHIP_CPU) { ret = pfm_crayx2_counter_use (ctr, ev, &Pused[chipno], &Pevents); } else if (chip == PME_CRAYX2_CHIP_CACHE) { ret = pfm_crayx2_counter_use (ctr, ev, &Cused[chipno], &Cevents); } else if (chip == PME_CRAYX2_CHIP_MEMORY) { ret = pfm_crayx2_counter_use (ctr, ev, &Mused[chipno], &Mevents); } else { DPRINT ("return: invalid chip\n"); return PFMLIB_ERR_INVAL; } /* Each chip's counter can only count one event. */ if (ret == CTR_CONFLICT) { DPRINT ("return: ctr conflict\n"); return PFMLIB_ERR_EVTINCOMP; } else if (ret == CTR_REDUNDANT) { #if (CRAYX2_NO_REDUNDANT != 0) DPRINT ("return: ctr redundant\n"); return PFMLIB_ERR_EVTMANY; #else DPRINT ("warning: ctr redundant\n"); #endif /* CRAYX2_NO_REDUNDANT */ } /* Set up the output PMDs. */ outp->pfp_pmds[npmds].reg_num = crayx2_pe[code].pme_base + ctr + chipno*crayx2_pe[code].pme_nctrs; outp->pfp_pmds[npmds].reg_addr = 0; outp->pfp_pmds[npmds].reg_alt_addr = 0; outp->pfp_pmds[npmds].reg_value = 0; npmds++; } outp->pfp_pmd_count = npmds; if (PFMLIB_DEBUG ( )) { DPRINT ("P event mask %#16lx\n", Pevents); DPRINT ("C event mask %#16lx\n", Cevents); DPRINT ("M event mask %#16lx\n", Mevents); DPRINT ("PMDs: pmd_count %d\n", outp->pfp_pmd_count); for (i=0; ipfp_pmd_count; i++) { DPRINT (" %3d: reg_value %3lld reg_num %3d reg_addr %#16llx\n", i, outp->pfp_pmds[i].reg_value, outp->pfp_pmds[i].reg_num, outp->pfp_pmds[i].reg_addr); } } /* Set up the PMC basics for the chips that will be doing * some counting. */ if (pfm_crayx2_chip_use (Pused, PME_CRAYX2_CPU_CHIPS) > 0) { uint64_t Pctrl = PFM_CPU_START; uint64_t Pen = PFM_ENABLE_RW; if (inp->pfp_dfl_plm & (PFM_PLM0 | PFM_PLM1)) { Pen |= PFM_ENABLE_KERNEL; } if (inp->pfp_dfl_plm & PFM_PLM2) { Pen |= PFM_ENABLE_EXL; } if (inp->pfp_dfl_plm & PFM_PLM3) { Pen |= PFM_ENABLE_USER; } /* First of three CPU PMC registers. */ base_pmc = PMU_CRAYX2_CPU_PMC_BASE; outp->pfp_pmcs[npmcs].reg_value = Pctrl; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_CONTROL; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Pevents; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_EVENTS; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Pen; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_ENABLE; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; } if (pfm_crayx2_chip_use (Cused, PME_CRAYX2_CACHE_CHIPS) > 0) { uint64_t Cctrl = PFM_CACHE_START; uint64_t Cen = PFM_ENABLE_RW; /* domains N/A */ /* Second of three Cache PMC registers. */ base_pmc = PMU_CRAYX2_CACHE_PMC_BASE; outp->pfp_pmcs[npmcs].reg_value = Cctrl; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_CONTROL; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Cevents; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_EVENTS; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Cen; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_ENABLE; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; } if (pfm_crayx2_chip_use (Mused, PME_CRAYX2_MEMORY_CHIPS) > 0) { uint64_t Mctrl = PFM_MEM_START; uint64_t Men = PFM_ENABLE_RW; /* domains N/A */ /* Third of three Memory PMC registers. */ base_pmc = PMU_CRAYX2_MEMORY_PMC_BASE; outp->pfp_pmcs[npmcs].reg_value = Mctrl; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_CONTROL; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Mevents; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_EVENTS; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; outp->pfp_pmcs[npmcs].reg_value = Men; outp->pfp_pmcs[npmcs].reg_num = base_pmc + PMC_ENABLE; outp->pfp_pmcs[npmcs].reg_addr = 0; outp->pfp_pmcs[npmcs].reg_alt_addr = 0; npmcs++; } outp->pfp_pmc_count = npmcs; if (PFMLIB_DEBUG ( )) { DPRINT ("PMCs: pmc_count %d\n", outp->pfp_pmc_count); for (i=0; ipfp_pmc_count; i++) { DPRINT (" %3d: reg_value %#16llx reg_num %3d reg_addr %#16llx\n", i, outp->pfp_pmcs[i].reg_value, outp->pfp_pmcs[i].reg_num, outp->pfp_pmcs[i].reg_addr); } } return PFMLIB_SUCCESS; } static int pfm_crayx2_pmu_detect (void) { char buffer[128]; int ret; DPRINT ("detect the PMU attributes\n"); ret = __pfm_getcpuinfo_attr ("vendor_id", buffer, sizeof(buffer)); if (ret != 0 || strcasecmp (buffer, "Cray") != 0) { DPRINT ("return: no 'Cray' vendor_id\n"); return PFMLIB_ERR_NOTSUPP; } ret = __pfm_getcpuinfo_attr ("type", buffer, sizeof(buffer)); if (ret != 0 || strcasecmp (buffer, "craynv2") != 0) { DPRINT ("return: no 'craynv2' type\n"); return PFMLIB_ERR_NOTSUPP; } DPRINT ("Cray X2 nv2 found\n"); return PFMLIB_SUCCESS; } static void pfm_crayx2_get_impl_pmcs (pfmlib_regmask_t *impl_pmcs) { unsigned int i; DPRINT ("entered with PMC_COUNT %d\n", PMU_CRAYX2_PMC_COUNT); for (i=0; ievent = PME_CRAYX2_CYCLES; DPRINT ("return: event code for cycles %#x\n", e->event); return PFMLIB_SUCCESS; } static int pfm_crayx2_get_inst_retired (pfmlib_event_t *e) { e->event = PME_CRAYX2_INSTR_GRADUATED; DPRINT ("return: event code for retired instr %#x\n", e->event); return PFMLIB_SUCCESS; } /* Register the constants and the access functions. */ pfm_pmu_support_t crayx2_support = { .pmu_name = PMU_CRAYX2_NAME, .pmu_type = PFMLIB_CRAYX2_PMU, .pme_count = PME_CRAYX2_EVENT_COUNT, .pmc_count = PMU_CRAYX2_PMC_COUNT, .pmd_count = PMU_CRAYX2_PMD_COUNT, .num_cnt = PMU_CRAYX2_NUM_COUNTERS, .get_event_code = pfm_crayx2_get_event_code, .get_event_name = pfm_crayx2_get_event_name, .get_event_counters = pfm_crayx2_get_event_counters, .dispatch_events = pfm_crayx2_dispatch_events, .pmu_detect = pfm_crayx2_pmu_detect, .get_impl_pmcs = pfm_crayx2_get_impl_pmcs, .get_impl_pmds = pfm_crayx2_get_impl_pmds, .get_impl_counters = pfm_crayx2_get_impl_counters, .get_hw_counter_width = pfm_crayx2_get_hw_counter_width, .get_event_desc = pfm_crayx2_get_event_desc, .get_num_event_masks = pfm_crayx2_get_num_event_masks, .get_event_mask_name = pfm_crayx2_get_event_mask_name, .get_event_mask_code = pfm_crayx2_get_event_mask_code, .get_event_mask_desc = pfm_crayx2_get_event_mask_desc, .get_cycle_event = pfm_crayx2_get_cycle_event, .get_inst_retired_event = pfm_crayx2_get_inst_retired }; papi-5.6.0/README000664 001750 001750 00000005643 13216244355 015426 0ustar00jshenry1963jshenry1963000000 000000 PAPI: Performance Application Programming Interface =================================================== ** Innovative Computing Lab ** ** University of Tennessee, Knoxville, TN ** *** [TOC] *** About ----- PAPI provides the tool designer and application engineer with a consistent interface and methodology for use of the performance counter hardware found in most major microprocessors. PAPI enables software engineers to see, in near real time, the relation between software performance and processor events. In addition, PAPI provides access to a collection of components that expose performance measurement opportunites across the hardware and software stack. Getting Started --------------- If this is the first file you've opened in the PAPI tree, we'll try to give you a few tips on where to go from here. * Read the license found in LICENSE.txt. It's pretty short, and not very restrictive, but it'll give you an idea of what you can and can't do with the PAPI sources. * Visit the website at: There you can find late-breaking news that may be more current than in these files. You can also find documentation in a greater variety of formats than in the papi/doc/ directory. * Sign up for the PAPI mailing list(s). Instructions are on our home page. * Read the RELEASENOTES.txt file to get an idea of what's new in the current release. Installing PAPI --------------- To install PAPI on your system: * Find the section in INSTALL.txt that pertains to your hardware and operating system. * Follow the directions to install required components and build the PAPI libraries. * Run the test suite when you are finished to verify that everything went ok. NOTE: Although we make every attempt to get all tests to PASS or SKIP on all platforms, there are occasional instances of FAILures due to excessively tight compliance thresholds or platform idiosyncrasies. Don't panic if one or two tests FAIL. Contact us with complete output and we'll see what we can do. Using PAPI ---------- To use PAPI in your own programs: * Read the PAPI Overview found at: http://icl.utk.edu/projects/papi/wiki/Main_Page. * Try out the utility programs in /utils to see what's in your system. * Try a test program. Source for a number of tests in both C and FORTRAN is available in the src/tests/ and src/ftests/ directories. Find a program that's similar to what you want to do. Make sure you can build it and run it. * Write a test program of your own, exercising the PAPI events and features of interest to you. * Go for broke. Fold PAPI calls into your sources and see what you can learn. Bugs and Questions ------------------ * Visit our FAQ at: or read a snapshot of the FAQ in papi/PAPI_FAQ.html * Subscribe to the PAPI mailing list at: * Read historical postings to the list. * Post questions to the list.papi-5.6.0/src/freebsd-context.h000664 001750 001750 00000000226 13216244361 020567 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PAPI_FreeBSD_CONTEXT_H #define _PAPI_FreeBSD_CONTEXT_H #define GET_OVERFLOW_ADDRESS(ctx) (0x80000000) #endif /* _PAPI_FreeBSD_CONTEXT_H */ papi-5.6.0/src/perfctr-2.7.x/etc/costs/Pentium4Xeon-3.4000664 001750 001750 00000001720 13216244367 024241 0ustar00jshenry1963jshenry1963000000 000000 [data from a 3.4 GHz Pentium 4 Xeon (or maybe non-Xeon)] PERFCTR INIT: vendor 0, family 15, model 3, stepping 4, clock 3393661 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 348 cycles PERFCTR INIT: rdtsc cost is 96.1 cycles (6503 total) PERFCTR INIT: rdpmc cost is 223.9 cycles (14679 total) PERFCTR INIT: rdmsr (counter) cost is 354.0 cycles (23009 total) PERFCTR INIT: rdmsr (escr) cost is 375.2 cycles (24361 total) PERFCTR INIT: wrmsr (counter) cost is 990.9 cycles (63767 total) PERFCTR INIT: wrmsr (escr) cost is 980.0 cycles (63070 total) PERFCTR INIT: read cr4 cost is 22.7 cycles (1802 total) PERFCTR INIT: write cr4 cost is 392.2 cycles (25449 total) PERFCTR INIT: rdpmc (fast) cost is 90.3 cycles (6129 total) PERFCTR INIT: rdmsr (cccr) cost is 385.1 cycles (24999 total) PERFCTR INIT: wrmsr (cccr) cost is 938.5 cycles (60418 total) PERFCTR INIT: write LVTPC cost is 13.2 cycles (1199 total) perfctr: driver 2.7.3, cpu type Intel P4 at 3393661 kHz papi-5.6.0/src/libpfm-3.y/lib/000775 001750 001750 00000000000 13216244363 017752 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_force_pmu.3000664 001750 001750 00000000034 13216244361 023672 0ustar00jshenry1963jshenry1963000000 000000 .so man3/pfm_get_pmu_name.3 papi-5.6.0/src/ctests/failed_events.c000664 001750 001750 00000010421 13216244360 021600 0ustar00jshenry1963jshenry1963000000 000000 /* * File: failed_events.c * Author: Vince Weaver */ /* This test tries adding events that don't exist */ /* We've had issues where the name resolution code might do weird */ /* things when passed invalid event names */ #include #include #include #include "papi.h" #include "papi_test.h" #define LARGE_NAME_SIZE 4096 char large_name[LARGE_NAME_SIZE]; int main( int argc, char **argv ) { int i, k, err_count = 0; int retval; PAPI_event_info_t info, info1; const PAPI_component_info_t* cmpinfo; int numcmp, cid; int quiet; int EventSet = PAPI_NULL; /* Set quiet variable */ quiet=tests_quiet( argc, argv ); /* Init PAPI library */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } if (!quiet) { printf("Test adding invalid events.\n"); } /* Create an eventset */ retval = PAPI_create_eventset( &EventSet ); if ( retval != PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); } /* Simple Event */ if (!quiet) { printf("+ Simple invalid event\t"); } retval=PAPI_add_named_event(EventSet,"INVALID_EVENT"); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened!\n"); err_count++; } } else { if (!quiet) printf("OK\n"); } /* Extra Colons */ if (!quiet) { printf("+ Extra colons\t"); } retval=PAPI_add_named_event(EventSet,"INV::::AL:ID:::_E=3V::E=NT"); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened!\n"); err_count++; } } else { if (!quiet) printf("OK\n"); } /* Large Invalid Event */ if (!quiet) { printf("+ Large invalid event\t"); } memset(large_name,'A',LARGE_NAME_SIZE); large_name[LARGE_NAME_SIZE-1]=0; retval=PAPI_add_named_event(EventSet,large_name); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened!\n"); err_count++; } } else { if (!quiet) printf("OK\n"); } /* Large Unterminated Invalid Event */ if (!quiet) { printf("+ Large unterminated invalid event\t"); } memset(large_name,'A',LARGE_NAME_SIZE); retval=PAPI_add_named_event(EventSet,large_name); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened!\n"); err_count++; } } else { if (!quiet) printf("OK\n"); } /* Randomly modifying valid events */ if (!quiet) { printf("+ Randomly modifying valid events\t"); } numcmp = PAPI_num_components( ); /* Loop through all components */ for( cid = 0; cid < numcmp; cid++ ) { cmpinfo = PAPI_get_component_info( cid ); if (cmpinfo == NULL) { test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); } /* Include disabled components */ if (cmpinfo->disabled) { // continue; } /* For platform independence, always ASK FOR the first event */ /* Don't just assume it'll be the first numeric value */ i = 0 | PAPI_NATIVE_MASK; retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cid ); do { retval = PAPI_get_event_info( i, &info ); k = i; if ( PAPI_enum_cmp_event(&k, PAPI_NTV_ENUM_UMASKS, cid )==PAPI_OK ) { do { retval = PAPI_get_event_info( k, &info1 ); /* Skip perf_raw event as it is hard to error out */ if (strstr(info1.symbol,"perf_raw")) { break; } // printf("%s\n",info1.symbol); if (strlen(info1.symbol)>5) { info1.symbol[strlen(info1.symbol)-4]^=0xa5; retval=PAPI_add_named_event(EventSet,info1.symbol); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened %s!\n", info1.symbol); err_count++; } } } } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cid ) == PAPI_OK ); } else { /* Event didn't have any umasks */ // printf("%s\n",info1.symbol); if (strlen(info1.symbol)>5) { info1.symbol[strlen(info1.symbol)-4]^=0xa5; retval=PAPI_add_named_event(EventSet,info1.symbol); if (retval==PAPI_OK) { if (!quiet) { printf("Unexpectedly opened %s!\n", info1.symbol); err_count++; } } } } } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cid ) == PAPI_OK ); } if ( err_count ) { if (!quiet) { printf( "%d Invalid events added.\n", err_count ); } test_fail( __FILE__, __LINE__, "Invalid events added", 1 ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/components/vmware/configure.in000664 001750 001750 00000001002 13216244360 023311 0ustar00jshenry1963jshenry1963000000 000000 AC_INIT AC_ARG_WITH(vmware_incdir, [--with-vmware_incdir= Specify path to VMware GuestSDK includes], [VMWARE_INCDIR=$withval CFLAGS="$CFLAGS -I$withval" AC_CHECK_HEADER([vmGuestLib.h], [VMGUESTLIB=1], [AC_MSG_WARN([vmGuestLib.h not found])], )], [AC_MSG_WARN([Component requires path to vmware includes])]) AC_SUBST(VMWARE_INCDIR) AC_SUBST(VMGUESTLIB) AC_CONFIG_FILES([Makefile.vmware]) AC_OUTPUT papi-5.6.0/src/components/vmware/vmware.c000664 001750 001750 00000116422 13216244360 022462 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /** * @file mware.c * @author Matt Johnson * mrj@eecs.utk.edu * @author John Nelson * jnelso37@eecs.utk.edu * @author Vince Weaver * vweaver1@eecs.utk.edu * * @ingroup papi_components * * VMware component * * @brief * This is the VMware component for PAPI-V. It will allow user access to * hardware information available from a VMware virtual machine. */ #include #include #include #include #include #include /* Headers required by PAPI */ #include "papi.h" #include "papi_internal.h" #include "papi_vector.h" #include "papi_memory.h" #define VMWARE_MAX_COUNTERS 256 #define VMWARE_CPU_LIMIT_MHZ 0 #define VMWARE_CPU_RESERVATION_MHZ 1 #define VMWARE_CPU_SHARES 2 #define VMWARE_CPU_STOLEN_MS 3 #define VMWARE_CPU_USED_MS 4 #define VMWARE_ELAPSED_MS 5 #define VMWARE_MEM_ACTIVE_MB 6 #define VMWARE_MEM_BALLOONED_MB 7 #define VMWARE_MEM_LIMIT_MB 8 #define VMWARE_MEM_MAPPED_MB 9 #define VMWARE_MEM_OVERHEAD_MB 10 #define VMWARE_MEM_RESERVATION_MB 11 #define VMWARE_MEM_SHARED_MB 12 #define VMWARE_MEM_SHARES 13 #define VMWARE_MEM_SWAPPED_MB 14 #define VMWARE_MEM_TARGET_SIZE_MB 15 #define VMWARE_MEM_USED_MB 16 #define VMWARE_HOST_CPU_MHZ 17 /* The following 3 require VMWARE_PSEUDO_PERFORMANCE env_var to be set. */ #define VMWARE_HOST_TSC 18 #define VMWARE_ELAPSED_TIME 19 #define VMWARE_ELAPSED_APPARENT 20 /* Begin PAPI definitions */ papi_vector_t _vmware_vector; void (*_dl_non_dynamic_init)(void) __attribute__((weak)); /** Structure that stores private information for each event */ struct _vmware_register { unsigned int selector; /**< Signifies which counter slot is being used */ /**< Indexed from 1 as 0 has a special meaning */ }; /** This structure is used to build the table of events */ struct _vmware_native_event_entry { char name[PAPI_MAX_STR_LEN]; /**< Name of the counter */ char description[PAPI_HUGE_STR_LEN]; /**< Description of counter */ char units[PAPI_MIN_STR_LEN]; int which_counter; int report_difference; }; struct _vmware_reg_alloc { struct _vmware_register ra_bits; }; inline uint64_t rdpmc(int c) { uint32_t low, high; __asm__ __volatile__("rdpmc" : "=a" (low), "=d" (high) : "c" (c)); return (uint64_t)high << 32 | (uint64_t)low; } #ifdef VMGUESTLIB /* Headers required by VMware */ #include "vmGuestLib.h" /* Functions to dynamically load from the GuestLib library. */ char const * (*GuestLib_GetErrorText)(VMGuestLibError); VMGuestLibError (*GuestLib_OpenHandle)(VMGuestLibHandle*); VMGuestLibError (*GuestLib_CloseHandle)(VMGuestLibHandle); VMGuestLibError (*GuestLib_UpdateInfo)(VMGuestLibHandle handle); VMGuestLibError (*GuestLib_GetSessionId)(VMGuestLibHandle handle, VMSessionId *id); VMGuestLibError (*GuestLib_GetCpuReservationMHz)(VMGuestLibHandle handle, uint32 *cpuReservationMHz); VMGuestLibError (*GuestLib_GetCpuLimitMHz)(VMGuestLibHandle handle, uint32 *cpuLimitMHz); VMGuestLibError (*GuestLib_GetCpuShares)(VMGuestLibHandle handle, uint32 *cpuShares); VMGuestLibError (*GuestLib_GetCpuUsedMs)(VMGuestLibHandle handle, uint64 *cpuUsedMs); VMGuestLibError (*GuestLib_GetHostProcessorSpeed)(VMGuestLibHandle handle, uint32 *mhz); VMGuestLibError (*GuestLib_GetMemReservationMB)(VMGuestLibHandle handle, uint32 *memReservationMB); VMGuestLibError (*GuestLib_GetMemLimitMB)(VMGuestLibHandle handle, uint32 *memLimitMB); VMGuestLibError (*GuestLib_GetMemShares)(VMGuestLibHandle handle, uint32 *memShares); VMGuestLibError (*GuestLib_GetMemMappedMB)(VMGuestLibHandle handle, uint32 *memMappedMB); VMGuestLibError (*GuestLib_GetMemActiveMB)(VMGuestLibHandle handle, uint32 *memActiveMB); VMGuestLibError (*GuestLib_GetMemOverheadMB)(VMGuestLibHandle handle, uint32 *memOverheadMB); VMGuestLibError (*GuestLib_GetMemBalloonedMB)(VMGuestLibHandle handle, uint32 *memBalloonedMB); VMGuestLibError (*GuestLib_GetMemSwappedMB)(VMGuestLibHandle handle, uint32 *memSwappedMB); VMGuestLibError (*GuestLib_GetMemSharedMB)(VMGuestLibHandle handle, uint32 *memSharedMB); VMGuestLibError (*GuestLib_GetMemSharedSavedMB)(VMGuestLibHandle handle, uint32 *memSharedSavedMB); VMGuestLibError (*GuestLib_GetMemUsedMB)(VMGuestLibHandle handle, uint32 *memUsedMB); VMGuestLibError (*GuestLib_GetElapsedMs)(VMGuestLibHandle handle, uint64 *elapsedMs); VMGuestLibError (*GuestLib_GetResourcePoolPath)(VMGuestLibHandle handle, size_t *bufferSize, char *pathBuffer); VMGuestLibError (*GuestLib_GetCpuStolenMs)(VMGuestLibHandle handle, uint64 *cpuStolenMs); VMGuestLibError (*GuestLib_GetMemTargetSizeMB)(VMGuestLibHandle handle, uint64 *memTargetSizeMB); VMGuestLibError (*GuestLib_GetHostNumCpuCores)(VMGuestLibHandle handle, uint32 *hostNumCpuCores); VMGuestLibError (*GuestLib_GetHostCpuUsedMs)(VMGuestLibHandle handle, uint64 *hostCpuUsedMs); VMGuestLibError (*GuestLib_GetHostMemSwappedMB)(VMGuestLibHandle handle, uint64 *hostMemSwappedMB); VMGuestLibError (*GuestLib_GetHostMemSharedMB)(VMGuestLibHandle handle, uint64 *hostMemSharedMB); VMGuestLibError (*GuestLib_GetHostMemUsedMB)(VMGuestLibHandle handle, uint64 *hostMemUsedMB); VMGuestLibError (*GuestLib_GetHostMemPhysMB)(VMGuestLibHandle handle, uint64 *hostMemPhysMB); VMGuestLibError (*GuestLib_GetHostMemPhysFreeMB)(VMGuestLibHandle handle, uint64 *hostMemPhysFreeMB); VMGuestLibError (*GuestLib_GetHostMemKernOvhdMB)(VMGuestLibHandle handle, uint64 *hostMemKernOvhdMB); VMGuestLibError (*GuestLib_GetHostMemMappedMB)(VMGuestLibHandle handle, uint64 *hostMemMappedMB); VMGuestLibError (*GuestLib_GetHostMemUnmappedMB)(VMGuestLibHandle handle, uint64 *hostMemUnmappedMB); static void *dlHandle = NULL; /* * Macro to load a single GuestLib function from the shared library. */ #define LOAD_ONE_FUNC(funcname) \ do { \ funcname = dlsym(dlHandle, "VM" #funcname); \ if ((dlErrStr = dlerror()) != NULL) { \ fprintf(stderr, "Failed to load \'%s\': \'%s\'\n", \ #funcname, dlErrStr); \ return FALSE; \ } \ } while (0) #endif /** Holds control flags, usually out-of band configuration of the hardware */ struct _vmware_control_state { long long value[VMWARE_MAX_COUNTERS]; int which_counter[VMWARE_MAX_COUNTERS]; int num_events; }; /** Holds per-thread information */ struct _vmware_context { long long values[VMWARE_MAX_COUNTERS]; long long start_values[VMWARE_MAX_COUNTERS]; #ifdef VMGUESTLIB VMGuestLibHandle glHandle; #endif }; /* *----------------------------------------------------------------------------- * * LoadFunctions -- * * Load the functions from the shared library. * * Results: * TRUE on success * FALSE on failure * * Side effects: * None * * Credit: VMware *----------------------------------------------------------------------------- */ static int LoadFunctions(void) { #ifdef VMGUESTLIB /* * First, try to load the shared library. */ /* Attempt to guess if we were statically linked to libc, if so bail */ if ( _dl_non_dynamic_init != NULL ) { strncpy(_vmware_vector.cmp_info.disabled_reason, "The VMware component does not support statically linking of libc.", PAPI_MAX_STR_LEN); return PAPI_ENOSUPP; } char const *dlErrStr; char filename[BUFSIZ]; sprintf(filename,"%s","libvmGuestLib.so"); dlHandle = dlopen(filename, RTLD_NOW); if (!dlHandle) { dlErrStr = dlerror(); fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, dlErrStr); sprintf(filename,"%s/lib/lib64/libvmGuestLib.so",VMWARE_INCDIR); dlHandle = dlopen(filename, RTLD_NOW); if (!dlHandle) { dlErrStr = dlerror(); fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, dlErrStr); sprintf(filename,"%s/lib/lib32/libvmGuestLib.so",VMWARE_INCDIR); dlHandle = dlopen(filename, RTLD_NOW); if (!dlHandle) { dlErrStr = dlerror(); fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, dlErrStr); return PAPI_ECMP; } } } /* Load all the individual library functions. */ LOAD_ONE_FUNC(GuestLib_GetErrorText); LOAD_ONE_FUNC(GuestLib_OpenHandle); LOAD_ONE_FUNC(GuestLib_CloseHandle); LOAD_ONE_FUNC(GuestLib_UpdateInfo); LOAD_ONE_FUNC(GuestLib_GetSessionId); LOAD_ONE_FUNC(GuestLib_GetCpuReservationMHz); LOAD_ONE_FUNC(GuestLib_GetCpuLimitMHz); LOAD_ONE_FUNC(GuestLib_GetCpuShares); LOAD_ONE_FUNC(GuestLib_GetCpuUsedMs); LOAD_ONE_FUNC(GuestLib_GetHostProcessorSpeed); LOAD_ONE_FUNC(GuestLib_GetMemReservationMB); LOAD_ONE_FUNC(GuestLib_GetMemLimitMB); LOAD_ONE_FUNC(GuestLib_GetMemShares); LOAD_ONE_FUNC(GuestLib_GetMemMappedMB); LOAD_ONE_FUNC(GuestLib_GetMemActiveMB); LOAD_ONE_FUNC(GuestLib_GetMemOverheadMB); LOAD_ONE_FUNC(GuestLib_GetMemBalloonedMB); LOAD_ONE_FUNC(GuestLib_GetMemSwappedMB); LOAD_ONE_FUNC(GuestLib_GetMemSharedMB); LOAD_ONE_FUNC(GuestLib_GetMemSharedSavedMB); LOAD_ONE_FUNC(GuestLib_GetMemUsedMB); LOAD_ONE_FUNC(GuestLib_GetElapsedMs); LOAD_ONE_FUNC(GuestLib_GetResourcePoolPath); LOAD_ONE_FUNC(GuestLib_GetCpuStolenMs); LOAD_ONE_FUNC(GuestLib_GetMemTargetSizeMB); LOAD_ONE_FUNC(GuestLib_GetHostNumCpuCores); LOAD_ONE_FUNC(GuestLib_GetHostCpuUsedMs); LOAD_ONE_FUNC(GuestLib_GetHostMemSwappedMB); LOAD_ONE_FUNC(GuestLib_GetHostMemSharedMB); LOAD_ONE_FUNC(GuestLib_GetHostMemUsedMB); LOAD_ONE_FUNC(GuestLib_GetHostMemPhysMB); LOAD_ONE_FUNC(GuestLib_GetHostMemPhysFreeMB); LOAD_ONE_FUNC(GuestLib_GetHostMemKernOvhdMB); LOAD_ONE_FUNC(GuestLib_GetHostMemMappedMB); LOAD_ONE_FUNC(GuestLib_GetHostMemUnmappedMB); #endif return PAPI_OK; } /** This table contains the native events */ static struct _vmware_native_event_entry *_vmware_native_table; /** number of events in the table*/ static int num_events = 0; static int use_pseudo=0; static int use_guestlib=0; /************************************************************************/ /* Below is the actual "hardware implementation" of our VMWARE counters */ /************************************************************************/ /** Code that reads event values. You might replace this with code that accesses hardware or reads values from the operatings system. */ static long long _vmware_hardware_read( struct _vmware_context *context, int starting) { int i; if (use_pseudo) { context->values[VMWARE_HOST_TSC]=rdpmc(0x10000); context->values[VMWARE_ELAPSED_TIME]=rdpmc(0x10001); context->values[VMWARE_ELAPSED_APPARENT]=rdpmc(0x10002); } #ifdef VMGUESTLIB static VMSessionId sessionId = 0; VMSessionId tmpSession; uint32_t temp32; uint64_t temp64; VMGuestLibError glError; if (use_guestlib) { glError = GuestLib_UpdateInfo(context->glHandle); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"UpdateInfo failed: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } /* Retrieve and check the session ID */ glError = GuestLib_GetSessionId(context->glHandle, &tmpSession); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get session ID: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } if (tmpSession == 0) { fprintf(stderr, "Error: Got zero sessionId from GuestLib\n"); return PAPI_ECMP; } if (sessionId == 0) { sessionId = tmpSession; } else if (tmpSession != sessionId) { sessionId = tmpSession; } glError = GuestLib_GetCpuLimitMHz(context->glHandle,&temp32); context->values[VMWARE_CPU_LIMIT_MHZ]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"Failed to get CPU limit: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetCpuReservationMHz(context->glHandle,&temp32); context->values[VMWARE_CPU_RESERVATION_MHZ]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"Failed to get CPU reservation: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetCpuShares(context->glHandle,&temp32); context->values[VMWARE_CPU_SHARES]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"Failed to get cpu shares: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetCpuStolenMs(context->glHandle,&temp64); context->values[VMWARE_CPU_STOLEN_MS]=temp64; if (glError != VMGUESTLIB_ERROR_SUCCESS) { if (glError == VMGUESTLIB_ERROR_UNSUPPORTED_VERSION) { context->values[VMWARE_CPU_STOLEN_MS]=0; fprintf(stderr, "Skipping CPU stolen, not supported...\n"); } else { fprintf(stderr, "Failed to get CPU stolen: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } glError = GuestLib_GetCpuUsedMs(context->glHandle,&temp64); context->values[VMWARE_CPU_USED_MS]=temp64; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get used ms: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetElapsedMs(context->glHandle, &temp64); context->values[VMWARE_ELAPSED_MS]=temp64; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get elapsed ms: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemActiveMB(context->glHandle, &temp32); context->values[VMWARE_MEM_ACTIVE_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get active mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemBalloonedMB(context->glHandle, &temp32); context->values[VMWARE_MEM_BALLOONED_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get ballooned mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemLimitMB(context->glHandle, &temp32); context->values[VMWARE_MEM_LIMIT_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"Failed to get mem limit: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemMappedMB(context->glHandle, &temp32); context->values[VMWARE_MEM_MAPPED_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get mapped mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemOverheadMB(context->glHandle, &temp32); context->values[VMWARE_MEM_OVERHEAD_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get overhead mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemReservationMB(context->glHandle, &temp32); context->values[VMWARE_MEM_RESERVATION_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get mem reservation: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemSharedMB(context->glHandle, &temp32); context->values[VMWARE_MEM_SHARED_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get swapped mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemShares(context->glHandle, &temp32); context->values[VMWARE_MEM_SHARES]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { if (glError == VMGUESTLIB_ERROR_NOT_AVAILABLE) { context->values[VMWARE_MEM_SHARES]=0; fprintf(stderr, "Skipping mem shares, not supported...\n"); } else { fprintf(stderr, "Failed to get mem shares: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } glError = GuestLib_GetMemSwappedMB(context->glHandle, &temp32); context->values[VMWARE_MEM_SWAPPED_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get swapped mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetMemTargetSizeMB(context->glHandle, &temp64); context->values[VMWARE_MEM_TARGET_SIZE_MB]=temp64; if (glError != VMGUESTLIB_ERROR_SUCCESS) { if (glError == VMGUESTLIB_ERROR_UNSUPPORTED_VERSION) { context->values[VMWARE_MEM_TARGET_SIZE_MB]=0; fprintf(stderr, "Skipping target mem size, not supported...\n"); } else { fprintf(stderr, "Failed to get target mem size: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } glError = GuestLib_GetMemUsedMB(context->glHandle, &temp32); context->values[VMWARE_MEM_USED_MB]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get swapped mem: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } glError = GuestLib_GetHostProcessorSpeed(context->glHandle, &temp32); context->values[VMWARE_HOST_CPU_MHZ]=temp32; if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to get host proc speed: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } #endif if (starting) { for(i=0;istart_values[i]=context->values[i]; } } return PAPI_OK; } /********************************************************************/ /* Below are the functions required by the PAPI component interface */ /********************************************************************/ /** This is called whenever a thread is initialized */ int _vmware_init_thread( hwd_context_t *ctx ) { (void) ctx; #ifdef VMGUESTLIB struct _vmware_context *context; VMGuestLibError glError; context=(struct _vmware_context *)ctx; if (use_guestlib) { glError = GuestLib_OpenHandle(&(context->glHandle)); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"OpenHandle failed: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } #endif return PAPI_OK; } /** Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ int _vmware_init_component( int cidx ) { (void) cidx; int result; SUBDBG( "_vmware_init_component..." ); /* Initialize and try to load the VMware library */ /* Try to load the library. */ result=LoadFunctions(); if (result!=PAPI_OK) { strncpy(_vmware_vector.cmp_info.disabled_reason, "GuestLibTest: Failed to load shared library", PAPI_MAX_STR_LEN); return PAPI_ECMP; } /* we know in advance how many events we want */ /* for actual hardware this might have to be determined dynamically */ /* Allocate memory for the our event table */ _vmware_native_table = ( struct _vmware_native_event_entry * ) calloc( VMWARE_MAX_COUNTERS, sizeof ( struct _vmware_native_event_entry )); if ( _vmware_native_table == NULL ) { return PAPI_ENOMEM; } #ifdef VMGUESTLIB /* Detect if GuestLib works */ { VMGuestLibError glError; VMGuestLibHandle glHandle; use_guestlib=0; /* try to open */ glError = GuestLib_OpenHandle(&glHandle); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"OpenHandle failed: %s\n", GuestLib_GetErrorText(glError)); } else { /* open worked, try to update */ glError = GuestLib_UpdateInfo(glHandle); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr,"UpdateInfo failed: %s\n", GuestLib_GetErrorText(glError)); } else { /* update worked, things work! */ use_guestlib=1; } /* shut things down */ glError = GuestLib_CloseHandle(glHandle); } } if (use_guestlib) { /* fill in the event table parameters */ strcpy( _vmware_native_table[num_events].name, "CPU_LIMIT" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the upper limit of processor use in MHz " "available to the virtual machine.", PAPI_HUGE_STR_LEN); strcpy( _vmware_native_table[num_events].units,"MHz"); _vmware_native_table[num_events].which_counter= VMWARE_CPU_LIMIT_MHZ; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "CPU_RESERVATION" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the minimum processing power in MHz " "reserved for the virtual machine.", PAPI_HUGE_STR_LEN); strcpy( _vmware_native_table[num_events].units,"MHz"); _vmware_native_table[num_events].which_counter= VMWARE_CPU_RESERVATION_MHZ; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "CPU_SHARES" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the number of CPU shares allocated " "to the virtual machine.", PAPI_HUGE_STR_LEN); strcpy( _vmware_native_table[num_events].units,"shares"); _vmware_native_table[num_events].which_counter= VMWARE_CPU_SHARES; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "CPU_STOLEN" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the number of milliseconds that the " "virtual machine was in a ready state (able to " "transition to a run state), but was not scheduled to run.", PAPI_HUGE_STR_LEN); strcpy( _vmware_native_table[num_events].units,"ms"); _vmware_native_table[num_events].which_counter= VMWARE_CPU_STOLEN_MS; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "CPU_USED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the number of milliseconds during which " "the virtual machine has used the CPU. This value " "includes the time used by the guest operating system " "and the time used by virtualization code for tasks for " "this virtual machine. You can combine this value with " "the elapsed time (VMWARE_ELAPSED) to estimate the " "effective virtual machine CPU speed. This value is a " "subset of elapsedMs.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"ms"); _vmware_native_table[num_events].which_counter= VMWARE_CPU_USED_MS; _vmware_native_table[num_events].report_difference=1; num_events++; strcpy( _vmware_native_table[num_events].name, "ELAPSED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the number of milliseconds that have passed " "in the virtual machine since it last started running on " "the server. The count of elapsed time restarts each time " "the virtual machine is powered on, resumed, or migrated " "using VMotion. This value counts milliseconds, regardless " "of whether the virtual machine is using processing power " "during that time. You can combine this value with the CPU " "time used by the virtual machine (VMWARE_CPU_USED) to " "estimate the effective virtual machine xCPU speed. " "cpuUsedMS is a subset of this value.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"ms"); _vmware_native_table[num_events].which_counter= VMWARE_ELAPSED_MS; _vmware_native_table[num_events].report_difference=1; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_ACTIVE" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of memory the virtual machine is " "actively using in MB - Its estimated working set size.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_ACTIVE_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_BALLOONED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of memory that has been reclaimed " "from this virtual machine by the vSphere memory balloon " "driver (also referred to as the 'vmemctl' driver) in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_BALLOONED_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_LIMIT" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the upper limit of memory that is available " "to the virtual machine in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_LIMIT_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_MAPPED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of memory that is allocated to " "the virtual machine in MB. Memory that is ballooned, " "swapped, or has never been accessed is excluded.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_MAPPED_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_OVERHEAD" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of 'overhead' memory associated " "with this virtual machine that is currently consumed " "on the host system in MB. Overhead memory is additional " "memory that is reserved for data structures required by " "the virtualization layer.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_OVERHEAD_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_RESERVATION" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the minimum amount of memory that is " "reserved for the virtual machine in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_RESERVATION_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_SHARED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of physical memory associated " "with this virtual machine that is copy-on-write (COW) " "shared on the host in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_SHARED_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_SHARES" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the number of memory shares allocated to " "the virtual machine.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"shares"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_SHARES; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_SWAPPED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the amount of memory that has been reclaimed " "from this virtual machine by transparently swapping " "guest memory to disk in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_SWAPPED_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_TARGET_SIZE" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the size of the target memory allocation " "for this virtual machine in MB.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_TARGET_SIZE_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "MEM_USED" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the estimated amount of physical host memory " "currently consumed for this virtual machine's " "physical memory.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MB"); _vmware_native_table[num_events].which_counter= VMWARE_MEM_USED_MB; _vmware_native_table[num_events].report_difference=0; num_events++; strcpy( _vmware_native_table[num_events].name, "HOST_CPU" ); strncpy( _vmware_native_table[num_events].description, "Retrieves the speed of the ESX system's physical " "CPU in MHz.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"MHz"); _vmware_native_table[num_events].which_counter= VMWARE_HOST_CPU_MHZ; _vmware_native_table[num_events].report_difference=0; num_events++; } #endif /* For VMWare Pseudo Performance Counters */ if ( getenv( "PAPI_VMWARE_PSEUDOPERFORMANCE" ) ) { use_pseudo=1; strcpy( _vmware_native_table[num_events].name, "HOST_TSC" ); strncpy( _vmware_native_table[num_events].description, "Physical host TSC", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"cycles"); _vmware_native_table[num_events].which_counter= VMWARE_HOST_TSC; _vmware_native_table[num_events].report_difference=1; num_events++; strcpy( _vmware_native_table[num_events].name, "ELAPSED_TIME" ); strncpy( _vmware_native_table[num_events].description, "Elapsed real time in ns.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"ns"); _vmware_native_table[num_events].which_counter= VMWARE_ELAPSED_TIME; _vmware_native_table[num_events].report_difference=1; num_events++; strcpy( _vmware_native_table[num_events].name, "ELAPSED_APPARENT" ); strncpy( _vmware_native_table[num_events].description, "Elapsed apparent time in ns.", PAPI_HUGE_STR_LEN ); strcpy( _vmware_native_table[num_events].units,"ns"); _vmware_native_table[num_events].which_counter= VMWARE_ELAPSED_APPARENT; _vmware_native_table[num_events].report_difference=1; num_events++; } if (num_events==0) { strncpy(_vmware_vector.cmp_info.disabled_reason, "VMware SDK not installed, and PAPI_VMWARE_PSEUDOPERFORMANCE not set", PAPI_MAX_STR_LEN); return PAPI_ECMP; } _vmware_vector.cmp_info.num_native_events = num_events; return PAPI_OK; } /** Setup the counter control structure */ int _vmware_init_control_state( hwd_control_state_t *ctl ) { (void) ctl; return PAPI_OK; } /** Enumerate Native Events @param EventCode is the event of interest @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS */ int _vmware_ntv_enum_events( unsigned int *EventCode, int modifier ) { switch ( modifier ) { /* return EventCode of first event */ case PAPI_ENUM_FIRST: if (num_events==0) return PAPI_ENOEVNT; *EventCode = 0; return PAPI_OK; break; /* return EventCode of passed-in Event */ case PAPI_ENUM_EVENTS:{ int index = *EventCode; if ( index < num_events - 1 ) { *EventCode = *EventCode + 1; return PAPI_OK; } else { return PAPI_ENOEVNT; } break; } default: return PAPI_EINVAL; } return PAPI_EINVAL; } int _vmware_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) { int index = EventCode; if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; strncpy( info->symbol, _vmware_native_table[index].name, sizeof(info->symbol)); strncpy( info->long_descr, _vmware_native_table[index].description, sizeof(info->symbol)); strncpy( info->units, _vmware_native_table[index].units, sizeof(info->units)); return PAPI_OK; } /** Takes a native event code and passes back the name @param EventCode is the native event code @param name is a pointer for the name to be copied to @param len is the size of the string */ int _vmware_ntv_code_to_name( unsigned int EventCode, char *name, int len ) { int index = EventCode; if ( index >= 0 && index < num_events ) { strncpy( name, _vmware_native_table[index].name, len ); } return PAPI_OK; } /** Takes a native event code and passes back the event description @param EventCode is the native event code @param name is a pointer for the description to be copied to @param len is the size of the string */ int _vmware_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) { int index = EventCode; if ( index >= 0 && index < num_events ) { strncpy( name, _vmware_native_table[index].description, len ); } return PAPI_OK; } /** Triggered by eventset operations like add or remove */ int _vmware_update_control_state( hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx ) { (void) ctx; struct _vmware_control_state *control; int i, index; control=(struct _vmware_control_state *)ctl; for ( i = 0; i < count; i++ ) { index = native[i].ni_event; control->which_counter[i]=_vmware_native_table[index].which_counter; native[i].ni_position = i; } control->num_events=count; return PAPI_OK; } /** Triggered by PAPI_start() */ int _vmware_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) { struct _vmware_context *context; (void) ctl; context=(struct _vmware_context *)ctx; _vmware_hardware_read( context, 1 ); return PAPI_OK; } /** Triggered by PAPI_stop() */ int _vmware_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) { struct _vmware_context *context; (void) ctl; context=(struct _vmware_context *)ctx; _vmware_hardware_read( context, 0 ); return PAPI_OK; } /** Triggered by PAPI_read() */ int _vmware_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long_long **events, int flags ) { struct _vmware_context *context; struct _vmware_control_state *control; (void) flags; int i; context=(struct _vmware_context *)ctx; control=(struct _vmware_control_state *)ctl; _vmware_hardware_read( context, 0 ); for (i=0; inum_events; i++) { if (_vmware_native_table[ _vmware_native_table[control->which_counter[i]].which_counter]. report_difference) { control->value[i]=context->values[control->which_counter[i]]- context->start_values[control->which_counter[i]]; } else { control->value[i]=context->values[control->which_counter[i]]; } // printf("%d %d %lld-%lld=%lld\n",i,control->which_counter[i], // context->values[control->which_counter[i]], // context->start_values[control->which_counter[i]], // control->value[i]); } *events = control->value; return PAPI_OK; } /** Triggered by PAPI_write(), but only if the counters are running */ /* otherwise, the updated state is written to ESI->hw_start */ int _vmware_write( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long events[] ) { (void) ctx; (void) ctrl; (void) events; SUBDBG( "_vmware_write... %p %p", ctx, ctrl ); /* FIXME... this should actually carry out the write, though */ /* this is non-trivial as which counter being written has to be */ /* determined somehow. */ return PAPI_OK; } /** Triggered by PAPI_reset */ int _vmware_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) { (void) ctx; (void) ctl; return PAPI_OK; } /** Shutting down a context */ int _vmware_shutdown_thread( hwd_context_t *ctx ) { (void) ctx; #ifdef VMGUESTLIB VMGuestLibError glError; struct _vmware_context *context; context=(struct _vmware_context *)ctx; if (use_guestlib) { glError = GuestLib_CloseHandle(context->glHandle); if (glError != VMGUESTLIB_ERROR_SUCCESS) { fprintf(stderr, "Failed to CloseHandle: %s\n", GuestLib_GetErrorText(glError)); return PAPI_ECMP; } } #endif return PAPI_OK; } /** Triggered by PAPI_shutdown() */ int _vmware_shutdown_component( void ) { #ifdef VMGUESTLIB if (dlclose(dlHandle)) { fprintf(stderr, "dlclose failed\n"); return EXIT_FAILURE; } #endif return PAPI_OK; } /** This function sets various options in the component @param ctx @param code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT @param option */ int _vmware_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) { (void) ctx; (void) code; (void) option; SUBDBG( "_vmware_ctl..." ); return PAPI_OK; } /** This function has to set the bits needed to count different domains In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER By default return PAPI_EINVAL if none of those are specified and PAPI_OK with success PAPI_DOM_USER is only user context is counted PAPI_DOM_KERNEL is only the Kernel/OS context is counted PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) PAPI_DOM_ALL is all of the domains */ int _vmware_set_domain( hwd_control_state_t *ctl, int domain ) { (void) ctl; int found = 0; SUBDBG( "_vmware_set_domain..." ); if ( PAPI_DOM_USER & domain ) { SUBDBG( " PAPI_DOM_USER " ); found = 1; } if ( PAPI_DOM_KERNEL & domain ) { SUBDBG( " PAPI_DOM_KERNEL " ); found = 1; } if ( PAPI_DOM_OTHER & domain ) { SUBDBG( " PAPI_DOM_OTHER " ); found = 1; } if ( PAPI_DOM_ALL & domain ) { SUBDBG( " PAPI_DOM_ALL " ); found = 1; } if ( !found ) { return ( PAPI_EINVAL ); } return PAPI_OK; } /** Vector that points to entry points for our component */ papi_vector_t _vmware_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "vmware", .short_name = "vmware", .description = "Provide support for VMware vmguest and pseudo counters", .version = "5.0", .num_mpx_cntrs = VMWARE_MAX_COUNTERS, .num_cntrs = VMWARE_MAX_COUNTERS, .default_domain = PAPI_DOM_USER, .available_domains = PAPI_DOM_USER, .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .hardware_intr_sig = PAPI_INT_SIGNAL, /* component specific cmp_info initializations */ .fast_real_timer = 0, .fast_virtual_timer = 0, .attach = 0, .attach_must_ptrace = 0, }, /* sizes of framework-opaque component-private structures */ .size = { .context = sizeof ( struct _vmware_context ), .control_state = sizeof ( struct _vmware_control_state ), .reg_value = sizeof ( struct _vmware_register ), .reg_alloc = sizeof ( struct _vmware_reg_alloc ), } , /* function pointers in this component */ .init_thread = _vmware_init_thread, .init_component = _vmware_init_component, .init_control_state = _vmware_init_control_state, .start = _vmware_start, .stop = _vmware_stop, .read = _vmware_read, .write = _vmware_write, .shutdown_thread = _vmware_shutdown_thread, .shutdown_component = _vmware_shutdown_component, .ctl = _vmware_ctl, .update_control_state = _vmware_update_control_state, .set_domain = _vmware_set_domain, .reset = _vmware_reset, .ntv_enum_events = _vmware_ntv_enum_events, .ntv_code_to_name = _vmware_ntv_code_to_name, .ntv_code_to_descr = _vmware_ntv_code_to_descr, .ntv_code_to_info = _vmware_ntv_code_to_info, }; papi-5.6.0/src/perfctr-2.7.x/etc/costs/Sempron-3100+000664 001750 001750 00000001467 13216244367 023467 0ustar00jshenry1963jshenry1963000000 000000 [data from a 1.8GHz Sempron 3100+] PERFCTR INIT: vendor 2, family 15, model 12, stepping 0, clock 1804035 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 244 cycles PERFCTR INIT: rdtsc cost is 9.4 cycles (849 total) PERFCTR INIT: rdpmc cost is 11.4 cycles (979 total) PERFCTR INIT: rdmsr (counter) cost is 51.1 cycles (3516 total) PERFCTR INIT: rdmsr (evntsel) cost is 57.0 cycles (3897 total) PERFCTR INIT: wrmsr (counter) cost is 72.1 cycles (4859 total) PERFCTR INIT: wrmsr (evntsel) cost is 328.8 cycles (21291 total) PERFCTR INIT: read cr4 cost is 5.8 cycles (621 total) PERFCTR INIT: write cr4 cost is 67.0 cycles (4534 total) PERFCTR INIT: write LVTPC cost is 22.1 cycles (1663 total) PERFCTR INIT: sync_core cost is 164.0 cycles (10742 total) perfctr: driver 2.7.17, cpu type AMD K7/K8 at 1804035 kHz papi-5.6.0/src/perfctr-2.7.x/examples/global/000775 001750 001750 00000000000 13216244370 022617 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/freebsd/map-unknown.h000664 001750 001750 00000001317 13216244361 021361 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-unknown.h * CVS: $Id$ * Author: Harald Servat * redcrash@gmail.com */ #ifndef FreeBSD_MAP_UNKNOWN #define FreeBSD_MAP_UNKNOWN enum NativeEvent_Value_UnknownProcessor { PNE_UNK_BRANCHES = PAPI_NATIVE_MASK, PNE_UNK_BRANCH_MISPREDICTS, /* PNE_UNK_CYCLES, -- libpmc only supports cycles in system wide mode and this requires root privileges */ PNE_UNK_DC_MISSES, PNE_UNK_IC_MISSES, PNE_UNK_INSTRUCTIONS, PNE_UNK_INTERRUPTS, PNE_UNK_UNHALTED_CYCLES, PNE_UNK_NATNAME_GUARD }; extern Native_Event_LabelDescription_t UnkProcessor_info[]; extern hwi_search_t UnkProcessor_map[]; #endif papi-5.6.0/src/papi_events.xml000664 001750 001750 00000236132 13216244366 020375 0ustar00jshenry1963jshenry1963000000 000000 Level 1 data cache misses Level 1 instruction cache misses Level 2 data cache misses Level 2 instruction cache misses Level 3 data cache misses Level 3 instruction cache misses Level 1 cache misses Level 2 cache misses Level 3 cache misses Requests for a snoop Requests for exclusive access to shared cache line Requests for exclusive access to clean cache line Requests for cache line invalidation Requests for cache line intervention Level 3 load misses Level 3 store misses Cycles branch units are idle Cycles integer units are idle Cycles floating point units are idle Cycles load/store units are idle Data translation lookaside buffer misses Instruction translation lookaside buffer misses Total translation lookaside buffer misses Level 1 load misses Level 1 store misses Level 2 load misses Level 2 store misses Branch target address cache misses Data prefetch cache misses Level 3 data cache hits Translation lookaside buffer shootdowns Failed store conditional instructions Successful store conditional instructions Total store conditional instructions Cycles Stalled Waiting for memory accesses Cycles Stalled Waiting for memory Reads Cycles Stalled Waiting for memory writes Cycles with no instruction issue Cycles with maximum instruction issue Cycles with no instructions completed Cycles with maximum instructions completed Hardware interrupts Unconditional branch instructions Conditional branch instructions Conditional branch instructions taken Conditional branch instructions not taken Conditional branch instructions mispredicted Conditional branch instructions correctly predicted FMA instructions completed Instructions issued Instructions completed Integer instructions Floating point instructions Load instructions Store instructions Branch instructions Vector/SIMD instructions Cycles stalled on any resource Cycles the FP unit(s) are stalled Total cycles Load/store instructions completed Synchronization instructions completed Level 1 data cache hits Level 2 data cache hits Level 1 data cache accesses Level 2 data cache accesses Level 3 data cache accesses Level 1 data cache reads Level 2 data cache reads Level 3 data cache reads Level 1 data cache writes Level 2 data cache writes Level 3 data cache writes Level 1 instruction cache hits Level 2 instruction cache hits Level 3 instruction cache hits Level 1 instruction cache accesses Level 2 instruction cache accesses Level 3 instruction cache accesses Level 1 instruction cache reads Level 2 instruction cache reads Level 3 instruction cache reads Level 1 instruction cache writes Level 2 instruction cache writes Level 3 instruction cache writes Level 1 total cache hits Level 2 total cache hits Level 3 total cache hits Level 1 total cache accesses Level 2 total cache accesses Level 3 total cache accesses Level 1 total cache reads Level 2 total cache reads Level 3 total cache reads Level 1 total cache writes Level 2 total cache writes Level 3 total cache writes Floating point multiply instructions Floating point add instructions Floating point divide instructions Floating point square root instructions Floating point inverse instructions Floating point operations papi-5.6.0/src/libpfm4/lib/pfmlib_amd64.c000664 001750 001750 00000053177 13216244365 022056 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_amd64.c : support for the AMD64 architected PMU * (for both 64 and 32 bit modes) * * Copyright (c) 2009 Google, Inc * Contributed by Stephane Eranian * * Based on: * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_amd64_priv.h" /* architecture private */ const pfmlib_attr_desc_t amd64_mods[]={ PFM_ATTR_B("k", "monitor at priv level 0"), /* monitor priv level 0 */ PFM_ATTR_B("u", "monitor at priv level 1, 2, 3"), /* monitor priv level 1, 2, 3 */ PFM_ATTR_B("e", "edge level"), /* edge */ PFM_ATTR_B("i", "invert"), /* invert */ PFM_ATTR_I("c", "counter-mask in range [0-255]"), /* counter-mask */ PFM_ATTR_B("h", "monitor in hypervisor"), /* monitor in hypervisor*/ PFM_ATTR_B("g", "measure in guest"), /* monitor in guest */ PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */ }; pfmlib_pmu_t amd64_support; pfm_amd64_config_t pfm_amd64_cfg; static int amd64_num_mods(void *this, int idx) { const amd64_entry_t *pe = this_pe(this); unsigned int mask; mask = pe[idx].modmsk; return pfmlib_popcnt(mask); } static inline int amd64_eflag(void *this, int idx, int flag) { const amd64_entry_t *pe = this_pe(this); return !!(pe[idx].flags & flag); } static inline int amd64_uflag(void *this, int idx, int attr, int flag) { const amd64_entry_t *pe = this_pe(this); return !!(pe[idx].umasks[attr].uflags & flag); } static inline int amd64_event_ibsfetch(void *this, int idx) { return amd64_eflag(this, idx, AMD64_FL_IBSFE); } static inline int amd64_event_ibsop(void *this, int idx) { return amd64_eflag(this, idx, AMD64_FL_IBSOP); } static inline int amd64_from_rev(unsigned int flags) { return ((flags) >> 8) & 0xff; } static inline int amd64_till_rev(unsigned int flags) { int till = (((flags)>>16) & 0xff); if (!till) return 0xff; return till; } static void amd64_get_revision(pfm_amd64_config_t *cfg) { pfm_pmu_t rev = PFM_PMU_NONE; if (cfg->family == 6) { cfg->revision = PFM_PMU_AMD64_K7; return; } if (cfg->family == 15) { switch (cfg->model >> 4) { case 0: if (cfg->model == 5 && cfg->stepping < 2) { rev = PFM_PMU_AMD64_K8_REVB; break; } if (cfg->model == 4 && cfg->stepping == 0) { rev = PFM_PMU_AMD64_K8_REVB; break; } rev = PFM_PMU_AMD64_K8_REVC; break; case 1: rev = PFM_PMU_AMD64_K8_REVD; break; case 2: case 3: rev = PFM_PMU_AMD64_K8_REVE; break; case 4: case 5: case 0xc: rev = PFM_PMU_AMD64_K8_REVF; break; case 6: case 7: case 8: rev = PFM_PMU_AMD64_K8_REVG; break; default: rev = PFM_PMU_AMD64_K8_REVB; } } else if (cfg->family == 16) { /* family 10h */ switch (cfg->model) { case 4: case 5: case 6: rev = PFM_PMU_AMD64_FAM10H_SHANGHAI; break; case 8: case 9: rev = PFM_PMU_AMD64_FAM10H_ISTANBUL; break; default: rev = PFM_PMU_AMD64_FAM10H_BARCELONA; } } else if (cfg->family == 17) { /* family 11h */ switch (cfg->model) { default: rev = PFM_PMU_AMD64_FAM11H_TURION; } } else if (cfg->family == 18) { /* family 12h */ switch (cfg->model) { default: rev = PFM_PMU_AMD64_FAM12H_LLANO; } } else if (cfg->family == 20) { /* family 14h */ switch (cfg->model) { default: rev = PFM_PMU_AMD64_FAM14H_BOBCAT; } } else if (cfg->family == 21) { /* family 15h */ rev = PFM_PMU_AMD64_FAM15H_INTERLAGOS; } else if (cfg->family == 23) { /* family 17h */ rev = PFM_PMU_AMD64_FAM17H; } else if (cfg->family == 22) { /* family 16h */ rev = PFM_PMU_AMD64_FAM16H; } cfg->revision = rev; } /* * .byte 0x53 == push ebx. it's universal for 32 and 64 bit * .byte 0x5b == pop ebx. * Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode. * Using the opcode directly avoids this problem. */ static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d) { __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b" : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) : "a" (op)); } static int amd64_event_valid(void *this, int i) { const amd64_entry_t *pe = this_pe(this); pfmlib_pmu_t *pmu = this; int flags; flags = pe[i].flags; if (pmu->pmu_rev < amd64_from_rev(flags)) return 0; if (pmu->pmu_rev > amd64_till_rev(flags)) return 0; /* no restrictions or matches restrictions */ return 1; } static int amd64_umask_valid(void *this, int i, int attr) { pfmlib_pmu_t *pmu = this; const amd64_entry_t *pe = this_pe(this); int flags; flags = pe[i].umasks[attr].uflags; if (pmu->pmu_rev < amd64_from_rev(flags)) return 0; if (pmu->pmu_rev > amd64_till_rev(flags)) return 0; /* no restrictions or matches restrictions */ return 1; } static unsigned int amd64_num_umasks(void *this, int pidx) { const amd64_entry_t *pe = this_pe(this); unsigned int i, n = 0; /* unit masks + modifiers */ for (i = 0; i < pe[pidx].numasks; i++) if (amd64_umask_valid(this, pidx, i)) n++; return n; } static int amd64_get_umask(void *this, int pidx, int attr_idx) { const amd64_entry_t *pe = this_pe(this); unsigned int i; int n; for (i=0, n = 0; i < pe[pidx].numasks; i++) { if (!amd64_umask_valid(this, pidx, i)) continue; if (n++ == attr_idx) return i; } return -1; } static inline int amd64_attr2mod(void *this, int pidx, int attr_idx) { const amd64_entry_t *pe = this_pe(this); size_t x; int n; n = attr_idx - amd64_num_umasks(this, pidx); pfmlib_for_each_bit(x, pe[pidx].modmsk) { if (n == 0) break; n--; } return x; } void amd64_display_reg(void *this, pfmlib_event_desc_t *e, pfm_amd64_reg_t reg) { pfmlib_pmu_t *pmu = this; if (IS_FAMILY_10H(pmu) || IS_FAMILY_15H(pmu)) __pfm_vbprintf("[0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d guest=%d host=%d] %s\n", reg.val, reg.sel_event_mask | (reg.sel_event_mask2 << 8), reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, reg.sel_guest, reg.sel_host, e->fstr); else __pfm_vbprintf("[0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d en=%d int=%d inv=%d edge=%d cnt_mask=%d] %s\n", reg.val, reg.sel_event_mask, reg.sel_unit_mask, reg.sel_os, reg.sel_usr, reg.sel_en, reg.sel_int, reg.sel_inv, reg.sel_edge, reg.sel_cnt_mask, e->fstr); } int pfm_amd64_detect(void *this) { unsigned int a, b, c, d; char buffer[128]; if (pfm_amd64_cfg.family) return PFM_SUCCESS; cpuid(0, &a, &b, &c, &d); strncpy(&buffer[0], (char *)(&b), 4); strncpy(&buffer[4], (char *)(&d), 4); strncpy(&buffer[8], (char *)(&c), 4); buffer[12] = '\0'; if (strcmp(buffer, "AuthenticAMD")) return PFM_ERR_NOTSUPP; cpuid(1, &a, &b, &c, &d); pfm_amd64_cfg.family = (a >> 8) & 0x0000000f; // bits 11 - 8 pfm_amd64_cfg.model = (a >> 4) & 0x0000000f; // Bits 7 - 4 if (pfm_amd64_cfg.family == 0xf) { pfm_amd64_cfg.family += (a >> 20) & 0x000000ff; // Extended family pfm_amd64_cfg.model |= (a >> 12) & 0x000000f0; // Extended model } pfm_amd64_cfg.stepping= a & 0x0000000f; // bits 3 - 0 amd64_get_revision(&pfm_amd64_cfg); if (pfm_amd64_cfg.revision == PFM_PMU_NONE) return PFM_ERR_NOTSUPP; return PFM_SUCCESS; } int pfm_amd64_family_detect(void *this) { struct pfmlib_pmu *pmu = this; int ret; ret = pfm_amd64_detect(this); if (ret != PFM_SUCCESS) return ret; ret = pfm_amd64_cfg.revision; return ret == pmu->cpu_family ? PFM_SUCCESS : PFM_ERR_NOTSUPP; } static int amd64_add_defaults(void *this, pfmlib_event_desc_t *e, unsigned int msk, uint64_t *umask) { const amd64_entry_t *ent, *pe = this_pe(this); unsigned int i; int j, k, added, omit, numasks_grp; int idx; k = e->nattrs; ent = pe+e->event; for(i=0; msk; msk >>=1, i++) { if (!(msk & 0x1)) continue; added = omit = numasks_grp = 0; for (j = 0; j < e->npattrs; j++) { if (e->pattrs[j].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[j].type != PFM_ATTR_UMASK) continue; idx = e->pattrs[j].idx; if (ent->umasks[idx].grpid != i) continue; /* number of umasks in this group */ numasks_grp++; if (amd64_uflag(this, e->event, idx, AMD64_FL_DFL)) { DPRINT("added default for %s j=%d idx=%d\n", ent->umasks[idx].uname, j, idx); *umask |= ent->umasks[idx].ucode; e->attrs[k].id = j; /* pattrs index */ e->attrs[k].ival = 0; k++; added++; } if (amd64_uflag(this, e->event, idx, AMD64_FL_OMIT)) omit++; } /* * fail if no default was found AND at least one umasks cannot be omitted * in the group */ if (!added && omit != numasks_grp) { DPRINT("no default found for event %s unit mask group %d\n", ent->name, i); return PFM_ERR_UMASK; } } e->nattrs = k; return PFM_SUCCESS; } int pfm_amd64_get_encoding(void *this, pfmlib_event_desc_t *e) { const amd64_entry_t *pe = this_pe(this); pfm_amd64_reg_t reg; pfmlib_event_attr_info_t *a; uint64_t umask = 0; unsigned int plmmsk = 0; int k, ret, grpid; unsigned int grpmsk, ugrpmsk = 0; int grpcounts[AMD64_MAX_GRP]; int ncombo[AMD64_MAX_GRP]; memset(grpcounts, 0, sizeof(grpcounts)); memset(ncombo, 0, sizeof(ncombo)); e->fstr[0] = '\0'; reg.val = 0; /* assume reserved bits are zerooed */ grpmsk = (1 << pe[e->event].ngrp)-1; if (amd64_event_ibsfetch(this, e->event)) reg.ibsfetch.en = 1; else if (amd64_event_ibsop(this, e->event)) reg.ibsop.en = 1; else { reg.sel_event_mask = pe[e->event].code; reg.sel_event_mask2 = pe[e->event].code >> 8; reg.sel_en = 1; /* force enable */ reg.sel_int = 1; /* force APIC */ } for(k=0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) { grpid = pe[e->event].umasks[a->idx].grpid; ++grpcounts[grpid]; /* * upper layer has removed duplicates * so if we come here more than once, it is for two * diinct umasks */ if (amd64_uflag(this, e->event, a->idx, AMD64_FL_NCOMBO)) ncombo[grpid] = 1; /* * if more than one umask in this group but one is marked * with ncombo, then fail. It is okay to combine umask within * a group as long as none is tagged with NCOMBO */ if (grpcounts[grpid] > 1 && ncombo[grpid]) { DPRINT("event does not support unit mask combination within a group\n"); return PFM_ERR_FEATCOMB; } umask |= pe[e->event].umasks[a->idx].ucode; ugrpmsk |= 1 << pe[e->event].umasks[a->idx].grpid; } else if (a->type == PFM_ATTR_RAW_UMASK) { /* there can only be one RAW_UMASK per event */ /* sanity checks */ if (a->idx & ~0xff) { DPRINT("raw umask is invalid\n"); return PFM_ERR_ATTR; } /* override umask */ umask = a->idx & 0xff; ugrpmsk = grpmsk; } else { /* modifiers */ uint64_t ival = e->attrs[k].ival; switch(a->idx) { //amd64_attr2mod(this, e->osid, e->event, a->idx)) { case AMD64_ATTR_I: /* invert */ reg.sel_inv = !!ival; break; case AMD64_ATTR_E: /* edge */ reg.sel_edge = !!ival; break; case AMD64_ATTR_C: /* counter-mask */ if (ival > 255) return PFM_ERR_ATTR_VAL; reg.sel_cnt_mask = ival; break; case AMD64_ATTR_U: /* USR */ reg.sel_usr = !!ival; plmmsk |= _AMD64_ATTR_U; break; case AMD64_ATTR_K: /* OS */ reg.sel_os = !!ival; plmmsk |= _AMD64_ATTR_K; break; case AMD64_ATTR_G: /* GUEST */ reg.sel_guest = !!ival; plmmsk |= _AMD64_ATTR_G; break; case AMD64_ATTR_H: /* HOST */ reg.sel_host = !!ival; plmmsk |= _AMD64_ATTR_H; break; } } } /* * handle case where no priv level mask was passed. * then we use the dfl_plm */ if (!(plmmsk & (_AMD64_ATTR_K|_AMD64_ATTR_U|_AMD64_ATTR_H))) { if (e->dfl_plm & PFM_PLM0) reg.sel_os = 1; if (e->dfl_plm & PFM_PLM3) reg.sel_usr = 1; if ((IS_FAMILY_10H(this) || IS_FAMILY_15H(this)) && e->dfl_plm & PFM_PLMH) reg.sel_host = 1; } /* * check that there is at least of unit mask in each unit * mask group */ if (ugrpmsk != grpmsk) { ugrpmsk ^= grpmsk; ret = amd64_add_defaults(this, e, ugrpmsk, &umask); if (ret != PFM_SUCCESS) return ret; } reg.sel_unit_mask = umask; e->codes[0] = reg.val; e->count = 1; /* * reorder all the attributes such that the fstr appears always * the same regardless of how the attributes were submitted. */ evt_strcat(e->fstr, "%s", pe[e->event].name); pfmlib_sort_attr(e); for (k = 0; k < e->nattrs; k++) { a = attr(e, k); if (a->ctrl != PFM_ATTR_CTRL_PMU) continue; if (a->type == PFM_ATTR_UMASK) evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname); else if (a->type == PFM_ATTR_RAW_UMASK) evt_strcat(e->fstr, ":0x%x", a->idx); } for (k = 0; k < e->npattrs; k++) { int idx; if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU) continue; if (e->pattrs[k].type == PFM_ATTR_UMASK) continue; idx = e->pattrs[k].idx; switch(idx) { case AMD64_ATTR_K: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_os); break; case AMD64_ATTR_U: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_usr); break; case AMD64_ATTR_E: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_edge); break; case AMD64_ATTR_I: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_inv); break; case AMD64_ATTR_C: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_cnt_mask); break; case AMD64_ATTR_H: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_host); break; case AMD64_ATTR_G: evt_strcat(e->fstr, ":%s=%lu", amd64_mods[idx].name, reg.sel_guest); break; } } amd64_display_reg(this, e, reg); return PFM_SUCCESS; } int pfm_amd64_get_event_first(void *this) { pfmlib_pmu_t *pmu = this; int idx; for(idx=0; idx < pmu->pme_count; idx++) if (amd64_event_valid(this, idx)) return idx; return -1; } int pfm_amd64_get_event_next(void *this, int idx) { pfmlib_pmu_t *pmu = this; /* basic validity checks on idx down by caller */ if (idx >= (pmu->pme_count-1)) return -1; /* validate event fo this host PMU */ if (!amd64_event_valid(this, idx)) return -1; for(++idx; idx < pmu->pme_count; idx++) { if (amd64_event_valid(this, idx)) return idx; } return -1; } int pfm_amd64_event_is_valid(void *this, int pidx) { pfmlib_pmu_t *pmu = this; if (pidx < 0 || pidx >= pmu->pme_count) return 0; /* valid revision */ return amd64_event_valid(this, pidx); } int pfm_amd64_get_event_attr_info(void *this, int pidx, int attr_idx, pfmlib_event_attr_info_t *info) { const amd64_entry_t *pe = this_pe(this); int numasks, idx; numasks = amd64_num_umasks(this, pidx); if (attr_idx < numasks) { idx = amd64_get_umask(this, pidx, attr_idx); if (idx == -1) return PFM_ERR_ATTR; info->name = pe[pidx].umasks[idx].uname; info->desc = pe[pidx].umasks[idx].udesc; info->code = pe[pidx].umasks[idx].ucode; info->type = PFM_ATTR_UMASK; info->is_dfl = amd64_uflag(this, pidx, idx, AMD64_FL_DFL); } else { idx = amd64_attr2mod(this, pidx, attr_idx); info->name = amd64_mods[idx].name; info->desc = amd64_mods[idx].desc; info->type = amd64_mods[idx].type; info->code = idx; info->is_dfl = 0; } info->is_precise = 0; info->equiv = NULL; info->ctrl = PFM_ATTR_CTRL_PMU; info->idx = idx; /* namespace specific index */ info->dfl_val64 = 0; return PFM_SUCCESS; } int pfm_amd64_get_event_info(void *this, int idx, pfm_event_info_t *info) { pfmlib_pmu_t *pmu = this; const amd64_entry_t *pe = this_pe(this); info->name = pe[idx].name; info->desc = pe[idx].desc; info->equiv = NULL; info->code = pe[idx].code; info->idx = idx; info->pmu = pmu->pmu; info->is_precise = 0; info->nattrs = amd64_num_umasks(this, idx); info->nattrs += amd64_num_mods(this, idx); return PFM_SUCCESS; } int pfm_amd64_validate_table(void *this, FILE *fp) { pfmlib_pmu_t *pmu = this; const amd64_entry_t *pe = this_pe(this); const char *name = pmu->name; unsigned int j, k; int i, ndfl; int error = 0; if (!pmu->atdesc) { fprintf(fp, "pmu: %s missing attr_desc\n", pmu->name); error++; } if (!pmu->supported_plm && pmu->type == PFM_PMU_TYPE_CORE) { fprintf(fp, "pmu: %s supported_plm not set\n", pmu->name); error++; } for(i=0; i < pmu->pme_count; i++) { if (!pe[i].name) { fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i, i > 1 ? pe[i-1].name : "??"); error++; } if (!pe[i].desc) { fprintf(fp, "pmu: %s event%d: %s :: no description\n", name, i, pe[i].name); error++; } if (pe[i].numasks && pe[i].umasks == NULL) { fprintf(fp, "pmu: %s event%d: %s :: numasks but no umasks\n", pmu->name, i, pe[i].name); error++; } if (pe[i].numasks == 0 && pe[i].umasks) { fprintf(fp, "pmu: %s event%d: %s :: numasks=0 but umasks defined\n", pmu->name, i, pe[i].name); error++; } if (pe[i].numasks && pe[i].ngrp == 0) { fprintf(fp, "pmu: %s event%d: %s :: ngrp cannot be zero\n", name, i, pe[i].name); error++; } if (pe[i].numasks == 0 && pe[i].ngrp) { fprintf(fp, "pmu: %s event%d: %s :: ngrp must be zero\n", name, i, pe[i].name); error++; } if (pe[i].ngrp >= AMD64_MAX_GRP) { fprintf(fp, "pmu: %s event%d: %s :: ngrp too big (max=%d)\n", name, i, pe[i].name, AMD64_MAX_GRP); error++; } for(ndfl = 0, j= 0; j < pe[i].numasks; j++) { if (!pe[i].umasks[j].uname) { fprintf(fp, "pmu: %s event%d: %s umask%d :: no name\n", pmu->name, i, pe[i].name, j); error++; } if (!pe[i].umasks[j].udesc) { fprintf(fp, "pmu: %s event%d:%s umask%d: %s :: no description\n", name, i, pe[i].name, j, pe[i].umasks[j].uname); error++; } if (pe[i].ngrp && pe[i].umasks[j].grpid >= pe[i].ngrp) { fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: invalid grpid %d (must be < %d)\n", name, i, pe[i].name, j, pe[i].umasks[j].uname, pe[i].umasks[j].grpid, pe[i].ngrp); error++; } if (pe[i].umasks[j].uflags & AMD64_FL_DFL) { for(k=0; k < j; k++) if ((pe[i].umasks[k].uflags == pe[i].umasks[j].uflags) && (pe[i].umasks[k].grpid == pe[i].umasks[j].grpid)) ndfl++; if (pe[i].numasks == 1) ndfl = 1; } } if (pe[i].numasks > 1 && ndfl) { fprintf(fp, "pmu: %s event%d: %s :: more than one default unit mask with same code\n", name, i, pe[i].name); error++; } /* if only one umask, then ought to be default */ if (pe[i].numasks == 1 && ndfl != 1) { fprintf(fp, "pmu: %s event%d: %s, only one umask but no default\n", pmu->name, i, pe[i].name); error++; } if (pe[i].flags & AMD64_FL_NCOMBO) { fprintf(fp, "pmu: %s event%d: %s :: NCOMBO is unit mask only flag\n", name, i, pe[i].name); error++; } for(j=0; j < pe[i].numasks; j++) { if (pe[i].umasks[j].uflags & AMD64_FL_NCOMBO) continue; for(k=j+1; k < pe[i].numasks; k++) { if (pe[i].umasks[k].uflags & AMD64_FL_NCOMBO) continue; if ((pe[i].umasks[j].ucode & pe[i].umasks[k].ucode)) { fprintf(fp, "pmu: %s event%d: %s :: umask %s and %s have overlapping code bits\n", name, i, pe[i].name, pe[i].umasks[j].uname, pe[i].umasks[k].uname); error++; } } } } return error ? PFM_ERR_INVAL : PFM_SUCCESS; } unsigned int pfm_amd64_get_event_nattrs(void *this, int pidx) { unsigned int nattrs; nattrs = amd64_num_umasks(this, pidx); nattrs += amd64_num_mods(this, pidx); return nattrs; } int pfm_amd64_get_num_events(void *this) { pfmlib_pmu_t *pmu = this; int i, num = 0; /* * count actual number of events for specific PMU. * Table may contain more events for the family than * what a specific model actually supports. */ for (i = 0; i < pmu->pme_count; i++) if (amd64_event_valid(this, i)) num++; return num; } papi-5.6.0/src/perfctr-2.7.x/etc/costs/PentiumIII-933000664 001750 001750 00000002645 13216244367 023677 0ustar00jshenry1963jshenry1963000000 000000 [data from a 933MHz Pentium III (Coppermine)] PERFCTR INIT: vendor 0, family 6, model 8, stepping 10, clock 930391 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 103 cycles PERFCTR INIT: rdtsc cost is 35.8 cycles (2396 total) PERFCTR INIT: rdpmc cost is 35.4 cycles (2369 total) PERFCTR INIT: rdmsr (counter) cost is 91.2 cycles (5943 total) PERFCTR INIT: rdmsr (evntsel) cost is 72.7 cycles (4758 total) PERFCTR INIT: wrmsr (counter) cost is 80.5 cycles (5261 total) PERFCTR INIT: wrmsr (evntsel) cost is 75.9 cycles (4963 total) PERFCTR INIT: read cr4 cost is 1.7 cycles (215 total) PERFCTR INIT: write cr4 cost is 42.0 cycles (2796 total) PERFCTR INIT: write LVTPC cost is 40.2 cycles (2679 total) perfctr: driver 2.7.3, cpu type Intel P6 at 930391 kHz PERFCTR INIT: vendor 0, family 6, model 8, stepping 3, clock 933389 kHz PERFCTR INIT: NITER == 64 PERFCTR INIT: loop overhead is 102 cycles PERFCTR INIT: rdtsc cost is 32.1 cycles (2162 total) PERFCTR INIT: rdpmc cost is 34.8 cycles (2331 total) PERFCTR INIT: rdmsr (counter) cost is 91.2 cycles (5943 total) PERFCTR INIT: rdmsr (evntsel) cost is 72.7 cycles (4755 total) PERFCTR INIT: wrmsr (counter) cost is 80.5 cycles (5258 total) PERFCTR INIT: wrmsr (evntsel) cost is 75.7 cycles (4953 total) PERFCTR INIT: read %cr4 cost is 1.7 cycles (217 total) PERFCTR INIT: write %cr4 cost is 42.0 cycles (2795 total) perfctr: driver 2.1, cpu type Intel Pentium III at 933389 kHz papi-5.6.0/src/freebsd/map-unknown.c000664 001750 001750 00000002260 13216244361 021352 0ustar00jshenry1963jshenry1963000000 000000 /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: map-unknown.c * Author: Harald Servat * redcrash@gmail.com */ #include "freebsd.h" #include "papiStdEventDefs.h" #include "map.h" /**************************************************************************** UNKNOWN SUBSTRATE UNKNOWN SUBSTRATE UNKNOWN SUBSTRATE UNKNOWN SUBSTRATE ****************************************************************************/ /* NativeEvent_Value_UnknownProcessor must match UnkProcessor_info */ Native_Event_LabelDescription_t UnkProcessor_info[] = { { "branches", "Measure the number of branches retired." }, { "branch-mispredicts", "Measure the number of retired branches that were mispredicted." }, /* { "cycles", "Measure processor cycles." }, */ { "dc-misses", "Measure the number of data cache misses." }, { "ic-misses", "Measure the number of instruction cache misses." }, { "instructions", "Measure the number of instructions retired." }, { "interrupts", "Measure the number of interrupts seen." }, { "unhalted-cycles", "Measure the number of cycles the processor is not in a halted or sleep state." }, { NULL, NULL } }; papi-5.6.0/src/libpfm-3.y/examples_v2.x/ia64/ita2_dear.c000664 001750 001750 00000025053 13216244362 024444 0ustar00jshenry1963jshenry1963000000 000000 /* * ita2_dear.c - example of how use the D-EAR with the Itanium 2 PMU * * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 #define MAX_PMU_NAME_LEN 32 #define SMPL_PERIOD (40) #define EVENT_NAME "data_ear_cache_lat4" typedef pfm_dfl_smpl_hdr_t dear_hdr_t; typedef pfm_dfl_smpl_entry_t dear_entry_t; typedef pfm_dfl_smpl_arg_t smpl_arg_t; static void *smpl_vaddr; static unsigned long entry_size; static int id; #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } long do_test(unsigned long size) { unsigned long i, sum = 0; int *array; printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); array = (int *)malloc(size * sizeof(int)); if (array == NULL ) { printf("line = %d No memory available!\n", __LINE__); exit(1); } for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); return; } pos = (unsigned long)(hdr+1); /* * walk through all the entries recored in the buffer */ for(i=0; i < hdr->hdr_count; i++) { ret = 0; ent = (dear_entry_t *)pos; /* * print entry header */ safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", smpl_entry++, ent->tgid, ent->pid, ent->cpu, ent->tstamp, ent->ip); /* * point to first recorded register (always contiguous with entry header) */ reg = (pfm_ita2_pmd_reg_t*)(ent+1); safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); reg++; safe_printf("PMD3 : 0x%016lx, latency %u\n", reg->pmd_val, reg->pmd3_ita2_reg.dear_latency); reg++; safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", reg->pmd_val, reg->pmd17_ita2_reg.dear_vl ? 'Y': 'N', (reg->pmd17_ita2_reg.dear_iaddr << 4) | (unsigned long)reg->pmd17_ita2_reg.dear_slot); /* * move to next entry */ pos += entry_size; } } static void overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) { /* dangerous */ printf("Notification received\n"); process_smpl_buffer(); /* * And resume monitoring */ if (pfm_restart(id) == -1) { perror("pfm_restart"); exit(1); } } int main(void) { pfarg_pmd_t pd[NUM_PMDS]; pfarg_pmc_t pc[NUM_PMCS]; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_ctx_t ctx; smpl_arg_t buf_arg; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i; int ret, type = 0; /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Let's make sure we run this on the right CPU */ pfm_get_pmu_type(&type); if (type != PFMLIB_ITANIUM2_PMU) { char model[MAX_PMU_NAME_LEN]; pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); fatal_error("this program does not work with %s PMU\n", model); } /* * Install the overflow handler (SIGIO) */ memset(&act, 0, sizeof(act)); act.sa_handler = (sig_t)overflow_handler; sigaction (SIGIO, &act, 0); /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&buf_arg, 0, sizeof(buf_arg)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * To count the number of occurence of this instruction, we must * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 * event. */ if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) { fatal_error("cannot find event %s\n", EVENT_NAME); } /* * set the (global) privilege mode: * PFM_PLM0 : kernel level only */ inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; /* * how many counters we use */ inp.pfp_event_count = 1; /* * let the library figure out the values for the PMCS * * We use all global settings for this EAR. */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * prepare context structure. * * format specific parameters MUST be concatenated to the regular * pfarg_ctx_t structure. For convenience, the default sampling * format provides a data structure that already combines the pfarg_ctx_t * with what is needed fot this format. */ /* * the size of the buffer is indicated in bytes (not entries). * * The kernel will record into the buffer up to a certain point. * No partial samples are ever recorded. */ buf_arg.buf_size = getpagesize(); /* * now create the context for self monitoring/per-task */ id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); if (id == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * retrieve the virtual address at which the sampling * buffer has been mapped */ smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); if (smpl_vaddr == MAP_FAILED) fatal_error("cannot mmap sampling buffer errno %d\n", errno); printf("Sampling buffer mapped at %p\n", smpl_vaddr); /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * figure out pmd mapping from output pmc */ for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * indicate we want notification when buffer is full */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; pfm_bv_set(pd[0].reg_smpl_pmds, 2); pfm_bv_set(pd[0].reg_smpl_pmds, 3); pfm_bv_set(pd[0].reg_smpl_pmds, 17); entry_size = sizeof(dear_entry_t) + 3 * 8; /* * initialize the PMD and the sampling period */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers * * We don't use the save variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events we specified, i.e., contains more thann coutning monitors. */ if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * attach context to stopped task */ load_args.load_pid = getpid(); if (pfm_load_context(id, &load_args) == -1) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(id, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); /* * Let's roll now. */ pfm_self_start(id); do_test(100000); pfm_self_stop(id); /* * We must call the processing routine to cover the last entries recorded * in the sampling buffer, i.e. which may not be full */ process_smpl_buffer(); /* * let's stop this now */ munmap(smpl_vaddr, (size_t)buf_arg.buf_size); close(id); return 0; } papi-5.6.0/src/perfctr-2.7.x/usr.lib/ppc.h000664 001750 001750 00000002604 13216244370 022054 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.h,v 1.2 2005/04/09 10:25:47 mikpe Exp $ * PPC32-specific code for performance counters library. * * Copyright (C) 2004 Mikael Pettersson */ #ifndef __LIB_PERFCTR_PPC_H #define __LIB_PERFCTR_PPC_H static __inline__ unsigned long get_tbl(void) { unsigned long tbl; asm volatile("mftb %0" : "=r" (tbl)); return tbl; } #define rdtscl(x) do { (x) = get_tbl(); } while(0) #define SPRN_PVR 0x11F #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) #define SPRN_UPMC1 0x3A9 #define SPRN_UPMC2 0x3AA #define SPRN_UPMC3 0x3AD #define SPRN_UPMC4 0x3AE #define SPRN_UPMC5 0x3A1 #define SPRN_UPMC6 0x3A2 #define __stringify_1(x) #x #define __stringify(x) __stringify_1(x) #define mfspr(rn) ({unsigned int rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval; }) static __inline__ unsigned int read_pmc(unsigned int pmc) { switch( pmc ) { default: /* impossible, but silences gcc warning */ case 0: return mfspr(SPRN_UPMC1); case 1: return mfspr(SPRN_UPMC2); case 2: return mfspr(SPRN_UPMC3); case 3: return mfspr(SPRN_UPMC4); case 4: return mfspr(SPRN_UPMC5); case 5: return mfspr(SPRN_UPMC6); } } #define rdpmcl(pmc,x) do { (x) = read_pmc((pmc)); } while(0) #define vperfctr_has_rdpmc(vperfctr) ((vperfctr)->have_rdpmc) extern void perfctr_info_cpu_init(struct perfctr_info*); #endif /* __LIB_PERFCTR_PPC_H */ papi-5.6.0/src/ctests/johnmay2.c000664 001750 001750 00000007106 13216244360 020525 0ustar00jshenry1963jshenry1963000000 000000 #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" int main( int argc, char **argv ) { int FPEventSet = PAPI_NULL; long long values; int PAPI_event, retval; char event_name[PAPI_MAX_STR_LEN]; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); /* init PAPI */ retval = PAPI_library_init( PAPI_VER_CURRENT ); if (retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* Use PAPI_FP_INS if available, otherwise use PAPI_TOT_INS */ if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) PAPI_event = PAPI_FP_INS; else PAPI_event = PAPI_TOT_INS; retval = PAPI_query_event( PAPI_event ); if (retval != PAPI_OK ) { if (!quiet) printf("Trouble querying event\n"); test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); } /* Create the eventset */ if ( ( retval = PAPI_create_eventset( &FPEventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); /* Add event to the eventset */ if ( ( retval = PAPI_add_event( FPEventSet, PAPI_event ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); /* Start counting */ if ( ( retval = PAPI_start( FPEventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); /* Try to cleanup while running */ /* Fail test if this isn't refused */ if ( ( retval = PAPI_cleanup_eventset( FPEventSet ) ) != PAPI_EISRUN ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); /* Try to destroy eventset while running */ /* Fail test if this isn't refused */ if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_EISRUN ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); /* do some work */ do_flops( 1000000 ); /* stop counting */ if ( ( retval = PAPI_stop( FPEventSet, &values ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); /* Try to destroy eventset without cleaning first */ /* Fail test if this isn't refused */ if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_EINVAL ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); /* Try to cleanup eventset. */ /* This should pass. */ if ( ( retval = PAPI_cleanup_eventset( FPEventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); /* Try to destroy eventset. */ /* This should pass. */ if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); /* Make sure eventset was set to PAPI_NULL */ if ( FPEventSet != PAPI_NULL ) test_fail( __FILE__, __LINE__, "FPEventSet != PAPI_NULL", retval ); if ( !quiet ) { if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); printf( "Test case John May 2: cleanup / destroy eventset.\n" ); printf( "-------------------------------------------------\n" ); printf( "Test run : \t1\n" ); printf( "%s : \t", event_name ); printf( LLDFMT, values ); printf( "\n" ); printf( "-------------------------------------------------\n" ); printf( "The following messages will appear if PAPI is compiled with debug enabled:\n" ); printf ( "\tPAPI Error Code -10: PAPI_EISRUN: EventSet is currently counting\n" ); printf ( "\tPAPI Error Code -10: PAPI_EISRUN: EventSet is currently counting\n" ); printf( "\tPAPI Error Code -1: PAPI_EINVAL: Invalid argument\n" ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/man/man3/PAPI_sprofil_t.3000664 001750 001750 00000001502 13216244356 021044 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_sprofil_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_sprofil_t \- .SH SYNOPSIS .br .PP .SS "Data Fields" .in +1c .ti -1c .RI "void * \fBpr_base\fP" .br .ti -1c .RI "unsigned \fBpr_size\fP" .br .ti -1c .RI "caddr_t \fBpr_off\fP" .br .ti -1c .RI "unsigned \fBpr_scale\fP" .br .in -1c .SH "Detailed Description" .PP .SH "Field Documentation" .PP .SS "void* PAPI_sprofil_t::pr_base" buffer base .SS "caddr_t PAPI_sprofil_t::pr_off" pc start address (offset) .SS "unsigned PAPI_sprofil_t::pr_scale" pc scaling factor: fixed point fraction 0xffff ~= 1, 0x8000 == \&.5, 0x4000 == \&.25, etc\&. also, two extensions 0x1000 == 1, 0x2000 == 2 .SS "unsigned PAPI_sprofil_t::pr_size" buffer size .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/lib/niagara2_events.h000664 001750 001750 00000020213 13216244363 023171 0ustar00jshenry1963jshenry1963000000 000000 static pme_sparc_mask_entry_t niagara2_pe[] = { /* PIC0 Niagara-2 events */ { .pme_name = "All_strands_idle", .pme_desc = "Cycles when no strand can be picked for the physical core on which the monitoring strand resides.", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x0, .pme_masks = { { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, { .mask_name = "ignored2", .mask_desc = "Ignored", }, { .mask_name = "ignored3", .mask_desc = "Ignored", }, { .mask_name = "ignored4", .mask_desc = "Ignored", }, { .mask_name = "ignored5", .mask_desc = "Ignored", }, { .mask_name = "ignored6", .mask_desc = "Ignored", }, { .mask_name = "ignored7", .mask_desc = "Ignored", }, }, }, { .pme_name = "Instr_cnt", .pme_desc = "Number of instructions completed", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x2, .pme_masks = { { .mask_name = "branches", .mask_desc = "Completed branches", }, { .mask_name = "taken_branches", .mask_desc = "Taken branches, which are always mispredicted", }, { .mask_name = "FGU_arith", .mask_desc = "All FADD, FSUB, FCMP, convert, FMUL, FDIV, FNEG, FABS, FSQRT, FMOV, FPADD, FPSUB, FPACK, FEXPAND, FPMERGE, FMUL8, FMULD8, FALIGNDATA, BSHUFFLE, FZERO, FONE, FSRC, FNOT1, FNOT2, FOR, FNOR, FAND, FNAND, FXOR, FXNOR, FORNOT1, FORNOT2, FANDNOT1, FANDNOT2, PDIST, SIAM", }, { .mask_name = "Loads", .mask_desc = "Load instructions", }, { .mask_name = "Stores", .mask_desc = "Stores instructions", }, { .mask_name = "SW_count", .mask_desc = "Software count 'sethi %hi(fc00), %g0' instructions", }, { .mask_name = "other", .mask_desc = "Instructions not covered by other mask bits", }, { .mask_name = "atomics", .mask_desc = "Atomics are LDSTUB/A, CASA/XA, SWAP/A", }, }, }, { .pme_name = "cache", .pme_desc = "Cache events", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x3, .pme_masks = { { .mask_name = "IC_miss", .mask_desc = "I-cache misses. This counts only primary instruction cache misses, and does not count duplicate instruction cache misses.4 Also, only 'true' misses are counted. If a thread encounters an I$ miss, but the thread is redirected (due to a branch misprediction or trap, for example) before the line returns from L2 and is loaded into the I$, then the miss is not counted.", }, { .mask_name = "DC_miss", .mask_desc = "D-cache misses. This counts both primary and duplicate data cache misses.", }, { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, { .mask_name = "L2IC_miss", .mask_desc = "L2 cache instruction misses", }, { .mask_name = "L2LD_miss", .mask_desc = "L2 cache load misses. Block loads are treated as one L2 miss event. In reality, each individual load can hit or miss in the L2 since the block load is not atomic.", }, { .mask_name = "ignored2", .mask_desc = "Ignored", }, { .mask_name = "ignored3", .mask_desc = "Ignored", }, }, }, { .pme_name = "TLB", .pme_desc = "TLB events", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x4, .pme_masks = { { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, { .mask_name = "ITLB_L2ref", .mask_desc = "ITLB references to L2. For each ITLB miss with hardware tablewalk enabled, count each access the ITLB hardware tablewalk makes to L2.", }, { .mask_name = "DTLB_L2ref", .mask_desc = "DTLB references to L2. For each DTLB miss with hardware tablewalk enabled, count each access the DTLB hardware tablewalk makes to L2.", }, { .mask_name = "ITLB_L2miss", .mask_desc = "For each ITLB miss with hardware tablewalk enabled, count each access the ITLB hardware tablewalk makes to L2 which misses in L2. Note: Depending upon the hardware table walk configuration, each ITLB miss may issue from 1 to 4 requests to L2 to search TSBs.", }, { .mask_name = "DTLB_L2miss", .mask_desc = "For each DTLB miss with hardware tablewalk enabled, count each access the DTLB hardware tablewalk makes to L2 which misses in L2. Note: Depending upon the hardware table walk configuration, each DTLB miss may issue from 1 to 4 requests to L2 to search TSBs.", }, { .mask_name = "ignored2", .mask_desc = "Ignored", }, { .mask_name = "ignored3", .mask_desc = "Ignored", }, }, }, { .pme_name = "mem", .pme_desc = "Memory operations", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x5, .pme_masks = { { .mask_name = "stream_load", .mask_desc = "Stream Unit load operations to L2", }, { .mask_name = "stream_store", .mask_desc = "Stream Unit store operations to L2", }, { .mask_name = "cpu_load", .mask_desc = "CPU loads to L2", }, { .mask_name = "cpu_ifetch", .mask_desc = "CPU instruction fetches to L2", }, { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "cpu_store", .mask_desc = "CPU stores to L2", }, { .mask_name = "mmu_load", .mask_desc = "MMU loads to L2", }, }, }, { .pme_name = "spu_ops", .pme_desc = "Stream Unit operations. User, supervisor, and hypervisor counting must all be enabled to properly count these events.", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x6, .pme_masks = { { .mask_name = "DES", .mask_desc = "Increment for each CWQ or ASI operation that uses DES/3DES unit", }, { .mask_name = "AES", .mask_desc = "Increment for each CWQ or ASI operation that uses AES unit", }, { .mask_name = "RC4", .mask_desc = "Increment for each CWQ or ASI operation that uses RC4 unit", }, { .mask_name = "HASH", .mask_desc = "Increment for each CWQ or ASI operation that uses MD5/SHA-1/SHA-256 unit", }, { .mask_name = "MA", .mask_desc = "Increment for each CWQ or ASI modular arithmetic operation", }, { .mask_name = "CSUM", .mask_desc = "Increment for each iSCSI CRC or TCP/IP checksum operation", }, { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, }, }, { .pme_name = "spu_busy", .pme_desc = "Stream Unit busy cycles. User, supervisor, and hypervisor counting must all be enabled to properly count these events.", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0x07, .pme_masks = { { .mask_name = "DES", .mask_desc = "Cycles the DES/3DES unit is busy", }, { .mask_name = "AES", .mask_desc = "Cycles the AES unit is busy", }, { .mask_name = "RC4", .mask_desc = "Cycles the RC4 unit is busy", }, { .mask_name = "HASH", .mask_desc = "Cycles the MD5/SHA-1/SHA-256 unit is busy", }, { .mask_name = "MA", .mask_desc = "Cycles the modular arithmetic unit is busy", }, { .mask_name = "CSUM", .mask_desc = "Cycles the CRC/MPA/checksum unit is busy", }, { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, }, }, { .pme_name = "tlb_miss", .pme_desc = "TLB misses", .pme_ctrl = PME_CTRL_S0 | PME_CTRL_S1, .pme_val = 0xb, .pme_masks = { { .mask_name = "ignored0", .mask_desc = "Ignored", }, { .mask_name = "ignored1", .mask_desc = "Ignored", }, { .mask_name = "ITLB", .mask_desc = "I-TLB misses", }, { .mask_name = "DTLB", .mask_desc = "D-TLB misses", }, { .mask_name = "ignored2", .mask_desc = "Ignored", }, { .mask_name = "ignored3", .mask_desc = "Ignored", }, { .mask_name = "ignored4", .mask_desc = "Ignored", }, { .mask_name = "ignored5", .mask_desc = "Ignored", }, }, }, }; #define PME_NIAGARA2_EVENT_COUNT (sizeof(niagara2_pe)/sizeof(pme_sparc_mask_entry_t)) papi-5.6.0/src/libpfm-3.y/examples_ia64_v2.0/self.c000664 001750 001750 00000016304 13216244362 023512 0ustar00jshenry1963jshenry1963000000 000000 /* * self.c - example of a simple self monitoring task * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux/ia64. */ #include #include #include #include #include #include #include #include #include #include #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS #define MAX_EVT_NAME_LEN 128 static volatile int quit; void sig_handler(int n) { quit = 1; } /* * our test code (function cannot be made static otherwise it is optimized away) */ void noploop(void) { for(;quit == 0;); } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(int argc, char **argv) { char **p; unsigned int i; int ret, ctx_fd; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_reg_t pd[NUM_PMDS]; pfarg_reg_t pc[NUM_PMCS]; pfarg_context_t ctx[1]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; unsigned int num_counters; char name[MAX_EVT_NAME_LEN]; /* * Initialize pfm library (required before we can use it) */ if (pfm_initialize() != PFMLIB_SUCCESS) { printf("Can't initialize library\n"); exit(1); } pfm_get_num_counters(&num_counters); /* * check that the user did not specify too many events */ if ((unsigned int)(argc-1) > num_counters) { printf("Too many events specified\n"); exit(1); } /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfm_set_options(&pfmlib_options); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); /* * prepare parameters to library. we don't use any Itanium * specific features here. so the pfp_model is NULL. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * be nice to user! */ if (argc > 1) { p = argv+1; for (i=0; *p ; i++, p++) { if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { fatal_error("Cannot find %s event\n", *p); } } } else { if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; } /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); } /* * now create a new context, per process context. * This just creates a new context with some initial state, it is not * active nor attached to any process. */ if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * extract the unique identifier for our context, a regular file descriptor */ ctx_fd = ctx[0].ctx_fd; /* * Now prepare the argument to initialize the PMDs and PMCS. * We must pfp_pmc_count to determine the number of PMC to intialize. * We must use pfp_event_count to determine the number of PMD to initialize. * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. * * This step is new compared to libpfm-2.x. It is necessary because the library no * longer knows about the kernel data structures. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } /* * the PMC controlling the event ALWAYS come first, that's why this loop * is safe even when extra PMC are needed to support a particular event. */ for (i=0; i < inp.pfp_event_count; i++) { pd[i].reg_num = pc[i].reg_num; } /* * Now program the registers * * We don't use the same variable to indicate the number of elements passed to * the kernel because, as we said earlier, pc may contain more elements than * the number of events (pmd) we specified, i.e., contains more than counting * monitors. */ if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); } if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); } /* * now we load (i.e., attach) the context to ourself */ load_args.load_pid = getpid(); if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); } /* * Let's roll now */ signal(SIGALRM, sig_handler); pfm_self_start(ctx_fd); alarm(10); noploop(); pfm_self_stop(ctx_fd); /* * now read the results */ if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); return -1; } /* * print the results * * It is important to realize, that the first event we specified may not * be in PMD4. Not all events can be measured by any monitor. That's why * we need to use the pc[] array to figure out where event i was allocated. * */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); printf("PMD%u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } /* * and destroy our context */ close(ctx_fd); return 0; } papi-5.6.0/src/perfctr-2.7.x/usr.lib/ppc.c000664 001750 001750 00000026775 13216244370 022066 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.20 2007/10/06 13:02:07 mikpe Exp $ * PPC32-specific perfctr library procedures. * * Copyright (C) 2004-2007 Mikael Pettersson */ #include #include #include #include /* memset() */ #include "libperfctr.h" #include "ppc.h" static unsigned int __NR_vperfctr_open; #define __NR_vperfctr_control (__NR_vperfctr_open+1) #define __NR_vperfctr_write (__NR_vperfctr_open+2) #define __NR_vperfctr_read (__NR_vperfctr_open+3) #include static void init_sys_vperfctr(void) { if (!__NR_vperfctr_open) { unsigned int nr; unsigned int kver = perfctr_linux_version_code(); if (kver >= PERFCTR_KERNEL_VERSION(2,6,18)) nr = 310; else if (kver >= PERFCTR_KERNEL_VERSION(2,6,16)) nr = 301; else nr = 280; __NR_vperfctr_open = nr; } } /* * The actual syscalls. */ int _sys_vperfctr_open(int fd_unused, int tid, int creat) { init_sys_vperfctr(); return syscall(__NR_vperfctr_open, tid, creat); } static int _sys_vperfctr_control(int fd, unsigned int cmd) { init_sys_vperfctr(); return syscall(__NR_vperfctr_control, fd, cmd); } static int _sys_vperfctr_write(int fd, unsigned int domain, const void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_write, fd, domain, arg, argbytes); } static int _sys_vperfctr_read(int fd, unsigned int domain, void *arg, unsigned int argbytes) { init_sys_vperfctr(); return syscall(__NR_vperfctr_read, fd, domain, arg, argbytes); } /* * Simple syscall wrappers. */ int _sys_vperfctr_read_sum(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_SUM, arg, sizeof(*arg)); } int _sys_vperfctr_read_children(int fd, struct perfctr_sum_ctrs *arg) { return _sys_vperfctr_read(fd, VPERFCTR_DOMAIN_CHILDREN, arg, sizeof(*arg)); } int _sys_vperfctr_unlink(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_UNLINK); } int _sys_vperfctr_iresume(int fd) { return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } /* * Complex syscall wrappers, for transmitting control data * in CPU family specific formats. */ #define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 (604 and up) */ #define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 (604e and up) */ #define SPRN_MMCR2 0x3B0 /* Monitor Mode Control Register 2 (7400 and up) */ #define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 (604 and up) */ #define SPRN_PMC2 0x3BA /* Performance Counter Register 2 (604 and up) */ #define SPRN_PMC3 0x3BD /* Performance Counter Register 3 (604e and up) */ #define SPRN_PMC4 0x3BE /* Performance Counter Register 4 (604e and up) */ #define SPRN_PMC5 0x3B1 /* Performance Counter Register 5 (7450 and up) */ #define SPRN_PMC6 0x3B2 /* Performance Counter Register 6 (7450 and up) */ #define MMCR0_PMC1SEL 0x00001FC0 /* PMC1 event selector, 7 bits. */ #define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ #if 0 static void show_regs(const struct perfctr_cpu_reg *regs, unsigned int n) { unsigned int i; fprintf(stderr, "CPU Register Values:\n"); for(i = 0; i < n; ++i) fprintf(stderr, "SPR %#x\t0x%08x\n", regs[i].nr, regs[i].value); } #else #define show_regs(regs, n) do{}while(0) #endif static int read_packet(int fd, unsigned int domain, void *arg, unsigned int argbytes) { int ret; ret = _sys_vperfctr_read(fd, domain, arg, argbytes); if (ret != argbytes && ret >= 0) { errno = EPROTO; return -1; } return ret; } static unsigned int pmc_to_spr(unsigned int pmc) { switch (pmc) { default: /* impossible, but silences gcc warning */ case (1-1): return SPRN_PMC1; case (2-1): return SPRN_PMC2; case (3-1): return SPRN_PMC3; case (4-1): return SPRN_PMC4; case (5-1): return SPRN_PMC5; case (6-1): return SPRN_PMC6; } } static int write_cpu_regs(int fd, const struct perfctr_cpu_control *control) { struct perfctr_cpu_reg regs[3+6]; unsigned int evntsel[6]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 6) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; memset(evntsel, 0, sizeof evntsel); for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; if (pmc >= 6 || (pmc_mask & (1<evntsel[i]; if (i >= nractrs) { unsigned int j = 3 + (i - nractrs); regs[j].nr = pmc_to_spr(pmc); regs[j].value = control->ireset[i]; } } regs[0].nr = SPRN_MMCR0; regs[0].value = (control->ppc.mmcr0 | (evntsel[1-1] << (31-25)) | (evntsel[2-1] << (31-31))); regs[1].nr = SPRN_MMCR1; regs[1].value = (( evntsel[3-1] << (31-4)) | (evntsel[4-1] << (31-9)) | (evntsel[5-1] << (31-14)) | (evntsel[6-1] << (31-20))); regs[2].nr = SPRN_MMCR2; regs[2].value = control->ppc.mmcr2; nr_regs = 3 + (nrctrs - nractrs); show_regs(regs, nr_regs); return _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); } static int read_cpu_regs(int fd, struct perfctr_cpu_control *control) { struct perfctr_cpu_reg regs[3+6]; unsigned int evntsel[6]; unsigned int nrctrs, nractrs, pmc_mask, nr_regs, i, pmc; int ret; nractrs = control->nractrs; nrctrs = nractrs + control->nrictrs; if (nrctrs < nractrs || nrctrs > 6) { errno = EINVAL; return -1; } if (!nrctrs) return 0; nr_regs = 0; pmc_mask = 0; for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; if (pmc >= 6 || (pmc_mask & (1<= nractrs) { unsigned int j = 3 + (i - nractrs); regs[j].nr = pmc_to_spr(pmc); } } regs[0].nr = SPRN_MMCR0; regs[1].nr = SPRN_MMCR1; regs[2].nr = SPRN_MMCR2; nr_regs = 3 + (nrctrs - nractrs); ret = read_packet(fd, PERFCTR_DOMAIN_CPU_REGS, regs, nr_regs*sizeof(regs[0])); if (ret < 0) return ret; show_regs(regs, nr_regs); evntsel[1-1] = (regs[0].value >> (31-25)) & 0x7F; evntsel[2-1] = (regs[0].value >> (31-31)) & 0x3F; evntsel[3-1] = (regs[1].value >> (31- 4)) & 0x1F; evntsel[4-1] = (regs[1].value >> (31- 9)) & 0x1F; evntsel[5-1] = (regs[1].value >> (31-14)) & 0x1F; evntsel[6-1] = (regs[1].value >> (31-20)) & 0x3F; for(i = 0; i < nrctrs; ++i) { pmc = control->pmc_map[i]; control->evntsel[i] = evntsel[pmc]; if (i >= nractrs) control->ireset[i] = regs[3 + (i - nractrs)].value; } control->ppc.mmcr0 = regs[0].value & ~(MMCR0_PMC1SEL | MMCR0_PMC2SEL); control->ppc.mmcr2 = regs[2].value; return 0; } int _sys_vperfctr_write_control(int fd, unsigned int cpu_type, const struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; ret = _sys_vperfctr_control(fd, VPERFCTR_CONTROL_CLEAR); if (ret < 0) return ret; u.control.si_signo = control->si_signo; u.control.preserve = control->preserve; ret = _sys_vperfctr_write(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; u.header.tsc_on = control->cpu_control.tsc_on; u.header.nractrs = control->cpu_control.nractrs; u.header.nrictrs = control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = _sys_vperfctr_write(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; ret = write_cpu_regs(fd, &control->cpu_control); if (ret < 0) return ret; return _sys_vperfctr_control(fd, VPERFCTR_CONTROL_RESUME); } int _sys_vperfctr_read_control(int fd, unsigned int cpu_type, struct vperfctr_control *control) { union { struct vperfctr_control_kernel control; struct perfctr_cpu_control_header header; } u; unsigned int nrctrs; int ret; memset(control, 0, sizeof *control); ret = read_packet(fd, VPERFCTR_DOMAIN_CONTROL, &u.control, sizeof u.control); if (ret < 0) return ret; control->si_signo = u.control.si_signo; control->preserve = u.control.preserve; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_CONTROL, &u.header, sizeof u.header); if (ret < 0) return ret; control->cpu_control.tsc_on = u.header.tsc_on; control->cpu_control.nractrs = u.header.nractrs; control->cpu_control.nrictrs = u.header.nrictrs; nrctrs = control->cpu_control.nractrs + control->cpu_control.nrictrs; ret = read_packet(fd, PERFCTR_DOMAIN_CPU_MAP, &control->cpu_control.pmc_map, nrctrs * sizeof control->cpu_control.pmc_map[0]); if (ret < 0) return ret; return read_cpu_regs(fd, &control->cpu_control); } void perfctr_info_cpu_init(struct perfctr_info *info) { unsigned int pvr = mfspr(SPRN_PVR); /* trapped & emulated by the kernel */ unsigned int cpu_type; switch( PVR_VER(pvr) ) { case 0x0004: /* 604 */ cpu_type = PERFCTR_PPC_604; break; case 0x0009: /* 604e */ case 0x000A: /* 604ev */ cpu_type = PERFCTR_PPC_604e; break; case 0x0008: /* 750/740 */ case 0x7000: case 0x7001: /* 750FX */ case 0x7002: /* 750GX */ cpu_type = PERFCTR_PPC_750; break; case 0x000C: /* 7400 */ case 0x800C: /* 7410 */ cpu_type = PERFCTR_PPC_7400; break; case 0x8000: /* 7451/7441 */ case 0x8001: /* 7455/7445 */ case 0x8002: /* 7457/7447 */ case 0x8003: /* 7447A */ case 0x8004: /* 7448 */ cpu_type = PERFCTR_PPC_7450; break; default: cpu_type = PERFCTR_PPC_GENERIC; } info->cpu_type = cpu_type; } unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC_604: return 2; case PERFCTR_PPC_604e: case PERFCTR_PPC_750: case PERFCTR_PPC_7400: return 4; case PERFCTR_PPC_7450: return 6; default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC_GENERIC: return "Generic PowerPC with TB"; case PERFCTR_PPC_604: return "PowerPC 604"; case PERFCTR_PPC_604e: return "PowerPC 604e"; case PERFCTR_PPC_750: return "PowerPC 750"; case PERFCTR_PPC_7400: return "PowerPC 7400"; case PERFCTR_PPC_7450: return "PowerPC 7450"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if( nrictrs ) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { printf("pmc_map[%u]\t\t%u\n", i, control->pmc_map[i]); printf("evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i]); if( i >= nractrs ) printf("ireset[%u]\t\t%d\n", i, control->ireset[i]); } if( control->ppc.mmcr0 ) printf("mmcr0\t\t\t0x%08X\n", control->ppc.mmcr0); if( control->ppc.mmcr2 ) printf("mmcr2\t\t\t0x%08X\n", control->ppc.mmcr2); } papi-5.6.0/src/perfctr-2.6.x/usr.lib/ppc.h000775 001750 001750 00000002634 13216244367 022067 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.h,v 1.1 2004/01/26 13:21:41 mikpe Exp $ * PPC32-specific code for performance counters library. * * Copyright (C) 2004 Mikael Pettersson */ #ifndef __LIB_PERFCTR_PPC_H #define __LIB_PERFCTR_PPC_H #define PAGE_SIZE 4096 static __inline__ unsigned long get_tbl(void) { unsigned long tbl; asm volatile("mftb %0" : "=r" (tbl)); return tbl; } #define rdtscl(x) do { (x) = get_tbl(); } while(0) #define SPRN_PVR 0x11F #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) #define SPRN_UPMC1 0x3A9 #define SPRN_UPMC2 0x3AA #define SPRN_UPMC3 0x3AD #define SPRN_UPMC4 0x3AE #define SPRN_UPMC5 0x3A1 #define SPRN_UPMC6 0x3A2 #define __stringify_1(x) #x #define __stringify(x) __stringify_1(x) #define mfspr(rn) ({unsigned int rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval; }) static __inline__ unsigned int read_pmc(unsigned int pmc) { switch( pmc ) { default: /* impossible, but silences gcc warning */ case 0: return mfspr(SPRN_UPMC1); case 1: return mfspr(SPRN_UPMC2); case 2: return mfspr(SPRN_UPMC3); case 3: return mfspr(SPRN_UPMC4); case 4: return mfspr(SPRN_UPMC5); case 5: return mfspr(SPRN_UPMC6); } } #define rdpmcl(pmc,x) do { (x) = read_pmc((pmc)); } while(0) #define vperfctr_has_rdpmc(vperfctr) ((vperfctr)->have_rdpmc) extern void perfctr_info_cpu_init(struct perfctr_info*); #endif /* __LIB_PERFCTR_PPC_H */ papi-5.6.0/src/libpfm4/lib/pfmlib_intel_coreduo.c000664 001750 001750 00000005400 13216244365 023760 0ustar00jshenry1963jshenry1963000000 000000 /* * pfmlib_intel_coreduo.c : Intel Core Duo/Solo (Yonah) * * Copyright (c) 2009, Google, Inc * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* private headers */ #include "pfmlib_priv.h" /* library private */ #include "pfmlib_intel_x86_priv.h" /* architecture private */ #include "events/intel_coreduo_events.h" static int pfm_coreduo_detect(void *this) { int ret; ret = pfm_intel_x86_detect(); if (ret != PFM_SUCCESS) return ret; /* * check for core solo/core duo */ if (pfm_intel_x86_cfg.family != 6) return PFM_ERR_NOTSUPP; if (pfm_intel_x86_cfg.model != 14) return PFM_ERR_NOTSUPP; return PFM_SUCCESS; } static int pfm_coreduo_init(void *this) { pfm_intel_x86_cfg.arch_version = 1; return PFM_SUCCESS; } pfmlib_pmu_t intel_coreduo_support={ .desc = "Intel Core Duo/Core Solo", .name = "coreduo", .pmu = PFM_PMU_COREDUO, .pme_count = LIBPFM_ARRAY_SIZE(intel_coreduo_pe), .type = PFM_PMU_TYPE_CORE, .num_cntrs = 2, .max_encoding = 1, .pe = intel_coreduo_pe, .atdesc = intel_x86_mods, .flags = PFMLIB_PMU_FL_RAW_UMASK, .supported_plm = INTEL_X86_PLM, .pmu_detect = pfm_coreduo_detect, .pmu_init = pfm_coreduo_init, .get_event_encoding[PFM_OS_NONE] = pfm_intel_x86_get_encoding, PFMLIB_ENCODE_PERF(pfm_intel_x86_get_perf_encoding), .get_event_first = pfm_intel_x86_get_event_first, .get_event_next = pfm_intel_x86_get_event_next, .event_is_valid = pfm_intel_x86_event_is_valid, .validate_table = pfm_intel_x86_validate_table, .get_event_info = pfm_intel_x86_get_event_info, .get_event_attr_info = pfm_intel_x86_get_event_attr_info, PFMLIB_VALID_PERF_PATTRS(pfm_intel_x86_perf_validate_pattrs), .get_event_nattrs = pfm_intel_x86_get_event_nattrs, }; papi-5.6.0/src/examples/high_level.c000664 001750 001750 00000012412 13216244361 021412 0ustar00jshenry1963jshenry1963000000 000000 /***************************************************************************** * This example code shows how to use most of PAPI's High level functions * * to start,count,read and stop on an event set. We use two preset events * * here: * * PAPI_TOT_INS: Total instructions executed in a period of time * * PAPI_TOT_CYC: Total cpu cycles in a period of time * ******************************************************************************/ #include #include #include "papi.h" #define NUM_EVENTS 2 #define THRESHOLD 10000 #define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } /* stupid codes to be monitored */ void computation_mult() { double tmp=1.0; int i=1; for( i = 1; i < THRESHOLD; i++ ) { tmp = tmp*i; } } /* stupid codes to be monitored */ void computation_add() { int tmp = 0; int i=0; for( i = 0; i < THRESHOLD; i++ ) { tmp = tmp + i; } } int main() { /*Declaring and initializing the event set with the presets*/ int Events[2] = {PAPI_TOT_INS, PAPI_TOT_CYC}; /*The length of the events array should be no longer than the value returned by PAPI_num_counters.*/ /*declaring place holder for no of hardware counters */ int num_hwcntrs = 0; int retval; char errstring[PAPI_MAX_STR_LEN]; /*This is going to store our list of results*/ long long values[NUM_EVENTS]; /*************************************************************************** * This part initializes the library and compares the version number of the* * header file, to the version of the library, if these don't match then it * * is likely that PAPI won't work correctly.If there is an error, retval * * keeps track of the version number. * ***************************************************************************/ if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) { fprintf(stderr, "Error: %d %s\n",retval, errstring); exit(1); } /************************************************************************** * PAPI_num_counters returns the number of hardware counters the platform * * has or a negative number if there is an error * **************************************************************************/ if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK) { printf("There are no counters available. \n"); exit(1); } printf("There are %d counters in this system\n",num_hwcntrs); /************************************************************************** * PAPI_start_counters initializes the PAPI library (if necessary) and * * starts counting the events named in the events array. This function * * implicitly stops and initializes any counters running as a result of * * a previous call to PAPI_start_counters. * **************************************************************************/ if ( (retval = PAPI_start_counters(Events, NUM_EVENTS)) != PAPI_OK) ERROR_RETURN(retval); printf("\nCounter Started: \n"); /* Your code goes here*/ computation_add(); /********************************************************************** * PAPI_read_counters reads the counter values into values array * **********************************************************************/ if ( (retval=PAPI_read_counters(values, NUM_EVENTS)) != PAPI_OK) ERROR_RETURN(retval); printf("Read successfully\n"); printf("The total instructions executed for addition are %lld \n",values[0]); printf("The total cycles used are %lld \n", values[1] ); printf("\nNow we try to use PAPI_accum to accumulate values\n"); /* Do some computation here */ computation_add(); /************************************************************************ * What PAPI_accum_counters does is it adds the running counter values * * to what is in the values array. The hardware counters are reset and * * left running after the call. * ************************************************************************/ if ( (retval=PAPI_accum_counters(values, NUM_EVENTS)) != PAPI_OK) ERROR_RETURN(retval); printf("We did an additional %d times addition!\n", THRESHOLD); printf("The total instructions executed for addition are %lld \n", values[0] ); printf("The total cycles used are %lld \n", values[1] ); /*********************************************************************** * Stop counting events(this reads the counters as well as stops them * ***********************************************************************/ printf("\nNow we try to do some multiplications\n"); computation_mult(); /******************* PAPI_stop_counters **********************************/ if ((retval=PAPI_stop_counters(values, NUM_EVENTS)) != PAPI_OK) ERROR_RETURN(retval); printf("The total instruction executed for multiplication are %lld \n", values[0] ); printf("The total cycles used are %lld \n", values[1] ); exit(0); } papi-5.6.0/src/ctests/high-level.c000664 001750 001750 00000007055 13216244360 021025 0ustar00jshenry1963jshenry1963000000 000000 /* These examples show the essentials in using the PAPI high-level interface. The program consists of 4 work-loops. The programmer intends to count the total events for loop 1, 2 and 4, but not include the number of events in loop 3. To accomplish this PAPI_read_counters is used as a counter reset function, while PAPI_accum_counters is used to sum the contributions of loops 2 and 4 into the total count. */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define NUM_EVENTS 2 int main( int argc, char **argv ) { int retval; long long values[NUM_EVENTS], dummyvalues[NUM_EVENTS]; long long myvalues[NUM_EVENTS]; int Events[NUM_EVENTS]; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* query and set up the right events to monitor */ if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { Events[0] = PAPI_FP_INS; } else { Events[0] = PAPI_TOT_INS; } Events[1] = PAPI_TOT_CYC; retval = PAPI_start_counters( ( int * ) Events, NUM_EVENTS ); if ( retval != PAPI_OK ) { if (!quiet) printf("Cannot start events\n"); test_skip( __FILE__, __LINE__, "PAPI_start_counters", retval ); } /* Loop 1 */ do_flops( NUM_FLOPS ); retval = PAPI_read_counters( values, NUM_EVENTS ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); myvalues[0] = values[0]; myvalues[1] = values[1]; /* Loop 2 */ do_flops( NUM_FLOPS ); retval = PAPI_accum_counters( values, NUM_EVENTS ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "(Counters being ''held'')\n" ); /* Loop 3 */ /* Simulated code that should not be counted */ do_flops( NUM_FLOPS ); retval = PAPI_read_counters( dummyvalues, NUM_EVENTS ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); if ( !quiet ) printf( TWO12, dummyvalues[0], dummyvalues[1], "(Skipped counts)\n" ); if ( !quiet ) printf( "%12s %12s (''Continuing'' counting)\n", "xxx", "xxx" ); /* Loop 4 */ do_flops( NUM_FLOPS ); retval = PAPI_accum_counters( values, NUM_EVENTS ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); if ( !quiet ) printf( TWO12, values[0], values[1], "" ); if ( !quiet ) { printf( "----------------------------------\n" ); printf( "Verification: The last line in each experiment should be\n" ); printf( "approximately three times the value of the first line.\n" ); } { long long min, max; min = ( long long ) ( ( double ) myvalues[0] * .9 ); max = ( long long ) ( ( double ) myvalues[0] * 1.1 ); if ( values[0] < ( 3 * min ) || values[0] > ( 3 * max ) ) { retval = 1; if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { test_fail( __FILE__, __LINE__, "PAPI_FP_INS", 1 ); } else { test_fail( __FILE__, __LINE__, "PAPI_TOT_INS", 1 ); } } min = ( long long ) ( ( double ) myvalues[1] * .9 ); max = ( long long ) ( ( double ) myvalues[1] * 1.1 ); if ( values[1] < ( 3 * min ) || values[1] > ( 3 * max ) ) { retval = 1; test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); } } /* The values array is not allocated through allocate_test_space * so we need to pass NULL here */ test_pass( __FILE__ ); return 0; } papi-5.6.0/src/perfctr-2.6.x/usr.lib/ppc.c000775 001750 001750 00000005144 13216244367 022061 0ustar00jshenry1963jshenry1963000000 000000 /* $Id: ppc.c,v 1.2.2.3 2004/11/13 16:31:09 mikpe Exp $ * PPC32-specific perfctr library procedures. * * Copyright (C) 2004 Mikael Pettersson */ #include #include "libperfctr.h" #include "ppc.h" void perfctr_info_cpu_init(struct perfctr_info *info) { unsigned int pvr = mfspr(SPRN_PVR); /* trapped & emulated by the kernel */ unsigned int cpu_type; switch( PVR_VER(pvr) ) { case 0x0004: /* 604 */ cpu_type = PERFCTR_PPC_604; break; case 0x0009: /* 604e */ case 0x000A: /* 604ev */ cpu_type = PERFCTR_PPC_604e; break; case 0x0008: /* 750/740 */ case 0x7000: case 0x7001: /* 750FX */ case 0x7002: /* 750GX */ cpu_type = PERFCTR_PPC_750; break; case 0x000C: /* 7400 */ case 0x800C: /* 7410 */ cpu_type = PERFCTR_PPC_7400; break; case 0x8000: /* 7451/7441 */ case 0x8001: /* 7455/7445 */ case 0x8002: /* 7457/7447 */ case 0x8003: /* 7447A */ case 0x8004: /* 7448 */ cpu_type = PERFCTR_PPC_7450; break; default: cpu_type = PERFCTR_PPC_GENERIC; } info->cpu_type = cpu_type; } unsigned int perfctr_info_nrctrs(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC_604: return 2; case PERFCTR_PPC_604e: case PERFCTR_PPC_750: case PERFCTR_PPC_7400: return 4; case PERFCTR_PPC_7450: return 6; default: return 0; } } const char *perfctr_info_cpu_name(const struct perfctr_info *info) { switch( info->cpu_type ) { case PERFCTR_PPC_GENERIC: return "Generic PowerPC with TB"; case PERFCTR_PPC_604: return "PowerPC 604"; case PERFCTR_PPC_604e: return "PowerPC 604e"; case PERFCTR_PPC_750: return "PowerPC 750"; case PERFCTR_PPC_7400: return "PowerPC 7400"; case PERFCTR_PPC_7450: return "PowerPC 7450"; default: return "?"; } } void perfctr_cpu_control_print(const struct perfctr_cpu_control *control) { unsigned int i, nractrs, nrictrs, nrctrs; nractrs = control->nractrs; nrictrs = control->nrictrs; nrctrs = control->nractrs + nrictrs; printf("tsc_on\t\t\t%u\n", control->tsc_on); printf("nractrs\t\t\t%u\n", nractrs); if( nrictrs ) printf("nrictrs\t\t\t%u\n", nrictrs); for(i = 0; i < nrctrs; ++i) { printf("pmc_map[%u]\t\t%u\n", i, control->pmc_map[i]); printf("evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i]); if( i >= nractrs ) printf("ireset[%u]\t\t%d\n", i, control->ireset[i]); } if( control->ppc.mmcr0 ) printf("mmcr0\t\t\t0x%08X\n", control->ppc.mmcr0); if( control->ppc.mmcr2 ) printf("mmcr2\t\t\t0x%08X\n", control->ppc.mmcr2); } papi-5.6.0/src/event_data/000775 001750 001750 00000000000 13216244361 017434 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/perfctr-2.7.x/examples/000775 001750 001750 00000000000 13216244370 021357 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/event_data/power5+/events000664 001750 001750 00001157620 13216244361 022173 0ustar00jshenry1963jshenry1963000000 000000 { File: power5+/events { Date: 12/13/06 { Version: 1.7 { Copyright (c) International Business Machines, 2006. { Contributed by Eric Kjeldergaard 362,356,355,352,1,1 { counter 1 } #0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB ##400C0 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB ##400C1 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #2,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed ##00013 A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. #3,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB ##400C2 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #4,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB ##400C3 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #5,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB ##400C4 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #6,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB ##400C5 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #7,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB ##400C6 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #8,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full ##100C5 Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #9,v,g,n,n,PM_BR_ISSUED,Branches issued ##230E4 A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. #10,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##230E5 A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. #11,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##230E6 A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. #12,v,g,n,n,PM_BR_UNCOND,Unconditional branch ##23087 An unconditional branch was executed. #13,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty ##410C6 Cycles when both thread's CLB is completely empty. #14,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full ##220E5 Cycles when both thread's CLB is full. #15,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##110C1 The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #16,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##100C4 The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #17,v,g,n,s,PM_CYC,Processor cycles ##0000F Processor cycles #18,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 ##C3087 The processor's Data Cache was reloaded from the local L2 due to a demand load. #19,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared ##C3097 The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load. #20,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified ##C30A3 The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. #21,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 ##C308E The processor's Data Cache was reloaded from the local L3 due to a demand load. #22,v,g,n,n,PM_DATA_FROM_L35_SHR,Data loaded from L3.5 shared ##C309E The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a demand load. #23,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified ##C30A7 The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. #24,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory ##C30A1 The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on. #25,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##800C7 Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #26,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C10C7 A dcache invalidated was received from the L2 because a line in L2 was castout. #27,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams ##C50C2 A new prefetch stream was detected but no more stream entries were available. #28,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start ##830E6 A prefetch stream was started using the DST instruction. #29,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##830E7 A new Prefetch Stream was allocated. #30,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##800C5 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. #31,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##800C4,C20E0 Data TLB misses, all page sizes. #32,v,g,n,n,PM_DTLB_MISS_4K,Data TLB miss for 4K page ##C208D Data TLB references to 4KB pages that missed the TLB. Page size is determined at TLB reload time. #33,v,g,n,n,PM_DTLB_REF,Data TLB references ##C20E4 Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. #34,v,g,n,n,PM_DTLB_REF_4K,Data TLB reference for 4K page ##C2086 Data TLB references for 4KB pages. Includes hits + misses. #35,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##130E3 Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. #36,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##130E7 Cycles when an interrupt due to an external exception is pending but external exceptions were masked. #37,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued ##700C7 Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. #38,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried ##710C7 Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #39,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued ##720E7 A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #40,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried ##730E7 A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #41,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty ##722E7 Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #42,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty ##721E7 Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #43,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty ##702C7 Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #44,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty ##712C7 Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #45,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty ##701C7 Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #46,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty ##711C7 Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #47,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination ##703C7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. #48,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first ##713C7 Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. #49,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination ##723E7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. #50,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first ##733E7 Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. #51,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty ##731E7 Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #52,v,g,n,n,PM_FLUSH,Flushes ##110C7 Flushes occurred including LSU and Branch flushes. #53,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##110C6 A flush was caused by a branch mispredict. #54,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance ##330E3 This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #55,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation ##330E2 This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #56,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync ##330E1 This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. #57,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##100C1 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #58,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction ##000C3 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #59,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##020E0 FPU0 has encountered a denormalized operand. #60,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##000C0 FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #61,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##010C2 FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #62,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##010C3 FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. #63,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##000C1 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #64,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##010C0 FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #65,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##030E0 FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. #66,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##010C1 FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #67,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##000C2 FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #68,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##100C3 The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. #69,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##020E3 FPU0 has executed a single precision instruction. #70,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##020E1 FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #71,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##020E2 FPU0 has executed a Floating Point Store instruction. #72,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction ##000C7 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #73,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##020E4 FPU1 has encountered a denormalized operand. #74,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##000C4 FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #75,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##010C6 FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #76,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##010C7 FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , #77,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##000C5 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #78,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions ##010C4 FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #79,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##010C5 FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #80,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##000C6 FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #81,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##100C7 The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped #82,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##020E7 FPU1 has executed a single precision instruction. #83,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##020E5 FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #84,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##020E6 FPU1 has executed a Floating Point Store instruction. #85,v,g,n,n,PM_FPU_1FLOP,FPU executed one flop instruction ##00090 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #86,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data ##02088 The floating point unit has encountered a denormalized operand. Combined Unit 0 + Unit 1. #87,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction ##00088 The floating point unit has executed a divide instruction. This could be fdiv, fdivs, fdiv., fdivs.. Combined Unit 0 + Unit 1. #88,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction ##010A8 The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. #89,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full ##10090 Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. #90,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction ##02090 FPU is executing single precision instruction. Combined Unit 0 + Unit 1. #91,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##110C0 The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #92,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##110C4 The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #93,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full ##110A8 Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. #94,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##130E2 The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. #95,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##130E6 The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. #96,u,g,n,n,PM_FXU_IDLE,FXU idle ##00012 FXU0 and FXU1 are both idle. #97,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##100C0 The Global Completion Table is completely full. #98,v,g,n,n,PM_GCT_NOSLOT_CYC,Cycles no GCT slot allocated ##00004 Cycles when the Global Completion Table has no slots from this thread. #99,v,g,n,s,PM_GCT_USAGE_00to59_CYC,Cycles GCT less than 60% full ##0001F Cycles when the Global Completion Table has fewer than 60% of its slots used. The GCT has 20 entries shared between threads. #100,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##130E5 The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #101,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##120E6 Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. #102,v,g,n,n,PM_GRP_BR_REDIR_NONSPEC,Group experienced non-speculative branch redirect ##12091 Number of groups, counted at completion, that have encountered a branch redirect. #103,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##130E1 A scoreboard operation on a non-renamed resource has blocked dispatch. #104,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##120E4 A group that previously attempted dispatch was rejected. #105,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##120E3 A group is available for dispatch. This does not mean it was successfully dispatched. #106,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss ##120E7 Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. #107,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect ##120E5 Group experienced non-speculative I cache miss or branch redirect #108,v,g,n,n,PM_GRP_IC_MISS_NONSPEC,Group experienced non-speculative I cache miss ##12099 Number of groups, counted at completion, that have encountered an instruction cache miss. #109,v,g,n,n,PM_GRP_MRK,Group marked in IDU ##00014 A group was sampled (marked). The group is called a marked group. One instruction within the group is tagged for detailed monitoring. The sampled instruction is called a marked instructions. Events associated with the marked instruction are annotated with the marked term. #110,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect ##230E0 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). #111,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect ##230E1 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). #112,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##220E6 An instruction prefetch request has been made. #113,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##220E7 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #114,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat ##210C6 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #115,v,g,n,n,PM_IOPS_CMPL,Internal operations completed ##00001 Number of internal operations that completed. #116,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted ##120E1 Number of PowerPC Instructions dispatched (attempted, not filtered by success. #117,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched ##220E4 Cycles when at least one instruction was sent from the fetch unit to the decode unit. #118,v,g,n,n,PM_INST_FROM_L2,Instruction fetched from L2 ##22086 An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions #119,v,g,n,n,PM_INST_FROM_L25_SHR,Instruction fetched from L2.5 shared ##22096 An instruction fetch group was fetched with shared (T or SL) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions. #120,v,g,n,n,PM_INST_FROM_L2MISS,Instruction fetched missed L2 ##2209B An instruction fetch group was fetched from beyond the local L2. #121,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 ##2208D An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions #122,v,g,n,n,PM_INST_FROM_L35_SHR,Instruction fetched from L3.5 shared ##2209D An instruction fetch group was fetched with shared (S) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions #123,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##800C1 A SLB miss for an instruction fetch as occurred #124,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##800C0 A TLB miss for an Instruction Fetch has occurred #125,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C30E4 The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. #126,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##C70E7 A request to prefetch data into the L1 was made #127,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##230E7 Cycles that a cache line was written to the instruction cache. #128,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid ##730E0 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #129,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged ##720E0 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #130,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt ##701C0 A Read/Claim dispatch for a Load was attempted #131,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C0 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #132,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons ##731E0 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #133,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full ##721E0 A Read/Claim dispatch for a load failed because all RC machines are busy. #134,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt ##702C0 A Read/Claim dispatch for a Store was attempted. #135,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C0 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #136,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons ##732E0 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #137,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full ##722E0 A Read/Claim dispatch for a store failed because all RC machines are busy. #138,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C0 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #139,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy ##713C0 A Read/Claim dispatch was rejected because all Castout machines were busy. #140,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid ##710C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #141,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified ##700C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #142,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits ##733E0 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. #143,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests ##723E0 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #144,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid ##730E1 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #145,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged ##720E1 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #146,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt ##701C1 A Read/Claim dispatch for a Load was attempted #147,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C1 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #148,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons ##731E1 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #149,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full ##721E1 A Read/Claim dispatch for a load failed because all RC machines are busy. #150,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt ##702C1 A Read/Claim dispatch for a Store was attempted. #151,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C1 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #152,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons ##732E1 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #153,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full ##722E2 A Read/Claim dispatch for a store failed because all RC machines are busy. #154,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C1 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #155,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy ##713C1 A Read/Claim dispatch was rejected because all Castout machines were busy. #156,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid ##710C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #157,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified ##700C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #158,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits ##733E1 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. #159,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests ##723E1 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #160,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid ##730E2 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #161,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged ##720E2 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #162,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt ##701C2 A Read/Claim dispatch for a Load was attempted #163,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C2 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #164,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons ##731E2 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #165,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full ##721E2 A Read/Claim dispatch for a load failed because all RC machines are busy. #166,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt ##702C2 A Read/Claim dispatch for a Store was attempted. #167,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C2 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #168,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons ##732E2 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #169,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full ##722E1 A Read/Claim dispatch for a store failed because all RC machines are busy. #170,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C2 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #171,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy ##713C2 A Read/Claim dispatch was rejected because all Castout machines were busy. #172,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid ##710C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #173,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified ##700C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #174,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits ##733E2 A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. #175,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests ##723E2 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #176,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##C50C3 A request to prefetch data into L2 was made #177,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy ##721E3 Cycles All Castin/Castout machines are busy. #178,v,g,n,s,PM_L3SA_HIT,L3 slice A hits ##711C3 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #179,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid ##730E3 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #180,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG ##720E3 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #181,v,g,n,s,PM_L3SA_REF,L3 slice A references ##701C3 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #182,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid ##710C3 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #183,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries ##731E3 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #184,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy ##721E4 Cycles All Castin/Castout machines are busy. #185,v,g,n,s,PM_L3SB_HIT,L3 slice B hits ##711C4 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #186,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid ##730E4 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #187,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG ##720E4 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #188,v,g,n,s,PM_L3SB_REF,L3 slice B references ##701C4 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #189,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid ##710C4 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #190,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries ##731E4 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #191,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy ##721E5 Cycles All Castin/Castout machines are busy. #192,v,g,n,s,PM_L3SC_HIT,L3 slice C hits ##711C5 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice #193,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid ##730E5 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. #194,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG ##720E5 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #195,v,g,n,s,PM_L3SC_REF,L3 slice C references ##701C5 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. #196,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid ##710C5 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #197,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries ##731E5 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #198,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##820E7 A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) #199,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C10C2 Load references that miss the Level 1 Data cache, by unit 0. #200,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C5,C10C6 Load references that miss the Level 1 Data cache, by unit 1. #201,v,g,n,n,PM_LD_REF_L1,L1 D cache load references ##C10A8 Load references to the Level 1 Data Cache. Combined unit 0 + 1. #202,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C10C0 Load references to Level 1 Data Cache, by unit 0. #203,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction ##230E3 The target address of a branch instruction was predicted. #204,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##100C6 The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #205,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject ##C20E1 Total cycles the Load Store Unit 0 is busy rejecting instructions. #206,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##800C2 Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. #207,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C00C2 A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #208,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes ##C00C3 A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. #209,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C00C0 A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) #210,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C00C1 A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). #211,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##C50C0 A floating point load was executed by LSU0 #212,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads ##C50C1 A non-cacheable load was executed by unit 0. #213,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C40C3 Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #214,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C40C1 Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #215,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C40C2 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #216,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects ##C40C0 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #217,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C60E1 Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #218,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject ##C20E5 Total cycles the Load Store Unit 1 is busy rejecting instructions. #219,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##800C6 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #220,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C00C6 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #221,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes ##C00C7 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #222,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C00C4 A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). #223,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C00C5 A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) #224,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##C50C4 A floating point load was executed by LSU1 #225,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads ##C50C5 A non-cacheable load was executed by Unit 0. #226,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C40C7 Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #227,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C40C5 Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #228,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C40C6 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #229,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects ##C40C4 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #230,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C60E5 Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #231,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##110C5 A flush was initiated by the Load Store Unit #232,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full ##320E7 This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #233,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes ##C0090 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1. #234,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full ##330E0 This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #235,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes ##C0088 A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1). Combined Unit 0 + 1. #236,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction ##C50A8 LSU executed Floating Point load instruction. Combined Unit 0 + 1. #237,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C30E7 The Load Miss Queue was full. #238,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C70E5 A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #239,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C30E6 The first entry in the LMQ was allocated. #240,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C30E5 This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #241,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##110C2 Cycles when the LRQ is full. #242,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C60E7 LRQ slot zero was allocated #243,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C60E6 This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). #244,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss ##C4090 Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #245,v,g,n,n,PM_LSU_REJECT_SRQ,LSU SRQ lhs rejects ##C4088 Total cycles the Load Store Unit is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. Combined Unit 0 + 1. #246,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##110C3 Cycles the Store Request Queue is full. #247,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C20E7 SRQ Slot zero was allocated #248,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C20E6 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). #249,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##830E5 Cycles that a sync instruction is active in the Store Request Queue. #250,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch ##130E0 Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. #251,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched ##731E6 Fast path memory read dispatched #252,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer ##210C7 A prefetch buffer entry (line) is allocated but the request is not a demand fetch. #253,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed ##726E6 A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #254,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled ##711C6 A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly #255,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed ##736E6 A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly #256,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched ##704C6 Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #257,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 ##734E6 Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #258,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed ##724E6 Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #259,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered ##714C6 Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #260,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 ##702C6 A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #261,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched ##701C6 A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #262,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 ##712C6 A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #263,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 ##722E6 A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #264,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled ##721E6 Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) #265,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 ##723E6 A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #266,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 ##733E6 A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #267,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush ##713C6 A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #268,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write ##703C6 A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #269,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 ##C7087 The processor's Data Cache was reloaded from the local L2 due to a marked load. #270,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared ##C7097 The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a marked load. #271,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified ##C70A3 The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load. #272,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 ##C708E The processor's Data Cache was reloaded from the local L3 due to a marked load. #273,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR,Marked data loaded from L3.5 shared ##C709E The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a marked load. #274,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified ##C70A7 The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load. #275,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory ##C70A1 The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on. #276,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses ##C50C7 A Data SLB miss was caused by a marked instruction. #277,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses ##C50C6,C60E0 Data TLB references by a marked instruction that missed the TLB (all page sizes). #278,v,g,n,n,PM_MRK_DTLB_MISS_4K,Marked Data TLB misses for 4K page ##C608D Data TLB references to 4KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. #279,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference ##C60E4 Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. #280,v,g,n,n,PM_MRK_DTLB_REF_4K,Marked Data TLB reference for 4K page ##C6086 Data TLB references by a marked instruction for 4KB pages. #281,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched ##00002 A group containing a sampled instruction was dispatched #282,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued ##00015 A sampled instruction was issued. #283,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##820E2 A DL1 reload occurred due to marked load #284,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C70E4 The source information is valid and is for a marked load #285,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses ##82088 Marked L1 D cache load misses #286,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses ##820E0 Load references that miss the Level 1 Data cache, by LSU0. #287,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses ##820E4 Load references that miss the Level 1 Data cache, by LSU1. #288,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##810C2 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #289,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes ##810C3 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #290,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##810C1 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #291,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##810C0 A marked store was flushed from unit 0 because it was unaligned #292,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##810C6 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #293,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes ##810C7 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #294,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##810C4 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #295,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##810C5 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #296,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes ##810A8 A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #297,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C70E6 This signal is asserted every cycle when a marked request is resident in the Store Request Queue #298,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##820E6 A marked stcx (stwcx or stdcx) failed #299,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed ##00003 A sampled store has completed (data home) #300,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##820E3 A marked store missed the dcache #301,v,g,n,n,PM_PMC4_OVERFLOW,PMC4 Overflow ##0000A Overflows from PMC4 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #302,v,g,n,n,PM_PMC5_OVERFLOW,PMC5 Overflow ##0001A Overflows from PMC5 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #303,v,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of PowerPC instructions that completed. #304,v,g,n,n,PM_PTEG_FROM_L2,PTEG loaded from L2 ##83087 A Page Table Entry was loaded into the TLB from the local L2 due to a demand load #305,v,g,n,n,PM_PTEG_FROM_L25_SHR,PTEG loaded from L2.5 shared ##83097 A Page Table Entry was loaded into the TLB with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load. #306,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified ##830A3 A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. #307,v,g,n,n,PM_PTEG_FROM_L3,PTEG loaded from L3 ##8308E A Page Table Entry was loaded into the TLB from the local L3 due to a demand load. #308,v,g,n,n,PM_PTEG_FROM_L35_SHR,PTEG loaded from L3.5 shared ##8309E A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on the same module as this processor is located, due to a demand load. #309,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified ##830A7 A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load. #310,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory ##830A1 A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on. #311,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid ##830E4 A Page Table Entry was loaded into the TLB. #312,v,g,n,n,PM_RUN_CYC,Run cycles ##00005 Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop. #313,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full ##720E6 The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #314,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full ##730E6 A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #315,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue ##707C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #316,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue ##717C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #317,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full ##700C6 A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #318,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue ##705C6 A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #319,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue ##715C6 A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #320,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision ##725E6 Snoop retry due to one ahead collision #321,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE ##800C3 A tlbie was snooped from another processor. #322,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full ##710C6 A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #323,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue ##706C6 A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly #324,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue ##716C6 A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #325,v,g,n,n,PM_STCX_FAIL,STCX failed ##820E1 A stcx (stwcx or stdcx) failed #326,v,g,n,n,PM_STCX_PASS,Stcx passes ##820E5 A stcx (stwcx or stdcx) instruction was successful #327,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C10C3 A store missed the dcache. Combined Unit 0 + 1. #328,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C10C1 Store references to the Data Cache by LSU0. #329,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C10C4 Store references to the Data Cache by LSU1. #330,v,g,n,n,PM_SUSPENDED,Suspended ##00000 The counter is suspended (does not count). #331,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition ##00018 When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 #332,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses ##410C7 Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. #333,v,g,n,s,PM_THRD_ONE_RUN_CYC,One of the threads in run cycles ##0000B At least one thread has set its run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. This event does not respect FCWAIT. #334,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 ##420E0 Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. #335,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 ##420E1 Cycles this thread was running at priority level 2. #336,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 ##420E2 Cycles this thread was running at priority level 3. #337,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 ##420E3 Cycles this thread was running at priority level 4. #338,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 ##420E4 Cycles this thread was running at priority level 5. #339,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 ##420E5 Cycles this thread was running at priority level 6. #340,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 ##420E6 Cycles this thread was running at priority level 7. #341,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference ##430E3 Cycles when this thread's priority is equal to the other thread's priority. #342,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 ##430E4 Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. #343,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 ##430E5 Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. #344,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 ##430E6 Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. #345,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 ##430E2 Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. #346,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 ##430E1 Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. #347,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 ##430E0 Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. #348,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty ##410C2 Thread selection was overridden because one thread's CLB was empty. #349,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance ##410C4 Thread selection was overridden because of a GCT imbalance. #350,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds ##410C5 Thread selection was overridden because of an ISU hold. #351,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses ##410C3 Thread selection was overridden because one thread was had a L2 miss pending. #352,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 ##410C0 Thread selection picked thread 0 for decode. #353,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 ##410C1 Thread selection picked thread 1 for decode. #354,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected ##330E7 A hung thread was detected #355,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch ##130E4 Cycles a TLBIE instruction was held at dispatch. #356,v,g,n,n,PM_TLB_MISS,TLB misses ##80088 Total of Data TLB mises + Instruction TLB misses #357,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##100C2 The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #358,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction ##230E2 A conditional branch instruction was predicted as taken or not taken. #359,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 ##732E6 A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #360,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 ##727E6 A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #361,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision ##735E6 Snoop retry due to a b collision $$$$$$$$ { counter 2 } #0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB ##400C0 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB ##400C1 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #2,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB ##400C2 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #3,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB ##400C3 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #4,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB ##400C4 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #5,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB ##400C5 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #6,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB ##400C6 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #7,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full ##100C5 Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #8,v,g,n,n,PM_BR_ISSUED,Branches issued ##230E4 A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. #9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##230E5 A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. #10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##230E6 A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. #11,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction ##23087,230E3 The target address of a branch instruction was predicted. #12,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty ##410C6 Cycles when both thread's CLB is completely empty. #13,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full ##220E5 Cycles when both thread's CLB is full. #14,v,g,n,n,PM_CMPLU_STALL_DCACHE_MISS,Completion stall caused by D cache miss ##1109A Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a Data Cache Miss. Data Cache Miss has higher priority than any other Load/Store delay, so if an instruction encounters multiple delays only the Data Cache Miss will be reported and the entire delay period will be charged to Data Cache Miss. This is a subset of PM_CMPLU_STALL_LSU. #15,v,g,n,n,PM_CMPLU_STALL_FDIV,Completion stall caused by FDIV or FQRT instruction ##1109B Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point divide or square root instruction. This is a subset of PM_CMPLU_STALL_FPU. #16,v,g,n,n,PM_CMPLU_STALL_FXU,Completion stall caused by FXU instruction ##11099 Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point instruction. #17,v,g,n,n,PM_CMPLU_STALL_LSU,Completion stall caused by LSU instruction ##11098 Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a load/store instruction. #18,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##110C1 The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #19,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##100C4 The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #20,v,g,n,s,PM_CYC,Processor cycles ##0000F Processor cycles #21,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified ##C3097 The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. #22,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified ##C309E The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. #23,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory ##C3087 The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on. #24,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##800C7 Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #25,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C10C7 A dcache invalidated was received from the L2 because a line in L2 was castout. #26,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams ##C50C2 A new prefetch stream was detected but no more stream entries were available. #27,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start ##830E6 A prefetch stream was started using the DST instruction. #28,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##830E7 A new Prefetch Stream was allocated. #29,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##800C5 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. #30,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##800C4,C20E0 Data TLB misses, all page sizes. #31,v,g,n,n,PM_DTLB_MISS_64K,Data TLB miss for 64K page ##C208D Data TLB references to 64KB pages that missed the TLB. Page size is determined at TLB reload time. #32,v,g,n,n,PM_DTLB_REF,Data TLB references ##C20E4 Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. #33,v,g,n,n,PM_DTLB_REF_64K,Data TLB reference for 64K page ##C2086 Data TLB references for 64KB pages. Includes hits + misses. #34,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##130E3 Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. #35,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##130E7 Cycles when an interrupt due to an external exception is pending but external exceptions were masked. #36,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued ##700C7 Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. #37,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried ##710C7 Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #38,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued ##720E7 A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #39,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried ##730E7 A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #40,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty ##722E7 Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #41,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty ##721E7 Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #42,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty ##702C7 Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #43,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty ##712C7 Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #44,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty ##701C7 Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #45,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty ##711C7 Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #46,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination ##703C7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. #47,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first ##713C7 Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. #48,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination ##723E7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. #49,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first ##733E7 Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. #50,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty ##731E7 Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #51,v,g,n,n,PM_FLUSH,Flushes ##110C7 Flushes occurred including LSU and Branch flushes. #52,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##110C6 A flush was caused by a branch mispredict. #53,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance ##330E3 This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #54,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation ##330E2 This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #55,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync ##330E1 This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. #56,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##100C1 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #57,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction ##000C3 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #58,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##020E0 FPU0 has encountered a denormalized operand. #59,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##000C0 FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #60,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##010C2 FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #61,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##010C3 FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. #62,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##000C1 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #63,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##010C0 FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #64,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##030E0 FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. #65,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##010C1 FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #66,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##000C2 FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #67,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##100C3 The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. #68,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##020E3 FPU0 has executed a single precision instruction. #69,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##020E1 FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #70,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##020E2 FPU0 has executed a Floating Point Store instruction. #71,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction ##000C7 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #72,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##020E4 FPU1 has encountered a denormalized operand. #73,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##000C4 FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #74,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##010C6 FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #75,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##010C7 FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , #76,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##000C5 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #77,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions ##010C4 FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #78,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##010C5 FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #79,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##000C6 FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #80,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##100C7 The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped #81,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##020E7 FPU1 has executed a single precision instruction. #82,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##020E5 FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #83,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##020E6 FPU1 has executed a Floating Point Store instruction. #84,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction ##00088 This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. #85,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions ##010A8 The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. #86,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction ##00090 The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. #87,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 ##02088 FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1. #88,v,g,n,n,PM_FPU_STF,FPU executed store instruction ##02090 FPU has executed a store instruction. Combined Unit 0 + Unit 1. #89,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##110C0 The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #90,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##110C4 The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #91,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##130E2 The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. #92,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##130E6 The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. #93,u,g,n,n,PM_FXU_BUSY,FXU busy ##00012 Cycles when both FXU0 and FXU1 are busy. #94,v,g,n,n,PM_MRK_FXU_FIN,Marked instruction FXU processing finished ##00014 One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete. #95,v,g,n,s,PM_GCT_EMPTY_CYC,Cycles GCT empty ##00004 The Global Completion Table is completely empty #96,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##100C0 The Global Completion Table is completely full. #97,v,g,n,n,PM_GCT_NOSLOT_IC_MISS,No slot in GCT caused by I cache miss ##1009C Cycles when the Global Completion Table has no slots from this thread because of an Instruction Cache miss. #98,v,g,n,s,PM_GCT_USAGE_60to79_CYC,Cycles GCT 60-79% full ##0001F Cycles when the Global Completion Table has between 60% and 70% of its slots used. The GCT has 20 entries shared between threads. #99,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##130E5 The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #100,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##120E6 Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. #101,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect ##120E5 Group experienced non-speculative I cache miss or branch redirect #102,v,g,n,n,PM_GRP_DISP,Group dispatches ##00002 A group was dispatched #103,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##130E1 A scoreboard operation on a non-renamed resource has blocked dispatch. #104,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##120E4 A group that previously attempted dispatch was rejected. #105,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##120E3 A group is available for dispatch. This does not mean it was successfully dispatched. #106,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss ##120E7 Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. #107,v,g,n,n,PM_HV_CYC,Hypervisor Cycles ##0000B Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0) #108,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect ##230E0 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). #109,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect ##230E1 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). #110,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##220E6 An instruction prefetch request has been made. #111,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##220E7 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #112,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat ##210C6 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #113,v,g,n,n,PM_IOPS_CMPL,Internal operations completed ##00001 Number of internal operations that completed. #114,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted ##120E1 Number of PowerPC Instructions dispatched (attempted, not filtered by success. #115,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched ##220E4 Cycles when at least one instruction was sent from the fetch unit to the decode unit. #116,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 ##2208D An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions #117,v,g,n,n,PM_INST_FROM_L25_MOD,Instruction fetched from L2.5 modified ##22096 An instruction fetch group was fetched with modified (M) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions. #118,v,g,n,n,PM_INST_FROM_L35_MOD,Instruction fetched from L3.5 modified ##2209D An instruction fetch group was fetched with modified (M) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions #119,v,g,n,n,PM_INST_FROM_LMEM,Instruction fetched from local memory ##22086 An instruction fetch group was fetched from memory attached to the same module this proccessor is located on. Fetch groups can contain up to 8 instructions #120,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##800C1 A SLB miss for an instruction fetch as occurred #121,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##800C0 A TLB miss for an Instruction Fetch has occurred #122,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C30E4 The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. #123,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##C70E7 A request to prefetch data into the L1 was made #124,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##230E7 Cycles that a cache line was written to the instruction cache. #125,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid ##730E0 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #126,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged ##720E0 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #127,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt ##701C0 A Read/Claim dispatch for a Load was attempted #128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C0 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #129,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons ##731E0 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #130,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full ##721E0 A Read/Claim dispatch for a load failed because all RC machines are busy. #131,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt ##702C0 A Read/Claim dispatch for a Store was attempted. #132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C0 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #133,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons ##732E0 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #134,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full ##722E0 A Read/Claim dispatch for a store failed because all RC machines are busy. #135,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C0 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #136,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy ##713C0 A Read/Claim dispatch was rejected because all Castout machines were busy. #137,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid ##710C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #138,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified ##700C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #139,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits ##733E0 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. #140,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests ##723E0 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #141,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid ##730E1 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #142,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged ##720E1 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #143,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt ##701C1 A Read/Claim dispatch for a Load was attempted #144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C1 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #145,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons ##731E1 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #146,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full ##721E1 A Read/Claim dispatch for a load failed because all RC machines are busy. #147,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt ##702C1 A Read/Claim dispatch for a Store was attempted. #148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C1 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #149,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons ##732E1 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #150,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full ##722E2 A Read/Claim dispatch for a store failed because all RC machines are busy. #151,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C1 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #152,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy ##713C1 A Read/Claim dispatch was rejected because all Castout machines were busy. #153,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid ##710C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #154,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified ##700C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #155,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits ##733E1 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. #156,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests ##723E1 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #157,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid ##730E2 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #158,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged ##720E2 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #159,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt ##701C2 A Read/Claim dispatch for a Load was attempted #160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C2 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #161,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons ##731E2 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #162,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full ##721E2 A Read/Claim dispatch for a load failed because all RC machines are busy. #163,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt ##702C2 A Read/Claim dispatch for a Store was attempted. #164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C2 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #165,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons ##732E2 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #166,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full ##722E1 A Read/Claim dispatch for a store failed because all RC machines are busy. #167,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C2 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #168,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy ##713C2 A Read/Claim dispatch was rejected because all Castout machines were busy. #169,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid ##710C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #170,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified ##700C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #171,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits ##733E2 A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. #172,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests ##723E2 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #173,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##C50C3 A request to prefetch data into L2 was made #174,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy ##721E3 Cycles All Castin/Castout machines are busy. #175,v,g,n,s,PM_L3SA_HIT,L3 slice A hits ##711C3 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #176,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid ##730E3 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #177,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG ##720E3 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #178,v,g,n,s,PM_L3SA_REF,L3 slice A references ##701C3 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #179,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid ##710C3 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #180,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries ##731E3 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #181,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy ##721E4 Cycles All Castin/Castout machines are busy. #182,v,g,n,s,PM_L3SB_HIT,L3 slice B hits ##711C4 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #183,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid ##730E4 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #184,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG ##720E4 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #185,v,g,n,s,PM_L3SB_REF,L3 slice B references ##701C4 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #186,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid ##710C4 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #187,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries ##731E4 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #188,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy ##721E5 Cycles All Castin/Castout machines are busy. #189,v,g,n,s,PM_L3SC_HIT,L3 slice C hits ##711C5 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice #190,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid ##730E5 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. #191,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG ##720E5 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #192,v,g,n,s,PM_L3SC_REF,L3 slice C references ##701C5 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. #193,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid ##710C5 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #194,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries ##731E5 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #195,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##820E7 A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) #196,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C10C2 Load references that miss the Level 1 Data cache, by unit 0. #197,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C5 Load references that miss the Level 1 Data cache, by unit 1. #198,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C10C0 Load references to Level 1 Data Cache, by unit 0. #199,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C6 Load references that miss the Level 1 Data cache, by unit 1. #200,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##100C6 The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #201,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject ##C20E1 Total cycles the Load Store Unit 0 is busy rejecting instructions. #202,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##800C2 Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. #203,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C00C2 A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #204,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes ##C00C3 A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. #205,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C00C0 A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) #206,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C00C1 A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). #207,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##C50C0 A floating point load was executed by LSU0 #208,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads ##C50C1 A non-cacheable load was executed by unit 0. #209,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C40C3 Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #210,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C40C1 Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #211,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C40C2 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #212,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects ##C40C0 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #213,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C60E1 Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #214,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject ##C20E5 Total cycles the Load Store Unit 1 is busy rejecting instructions. #215,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##800C6 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #216,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C00C6 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #217,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes ##C00C7 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #218,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C00C4 A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). #219,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C00C5 A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) #220,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##C50C4 A floating point load was executed by LSU1 #221,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads ##C50C5 A non-cacheable load was executed by Unit 0. #222,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C40C7 Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #223,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C40C5 Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #224,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C40C6 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #225,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects ##C40C4 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #226,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C60E5 Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #227,v,g,n,n,PM_LSU_BUSY_REJECT,LSU busy due to reject ##C2088 Total cycles the Load Store Unit is busy rejecting instructions. Combined unit 0 + 1. #228,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses ##80090 Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1. #229,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##110C5 A flush was initiated by the Load Store Unit #230,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes ##C0090 A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1. #231,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full ##320E7 This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #232,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full ##330E0 This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #233,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes ##C0088 A store was flushed because it was unaligned (crossed a 4K boundary). Combined Unit 0 + 1. #234,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C30E7 The Load Miss Queue was full. #235,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C70E5 A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #236,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C30E6 The first entry in the LMQ was allocated. #237,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C30E5 This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #238,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty ##00015 Cycles when both the LMQ and SRQ are empty (LSU is idle) #239,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##110C2 Cycles when the LRQ is full. #240,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C60E7 LRQ slot zero was allocated #241,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C60E6 This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). #242,v,g,n,n,PM_LSU_REJECT_LMQ_FULL,LSU reject due to LMQ full or missed data coming ##C4088 Total cycles the Load Store Unit is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all the eight entries are full, subsequent load instructions are rejected. Combined unit 0 + 1. #243,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision ##C4090 Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1. #244,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##110C3 Cycles the Store Request Queue is full. #245,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C20E7 SRQ Slot zero was allocated #246,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C20E6 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). #247,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded ##C6088 Data from a store instruction was forwarded to a load. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. Combined Unit 0 + 1. #248,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##830E5 Cycles that a sync instruction is active in the Store Request Queue. #249,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch ##130E0 Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. #250,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 ##734E6 Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #251,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer ##210C7 A prefetch buffer entry (line) is allocated but the request is not a demand fetch. #252,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed ##726E6 A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #253,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled ##711C6 A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly #254,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed ##736E6 A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly #255,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched ##704C6 Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #256,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 ##702C6 A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #257,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed ##724E6 Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #258,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered ##714C6 Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #259,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 ##712C6 A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #260,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched ##701C6 A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #261,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 ##722E6 A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #262,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 ##732E6 A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #263,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled ##721E6 Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) #264,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 ##723E6 A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #265,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 ##733E6 A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #266,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush ##713C6 A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #267,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write ##703C6 A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #268,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished ##00005 The branch unit finished a marked instruction. Instructions that finish may not necessary complete. #269,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified ##C7097 The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load. #270,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR_CYC,Marked load latency from L2.5 shared ##C70A2 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #271,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR_CYC,Marked load latency from L2.75 shared ##C70A3 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #272,v,g,n,n,PM_MRK_DATA_FROM_L2_CYC,Marked load latency from L2 ##C70A0 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #273,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified ##C709E The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load. #274,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR_CYC,Marked load latency from L3.5 shared ##C70A6 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #275,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR_CYC,Marked load latency from L3.75 shared ##C70A7 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #276,v,g,n,n,PM_MRK_DATA_FROM_L3_CYC,Marked load latency from L3 ##C70A4 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #277,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory ##C7087 The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on. #278,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses ##C50C7 A Data SLB miss was caused by a marked instruction. #279,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses ##C50C6,C60E0 Data TLB references by a marked instruction that missed the TLB (all page sizes). #280,v,g,n,n,PM_MRK_DTLB_MISS_64K,Marked Data TLB misses for 64K page ##C608D Data TLB references to 64KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. #281,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference ##C60E4 Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. #282,v,g,n,n,PM_MRK_DTLB_REF_64K,Marked Data TLB reference for 64K page ##C6086 Data TLB references by a marked instruction for 64KB pages. #283,v,g,n,n,PM_MRK_GRP_BR_REDIR,Group experienced marked branch redirect ##12091 A group containing a marked (sampled) instruction experienced a branch redirect. #284,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##820E2 A DL1 reload occurred due to marked load #285,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C70E4 The source information is valid and is for a marked load #286,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses ##820E0 Load references that miss the Level 1 Data cache, by LSU0. #287,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses ##820E4 Load references that miss the Level 1 Data cache, by LSU1. #288,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##810C2 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #289,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes ##810C3 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #290,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##810C1 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #291,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##810C0 A marked store was flushed from unit 0 because it was unaligned #292,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##810C6 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #293,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes ##810C7 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #294,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##810C4 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #295,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##810C5 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #296,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes ##810A8 A marked store was flushed because it was unaligned #297,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C70E6 This signal is asserted every cycle when a marked request is resident in the Store Request Queue #298,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##820E6 A marked stcx (stwcx or stdcx) failed #299,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS ##00003 A sampled store has been sent to the memory subsystem #300,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##820E3 A marked store missed the dcache #301,v,g,n,n,PM_PMC1_OVERFLOW,PMC1 Overflow ##0000A Overflows from PMC1 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #302,v,g,n,n,PM_INST_CMPL,Instructions completed ##00009 Number of PowerPC instructions that completed. #303,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified ##83097 A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. #304,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified ##8309E A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load. #305,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory ##83087 A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on. #306,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid ##830E4 A Page Table Entry was loaded into the TLB. #307,v,g,n,n,PM_SLB_MISS,SLB misses ##80088 Total of all Segment Lookaside Buffer (SLB) misses, Instructions + Data. #308,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full ##720E6 The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #309,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full ##730E6 A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #310,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue ##707C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #311,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue ##717C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #312,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full ##700C6 A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #313,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue ##705C6 A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #314,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue ##715C6 A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #315,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision ##725E6 Snoop retry due to one ahead collision #316,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE ##800C3 A tlbie was snooped from another processor. #317,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full ##710C6 A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #318,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue ##706C6 A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly #319,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue ##716C6 A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #320,v,g,n,n,PM_STCX_FAIL,STCX failed ##820E1 A stcx (stwcx or stdcx) failed #321,v,g,n,n,PM_STCX_PASS,Stcx passes ##820E5 A stcx (stwcx or stdcx) instruction was successful #322,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C10C3 A store missed the dcache. Combined Unit 0 + 1. #323,v,g,n,n,PM_ST_REF_L1,L1 D cache store references ##C10A8 Store references to the Data Cache. Combined Unit 0 + 1. #324,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C10C1 Store references to the Data Cache by LSU0. #325,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C10C4 Store references to the Data Cache by LSU1. #326,v,g,n,n,PM_SUSPENDED,Suspended ##00000 The counter is suspended (does not count). #327,v,g,n,n,PM_THRD_GRP_CMPL_BOTH_CYC,Cycles group completed by both threads ##00013 Cycles that both threads completed. #328,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses ##410C7 Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. #329,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 ##420E0 Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. #330,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 ##420E1 Cycles this thread was running at priority level 2. #331,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 ##420E2 Cycles this thread was running at priority level 3. #332,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 ##420E3 Cycles this thread was running at priority level 4. #333,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 ##420E4 Cycles this thread was running at priority level 5. #334,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 ##420E5 Cycles this thread was running at priority level 6. #335,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 ##420E6 Cycles this thread was running at priority level 7. #336,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference ##430E3 Cycles when this thread's priority is equal to the other thread's priority. #337,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 ##430E4 Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. #338,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 ##430E5 Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. #339,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 ##430E6 Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. #340,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 ##430E2 Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. #341,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 ##430E1 Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. #342,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 ##430E0 Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. #343,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty ##410C2 Thread selection was overridden because one thread's CLB was empty. #344,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance ##410C4 Thread selection was overridden because of a GCT imbalance. #345,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds ##410C5 Thread selection was overridden because of an ISU hold. #346,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses ##410C3 Thread selection was overridden because one thread was had a L2 miss pending. #347,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 ##410C0 Thread selection picked thread 0 for decode. #348,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 ##410C1 Thread selection picked thread 1 for decode. #349,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected ##330E7 A hung thread was detected #350,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch ##130E4 Cycles a TLBIE instruction was held at dispatch. #351,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##100C2 The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #352,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction ##230E2 A conditional branch instruction was predicted as taken or not taken. #353,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 ##727E6 A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #354,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched ##731E6 Fast path memory read dispatched #355,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision ##735E6 Snoop retry due to a b collision $$$$$$$$ { counter 3 } #0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB ##400C0 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB ##400C1 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #2,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB ##400C2 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #3,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB ##400C3 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #4,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB ##400C4 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #5,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB ##400C5 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #6,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB ##400C6 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #7,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full ##100C5 Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #8,v,g,n,n,PM_BR_ISSUED,Branches issued ##230E4 A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. #9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##230E5 A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. #10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##230E6 A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. #11,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction ##23087,230E2 A conditional branch instruction was predicted as taken or not taken. #12,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty ##410C6 Cycles when both thread's CLB is completely empty. #13,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full ##220E5 Cycles when both thread's CLB is full. #14,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##110C1 The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #15,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##100C4 The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #16,v,g,n,s,PM_CYC,Processor cycles ##0000F Processor cycles #17,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified ##C30A2 The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. #18,v,g,n,n,PM_DATA_FROM_L275_SHR,Data loaded from L2.75 shared ##C3097 The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a demand load. #19,v,g,n,n,PM_DATA_FROM_L2MISS,Data loaded missed L2 ##C309B The processor's Data Cache was reloaded but not from the local L2. #20,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 ##C30AF The processor's Data Cache was reloaded from the local L3 due to a demand load. #21,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified ##C30A6 The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. #22,v,g,n,n,PM_DATA_FROM_L375_SHR,Data loaded from L3.75 shared ##C309E The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a demand load. #23,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory ##C30A0 The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on. #24,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##800C7 Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #25,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C10C7 A dcache invalidated was received from the L2 because a line in L2 was castout. #26,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams ##C50C2 A new prefetch stream was detected but no more stream entries were available. #27,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start ##830E6 A prefetch stream was started using the DST instruction. #28,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##830E7 A new Prefetch Stream was allocated. #29,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##800C5 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. #30,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##800C4,C20E0 Data TLB misses, all page sizes. #31,v,g,n,n,PM_DTLB_MISS_16M,Data TLB miss for 16M page ##C208D Data TLB references to 16MB pages that missed the TLB. Page size is determined at TLB reload time. #32,v,g,n,n,PM_DTLB_REF,Data TLB references ##C20E4 Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. #33,v,g,n,n,PM_DTLB_REF_16M,Data TLB reference for 16M page ##C2086 Data TLB references for 16MB pages. Includes hits + misses. #34,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##130E3 Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. #35,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##130E7 Cycles when an interrupt due to an external exception is pending but external exceptions were masked. #36,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued ##700C7 Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. #37,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried ##710C7 Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #38,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued ##720E7 A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #39,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried ##730E7 A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #40,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty ##722E7 Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #41,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty ##721E7 Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #42,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty ##702C7 Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #43,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty ##712C7 Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #44,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty ##701C7 Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #45,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty ##711C7 Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #46,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination ##703C7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. #47,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first ##713C7 Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. #48,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination ##723E7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. #49,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first ##733E7 Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. #50,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty ##731E7 Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #51,v,g,n,n,PM_FLUSH,Flushes ##110C7 Flushes occurred including LSU and Branch flushes. #52,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##110C6 A flush was caused by a branch mispredict. #53,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance ##330E3 This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #54,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation ##330E2 This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #55,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync ##330E1 This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. #56,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##100C1 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #57,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction ##000C3 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #58,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##020E0 FPU0 has encountered a denormalized operand. #59,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##000C0 FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #60,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##010C2 FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #61,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##010C3 FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. #62,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##000C1 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #63,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##010C0 FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #64,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##030E0 FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. #65,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##010C1 FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #66,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##000C2 FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #67,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##100C3 The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. #68,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##020E3 FPU0 has executed a single precision instruction. #69,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##020E1 FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #70,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##020E2 FPU0 has executed a Floating Point Store instruction. #71,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction ##000C7 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #72,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##020E4 FPU1 has encountered a denormalized operand. #73,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##000C4 FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #74,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##010C6 FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #75,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##010C7 FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , #76,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##000C5 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #77,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions ##010C4 FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #78,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##010C5 FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #79,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##000C6 FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #80,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##100C7 The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped #81,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##020E7 FPU1 has executed a single precision instruction. #82,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##020E5 FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #83,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##020E6 FPU1 has executed a Floating Point Store instruction. #84,v,g,n,n,PM_FPU_FMOV_FEST,FPU executed FMOV or FEST instructions ##01088 The floating point unit has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.. Combined Unit 0 + Unit 1. #85,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions ##01090 The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. #86,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction ##000A8 The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. #87,v,g,n,n,PM_FPU_STF,FPU executed store instruction ##020A8 FPU has executed a store instruction. Combined Unit 0 + Unit 1. #88,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##110C0 The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #89,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##110C4 The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #90,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle ##00012 FXU0 is busy while FXU1 was idle #91,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##130E2 The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. #92,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##130E6 The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. #93,v,g,n,n,PM_FXU_FIN,FXU produced a result ##13088 The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete. #94,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##100C0 The Global Completion Table is completely full. #95,v,g,n,n,PM_GCT_NOSLOT_SRQ_FULL,No slot in GCT caused by SRQ full ##10084 Cycles when the Global Completion Table has no slots from this thread because the Store Request Queue (SRQ) is full. This happens when the storage subsystem can not process the stores in the SRQ. Groups can not be dispatched until a SRQ entry is available. #96,v,g,n,s,PM_GCT_USAGE_80to99_CYC,Cycles GCT 80-99% full ##0001F Cycles when the Global Completion Table has between 80% and 99% of its slots used. The GCT has 20 entries shared between threads #97,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##130E5 The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #98,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##120E6 Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. #99,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect ##120E5 Group experienced non-speculative I cache miss or branch redirect #100,v,g,n,n,PM_GRP_CMPL,Group completed ##00013 A group completed. Microcoded instructions that span multiple groups will generate this event once per group. #101,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##130E1 A scoreboard operation on a non-renamed resource has blocked dispatch. #102,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##120E4 A group that previously attempted dispatch was rejected. #103,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success ##00002 Number of groups sucessfully dispatched (not rejected) #104,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##120E3 A group is available for dispatch. This does not mean it was successfully dispatched. #105,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss ##120E7 Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. #106,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect ##230E0 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). #107,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect ##230E1 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). #108,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer ##210C7 A prefetch buffer entry (line) is allocated but the request is not a demand fetch. #109,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##220E6 An instruction prefetch request has been made. #110,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##220E7 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #111,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat ##210C6 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #112,v,g,n,n,PM_IOPS_CMPL,Internal operations completed ##00001 Number of internal operations that completed. #113,v,g,n,n,PM_INST_DISP,Instructions dispatched ##00009 Number of PowerPC instructions successfully dispatched. #114,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched ##220E4 Cycles when at least one instruction was sent from the fetch unit to the decode unit. #115,v,g,n,n,PM_INST_FROM_L275_SHR,Instruction fetched from L2.75 shared ##22096 An instruction fetch group was fetched with shared (T) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions #116,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 ##220AE An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions #117,v,g,n,n,PM_INST_FROM_L375_SHR,Instruction fetched from L3.75 shared ##2209D An instruction fetch group was fetched with shared (S) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions #118,v,g,n,n,PM_INST_FROM_PREF,Instruction fetched from prefetch ##2208D An instruction fetch group was fetched from the prefetch buffer. Fetch groups can contain up to 8 instructions #119,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##800C1 A SLB miss for an instruction fetch as occurred #120,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##800C0 A TLB miss for an Instruction Fetch has occurred #121,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C30E4 The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. #122,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##C70E7 A request to prefetch data into the L1 was made #123,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##230E7 Cycles that a cache line was written to the instruction cache. #124,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid ##730E0 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #125,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged ##720E0 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #126,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt ##701C0 A Read/Claim dispatch for a Load was attempted #127,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C0 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons ##731E0 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #129,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full ##721E0 A Read/Claim dispatch for a load failed because all RC machines are busy. #130,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt ##702C0 A Read/Claim dispatch for a Store was attempted. #131,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C0 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons ##732E0 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #133,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full ##722E0 A Read/Claim dispatch for a store failed because all RC machines are busy. #134,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C0 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #135,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy ##713C0 A Read/Claim dispatch was rejected because all Castout machines were busy. #136,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid ##710C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #137,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified ##700C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #138,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits ##733E0 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. #139,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests ##723E0 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #140,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid ##730E1 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #141,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged ##720E1 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #142,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt ##701C1 A Read/Claim dispatch for a Load was attempted #143,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C1 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons ##731E1 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #145,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full ##721E1 A Read/Claim dispatch for a load failed because all RC machines are busy. #146,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt ##702C1 A Read/Claim dispatch for a Store was attempted. #147,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C1 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons ##732E1 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #149,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full ##722E2 A Read/Claim dispatch for a store failed because all RC machines are busy. #150,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C1 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #151,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy ##713C1 A Read/Claim dispatch was rejected because all Castout machines were busy. #152,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid ##710C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #153,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified ##700C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #154,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits ##733E1 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. #155,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests ##723E1 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #156,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid ##730E2 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #157,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged ##720E2 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #158,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt ##701C2 A Read/Claim dispatch for a Load was attempted #159,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C2 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons ##731E2 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #161,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full ##721E2 A Read/Claim dispatch for a load failed because all RC machines are busy. #162,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt ##702C2 A Read/Claim dispatch for a Store was attempted. #163,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C2 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons ##732E2 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #165,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full ##722E1 A Read/Claim dispatch for a store failed because all RC machines are busy. #166,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C2 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #167,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy ##713C2 A Read/Claim dispatch was rejected because all Castout machines were busy. #168,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid ##710C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #169,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified ##700C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #170,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits ##733E2 A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. #171,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests ##723E2 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #172,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##C50C3 A request to prefetch data into L2 was made #173,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy ##721E3 Cycles All Castin/Castout machines are busy. #174,v,g,n,s,PM_L3SA_HIT,L3 slice A hits ##711C3 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #175,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid ##730E3 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #176,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG ##720E3 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #177,v,g,n,s,PM_L3SA_REF,L3 slice A references ##701C3 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #178,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid ##710C3 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #179,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries ##731E3 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #180,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy ##721E4 Cycles All Castin/Castout machines are busy. #181,v,g,n,s,PM_L3SB_HIT,L3 slice B hits ##711C4 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #182,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid ##730E4 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #183,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG ##720E4 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #184,v,g,n,s,PM_L3SB_REF,L3 slice B references ##701C4 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #185,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid ##710C4 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #186,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries ##731E4 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #187,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy ##721E5 Cycles All Castin/Castout machines are busy. #188,v,g,n,s,PM_L3SC_HIT,L3 slice C hits ##711C5 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice #189,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid ##730E5 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. #190,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG ##720E5 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #191,v,g,n,s,PM_L3SC_REF,L3 slice C references ##701C5 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. #192,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid ##710C5 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #193,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries ##731E5 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #194,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##820E7 A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) #195,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses ##C1088 Load references that miss the Level 1 Data cache. Combined unit 0 + 1. #196,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C10C2 Load references that miss the Level 1 Data cache, by unit 0. #197,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C6 Load references that miss the Level 1 Data cache, by unit 1. #198,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C10C0 Load references to Level 1 Data Cache, by unit 0. #199,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C5 Load references that miss the Level 1 Data cache, by unit 1. #200,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##100C6 The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #201,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject ##C20E1 Total cycles the Load Store Unit 0 is busy rejecting instructions. #202,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##800C2 Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. #203,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C00C2 A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #204,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes ##C00C3 A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. #205,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C00C0 A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) #206,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C00C1 A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). #207,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##C50C0 A floating point load was executed by LSU0 #208,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads ##C50C1 A non-cacheable load was executed by unit 0. #209,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C40C3 Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #210,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C40C1 Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #211,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C40C2 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #212,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects ##C40C0 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #213,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C60E1 Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #214,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject ##C20E5 Total cycles the Load Store Unit 1 is busy rejecting instructions. #215,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##800C6 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #216,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C00C6 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #217,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes ##C00C7 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #218,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C00C4 A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). #219,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C00C5 A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) #220,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##C50C4 A floating point load was executed by LSU1 #221,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads ##C50C5 A non-cacheable load was executed by Unit 0. #222,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C40C7 Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #223,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C40C5 Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #224,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C40C6 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #225,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects ##C40C4 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #226,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C60E5 Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #227,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses ##800A8 Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1. #228,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##110C5 A flush was initiated by the Load Store Unit #229,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes ##C00A8 A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1. #230,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full ##320E7 This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #231,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full ##330E0 This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #232,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C30E7 The Load Miss Queue was full. #233,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C70E5 A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #234,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C30E6 The first entry in the LMQ was allocated. #235,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C30E5 This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #236,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty ##00015 Cycles when both the LMQ and SRQ are empty (LSU is idle) #237,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##110C2 Cycles when the LRQ is full. #238,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C60E7 LRQ slot zero was allocated #239,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C60E6 This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). #240,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision ##C40A8 Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1. #241,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##110C3 Cycles the Store Request Queue is full. #242,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C20E7 SRQ Slot zero was allocated #243,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C20E6 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). #244,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##830E5 Cycles that a sync instruction is active in the Store Request Queue. #245,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch ##130E0 Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. #246,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched ##731E6 Fast path memory read dispatched #247,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 ##722E6 A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #248,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed ##726E6 A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #249,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled ##711C6 A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly #250,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed ##736E6 A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly #251,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched ##704C6 Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #252,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 ##734E6 Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #253,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed ##724E6 Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #254,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered ##714C6 Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #255,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 ##732E6 A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #256,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched ##701C6 A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #257,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 ##702C6 A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #258,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 ##712C6 A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #259,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled ##721E6 Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) #260,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 ##723E6 A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #261,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 ##733E6 A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #262,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush ##713C6 A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #263,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write ##703C6 A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #264,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified ##C70A2 The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load. #265,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR,Marked data loaded from L2.75 shared ##C7097 The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a marked load. #266,v,g,n,n,PM_MRK_DATA_FROM_L2MISS,Marked data loaded missed L2 ##C709B DL1 was reloaded from beyond L2 due to a marked demand load. #267,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 ##C70AF The processor's Data Cache was reloaded from the local L3 due to a marked load. #268,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified ##C70A6 The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load. #269,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR,Marked data loaded from L3.75 shared ##C709E The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a marked load. #270,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory ##C70A0 The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on. #271,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses ##C50C7 A Data SLB miss was caused by a marked instruction. #272,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses ##C50C6,C60E0 Data TLB references by a marked instruction that missed the TLB (all page sizes). #273,v,g,n,n,PM_MRK_DTLB_MISS_16M,Marked Data TLB misses for 16M page ##C608D Marked Data TLB misses for 16M page #274,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference ##C60E4 Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. #275,v,g,n,n,PM_MRK_DTLB_REF_16M,Marked Data TLB reference for 16M page ##C6086 Data TLB references by a marked instruction for 16MB pages. #276,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished ##00014 One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete #277,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##820E2 A DL1 reload occurred due to marked load #278,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished ##00005 One of the execution units finished a marked instruction. Instructions that finish may not necessary complete #279,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C70E4 The source information is valid and is for a marked load #280,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses ##820E0 Load references that miss the Level 1 Data cache, by LSU0. #281,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses ##820E4 Load references that miss the Level 1 Data cache, by LSU1. #282,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##810C2 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #283,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes ##810C3 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #284,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##810C1 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #285,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##810C0 A marked store was flushed from unit 0 because it was unaligned #286,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##810C6 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #287,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes ##810C7 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #288,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##810C4 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #289,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##810C5 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #290,v,g,n,n,PM_MRK_LSU_FLUSH_LRQ,Marked LRQ flushes ##81088 A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #291,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes ##81090 A marked store was flushed because it was unaligned #292,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C70E6 This signal is asserted every cycle when a marked request is resident in the Store Request Queue #293,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##820E6 A marked stcx (stwcx or stdcx) failed #294,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention ##00003 A marked store previously sent to the memory subsystem completed (data home) after requiring intervention #295,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##820E3 A marked store missed the dcache #296,v,g,n,n,PM_PMC2_OVERFLOW,PMC2 Overflow ##0000A Overflows from PMC2 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #297,v,g,n,n,PM_PMC6_OVERFLOW,PMC6 Overflow ##0001A Overflows from PMC6 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #298,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified ##830A2 A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. #299,v,g,n,n,PM_PTEG_FROM_L275_SHR,PTEG loaded from L2.75 shared ##83097 A Page Table Entry was loaded into the TLB with shared (T) data from the L2 on a different module than this processor is located due to a demand load. #300,v,g,n,n,PM_PTEG_FROM_L2MISS,PTEG loaded from L2 miss ##8309B A Page Table Entry was loaded into the TLB but not from the local L2. #301,v,g,n,n,PM_PTEG_FROM_L3,PTEG loaded from L3 ##830AF A Page Table Entry was loaded into the TLB from the local L3 due to a demand load. #302,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified ##830A6 A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load. #303,v,g,n,n,PM_PTEG_FROM_L375_SHR,PTEG loaded from L3.75 shared ##8309E A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on a different module than this processor is located, due to a demand load. #304,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory ##830A0 A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on. #305,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid ##830E4 A Page Table Entry was loaded into the TLB. #306,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full ##720E6 The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #307,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full ##730E6 A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #308,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue ##707C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #309,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue ##717C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #310,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full ##700C6 A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #311,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue ##705C6 A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #312,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue ##715C6 A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #313,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision ##725E6 Snoop retry due to one ahead collision #314,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE ##800C3 A tlbie was snooped from another processor. #315,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full ##710C6 A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #316,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue ##706C6 A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly #317,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue ##716C6 A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #318,v,g,n,n,PM_STCX_FAIL,STCX failed ##820E1 A stcx (stwcx or stdcx) failed #319,v,g,n,n,PM_STCX_PASS,Stcx passes ##820E5 A stcx (stwcx or stdcx) instruction was successful #320,v,g,n,n,PM_STOP_COMPLETION,Completion stopped ##00018 RAS Unit has signaled completion to stop #321,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C10C3 A store missed the dcache. Combined Unit 0 + 1. #322,v,g,n,n,PM_ST_REF_L1,L1 D cache store references ##C1090 Store references to the Data Cache. Combined Unit 0 + 1. #323,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C10C1 Store references to the Data Cache by LSU0. #324,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C10C4 Store references to the Data Cache by LSU1. #325,v,g,n,n,PM_SUSPENDED,Suspended ##00000 The counter is suspended (does not count). #326,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses ##410C7 Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. #327,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 ##420E0 Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. #328,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 ##420E1 Cycles this thread was running at priority level 2. #329,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 ##420E2 Cycles this thread was running at priority level 3. #330,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 ##420E3 Cycles this thread was running at priority level 4. #331,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 ##420E4 Cycles this thread was running at priority level 5. #332,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 ##420E5 Cycles this thread was running at priority level 6. #333,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 ##420E6 Cycles this thread was running at priority level 7. #334,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference ##430E3 Cycles when this thread's priority is equal to the other thread's priority. #335,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 ##430E4 Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. #336,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 ##430E5 Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. #337,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 ##430E6 Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. #338,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 ##430E2 Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. #339,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 ##430E1 Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. #340,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 ##430E0 Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. #341,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty ##410C2 Thread selection was overridden because one thread's CLB was empty. #342,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance ##410C4 Thread selection was overridden because of a GCT imbalance. #343,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds ##410C5 Thread selection was overridden because of an ISU hold. #344,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses ##410C3 Thread selection was overridden because one thread was had a L2 miss pending. #345,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 ##410C0 Thread selection picked thread 0 for decode. #346,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 ##410C1 Thread selection picked thread 1 for decode. #347,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected ##330E7 A hung thread was detected #348,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout ##0000B The threshold timer expired #349,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch ##130E4 Cycles a TLBIE instruction was held at dispatch. #350,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##100C2 The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #351,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction ##230E3 The target address of a branch instruction was predicted. #352,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 ##727E6 A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #353,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision ##735E6 Snoop retry due to a b collision #354,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted ##120E1 Number of PowerPC Instructions dispatched (attempted, not filtered by success. $$$$$$$$ { counter 4 } #0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB ##400C0 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #1,v,g,n,n,PM_0INST_FETCH,No instructions fetched ##2208D No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) #2,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB ##400C1 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #3,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB ##400C2 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #4,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB ##400C3 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #5,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB ##400C4 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #6,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB ##400C5 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #7,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB ##400C6 The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. #8,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full ##100C5 Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #9,v,g,n,n,PM_BR_ISSUED,Branches issued ##230E4 A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. #10,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting ##230E5 A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. #11,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address ##230E6 A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. #12,v,g,n,n,PM_BR_PRED_CR_TA,A conditional branch was predicted, CR and target prediction ##23087 Both the condition (taken or not taken) and the target address of a branch instruction was predicted. #13,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty ##410C6 Cycles when both thread's CLB is completely empty. #14,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full ##220E5 Cycles when both thread's CLB is full. #15,v,g,n,n,PM_CMPLU_STALL_DIV,Completion stall caused by DIV instruction ##11099 Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point divide instruction. This is a subset of PM_CMPLU_STALL_FXU. #16,v,g,n,n,PM_CMPLU_STALL_ERAT_MISS,Completion stall caused by ERAT miss ##1109B Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered an ERAT miss. This is a subset of PM_CMPLU_STALL_REJECT. #17,v,g,n,n,PM_CMPLU_STALL_FPU,Completion stall caused by FPU instruction ##11098 Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point instruction. #18,v,g,n,n,PM_CMPLU_STALL_REJECT,Completion stall caused by reject ##1109A Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a load/store reject. This is a subset of PM_CMPLU_STALL_LSU. #19,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full ##110C1 The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #20,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full ##100C4 The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #21,v,g,n,s,PM_CYC,Processor cycles ##0000F Processor cycles #22,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified ##C3097 The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. #23,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified ##C309E The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. #24,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory ##C3087 The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on. #25,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks ##800C7 Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. #26,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 ##C10C7 A dcache invalidated was received from the L2 because a line in L2 was castout. #27,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams ##C50C2 A new prefetch stream was detected but no more stream entries were available. #28,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start ##830E6 A prefetch stream was started using the DST instruction. #29,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated ##830E7 A new Prefetch Stream was allocated. #30,v,g,n,n,PM_DSLB_MISS,Data SLB misses ##800C5 A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. #31,v,g,n,n,PM_DTLB_MISS,Data TLB misses ##800C4,C20E0 Data TLB misses, all page sizes. #32,v,g,n,n,PM_DTLB_MISS_16G,Data TLB miss for 16G page ##C208D Data TLB references to 16GB pages that missed the TLB. Page size is determined at TLB reload time. #33,v,g,n,n,PM_DTLB_REF,Data TLB references ##C20E4 Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. #34,v,g,n,n,PM_DTLB_REF_16G,Data TLB reference for 16G page ##C2086 Data TLB references for 16GB pages. Includes hits + misses. #35,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off ##130E3 Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. #36,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending ##130E7 Cycles when an interrupt due to an external exception is pending but external exceptions were masked. #37,v,g,n,n,PM_EXT_INT,External interrupts ##00003 An interrupt due to an external exception occurred #38,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued ##700C7 Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. #39,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried ##710C7 Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #40,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued ##720E7 A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #41,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried ##730E7 A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. #42,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty ##722E7 Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #43,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty ##721E7 Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. #44,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty ##702C7 Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #45,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty ##712C7 Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #46,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty ##701C7 Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #47,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty ##711C7 Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #48,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination ##703C7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. #49,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first ##713C7 Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. #50,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination ##723E7 Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. #51,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first ##733E7 Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. #52,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty ##731E7 Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. #53,v,g,n,n,PM_FLUSH,Flushes ##110C7 Flushes occurred including LSU and Branch flushes. #54,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict ##110C6 A flush was caused by a branch mispredict. #55,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance ##330E3 This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #56,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation ##330E2 This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #57,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync ##330E1 This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. #58,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full ##100C1 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #59,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction ##000C3 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #60,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data ##020E0 FPU0 has encountered a denormalized operand. #61,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction ##000C0 FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #62,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction ##010C2 FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #63,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result ##010C3 FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. #64,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction ##000C1 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #65,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions ##010C0 FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #66,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction ##030E0 FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. #67,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions ##010C1 FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #68,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction ##000C2 FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #69,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full ##100C3 The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. #70,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction ##020E3 FPU0 has executed a single precision instruction. #71,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 ##020E1 FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #72,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction ##020E2 FPU0 has executed a Floating Point Store instruction. #73,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction ##000C7 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #74,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data ##020E4 FPU1 has encountered a denormalized operand. #75,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction ##000C4 FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. #76,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction ##010C6 FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. #77,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result ##010C7 FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , #78,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction ##000C5 The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. #79,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions ##010C4 FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. #80,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions ##010C5 FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. #81,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction ##000C6 FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. #82,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full ##100C7 The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped #83,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction ##020E7 FPU1 has executed a single precision instruction. #84,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 ##020E5 FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). #85,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction ##020E6 FPU1 has executed a Floating Point Store instruction. #86,v,g,n,n,PM_FPU_1FLOP,FPU executed one flop instruction ##000A8 The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. #87,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction ##01090 The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. #88,v,g,n,n,PM_FPU_FIN,FPU produced a result ##01088 FPU finished, produced a result. This only indicates finish, not completion. Combined Unit 0 + Unit 1. Floating Point Stores are included in this count but not Floating Point Loads., , , XYZs #89,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full ##100A8 Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. #90,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction ##020A8 FPU is executing single precision instruction. Combined Unit 0 + Unit 1. #91,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full ##110C0 The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #92,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full ##110C4 The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. #93,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full ##11090 Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. #94,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result ##130E2 The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. #95,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle ##00012 FXU0 was idle while FXU1 was busy. #96,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result ##130E6 The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. #97,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full ##0001F,100C0 The Global Completion Table is completely full. #98,v,g,n,n,PM_GCT_NOSLOT_BR_MPRED,No slot in GCT caused by branch mispredict ##1009C Cycles when the Global Completion Table has no slots from this thread because of a branch misprediction. #99,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full ##130E5 The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #100,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect ##120E6 Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. #101,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect ##120E5 Group experienced non-speculative I cache miss or branch redirect #102,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard ##130E1 A scoreboard operation on a non-renamed resource has blocked dispatch. #103,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected ##00002,120E4 A group that previously attempted dispatch was rejected. #104,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid ##120E3 A group is available for dispatch. This does not mean it was successfully dispatched. #105,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss ##120E7 Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. #106,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect ##230E0 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). #107,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect ##230E1 A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). #108,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer ##210C7 A prefetch buffer entry (line) is allocated but the request is not a demand fetch. #109,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests ##220E6 An instruction prefetch request has been made. #110,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat ##220E7 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #111,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat ##210C6 An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. #112,v,g,n,n,PM_IOPS_CMPL,Internal operations completed ##00001 Number of internal operations that completed. #113,v,g,n,n,PM_INST_DISP,Instructions dispatched ##00009 Number of PowerPC instructions successfully dispatched. #114,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched ##220E4 Cycles when at least one instruction was sent from the fetch unit to the decode unit. #115,v,g,n,n,PM_INST_FROM_L275_MOD,Instruction fetched from L2.75 modified ##22096 An instruction fetch group was fetched with modified (M) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions #116,v,g,n,n,PM_INST_FROM_L375_MOD,Instruction fetched from L3.75 modified ##2209D An instruction fetch group was fetched with modified (M) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions #117,v,g,n,n,PM_INST_FROM_RMEM,Instruction fetched from remote memory ##22086 An instruction fetch group was fetched from memory attached to a different module than this proccessor is located on. Fetch groups can contain up to 8 instructions #118,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses ##800C1 A SLB miss for an instruction fetch as occurred #119,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses ##800C0 A TLB miss for an Instruction Fetch has occurred #120,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid ##C30E4 The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. #121,v,g,n,n,PM_L1_PREF,L1 cache data prefetches ##C70E7 A request to prefetch data into the L1 was made #122,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 ##230E7 Cycles that a cache line was written to the instruction cache. #123,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid ##730E0 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #124,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged ##720E0 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #125,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt ##701C0 A Read/Claim dispatch for a Load was attempted #126,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C0 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #127,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons ##731E0 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full ##721E0 A Read/Claim dispatch for a load failed because all RC machines are busy. #129,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt ##702C0 A Read/Claim dispatch for a Store was attempted. #130,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C0 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #131,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons ##732E0 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full ##722E0 A Read/Claim dispatch for a store failed because all RC machines are busy. #133,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C0 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #134,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy ##713C0 A Read/Claim dispatch was rejected because all Castout machines were busy. #135,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid ##710C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #136,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified ##700C0 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #137,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits ##733E0 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. #138,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests ##723E0 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #139,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid ##730E1 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #140,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged ##720E1 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #141,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt ##701C1 A Read/Claim dispatch for a Load was attempted #142,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C1 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #143,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons ##731E1 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full ##721E1 A Read/Claim dispatch for a load failed because all RC machines are busy. #145,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt ##702C1 A Read/Claim dispatch for a Store was attempted. #146,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C1 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #147,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons ##732E1 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full ##722E2 A Read/Claim dispatch for a store failed because all RC machines are busy. #149,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C1 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #150,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy ##713C1 A Read/Claim dispatch was rejected because all Castout machines were busy. #151,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid ##710C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #152,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified ##700C1 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #153,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits ##733E1 A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. #154,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests ##723E1 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #155,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid ##730E2 A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #156,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged ##720E2 A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. #157,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt ##701C2 A Read/Claim dispatch for a Load was attempted #158,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ ##711C2 A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #159,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons ##731E2 A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. #160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full ##721E2 A Read/Claim dispatch for a load failed because all RC machines are busy. #161,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt ##702C2 A Read/Claim dispatch for a Store was attempted. #162,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ ##712C2 A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. #163,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons ##732E2 A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. #164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full ##722E1 A Read/Claim dispatch for a store failed because all RC machines are busy. #165,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy ##703C2 A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. #166,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy ##713C2 A Read/Claim dispatch was rejected because all Castout machines were busy. #167,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid ##710C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. #168,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified ##700C2 A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. #169,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits ##733E2 A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. #170,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests ##723E2 A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. #171,v,g,n,n,PM_L2_PREF,L2 cache prefetches ##C50C3 A request to prefetch data into L2 was made #172,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy ##721E3 Cycles All Castin/Castout machines are busy. #173,v,g,n,s,PM_L3SA_HIT,L3 slice A hits ##711C3 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #174,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid ##730E3 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #175,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG ##720E3 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #176,v,g,n,s,PM_L3SA_REF,L3 slice A references ##701C3 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #177,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid ##710C3 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #178,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries ##731E3 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #179,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy ##721E4 Cycles All Castin/Castout machines are busy. #180,v,g,n,s,PM_L3SB_HIT,L3 slice B hits ##711C4 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice #181,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid ##730E4 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. #182,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG ##720E4 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #183,v,g,n,s,PM_L3SB_REF,L3 slice B references ##701C4 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice #184,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid ##710C4 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #185,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries ##731E4 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #186,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy ##721E5 Cycles All Castin/Castout machines are busy. #187,v,g,n,s,PM_L3SC_HIT,L3 slice C hits ##711C5 Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice #188,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid ##730E5 L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. #189,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG ##720E5 L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. #190,v,g,n,s,PM_L3SC_REF,L3 slice C references ##701C5 Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. #191,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid ##710C5 L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). #192,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries ##731E5 Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) #193,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 ##820E7 A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) #194,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses ##C10C2 Load references that miss the Level 1 Data cache, by unit 0. #195,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C6 Load references that miss the Level 1 Data cache, by unit 1. #196,v,g,n,n,PM_LD_REF_L1,L1 D cache load references ##C1090 Load references to the Level 1 Data Cache. Combined unit 0 + 1. #197,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references ##C10C0 Load references to Level 1 Data Cache, by unit 0. #198,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses ##C10C5 Load references that miss the Level 1 Data cache, by unit 1. #199,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full ##100C6 The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #200,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject ##C20E1 Total cycles the Load Store Unit 0 is busy rejecting instructions. #201,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses ##800C2 Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. #202,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes ##C00C2 A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #203,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes ##C00C3 A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. #204,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes ##C00C0 A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) #205,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes ##C00C1 A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). #206,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction ##C50C0 A floating point load was executed by LSU0 #207,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads ##C50C1 A non-cacheable load was executed by unit 0. #208,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss ##C40C3 Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #209,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming ##C40C1 Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #210,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision ##C40C2 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #211,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects ##C40C0 Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #212,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded ##C60E1 Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #213,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject ##C20E5 Total cycles the Load Store Unit 1 is busy rejecting instructions. #214,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses ##800C6 A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. #215,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes ##C00C6 A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #216,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes ##C00C7 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #217,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes ##C00C4 A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). #218,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes ##C00C5 A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) #219,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction ##C50C4 A floating point load was executed by LSU1 #220,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads ##C50C5 A non-cacheable load was executed by Unit 0. #221,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss ##C40C7 Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #222,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming ##C40C5 Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. #223,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision ##C40C6 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. #224,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects ##C40C4 Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. #225,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded ##C60E5 Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. #226,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU ##110C5 A flush was initiated by the Load Store Unit #227,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full ##320E7 This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #228,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes ##C00A8 A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1. #229,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full ##330E0 This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. #230,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction ##C5090 LSU executed Floating Point load instruction. Combined Unit 0 + 1. #231,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full ##C30E7 The Load Miss Queue was full. #232,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges ##C70E5 A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. #233,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated ##C30E6 The first entry in the LMQ was allocated. #234,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid ##C30E5 This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO #235,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full ##110C2 Cycles when the LRQ is full. #236,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated ##C60E7 LRQ slot zero was allocated #237,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid ##C60E6 This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). #238,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss ##C40A8 Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat. #239,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty ##00015 Cycles the Store Request Queue is empty #240,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full ##110C3 Cycles the Store Request Queue is full. #241,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated ##C20E7 SRQ Slot zero was allocated #242,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid ##C20E6 This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). #243,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration ##830E5 Cycles that a sync instruction is active in the Store Request Queue. #244,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch ##130E0 Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. #245,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched ##731E6 Fast path memory read dispatched #246,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 ##727E6 A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #247,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed ##726E6 A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #248,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 ##732E6 A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #249,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed ##736E6 A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly #250,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched ##704C6 Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #251,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 ##734E6 Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #252,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed ##724E6 Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #253,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered ##714C6 Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #254,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted ##120E1 Number of PowerPC Instructions dispatched (attempted, not filtered by success. #255,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched ##701C6 A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #256,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 ##702C6 A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #257,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 ##712C6 A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #258,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled ##721E6 Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) #259,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 ##723E6 A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #260,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 ##733E6 A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #261,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush ##713C6 A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #262,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write ##703C6 A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #263,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished ##00005 The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete. #264,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD_CYC,Marked load latency from L2.5 modified ##C70A2 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #265,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified ##C7097 The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load. #266,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD_CYC,Marked load latency from L2.75 modified ##C70A3 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #267,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD_CYC,Marked load latency from L3.5 modified ##C70A6 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #268,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified ##C709E The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load. #269,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD_CYC,Marked load latency from L3.75 modified ##C70A7 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #270,v,g,n,n,PM_MRK_DATA_FROM_LMEM_CYC,Marked load latency from local memory ##C70A0 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #271,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory ##C7087 The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on. #272,v,g,n,n,PM_MRK_DATA_FROM_RMEM_CYC,Marked load latency from remote memory ##C70A1 Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. #273,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses ##C50C7 A Data SLB miss was caused by a marked instruction. #274,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses ##C50C6,C60E0 Data TLB references by a marked instruction that missed the TLB (all page sizes). #275,v,g,n,n,PM_MRK_DTLB_MISS_16G,Marked Data TLB misses for 16G page ##C608D Data TLB references to 16GB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. #276,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference ##C60E4 Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. #277,v,g,n,n,PM_MRK_DTLB_REF_16G,Marked Data TLB reference for 16G page ##C6086 Data TLB references by a marked instruction for 16GB pages. #278,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed ##00013 A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. #279,v,g,n,n,PM_MRK_GRP_IC_MISS,Group experienced marked I cache miss ##12091 A group containing a marked (sampled) instruction experienced an instruction cache miss. #280,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout ##0000B The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor #281,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded ##820E2 A DL1 reload occurred due to marked load #282,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid ##C70E4 The source information is valid and is for a marked load #283,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses ##820E0 Load references that miss the Level 1 Data cache, by LSU0. #284,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses ##820E4 Load references that miss the Level 1 Data cache, by LSU1. #285,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes ##810C2 A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #286,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes ##810C3 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #287,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes ##810C1 A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #288,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes ##810C0 A marked store was flushed from unit 0 because it was unaligned #289,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes ##810C6 A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. #290,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes ##810C7 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #291,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes ##810C4 A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #292,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes ##810C5 A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) #293,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished ##00014 One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete #294,v,g,n,n,PM_MRK_LSU_FLUSH_SRQ,Marked SRQ lhs flushes ##81088 A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. #295,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes ##81090 A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) #296,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ ##C70E6 This signal is asserted every cycle when a marked request is resident in the Store Request Queue #297,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed ##820E6 A marked stcx (stwcx or stdcx) failed #298,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses ##820E3 A marked store missed the dcache #299,v,g,n,n,PM_PMC3_OVERFLOW,PMC3 Overflow ##0000A Overflows from PMC3 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. #300,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified ##83097 A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. #301,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified ##8309E A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load. #302,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory ##83087 A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on. #303,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid ##830E4 A Page Table Entry was loaded into the TLB. #304,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full ##720E6 The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #305,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full ##730E6 A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #306,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue ##707C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #307,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue ##717C6 A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #308,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full ##700C6 A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #309,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue ##705C6 A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #310,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue ##715C6 A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #311,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision ##725E6 Snoop retry due to one ahead collision #312,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE ##800C3 A tlbie was snooped from another processor. #313,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full ##710C6 A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #314,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue ##706C6 A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly #315,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue ##716C6 A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #316,v,g,n,n,PM_STCX_FAIL,STCX failed ##820E1 A stcx (stwcx or stdcx) failed #317,v,g,n,n,PM_STCX_PASS,Stcx passes ##820E5 A stcx (stwcx or stdcx) instruction was successful #318,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses ##C10C3 A store missed the dcache. Combined Unit 0 + 1. #319,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references ##C10C1 Store references to the Data Cache by LSU0. #320,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references ##C10C4 Store references to the Data Cache by LSU1. #321,v,g,n,n,PM_SUSPENDED,Suspended ##00000 The counter is suspended (does not count). #322,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses ##41084,410C7 Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. #323,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 ##420E0 Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. #324,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 ##420E1 Cycles this thread was running at priority level 2. #325,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 ##420E2 Cycles this thread was running at priority level 3. #326,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 ##420E3 Cycles this thread was running at priority level 4. #327,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 ##420E4 Cycles this thread was running at priority level 5. #328,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 ##420E5 Cycles this thread was running at priority level 6. #329,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 ##420E6 Cycles this thread was running at priority level 7. #330,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference ##430E3 Cycles when this thread's priority is equal to the other thread's priority. #331,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 ##430E4 Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. #332,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 ##430E5 Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. #333,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 ##430E6 Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. #334,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 ##430E2 Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. #335,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 ##430E1 Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. #336,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 ##430E0 Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. #337,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty ##410C2 Thread selection was overridden because one thread's CLB was empty. #338,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance ##410C4 Thread selection was overridden because of a GCT imbalance. #339,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds ##410C5 Thread selection was overridden because of an ISU hold. #340,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses ##410C3 Thread selection was overridden because one thread was had a L2 miss pending. #341,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 ##410C0 Thread selection picked thread 0 for decode. #342,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 ##410C1 Thread selection picked thread 1 for decode. #343,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected ##330E7 A hung thread was detected #344,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch ##130E4 Cycles a TLBIE instruction was held at dispatch. #345,v,g,n,n,PM_WORK_HELD,Work held ##0000C RAS Unit has signaled completion to stop and there are groups waiting to complete #346,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full ##100C2 The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. #347,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction ##230E2 A conditional branch instruction was predicted as taken or not taken. #348,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction ##230E3 The target address of a branch instruction was predicted. #349,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 ##722E6 A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. #350,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision ##735E6 Snoop retry due to a b collision #351,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled ##711C6 A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly $$$$$$$$ { counter 5 } #0,v,g,n,n,PM_RUN_INST_CMPL,Run instructions completed ##00009 Number of run instructions completed. $$$$$$$$ { counter 6 } #0,v,g,n,n,PM_RUN_CYC,Run cycles ##00005 Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop. papi-5.6.0/src/aix.h000664 001750 001750 00000006135 13216244356 016265 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PAPI_AIX_H /* _PAPI_AIX */ #define _PAPI_AIX_H /****************************/ /* THIS IS OPEN SOURCE CODE */ /****************************/ /* * File: pmapi-ppc64.h * Author: Maynard Johnson * maynardj@us.ibm.com * Mods: * */ #include #include #include #include #include #include #include #include #if defined( _AIXVERSION_510) || defined(_AIXVERSION_520) #include #include #endif #include #include #include #include #include #include #include #include #include "pmapi.h" #define ANY_THREAD_GETS_SIGNAL #define POWER_MAX_COUNTERS MAX_COUNTERS #define MAX_COUNTER_TERMS MAX_COUNTERS #define MAX_MPX_COUNTERS 32 #define INVALID_EVENT -2 #define POWER_MAX_COUNTERS_MAPPING 8 extern _text; extern _etext; extern _edata; extern _end; extern _data; /* globals */ #ifdef PM_INITIALIZE #ifdef _AIXVERSION_510 #define PMINFO_T pm_info2_t #define PMEVENTS_T pm_events2_t #else #define PMINFO_T pm_info_t #define PMEVENTS_T pm_events_t #endif PMINFO_T pminfo; #else #define PMINFO_T pm_info_t #define PMEVENTS_T pm_events_t /*pm_info_t pminfo;*/ #endif #include "aix-context.h" /* define the vector structure at the bottom of this file */ #define PM_INIT_FLAGS PM_VERIFIED|PM_UNVERIFIED|PM_CAVEAT|PM_GET_GROUPS #ifdef PM_INITIALIZE typedef pm_info2_t hwd_pminfo_t; typedef pm_events2_t hwd_pmevents_t; #else typedef pm_info_t hwd_pminfo_t; typedef pm_events_t hwd_pmevents_t; #endif #include "ppc64_events.h" typedef struct ppc64_pmapi_control { /* Buffer to pass to the kernel to control the counters */ pm_prog_t counter_cmd; int group_id; /* Space to read the counters */ pm_data_t state; } ppc64_pmapi_control_t; typedef struct ppc64_reg_alloc { int ra_position; unsigned int ra_group[GROUP_INTS]; int ra_counter_cmd[MAX_COUNTERS]; } ppc64_reg_alloc_t; typedef struct ppc64_pmapi_context { /* this structure is a work in progress */ ppc64_pmapi_control_t cntrl; } ppc64_pmapi_context_t; /* Override void* definitions from PAPI framework layer */ /* typedefs to conform to hardware independent PAPI code. */ #undef hwd_control_state_t #undef hwd_reg_alloc_t #undef hwd_context_t typedef ppc64_pmapi_control_t hwd_control_state_t; typedef ppc64_reg_alloc_t hwd_reg_alloc_t; typedef ppc64_pmapi_context_t hwd_context_t; /* typedef struct hwd_groups { // group number from the pmapi pm_groups_t struct //int group_id; // Buffer containing counter cmds for this group unsigned char counter_cmd[POWER_MAX_COUNTERS]; } hwd_groups_t; */ /* prototypes */ extern int _aix_set_granularity( hwd_control_state_t * this_state, int domain ); extern int _papi_hwd_init_preset_search_map( hwd_pminfo_t * info ); extern int _aix_get_memory_info( PAPI_hw_info_t * mem_info, int type ); extern int _aix_get_dmem_info( PAPI_dmem_info_t * d ); /* Machine dependent info structure */ extern pm_groups_info_t pmgroups; #endif /* _PAPI_AIX */ papi-5.6.0/src/perfctr-2.6.x/linux/drivers/000775 001750 001750 00000000000 13216244366 022362 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/ctests/overflow_one_and_read.c000664 001750 001750 00000007264 13216244360 023324 0ustar00jshenry1963jshenry1963000000 000000 /* * File: overflow_one_and_read.c : based on overflow_twoevents.c * Mods: Philip Mucci * mucci@cs.utk.edu * Kevin London * london@cs.utk.edu */ /* This file performs the following test: overflow dispatch on 1 counter. * In the handler read events. */ #include #include #include "papi.h" #include "papi_test.h" #include "do_loops.h" #define OVER_FMT "handler(%d) Overflow at %p! vector=%#llx\n" #define OUT_FMT "%-12s : %16lld%16lld\n" typedef struct { long long mask; int count; } ocount_t; /* there are three possible vectors, one counter overflows, the other counter overflows, both overflow */ /*not used*/ ocount_t overflow_counts[3] = { {0, 0}, {0, 0}, {0, 0} }; /*not used*/ int total_unknown = 0; /*added*/ long long dummyvalues[2]; void handler( int EventSet, void *address, long long overflow_vector, void *context ) { int retval; ( void ) context; if ( !TESTS_QUIET ) { fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); } if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read", retval ); if ( !TESTS_QUIET ) { fprintf( stderr, TWO12, dummyvalues[0], dummyvalues[1], "(Reading counters)\n" ); } if ( dummyvalues[1] == 0 ) test_fail( __FILE__, __LINE__, "Total Cycles == 0", 1 ); } int main( int argc, char **argv ) { int EventSet; long long **values = NULL; int retval; int PAPI_event; char event_name[PAPI_MAX_STR_LEN]; int num_events1, mask1; int quiet; /* Set TESTS_QUIET variable */ quiet=tests_quiet( argc, argv ); retval = PAPI_library_init( PAPI_VER_CURRENT ); if ( retval != PAPI_VER_CURRENT ) { test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); } /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, depends on the availability of the event on the platform */ /* NOTE: Only adding one overflow on PAPI_event -- no overflow for PAPI_TOT_CYC*/ EventSet = add_two_nonderived_events( &num_events1, &PAPI_event, &mask1 ); if (num_events1==0) { if (!quiet) printf("Trouble adding events\n"); test_skip(__FILE__,__LINE__,"Adding event",1); } values = allocate_test_space( 2, num_events1 ); if ( ( retval = PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[0] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); retval = PAPI_overflow( EventSet, PAPI_event, THRESHOLD, 0, handler ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); retval = PAPI_start( EventSet ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start", retval ); do_flops( NUM_FLOPS ); retval = PAPI_stop( EventSet, values[1] ); if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); remove_test_events( &EventSet, mask1 ); if ( !TESTS_QUIET ) { printf ( "Test case: Overflow dispatch of 1st event in set with 2 events.\n" ); printf ( "---------------------------------------------------------------\n" ); printf( "Threshold for overflow is: %d\n", THRESHOLD ); printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); printf( "-----------------------------------------------\n" ); printf( "Test type : %16d%16d\n", 1, 2 ); printf( OUT_FMT, event_name, ( values[0] )[0], ( values[1] )[0] ); printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[1], ( values[1] )[1] ); } test_pass( __FILE__ ); return 0; } papi-5.6.0/src/libpfm4/lib/events/intel_hswep_unc_cbo_events.h000664 001750 001750 00000105564 13216244364 026515 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2014 Google Inc. All rights reserved * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. * * PMU: hswep_unc_cbo (Intel Haswell-EP C-Box uncore PMU) */ #define CBO_FILT_MESIF(a, b, c, d) \ { .uname = "STATE_"#a,\ .udesc = #b" cacheline state",\ .ufilters[0] = 1ULL << (17 + (c)),\ .grpid = d, \ } #define CBO_FILT_MESIFS(d) \ CBO_FILT_MESIF(I, Invalid, 0, d), \ CBO_FILT_MESIF(S, Shared, 1, d), \ CBO_FILT_MESIF(E, Exclusive, 2, d), \ CBO_FILT_MESIF(M, Modified, 3, d), \ CBO_FILT_MESIF(F, Forward, 4, d), \ CBO_FILT_MESIF(D, Debug, 5, d), \ { .uname = "STATE_MP",\ .udesc = "Cacheline is modified but never written, was forwarded in modified state",\ .ufilters[0] = 0x1ULL << (17+6),\ .grpid = d, \ .uflags = INTEL_X86_NCOMBO, \ }, \ { .uname = "STATE_MESIFD",\ .udesc = "Any cache line state",\ .ufilters[0] = 0x7fULL << 17,\ .grpid = d, \ .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, \ } #define CBO_FILT_OPC(d) \ { .uname = "OPC_RFO",\ .udesc = "Demand data RFO (combine with any OPCODE umask)",\ .ufilters[1] = 0x180ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_CRD",\ .udesc = "Demand code read (combine with any OPCODE umask)",\ .ufilters[1] = 0x181ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_DRD",\ .udesc = "Demand data read (combine with any OPCODE umask)",\ .ufilters[1] = 0x182ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PRD",\ .udesc = "Partial reads (UC) (combine with any OPCODE umask)",\ .ufilters[1] = 0x187ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCILF",\ .udesc = "Full Stream store (combine with any OPCODE umask)", \ .ufilters[1] = 0x18cULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WCIL",\ .udesc = "Partial Stream store (combine with any OPCODE umask)", \ .ufilters[1] = 0x18dULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WIL",\ .udesc = "Write Invalidate Line (Partial) (combine with any OPCODE umask)", \ .ufilters[1] = 0x18fULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_RFO",\ .udesc = "Prefetch RFO into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x190ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_CODE",\ .udesc = "Prefetch code into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x191ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PF_DATA",\ .udesc = "Prefetch data into LLC but do not pass to L2 (includes hints) (combine with any OPCODE umask)", \ .ufilters[1] = 0x192ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIWIL",\ .udesc = "PCIe write (partial, non-allocating) - partial line MMIO write transactions from IIO (P2P). Not used for coherent transacions. Uncacheable. (combine with any OPCODE umask)", \ .ufilters[1] = 0x193ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIWIF",\ .udesc = "PCIe write (full, non-allocating) - full line MMIO write transactions from IIO (P2P). Not used for coherent transacions. Uncacheable. (combine with any OPCODE umask)", \ .ufilters[1] = 0x194ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIITOM",\ .udesc = "PCIe write (allocating) (combine with any OPCODE umask)", \ .ufilters[1] = 0x19cULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCIRDCUR",\ .udesc = "PCIe read current (combine with any OPCODE umask)", \ .ufilters[1] = 0x19eULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOI",\ .udesc = "Request writeback modified invalidate line (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c4ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_WBMTOE",\ .udesc = "Request writeback modified set to exclusive (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c5ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_ITOM",\ .udesc = "Request invalidate line. Request exclusive ownership of the line (combine with any OPCODE umask)", \ .ufilters[1] = 0x1c8ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSRD",\ .udesc = "PCIe non-snoop read (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e4ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWR",\ .udesc = "PCIe non-snoop write (partial) (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e5ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ }, \ { .uname = "OPC_PCINSWRF",\ .udesc = "PCIe non-snoop write (full) (combine with any OPCODE umask)", \ .ufilters[1] = 0x1e6ULL << 20, \ .uflags = INTEL_X86_NCOMBO, \ .grpid = d, \ } static const intel_x86_umask_t hswep_unc_c_llc_lookup[]={ { .uname = "DATA_READ", .udesc = "Data read requests", .grpid = 0, .ucode = 0x300, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WRITE", .udesc = "Write requests. Includes all write transactions (cached, uncached)", .grpid = 0, .ucode = 0x500, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE_SNOOP", .udesc = "External snoop request", .grpid = 0, .ucode = 0x900, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Any request", .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, .ucode = 0x1100, }, { .uname = "NID", .udesc = "Match a given RTID destination NID (must provide nf=X modifier)", .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .grpid = 1, .ucode = 0x4100, .uflags = INTEL_X86_GRP_DFL_NONE }, CBO_FILT_MESIFS(2), }; static const intel_x86_umask_t hswep_unc_c_llc_victims[]={ { .uname = "STATE_M", .udesc = "Lines in M state", .ucode = 0x100, .grpid = 0, }, { .uname = "STATE_E", .udesc = "Lines in E state", .ucode = 0x200, .grpid = 0, }, { .uname = "STATE_S", .udesc = "Lines in S state", .ucode = 0x400, .grpid = 0, }, { .uname = "STATE_F", .udesc = "Lines in F state", .ucode = 0x800, .grpid = 0, }, { .uname = "MISS", .udesc = "TBD", .ucode = 0x1000, .grpid = 0, }, { .uname = "NID", .udesc = "Victimized Lines matching the NID filter (must provide nf=X modifier)", .ucode = 0x4000, .uflags = INTEL_X86_GRP_DFL_NONE, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .grpid = 1, }, }; static const intel_x86_umask_t hswep_unc_c_ring_ad_used[]={ { .uname = "UP_EVEN", .udesc = "Up and Even ring polarity filter", .ucode = 0x100, }, { .uname = "UP_ODD", .udesc = "Up and odd ring polarity filter", .ucode = 0x200, }, { .uname = "DOWN_EVEN", .udesc = "Down and even ring polarity filter", .ucode = 0x400, }, { .uname = "DOWN_ODD", .udesc = "Down and odd ring polarity filter", .ucode = 0x800, }, { .uname = "UP", .udesc = "Up ring polarity filter", .ucode = 0x3300, }, { .uname = "DOWN", .udesc = "Down ring polarity filter", .ucode = 0xcc00, }, { .uname = "ALL", .udesc = "up or down ring polarity filter", .ucode = 0xcc00, }, }; static const intel_x86_umask_t hswep_unc_c_ring_bounces[]={ { .uname = "AD_IRQ", .udesc = "TBD", .ucode = 0x200, }, { .uname = "AK", .udesc = "Acknowledgments to core", .ucode = 0x400, }, { .uname = "BL", .udesc = "Data responses to core", .ucode = 0x800, }, { .uname = "IV", .udesc = "Snoops of processor cache", .ucode = 0x1000, }, }; static const intel_x86_umask_t hswep_unc_c_ring_iv_used[]={ { .uname = "ANY", .udesc = "Any filter", .ucode = 0x0f00, .uflags = INTEL_X86_DFL, }, { .uname = "UP", .udesc = "Filter on any up polarity", .ucode = 0x0300, }, { .uname = "DN", .udesc = "Filter on any up polarity", .ucode = 0x0c00, }, { .uname = "DOWN", .udesc = "Filter on any down polarity", .ucode = 0xcc00, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_ext_starved[]={ { .uname = "IRQ", .udesc = "Irq externally starved, therefore blocking the IPQ", .ucode = 0x100, }, { .uname = "IPQ", .udesc = "IPQ externally starved, therefore blocking the IRQ", .ucode = 0x200, }, { .uname = "PRQ", .udesc = "IRQ is blocking the ingress queue and causing starvation", .ucode = 0x400, }, { .uname = "ISMQ_BIDS", .udesc = "Number of time the ISMQ bids", .ucode = 0x800, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_inserts[]={ { .uname = "IRQ", .udesc = "IRQ", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IRQ_REJECTED", .udesc = "IRQ rejected", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IPQ", .udesc = "IPQ", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PRQ", .udesc = "PRQ", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "PRQ_REJECTED", .udesc = "PRQ rejected", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_ipq_retry[]={ { .uname = "ADDR_CONFLICT", .udesc = "Address conflict", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Any Reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "No QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_ipq_retry2[]={ { .uname = "AD_SBO", .udesc = "Count number of time that a request from the IPQ was retried because it lacked credits to send an AD packet to SBO", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TARGET", .udesc = "Count number of times that a request from the IPQ was retried filtered by the target NodeId", .ucode = 0x100, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_irq_retry[]={ { .uname = "ADDR_CONFLICT", .udesc = "Address conflict", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "ANY", .udesc = "Any reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "No QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RTID", .udesc = "No RTIDs", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IIO_CREDITS", .udesc = "No IIO Credits", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_irq_retry2[]={ { .uname = "AD_SBO", .udesc = "Count number of time that a request from the IRQ was retried because it lacked credits to send an AD packet to SBO", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_SBO", .udesc = "Count number of time that a request from the IRQ was retried because it lacked credits to send an BL packet to SBO", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TARGET", .udesc = "Count number of times that a request from the IRQ was retried filtered by the target NodeId", .ucode = 0x100, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_ismq_retry[]={ { .uname = "ANY", .udesc = "Any reject", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL, }, { .uname = "FULL", .udesc = "No Egress credits", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IIO_CREDITS", .udesc = "No IIO credits", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "QPI_CREDITS", .udesc = "NO QPI credits", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RTID", .udesc = "No RTIDs", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WB_CREDITS", .udesc = "No WB credits", .ucode = 0x8000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_rxr_ismq_retry2[]={ { .uname = "AD_SBO", .udesc = "Count number of time that a request from the ISMQ was retried because it lacked credits to send an AD packet to SBO", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_SBO", .udesc = "Count number of time that a request from the ISMQ was retried because it lacked credits to send an BL packet to SBO", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "TARGET", .udesc = "Count number of times that a request from the ISMQ was retried filtered by the target NodeId", .ucode = 0x100, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_tor_inserts[]={ { .uname = "OPCODE", .udesc = "Number of transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .ucode = 0x100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_OPCODE", .udesc = "Number of miss transactions inserted into the TOR that match an opcode (must provide opc_* umask)", .ucode = 0x300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "EVICTION", .udesc = "Number of Evictions transactions inserted into TOR", .ucode = 0x400, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "ALL", .udesc = "Number of transactions inserted in TOR", .ucode = 0x800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, }, { .uname = "WB", .udesc = "Number of write transactions inserted into the TOR", .ucode = 0x1000, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "LOCAL_OPCODE", .udesc = "Number of opcode-matched transactions inserted into the TOR that are satisfied by locally homed memory", .ucode = 0x2100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_LOCAL_OPCODE", .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by locally homed memory", .ucode = 0x2300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "LOCAL", .udesc = "Number of transactions inserted into the TOR that are satisfied by locally homed memory", .ucode = 0x2800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_LOCAL", .udesc = "Number of miss transactions inserted into the TOR that are satisfied by locally homed memory", .ucode = 0x2a00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_OPCODE", .udesc = "Number of transactions inserted into the TOR that match a NID and opcode (must provide opc_* umask and nf=X modifier)", .ucode = 0x4100, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_MISS_OPCODE", .udesc = "Number of NID and opcode matched miss transactions inserted into the TOR (must provide opc_* umask and nf=X modifier)", .ucode = 0x4300, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_EVICTION", .udesc = "Number of NID-matched eviction transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4400, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_ALL", .udesc = "Number of NID-matched transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_ALL", .udesc = "Number of NID-matched miss transactions that were inserted into the TOR (must provide nf=X modifier)", .ucode = 0x4a00, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_WB", .udesc = "Number of NID-matched write back transactions inserted into the TOR (must provide nf=X modifier)", .ucode = 0x5000, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "REMOTE_OPCODE", .udesc = "Number of opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", .ucode = 0x8100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_REMOTE_OPCODE", .udesc = "Number of miss opcode-matched transactions inserted into the TOR that are satisfied by remote caches or memory", .ucode = 0x8300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .udesc = "Number of transactions inserted into the TOR that are satisfied by remote caches or memory", .ucode = 0x8800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_REMOTE", .udesc = "Number of miss transactions inserted into the TOR that are satisfied by remote caches or memory", .ucode = 0x8a00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, CBO_FILT_OPC(1) }; static const intel_x86_umask_t hswep_unc_c_tor_occupancy[]={ { .uname = "OPCODE", .udesc = "Number of TOR entries that match an opcode (must provide opc_* umask)", .ucode = 0x100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_OPCODE", .udesc = "Number of TOR entries that match a NID and an opcode (must provide opc_* umask)", .ucode = 0x300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "EVICTION", .udesc = "Number of outstanding eviction transactions in the TOR", .ucode = 0x400, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "ALL", .udesc = "All valid TOR entries", .ucode = 0x800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_DFL | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_ALL", .udesc = "Number of outstanding miss requests in the TOR", .ucode = 0xa00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "WB", .udesc = "Number of write transactions in the TOR. Does not include RFO, but actual operations that contain data being sent from the core", .ucode = 0x1000, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "LOCAL_OPCODE", .udesc = "Number of opcode-matched transactions in the TOR that are satisfied by locally homed memory", .ucode = 0x2100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_LOCAL_OPCODE", .udesc = "Number of miss opcode-matched transactions in the TOR that are satisfied by locally homed memory", .ucode = 0x2300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "LOCAL", .udesc = "Number of transactions in the TOR that are satisfied by locally homed memory", .ucode = 0x2800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_LOCAL", .udesc = "Number of miss transactions in the TOR that are satisfied by locally homed memory", .ucode = 0x2a00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_OPCODE", .udesc = "Number of NID-matched TOR entries that an opcode (must provide nf=X modifier and opc_* umask)", .ucode = 0x4100, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_MISS_OPCODE", .udesc = "Number of NID-matched outstanding miss requests in the TOR that an opcode (must provide nf=X modifier and opc_* umask)", .ucode = 0x4300, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO, }, { .uname = "NID_EVICTION", .udesc = "Number of NID-matched outstanding requests in the TOR (must provide a nf=X modifier)", .ucode = 0x4400, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_ALL", .udesc = "Number of NID-matched outstanding requests in the TOR (must provide nf=X modifier)", .ucode = 0x4800, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_MISS_ALL", .udesc = "Number of NID-matched outstanding miss requests in the TOR (must provide a nf=X modifier)", .ucode = 0x4a00, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "NID_WB", .udesc = "Number of NID-matched write transactions in the TOR (must provide a nf=X modifier)", .ucode = 0x5000, .grpid = 0, .umodmsk_req = _SNBEP_UNC_ATTR_NF1, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "REMOTE_OPCODE", .udesc = "Number of opcode-matched transactions in the TOR that are satisfied by remote caches or memory", .ucode = 0x8100, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "MISS_REMOTE_OPCODE", .udesc = "Number of miss opcode-matched transactions in the TOR that are satisfied by remote caches or memory", .ucode = 0x8300, .grpid = 0, .uflags = INTEL_X86_NCOMBO, }, { .uname = "REMOTE", .udesc = "Number of transactions in the TOR that are satisfied by remote caches or memory", .ucode = 0x8800, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, { .uname = "MISS_REMOTE", .udesc = "Number of miss transactions inserted into the TOR that are satisfied by remote caches or memory", .ucode = 0x8a00, .grpid = 0, .uflags = INTEL_X86_NCOMBO | INTEL_X86_EXCL_GRP_GT, }, CBO_FILT_OPC(1) }; static const intel_x86_umask_t hswep_unc_c_txr_inserts[]={ { .uname = "AD_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to AD ring", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to AK ring", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_CACHE", .udesc = "Counts the number of ring transactions from Cachebo to BL ring", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "IV_CACHE", .udesc = "Counts the number of ring transactions from Cachebo ton IV ring", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AD_CORE", .udesc = "Counts the number of ring transactions from Corebo to AD ring", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK_CORE", .udesc = "Counts the number of ring transactions from Corebo to AK ring", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL_CORE", .udesc = "Counts the number of ring transactions from Corebo to BL ring", .ucode = 0x4000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_txr_ads_used[]={ { .uname = "AD", .udesc = "onto AD ring", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "AK", .udesc = "Onto AK ring", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL", .udesc = "Onto BL ring", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, } }; static const intel_x86_umask_t hswep_unc_c_misc[]={ { .uname = "RSPI_WAS_FSE", .udesc = "Counts the number of times when a SNoop hit in FSE states and triggered a silent eviction. This is useful because this information is lost in the PRE encodings", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "WC_ALIASING", .udesc = "Counts the number of times a USWC write (WCIL(F)) transaction hits in the LLC in M state, triggering a WBMTOI followed by the USWC write. This occurs when there is WC aliasing", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, }, { .uname = "STARTED", .udesc = "TBD", .ucode = 0x400, .uflags = INTEL_X86_NCOMBO, }, { .uname = "RFO_HIT_S", .udesc = "Counts the number of times that an RFO hits in S state. This is useful for determining if it might be good for a workload to use RSPIWB instead of RSPSWB", .ucode = 0x800, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CVZERO_PREFETCH_VICTIM", .udesc = "Counts the number of clean victims with raw CV=0 (core valid)", .ucode = 0x1000, .uflags = INTEL_X86_NCOMBO, }, { .uname = "CVZERO_PREFETCH_MISS", .udesc = "Counts the number of Demand Data Read requests hitting non-modified state lines with raw CV=0 (core valid)", .ucode = 0x2000, .uflags = INTEL_X86_NCOMBO, }, }; static const intel_x86_umask_t hswep_unc_c_sbo_credits_acquired[]={ { .uname = "AD", .udesc = "for AD ring", .ucode = 0x100, .uflags = INTEL_X86_NCOMBO, }, { .uname = "BL", .udesc = "for BL ring", .ucode = 0x200, .uflags = INTEL_X86_NCOMBO, } }; static const intel_x86_entry_t intel_hswep_unc_c_pe[]={ { .name = "UNC_C_CLOCKTICKS", .desc = "C-box Uncore clockticks", .modmsk = 0x0, .cntmsk = 0xf, .code = 0x00, .flags = INTEL_X86_FIXED, }, { .name = "UNC_C_COUNTER0_OCCUPANCY", .desc = "Counter 0 occupancy. Counts the occupancy related information by filtering CB0 occupancy count captured in counter 0.", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xe, .code = 0x1f, }, { .name = "UNC_C_LLC_LOOKUP", .desc = "Cache lookups", .modmsk = HSWEP_UNC_CBO_NID_ATTRS, .cntmsk = 0xf, .code = 0x34, .ngrp = 3, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_llc_lookup), .umasks = hswep_unc_c_llc_lookup, }, { .name = "UNC_C_LLC_VICTIMS", .desc = "Lines victimized", .modmsk = HSWEP_UNC_CBO_NID_ATTRS, .cntmsk = 0xf, .code = 0x37, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_llc_victims), .ngrp = 2, .umasks = hswep_unc_c_llc_victims, }, { .name = "UNC_C_MISC", .desc = "Miscellaneous C-Box events", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x39, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_misc), .ngrp = 1, .umasks = hswep_unc_c_misc, }, { .name = "UNC_C_RING_AD_USED", .desc = "Address ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x1b, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_ring_ad_used), .ngrp = 1, .umasks = hswep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_AK_USED", .desc = "Acknowledgement ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x1c, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = hswep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_BL_USED", .desc = "Bus or Data ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x1d, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_ring_ad_used), /* identical to RING_AD_USED */ .ngrp = 1, .umasks = hswep_unc_c_ring_ad_used, }, { .name = "UNC_C_RING_BOUNCES", .desc = "Number of LLC responses that bounced in the ring", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x05, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_ring_bounces), .ngrp = 1, .umasks = hswep_unc_c_ring_bounces, }, { .name = "UNC_C_FAST_ASSERTED", .desc = "Number of cycles in which the local distress or incoming distress signals are asserted (FaST). Incoming distress includes both up and down", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0x3, .code = 0x09, }, { .name = "UNC_C_BOUNCE_CONTROL", .desc = "Bounce control", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x0a, }, { .name = "UNC_C_RING_IV_USED", .desc = "Invalidate ring in use. Counts number of cycles ring is being used at this ring stop", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x1e, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_ring_iv_used), .ngrp = 1, .umasks = hswep_unc_c_ring_iv_used, }, { .name = "UNC_C_RING_SRC_THRTL", .desc = "TDB", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x07, }, { .name = "UNC_C_RXR_EXT_STARVED", .desc = "Ingress arbiter blocking cycles", .modmsk = HSWEP_UNC_CBO_ATTRS, .cntmsk = 0xf, .code = 0x12, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_ext_starved), .ngrp = 1, .umasks = hswep_unc_c_rxr_ext_starved, }, { .name = "UNC_C_RXR_INSERTS", .desc = "Ingress Allocations", .code = 0x13, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_inserts), .umasks = hswep_unc_c_rxr_inserts }, { .name = "UNC_C_RXR_IPQ_RETRY", .desc = "Probe Queue Retries", .code = 0x31, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_ipq_retry), .umasks = hswep_unc_c_rxr_ipq_retry }, { .name = "UNC_C_RXR_IPQ_RETRY2", .desc = "Probe Queue Retries", .code = 0x28, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_ipq_retry2), .umasks = hswep_unc_c_rxr_ipq_retry2 }, { .name = "UNC_C_RXR_IRQ_RETRY", .desc = "Ingress Request Queue Rejects", .code = 0x32, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_irq_retry), .umasks = hswep_unc_c_rxr_irq_retry }, { .name = "UNC_C_RXR_IRQ_RETRY2", .desc = "Ingress Request Queue Rejects", .code = 0x29, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_irq_retry2), .umasks = hswep_unc_c_rxr_irq_retry2 }, { .name = "UNC_C_RXR_ISMQ_RETRY", .desc = "ISMQ Retries", .code = 0x33, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_ismq_retry), .umasks = hswep_unc_c_rxr_ismq_retry }, { .name = "UNC_C_RXR_ISMQ_RETRY2", .desc = "ISMQ Retries", .code = 0x2a, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_NID_ATTRS, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_ismq_retry2), .umasks = hswep_unc_c_rxr_ismq_retry2 }, { .name = "UNC_C_RXR_OCCUPANCY", .desc = "Ingress Occupancy", .code = 0x11, .cntmsk = 0x1, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_rxr_inserts), .umasks = hswep_unc_c_rxr_inserts, /* identical to hswep_unc_c_rxr_inserts */ }, { .name = "UNC_C_TOR_INSERTS", .desc = "TOR Inserts", .code = 0x35, .cntmsk = 0xf, .ngrp = 2, .modmsk = HSWEP_UNC_CBO_NID_ATTRS | _SNBEP_UNC_ATTR_ISOC | _SNBEP_UNC_ATTR_NC, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_tor_inserts), .umasks = hswep_unc_c_tor_inserts }, { .name = "UNC_C_TOR_OCCUPANCY", .desc = "TOR Occupancy", .code = 0x36, .cntmsk = 0x1, .ngrp = 2, .modmsk = HSWEP_UNC_CBO_NID_ATTRS | _SNBEP_UNC_ATTR_ISOC | _SNBEP_UNC_ATTR_NC, .flags = INTEL_X86_NO_AUTOENCODE, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_tor_occupancy), .umasks = hswep_unc_c_tor_occupancy }, { .name = "UNC_C_TXR_ADS_USED", .desc = "Egress events", .code = 0x04, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_txr_ads_used), .umasks = hswep_unc_c_txr_ads_used }, { .name = "UNC_C_TXR_INSERTS", .desc = "Egress allocations", .code = 0x02, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_txr_inserts), .umasks = hswep_unc_c_txr_inserts }, { .name = "UNC_C_SBO_CREDITS_ACQUIRED", .desc = "SBO credits acquired", .code = 0x3d, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_sbo_credits_acquired), .umasks = hswep_unc_c_sbo_credits_acquired }, { .name = "UNC_C_SBO_CREDITS_OCCUPANCY", .desc = "SBO credits occupancy", .code = 0x3e, .cntmsk = 0xf, .ngrp = 1, .modmsk = HSWEP_UNC_CBO_ATTRS, .numasks = LIBPFM_ARRAY_SIZE(hswep_unc_c_sbo_credits_acquired), /* shared */ .umasks = hswep_unc_c_sbo_credits_acquired }, }; papi-5.6.0/src/components/lustre/fake_proc/000775 001750 001750 00000000000 13216244357 022763 5ustar00jshenry1963jshenry1963000000 000000 papi-5.6.0/src/aix.c000664 001750 001750 00000104000 13216244356 016246 0ustar00jshenry1963jshenry1963000000 000000 /* This file handles the OS dependent part of the POWER5 and POWER6 architectures. It supports both AIX 4 and AIX 5. The switch between AIX 4 and 5 is driven by the system defined value _AIX_VERSION_510. Other routines also include minor conditionally compiled differences. */ #include #include "papi.h" #include "papi_internal.h" #include "papi_lock.h" #include "papi_memory.h" #include "extras.h" #include "aix.h" #include "papi_vector.h" /* Advance declarations */ papi_vector_t _aix_vector; /* Locking variables */ volatile int lock_var[PAPI_MAX_LOCK] = { 0 }; atomic_p lock[PAPI_MAX_LOCK]; /* some heap information, start_of_text, start_of_data ..... ref: http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixprggd/genprogc/sys_mem_alloc.htm#HDRA9E4A4C9921SYLV */ #define START_OF_TEXT &_text #define END_OF_TEXT &_etext #define START_OF_DATA &_data #define END_OF_DATA &_edata #define START_OF_BSS &_edata #define END_OF_BSS &_end static int maxgroups = 0; struct utsname AixVer; native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; hwd_pminfo_t pminfo; pm_groups_info_t pmgroups; native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; PPC64_native_map_t native_name_map[PAPI_MAX_NATIVE_EVENTS]; hwd_groups_t group_map[MAX_GROUPS] = { 0 }; /* to initialize the native_table */ void aix_initialize_native_table( ) { int i, j; memset( native_table, 0, PAPI_MAX_NATIVE_EVENTS * sizeof ( native_event_entry_t ) ); memset( native_name_map, 0, PAPI_MAX_NATIVE_EVENTS * sizeof ( PPC64_native_map_t ) ); for ( i = 0; i < PAPI_MAX_NATIVE_EVENTS; i++ ) { native_name_map[i].index = -1; for ( j = 0; j < MAX_COUNTERS; j++ ) native_table[i].resources.counter_cmd[j] = -1; } } /* to setup native_table group value */ static void aix_ppc64_setup_gps( int total ) { int i, j, gnum; for ( i = 0; i < total; i++ ) { for ( j = 0; j < MAX_COUNTERS; j++ ) { /* native_table[i].resources.rgg[j]=-1; */ if ( native_table[i].resources.selector & ( 1 << j ) ) { for ( gnum = 0; gnum < pmgroups.maxgroups; gnum++ ) { if ( native_table[i].resources.counter_cmd[j] == pmgroups.event_groups[gnum].events[j] ) { /* could use gnum instead of pmgroups.event_groups[gnum].group_id */ native_table[i].resources.group[pmgroups. event_groups[gnum]. group_id / 32] |= 1 << ( pmgroups.event_groups[gnum].group_id % 32 ); } } } } } for ( gnum = 0; gnum < pmgroups.maxgroups; gnum++ ) { for ( i = 0; i < MAX_COUNTERS; i++ ) { /*group_map[gnum].counter_cmd[i] = pmgroups.event_groups[gnum].events[i]; */ if (pmgroups.event_groups[gnum].group_id >=MAX_GROUPS) { fprintf(stderr,"ERROR, group number trying to go past MAX GROUPS\n"); continue; } group_map[pmgroups.event_groups[gnum].group_id].counter_cmd[i] = pmgroups.event_groups[gnum].events[i]; } } } /* to setup native_table values, and return number of entries */ int aix_ppc64_setup_native_table( ) { hwd_pmevents_t *wevp; hwd_pminfo_t *info; int pmc, ev, i, j, index; info = &pminfo; index = 0; aix_initialize_native_table( ); for ( pmc = 0; pmc < info->maxpmcs; pmc++ ) { wevp = info->list_events[pmc]; for ( ev = 0; ev < info->maxevents[pmc]; ev++, wevp++ ) { for ( i = 0; i < index; i++ ) { if ( strcmp( wevp->short_name, native_table[i].name ) == 0 ) { native_table[i].resources.selector |= 1 << pmc; native_table[i].resources.counter_cmd[pmc] = wevp->event_id; break; } } if ( i == index ) { /*native_table[i].index=i; */ native_table[i].resources.selector |= 1 << pmc; native_table[i].resources.counter_cmd[pmc] = wevp->event_id; native_table[i].name = wevp->short_name; native_table[i].description = wevp->description; native_name_map[i].name = native_table[i].name; native_name_map[i].index = i; index++; } } } aix_ppc64_setup_gps( index ); return index; } /* Reports the elements of the hwd_register_t struct as an array of names and a matching array of values. Maximum string length is name_len; Maximum number of values is count. */ static void copy_value( unsigned int val, char *nam, char *names, unsigned int *values, int len ) { *values = val; strncpy( names, nam, len ); names[len - 1] = '\0'; } /* this function recusively does Modified Bipartite Graph counter allocation success return 1 fail return 0 */ static int do_counter_allocation( ppc64_reg_alloc_t * event_list, int size ) { int i, j, group = -1; unsigned int map[GROUP_INTS]; for ( i = 0; i < GROUP_INTS; i++ ) map[i] = event_list[0].ra_group[i]; for ( i = 1; i < size; i++ ) { for ( j = 0; j < GROUP_INTS; j++ ) map[j] &= event_list[i].ra_group[j]; } for ( i = 0; i < GROUP_INTS; i++ ) { if ( map[i] ) { group = ffs( map[i] ) - 1 + i * 32; break; } } if ( group < 0 ) return group; /* allocation fail */ else { for ( i = 0; i < size; i++ ) { for ( j = 0; j < MAX_COUNTERS; j++ ) { if ( event_list[i].ra_counter_cmd[j] >= 0 && event_list[i].ra_counter_cmd[j] == group_map[group].counter_cmd[j] ) event_list[i].ra_position = j; } } return group; } } /* this function will be called when there are counters available success return 1 fail return 0 */ int _aix_allocate_registers( EventSetInfo_t * ESI ) { hwd_control_state_t *this_state = ESI->ctl_state; unsigned char selector; int i, j, natNum, index; ppc64_reg_alloc_t event_list[MAX_COUNTERS]; int position, group; /* not yet successfully mapped, but have enough slots for events */ /* Initialize the local structure needed for counter allocation and optimization. */ natNum = ESI->NativeCount; for ( i = 0; i < natNum; i++ ) { /* CAUTION: Since this is in the hardware layer, it's ok to access the native table directly, but in general this is a bad idea */ event_list[i].ra_position = -1; /* calculate native event rank, which is number of counters it can live on, this is power3 specific */ for ( j = 0; j < MAX_COUNTERS; j++ ) { if ( ( index = native_name_map[ESI->NativeInfoArray[i]. ni_event & PAPI_NATIVE_AND_MASK].index ) < 0 ) return PAPI_ECNFLCT; event_list[i].ra_counter_cmd[j] = native_table[index].resources.counter_cmd[j]; } for ( j = 0; j < GROUP_INTS; j++ ) { if ( ( index = native_name_map[ESI->NativeInfoArray[i]. ni_event & PAPI_NATIVE_AND_MASK].index ) < 0 ) return PAPI_ECNFLCT; event_list[i].ra_group[j] = native_table[index].resources.group[j]; } /*event_list[i].ra_mod = -1; */ } if ( ( group = do_counter_allocation( event_list, natNum ) ) >= 0 ) { /* successfully mapped */ /* copy counter allocations info back into NativeInfoArray */ this_state->group_id = group; for ( i = 0; i < natNum; i++ ) ESI->NativeInfoArray[i].ni_position = event_list[i].ra_position; /* update the control structure based on the NativeInfoArray */ /*_papi_hwd_update_control_state(this_state, ESI->NativeInfoArray, natNum);*/ return PAPI_OK; } else { return PAPI_ECNFLCT; } } int _aix_init_control_state( hwd_control_state_t * ptr ) { int i; for ( i = 0; i < _aix_vector.cmp_info.num_cntrs; i++ ) { ptr->counter_cmd.events[i] = COUNT_NOTHING; } ptr->counter_cmd.mode.b.is_group = 1; _aix_vector.set_domain( ptr, _aix_vector.cmp_info.default_domain ); _aix_set_granularity( ptr, _aix_vector.cmp_info.default_granularity ); /*setup_native_table(); */ return ( PAPI_OK ); } /* This function updates the control structure with whatever resources are allocated for all the native events in the native info structure array. */ int _aix_update_control_state( hwd_control_state_t * this_state, NativeInfo_t * native, int count, hwd_context_t * context ) { this_state->counter_cmd.events[0] = this_state->group_id; return PAPI_OK; } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ /* The following is for any POWER hardware */ /* Trims trailing blank space and line endings from a string (in place). Returns pointer to start address */ static char * trim_string( char *in ) { int len, i = 0; char *start = in; if ( in == NULL ) return ( in ); len = strlen( in ); if ( len == 0 ) return ( in ); /* Trim right */ i = strlen( start ) - 1; while ( i >= 0 ) { if ( isblank( start[i] ) || ( start[i] == '\r' ) || ( start[i] == '\n' ) ) start[i] = '\0'; else break; i--; } return ( start ); } /* Routines to support an opaque native event table */ int _aix_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) { if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= _aix_vector.cmp_info.num_native_events ) return ( PAPI_ENOEVNT ); strncpy( ntv_name, native_name_map[EventCode & PAPI_NATIVE_AND_MASK].name, len ); trim_string( ntv_name ); if ( strlen( native_name_map[EventCode & PAPI_NATIVE_AND_MASK].name ) > len - 1 ) return ( PAPI_EBUF ); return ( PAPI_OK ); } int _aix_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len ) { if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= _aix_vector.cmp_info.num_native_events ) return ( PAPI_ENOEVNT ); strncpy( ntv_descr, native_table[native_name_map[EventCode & PAPI_NATIVE_AND_MASK]. index].description, len ); trim_string( ntv_descr ); if ( strlen ( native_table [native_name_map[EventCode & PAPI_NATIVE_AND_MASK].index]. description ) > len - 1 ) return ( PAPI_EBUF ); return ( PAPI_OK ); } int _aix_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) { bits = &native_table[EventCode & PAPI_NATIVE_AND_MASK].resources; /* it is not right, different type */ return ( PAPI_OK ); } /* this function return the next native event code. modifier = PAPI_ENUM_FIRST returns first native event code modifier = PAPI_ENUM_EVENTS returns next native event code modifier = PAPI_NTV_ENUM_GROUPS return groups in which this native event lives, in bits 16 - 23 of event code terminating with PAPI_ENOEVNT at the end of the list. function return value: PAPI_OK successful, event code is valid PAPI_EINVAL bad modifier PAPI_ENOEVNT end of list or fail, event code is invalid */ int _aix_ntv_enum_events( unsigned int *EventCode, int modifier ) { if ( modifier == PAPI_ENUM_FIRST ) { *EventCode = PAPI_NATIVE_MASK; return ( PAPI_OK ); } if ( modifier == PAPI_ENUM_EVENTS ) { int index = *EventCode & PAPI_NATIVE_AND_MASK; if ( native_table[index + 1].resources.selector ) { *EventCode = *EventCode + 1; return ( PAPI_OK ); } else return ( PAPI_ENOEVNT ); } else if ( modifier == PAPI_NTV_ENUM_GROUPS ) { #if defined(_POWER5) || defined(_POWER6) unsigned int group = ( *EventCode & PAPI_NTV_GROUP_AND_MASK ) >> PAPI_NTV_GROUP_SHIFT; int index = *EventCode & 0x000000FF; int i; unsigned int tmpg; *EventCode = *EventCode & ( ~PAPI_NTV_GROUP_SHIFT ); for ( i = 0; i < GROUP_INTS; i++ ) { tmpg = native_table[index].resources.group[i]; if ( group != 0 ) { while ( ( ffs( tmpg ) + i * 32 ) <= group && tmpg != 0 ) tmpg = tmpg ^ ( 1 << ( ffs( tmpg ) - 1 ) ); } if ( tmpg != 0 ) { group = ffs( tmpg ) + i * 32; *EventCode = *EventCode | ( group << PAPI_NTV_GROUP_SHIFT ); return ( PAPI_OK ); } } #endif return ( PAPI_ENOEVNT ); } else return ( PAPI_EINVAL ); } static void set_config( hwd_control_state_t * ptr, int arg1, int arg2 ) { ptr->counter_cmd.events[arg1] = arg2; } static void unset_config( hwd_control_state_t * ptr, int arg1 ) { ptr->counter_cmd.events[arg1] = 0; } int init_domain( ) { int domain = 0; domain = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_OTHER; #ifdef PM_INITIALIZE #ifdef _AIXVERSION_510 if ( pminfo.proc_feature.b.hypervisor ) { domain |= PAPI_DOM_SUPERVISOR; } #endif #endif return ( domain ); } static int _aix_set_domain( hwd_control_state_t * this_state, int domain ) { pm_mode_t *mode = &( this_state->counter_cmd.mode ); int did = 0; mode->b.user = 0; mode->b.kernel = 0; if ( domain & PAPI_DOM_USER ) { did++; mode->b.user = 1; } if ( domain & PAPI_DOM_KERNEL ) { did++; mode->b.kernel = 1; } #ifdef PM_INITIALIZE #ifdef _AIXVERSION_510 if ( ( domain & PAPI_DOM_SUPERVISOR ) && pminfo.proc_feature.b.hypervisor ) { did++; mode->b.hypervisor = 1; } #endif #endif if ( did ) return ( PAPI_OK ); else return ( PAPI_EINVAL ); /* switch (domain) { case PAPI_DOM_USER: mode->b.user = 1; mode->b.kernel = 0; break; case PAPI_DOM_KERNEL: mode->b.user = 0; mode->b.kernel = 1; break; case PAPI_DOM_ALL: mode->b.user = 1; mode->b.kernel = 1; break; default: return(PAPI_EINVAL); } return(PAPI_OK); */ } int _aix_set_granularity( hwd_control_state_t * this_state, int domain ) { pm_mode_t *mode = &( this_state->counter_cmd.mode ); switch ( domain ) { case PAPI_GRN_THR: mode->b.process = 0; mode->b.proctree = 0; break; /* case PAPI_GRN_PROC: mode->b.process = 1; mode->b.proctree = 0; break; case PAPI_GRN_PROCG: mode->b.process = 0; mode->b.proctree = 1; break; */ default: return ( PAPI_EINVAL ); } return ( PAPI_OK ); } static int set_default_domain( EventSetInfo_t * zero, int domain ) { hwd_control_state_t *current_state = zero->ctl_state; return ( _aix_set_domain( current_state, domain ) ); } static int set_default_granularity( EventSetInfo_t * zero, int granularity ) { hwd_control_state_t *current_state = zero->ctl_state; return ( _aix_set_granularity( current_state, granularity ) ); } /* Initialize the system-specific settings */ /* Machine info structure. -1 is unused. */ int _aix_mdi_init( ) { int retval; if ( ( retval = uname( &AixVer ) ) < 0 ) return ( PAPI_ESYS ); if ( AixVer.version[0] == '4' ) { _papi_hwi_system_info.exe_info.address_info.text_start = ( caddr_t ) START_OF_TEXT; _papi_hwi_system_info.exe_info.address_info.text_end = ( caddr_t ) END_OF_TEXT; _papi_hwi_system_info.exe_info.address_info.data_start = ( caddr_t ) START_OF_DATA; _papi_hwi_system_info.exe_info.address_info.data_end = ( caddr_t ) END_OF_DATA; _papi_hwi_system_info.exe_info.address_info.bss_start = ( caddr_t ) START_OF_BSS; _papi_hwi_system_info.exe_info.address_info.bss_end = ( caddr_t ) END_OF_BSS; } else { _aix_update_shlib_info( &_papi_hwi_system_info ); } /* _papi_hwi_system_info.supports_64bit_counters = 1; _papi_hwi_system_info.supports_real_usec = 1; _papi_hwi_system_info.sub_info.fast_real_timer = 1; _papi_hwi_system_info.sub_info->available_domains = init_domain();*/ return ( PAPI_OK ); } static int _aix_get_system_info( papi_mdi_t *mdi ) { int retval; /* pm_info_t pminfo; */ struct procsinfo psi = { 0 }; pid_t pid; char maxargs[PAPI_HUGE_STR_LEN]; char pname[PAPI_HUGE_STR_LEN]; pid = getpid( ); if ( pid == -1 ) return ( PAPI_ESYS ); _papi_hwi_system_info.pid = pid; psi.pi_pid = pid; retval = getargs( &psi, sizeof ( psi ), maxargs, PAPI_HUGE_STR_LEN ); if ( retval == -1 ) return ( PAPI_ESYS ); if ( realpath( maxargs, pname ) ) strncpy( _papi_hwi_system_info.exe_info.fullname, pname, PAPI_HUGE_STR_LEN ); else strncpy( _papi_hwi_system_info.exe_info.fullname, maxargs, PAPI_HUGE_STR_LEN ); strcpy( _papi_hwi_system_info.exe_info.address_info.name, basename( maxargs ) ); #ifdef _POWER7 /* we pass PM_POWER7 for the same reasons as below (power6 case) */ retval = pm_initialize( PM_INIT_FLAGS , &pminfo, &pmgroups, PM_POWER7); #elif defined(_POWER6) /* problem with pm_initialize(): it cannot be called multiple times with PM_CURRENT; use instead the actual proc type - here PM_POWER6 - and multiple invocations are no longer a problem */ retval = pm_initialize( PM_INIT_FLAGS, &pminfo, &pmgroups, PM_POWER6 ); #else #ifdef _AIXVERSION_510 #ifdef PM_INITIALIZE SUBDBG( "Calling AIX 5 version of pm_initialize...\n" ); /*#if defined(_POWER5) retval = pm_initialize(PM_INIT_FLAGS, &pminfo, &pmgroups, PM_POWER5); #endif*/ retval = pm_initialize( PM_INIT_FLAGS, &pminfo, &pmgroups, PM_CURRENT ); #else SUBDBG( "Calling AIX 5 version of pm_init...\n" ); retval = pm_init( PM_INIT_FLAGS, &pminfo, &pmgroups ); #endif #else SUBDBG( "Calling AIX 4 version of pm_init...\n" ); retval = pm_init( PM_INIT_FLAGS, &pminfo ); #endif #endif SUBDBG( "...Back from pm_init\n" ); if ( retval > 0 ) return ( retval ); _aix_mdi_init( ); _papi_hwi_system_info.hw_info.nnodes = 1; _papi_hwi_system_info.hw_info.ncpu = _system_configuration.ncpus; _papi_hwi_system_info.hw_info.totalcpus = _papi_hwi_system_info.hw_info.ncpu * _papi_hwi_system_info.hw_info.nnodes; _papi_hwi_system_info.hw_info.vendor = -1; strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); _papi_hwi_system_info.hw_info.model = _system_configuration.implementation; strcpy( _papi_hwi_system_info.hw_info.model_string, pminfo.proc_name ); _papi_hwi_system_info.hw_info.revision = ( float ) _system_configuration.version; _papi_hwi_system_info.hw_info.mhz = ( float ) ( pm_cycles( ) / 1000000.0 ); _papi_hwi_system_info.hw_info.cpu_max_mhz=_papi_hwi_system_info.hw_info.mhz; _papi_hwi_system_info.hw_info.cpu_min_mhz=_papi_hwi_system_info.hw_info.mhz; /* _papi_hwi_system_info.num_gp_cntrs = pminfo.maxpmcs;*/ _aix_vector.cmp_info.num_cntrs = pminfo.maxpmcs; _aix_vector.cmp_info.num_mpx_cntrs = MAX_MPX_COUNTERS; // pminfo.maxpmcs, _aix_vector.cmp_info.available_granularities = PAPI_GRN_THR; /* This field doesn't appear to exist in the PAPI 3.0 structure _papi_hwi_system_info.cpunum = mycpu(); */ _aix_vector.cmp_info.available_domains = init_domain( ); return PAPI_OK; } /* Low level functions, should not handle errors, just return codes. */ /* At init time, the higher level library should always allocate and reserve EventSet zero. */ long long _aix_get_real_usec( void ) { timebasestruct_t t; long long retval; read_real_time( &t, TIMEBASE_SZ ); time_base_to_time( &t, TIMEBASE_SZ ); retval = ( t.tb_high * 1000000 ) + t.tb_low / 1000; return ( retval ); } long long _aix_get_real_cycles( void ) { return ( _aix_get_real_usec( ) * ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz ); } long long _aix_get_virt_usec( void ) { long long retval; struct tms buffer; times( &buffer ); SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime, ( int ) buffer.tms_stime ); retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * ( 1000000 / CLK_TCK ) ); return ( retval ); } static void _aix_lock_init( void ) { int i; for ( i = 0; i < PAPI_MAX_LOCK; i++ ) lock[i] = ( int * ) ( lock_var + i ); } int _aix_shutdown_thread( hwd_context_t * ctx ) { return ( PAPI_OK ); } int _aix_init_component( int cidx ) { int retval = PAPI_OK, procidx; /* Fill in what we can of the papi_system_info. */ retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); if ( retval ) return ( retval ); /* Setup memory info */ retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, 0 ); if ( retval ) return ( retval ); SUBDBG( "Found %d %s %s CPUs at %d Mhz.\n", _papi_hwi_system_info.hw_info.totalcpus, _papi_hwi_system_info.hw_info.vendor_string, _papi_hwi_system_info.hw_info.model_string, _papi_hwi_system_info.hw_info.cpu_max_mhz ); _aix_vector.cmp_info.CmpIdx = cidx; _aix_vector.cmp_info.num_native_events = aix_ppc64_setup_native_table( ); procidx = pm_get_procindex( ); switch ( procidx ) { case PM_POWER5: _papi_load_preset_table( "POWER5", 0, cidx ); break; case PM_POWER5_II: _papi_load_preset_table( "POWER5+", 0, cidx ); break; case PM_POWER6: _papi_load_preset_table( "POWER6", 0, cidx ); break; case PM_PowerPC970: _papi_load_preset_table( "PPC970", 0, cidx ); break; case PM_POWER7: _papi_load_preset_table( "POWER7", 0, cidx ); break; default: fprintf( stderr, "%s is not supported!\n", pminfo.proc_name ); return PAPI_ENOIMPL; } _aix_lock_init( ); return ( retval ); } int _aix_init_thread( hwd_context_t * context ) { int retval; /* Initialize our global control state. */ _aix_init_control_state( &context->cntrl ); } /* Go from highest counter to lowest counter. Why? Because there are usually more counters on #1, so we try the least probable first. */ static int get_avail_hwcntr_bits( int cntr_avail_bits ) { int tmp = 0, i = 1 << ( POWER_MAX_COUNTERS - 1 ); while ( i ) { tmp = i & cntr_avail_bits; if ( tmp ) return ( tmp ); i = i >> 1; } return ( 0 ); } static void set_hwcntr_codes( int selector, unsigned char *from, int *to ) { int useme, i; for ( i = 0; i < _aix_vector.cmp_info.num_cntrs; i++ ) { useme = ( 1 << i ) & selector; if ( useme ) { to[i] = from[i]; } } } #ifdef DEBUG void dump_cmd( pm_prog_t * t ) { SUBDBG( "mode.b.threshold %d\n", t->mode.b.threshold ); SUBDBG( "mode.b.spare %d\n", t->mode.b.spare ); SUBDBG( "mode.b.process %d\n", t->mode.b.process ); SUBDBG( "mode.b.kernel %d\n", t->mode.b.kernel ); SUBDBG( "mode.b.user %d\n", t->mode.b.user ); SUBDBG( "mode.b.count %d\n", t->mode.b.count ); SUBDBG( "mode.b.proctree %d\n", t->mode.b.proctree ); SUBDBG( "events[0] %d\n", t->events[0] ); SUBDBG( "events[1] %d\n", t->events[1] ); SUBDBG( "events[2] %d\n", t->events[2] ); SUBDBG( "events[3] %d\n", t->events[3] ); SUBDBG( "events[4] %d\n", t->events[4] ); SUBDBG( "events[5] %d\n", t->events[5] ); SUBDBG( "events[6] %d\n", t->events[6] ); SUBDBG( "events[7] %d\n", t->events[7] ); SUBDBG( "reserved %d\n", t->reserved ); } void dump_data( long long *vals ) { int i; for ( i = 0; i < MAX_COUNTERS; i++ ) { SUBDBG( "counter[%d] = %lld\n", i, vals[i] ); } } #endif int _aix_reset( hwd_context_t * ESI, hwd_control_state_t * zero ) { int retval = pm_reset_data_mythread( ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "PAPI Error: pm_reset_data_mythread", retval ); return ( retval ); } return ( PAPI_OK ); } int _aix_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **vals, int flags ) { int retval; retval = pm_get_data_mythread( &spc->state ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "PAPI Error: pm_get_data_mythread", retval ); return ( retval ); } *vals = spc->state.accu; #ifdef DEBUG if ( ISLEVEL( DEBUG_SUBSTRATE ) ) dump_data( *vals ); #endif return ( PAPI_OK ); } static int round_requested_ns( int ns ) { if ( ns <= _papi_os_info.itimer_res_ns ) { return _papi_os_info.itimer_res_ns; } else { int leftover_ns = ns % _papi_os_info.itimer_res_ns; return ( ns - leftover_ns + _papi_os_info.itimer_res_ns ); } } int _aix_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) { switch ( code ) { /* I don't understand what it means to set the default domain case PAPI_DEFDOM: return(set_default_domain(zero, option->domain.domain)); */ case PAPI_DOMAIN: return ( _aix_set_domain ( option->domain.ESI->ctl_state, option->domain.domain ) ); /* I don't understand what it means to set the default granularity case PAPI_DEFGRN: return(set_default_granularity(zero, option->granularity.granularity)); */ case PAPI_GRANUL: return ( _aix_set_granularity ( option->domain.ESI->ctl_state, option->granularity.granularity ) ); #if 0 case PAPI_INHERIT: return ( set_inherit( option->inherit.inherit ) ); #endif case PAPI_DEF_ITIMER: { /* flags are currently ignored, eventually the flags will be able to specify whether or not we use POSIX itimers (clock_gettimer) */ if ( ( option->itimer.itimer_num == ITIMER_REAL ) && ( option->itimer.itimer_sig != SIGALRM ) ) return PAPI_EINVAL; if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && ( option->itimer.itimer_sig != SIGVTALRM ) ) return PAPI_EINVAL; if ( ( option->itimer.itimer_num == ITIMER_PROF ) && ( option->itimer.itimer_sig != SIGPROF ) ) return PAPI_EINVAL; if ( option->itimer.ns > 0 ) option->itimer.ns = round_requested_ns( option->itimer.ns ); /* At this point, we assume the user knows what he or she is doing, they maybe doing something arch specific */ return PAPI_OK; } case PAPI_DEF_MPX_NS: { option->multiplex.ns = round_requested_ns( option->multiplex.ns ); return ( PAPI_OK ); } case PAPI_DEF_ITIMER_NS: { option->itimer.ns = round_requested_ns( option->itimer.ns ); return ( PAPI_OK ); } default: return ( PAPI_ENOSUPP ); } } void _aix_dispatch_timer( int signal, siginfo_t * si, void *i ) { _papi_hwi_context_t ctx; ThreadInfo_t *t = NULL; caddr_t address; ctx.si = si; ctx.ucontext = ( hwd_ucontext_t * ) i; address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( &ctx ) ); _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, NULL, 0, 0, &t, _aix_vector.cmp_info.CmpIdx ); } int _aix_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) { hwd_control_state_t *this_state = ESI->ctl_state; return ( PAPI_OK ); } void * _aix_get_overflow_address( void *context ) { void *location; struct sigcontext *info = ( struct sigcontext * ) context; location = ( void * ) info->sc_jmpbuf.jmp_context.iar; return ( location ); } /* Copy the current control_state into the new thread context */ /*int _papi_hwd_start(EventSetInfo_t *ESI, EventSetInfo_t *zero)*/ int _aix_start( hwd_context_t * ctx, hwd_control_state_t * cntrl ) { int i, retval; hwd_control_state_t *current_state = &ctx->cntrl; /* If we are nested, merge the global counter structure with the current eventset */ SUBDBG( "Start\n" ); /* Copy the global counter structure to the current eventset */ SUBDBG( "Copying states\n" ); memcpy( current_state, cntrl, sizeof ( hwd_control_state_t ) ); retval = pm_set_program_mythread( ¤t_state->counter_cmd ); if ( retval > 0 ) { if ( retval == 13 ) { retval = pm_delete_program_mythread( ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "PAPI Error: pm_delete_program_mythread", retval ); return ( retval ); } retval = pm_set_program_mythread( ¤t_state->counter_cmd ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "PAPI Error: pm_set_program_mythread", retval ); return ( retval ); } } else { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "PAPI Error: pm_set_program_mythread", retval ); return ( retval ); } } /* Set up the new merged control structure */ #if 0 dump_cmd( ¤t_state->counter_cmd ); #endif /* Start the counters */ retval = pm_start_mythread( ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "pm_start_mythread()", retval ); return ( retval ); } return ( PAPI_OK ); } int _aix_stop( hwd_context_t * ctx, hwd_control_state_t * cntrl ) { int retval; retval = pm_stop_mythread( ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "pm_stop_mythread()", retval ); return ( retval ); } retval = pm_delete_program_mythread( ); if ( retval > 0 ) { if ( _papi_hwi_error_level != PAPI_QUIET ) pm_error( "pm_delete_program_mythread()", retval ); return ( retval ); } return ( PAPI_OK ); } int _aix_update_shlib_info( papi_mdi_t *mdi ) { #if ( ( defined( _AIXVERSION_510) || defined(_AIXVERSION_520))) struct ma_msg_s { long flag; char *name; } ma_msgs[] = { { MA_MAINEXEC, "MAINEXEC"}, { MA_KERNTEXT, "KERNTEXT"}, { MA_READ, "READ"}, { MA_WRITE, "WRITE"}, { MA_EXEC, "EXEC"}, { MA_SHARED, "SHARED"}, { MA_BREAK, "BREAK"}, { MA_STACK, "STACK"},}; char fname[80], name[PAPI_HUGE_STR_LEN]; prmap_t newp; int count, t_index, retval, i, j, not_first_flag_bit; FILE *map_f; void *vaddr; prmap_t *tmp1 = NULL; PAPI_address_map_t *tmp2 = NULL; sprintf( fname, "/proc/%d/map", getpid( ) ); map_f = fopen( fname, "r" ); if ( !map_f ) { PAPIERROR( "fopen(%s) returned < 0", fname ); return ( PAPI_OK ); } /* count the entries we need */ count = 0; t_index = 0; while ( ( retval = fread( &newp, sizeof ( prmap_t ), 1, map_f ) ) > 0 ) { if ( newp.pr_pathoff > 0 && newp.pr_mapname[0] != '\0' ) { if ( newp.pr_mflags & MA_STACK ) continue; count++; SUBDBG( "count=%d offset=%ld map=%s\n", count, newp.pr_pathoff, newp.pr_mapname ); if ( ( newp.pr_mflags & MA_READ ) && ( newp.pr_mflags & MA_EXEC ) ) t_index++; } } rewind( map_f ); tmp1 = ( prmap_t * ) papi_calloc( ( count + 1 ), sizeof ( prmap_t ) ); if ( tmp1 == NULL ) return ( PAPI_ENOMEM ); tmp2 = ( PAPI_address_map_t * ) papi_calloc( t_index, sizeof ( PAPI_address_map_t ) ); if ( tmp2 == NULL ) return ( PAPI_ENOMEM ); i = 0; t_index = -1; while ( ( retval = fread( &tmp1[i], sizeof ( prmap_t ), 1, map_f ) ) > 0 ) { if ( tmp1[i].pr_pathoff > 0 && tmp1[i].pr_mapname[0] != '\0' ) if ( !( tmp1[i].pr_mflags & MA_STACK ) ) i++; } for ( i = 0; i < count; i++ ) { char c; int cc = 0; retval = fseek( map_f, tmp1[i].pr_pathoff, SEEK_SET ); if ( retval != 0 ) return ( PAPI_ESYS ); while ( fscanf( map_f, "%c", &c ) != EOF ) { name[cc] = c; /* how many char are hold in /proc/xxxx/map */ cc++; if ( c == '\0' ) break; } /* currently /proc/xxxx/map file holds only 33 char per line (incl NULL char); * if executable name > 32 char, compare first 32 char only */ if ( strncmp( _papi_hwi_system_info.exe_info.address_info.name, basename( name ), cc - 1 ) == 0 ) { if ( strlen( _papi_hwi_system_info.exe_info.address_info.name ) != cc - 1 ) PAPIERROR ( "executable name too long (%d char). Match of first %d char only", strlen( _papi_hwi_system_info.exe_info.address_info. name ), cc - 1 ); if ( tmp1[i].pr_mflags & MA_READ ) { if ( tmp1[i].pr_mflags & MA_EXEC ) { _papi_hwi_system_info.exe_info.address_info. text_start = ( caddr_t ) tmp1[i].pr_vaddr; _papi_hwi_system_info.exe_info.address_info. text_end = ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); } else if ( tmp1[i].pr_mflags & MA_WRITE ) { _papi_hwi_system_info.exe_info.address_info. data_start = ( caddr_t ) tmp1[i].pr_vaddr; _papi_hwi_system_info.exe_info.address_info. data_end = ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); } } } else { if ( ( _papi_hwi_system_info.exe_info.address_info.text_start == 0 ) && ( _papi_hwi_system_info.exe_info.address_info.text_end == 0 ) && ( _papi_hwi_system_info.exe_info.address_info.data_start == 0 ) && ( _papi_hwi_system_info.exe_info.address_info.data_end == 0 ) ) PAPIERROR( "executable name not recognized" ); if ( tmp1[i].pr_mflags & MA_READ ) { if ( tmp1[i].pr_mflags & MA_EXEC ) { t_index++; tmp2[t_index].text_start = ( caddr_t ) tmp1[i].pr_vaddr; tmp2[t_index].text_end = ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); strncpy( tmp2[t_index].name, name, PAPI_MAX_STR_LEN ); } else if ( tmp1[i].pr_mflags & MA_WRITE ) { tmp2[t_index].data_start = ( caddr_t ) tmp1[i].pr_vaddr; tmp2[t_index].data_end = ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); } } } } fclose( map_f ); if ( _papi_hwi_system_info.shlib_info.map ) papi_free( _papi_hwi_system_info.shlib_info.map ); _papi_hwi_system_info.shlib_info.map = tmp2; _papi_hwi_system_info.shlib_info.count = t_index + 1; papi_free( tmp1 ); return PAPI_OK; #else return PAPI_ENOIMPL; #endif } int _aix_ntv_name_to_code( const char *name, unsigned int *evtcode ) { int i; for ( i = 0; i < PAPI_MAX_NATIVE_EVENTS; i++ ) if ( strcmp( name, native_name_map[i].name ) == 0 ) { *evtcode = native_name_map[i].index | PAPI_NATIVE_MASK; return PAPI_OK; } return PAPI_ENOEVNT; } PAPI_os_info_t _papi_os_info; int _papi_hwi_init_os(void) { struct utsname uname_buffer; uname(&uname_buffer); strncpy(_papi_os_info.name,uname_buffer.sysname,PAPI_MAX_STR_LEN); strncpy(_papi_os_info.version,uname_buffer.release,PAPI_MAX_STR_LEN); _papi_os_info.itimer_sig = PAPI_INT_MPX_SIGNAL; _papi_os_info.itimer_num = PAPI_INT_ITIMER; _papi_os_info.itimer_res_ns = 1; _papi_os_info.itimer_ns = 1000 * PAPI_INT_MPX_DEF_US; return PAPI_OK; } papi_vector_t _aix_vector = { .cmp_info = { /* default component information (unspecified values are initialized to 0) */ .name = "aix", .description = "AIX pmapi CPU counters", .default_domain = PAPI_DOM_USER, .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, .default_granularity = PAPI_GRN_THR, .available_granularities = PAPI_GRN_THR, .hardware_intr_sig = PAPI_INT_SIGNAL, /* component specific cmp_info initializations */ .fast_real_timer = 1, .fast_virtual_timer = 1, .attach = 1, .attach_must_ptrace = 1, .cntr_umasks = 1, } , /* sizes of framework-opaque component-private structures these are remapped in pmapi_ppc64.h, ppc64_events.h */ .size = { .context = sizeof ( hwd_context_t ), .control_state = sizeof ( hwd_control_state_t ), .reg_value = sizeof ( hwd_register_t ), .reg_alloc = sizeof ( hwd_reg_alloc_t ), } , /* function pointers in this component */ .init_control_state = _aix_init_control_state, .start = _aix_start, .stop = _aix_stop, .read = _aix_read, .allocate_registers = _aix_allocate_registers, .update_control_state = _aix_update_control_state, .set_domain = _aix_set_domain, .reset = _aix_reset, .set_overflow = _aix_set_overflow, /* .stop_profiling = _aix_stop_profiling, */ .ntv_enum_events = _aix_ntv_enum_events, .ntv_name_to_code = _aix_ntv_name_to_code, .ntv_code_to_name = _aix_ntv_code_to_name, .ntv_code_to_descr = _aix_ntv_code_to_descr, .ntv_code_to_bits = _aix_ntv_code_to_bits, .init_component = _aix_init_component, .ctl = _aix_ctl, .dispatch_timer = _aix_dispatch_timer, .init_thread = _aix_init_thread, .shutdown_thread = _aix_shutdown_thread, }; papi_os_vector_t _papi_os_vector = { .get_memory_info = _aix_get_memory_info, .get_dmem_info = _aix_get_dmem_info, .get_real_usec = _aix_get_real_usec, .get_real_cycles = _aix_get_real_cycles, .get_virt_usec = _aix_get_virt_usec, .update_shlib_info = _aix_update_shlib_info, .get_system_info = _aix_get_system_info, }; papi-5.6.0/src/perfctr-2.7.x/linux/Documentation/perfctr/low-level-api.txt000664 001750 001750 00000021261 13216244370 030376 0ustar00jshenry1963jshenry1963000000 000000 $Id: low-level-api.txt,v 1.1 2004/07/02 18:57:05 mikpe Exp $ PERFCTR LOW-LEVEL DRIVERS API ============================= This document describes the common low-level API. See low-level-$ARCH.txt for architecture-specific documentation. General Model ============= The model is that of a processor with: - A non-programmable clock-like counter, the "TSC". The TSC frequency is assumed to be constant, but it is not assumed to be identical to the core frequency. The TSC may be absent. - A set of programmable counters, the "perfctrs" or "pmcs". Control data may be per-counter, global, or both. The counters are not assumed to be interchangeable. A normal counter that simply counts events is referred to as an "accumulation-mode" or "a-mode" counter. Its total count is computed by adding the counts for the individual periods during which the counter is active. Two per-counter state variables are used for this: "sum", which is the total count up to but not including the current period, and "start", which records the value of the hardware counter at the start of the current period. At the end of a period, the hardware counter's value is read again, and the increment relative the start value is added to the sum. This strategy is used because it avoids a number of hardware problems. A counter that has been programmed to generate an interrupt on overflow is referred to as an "interrupt-mode" or "i-mode" counter. I-mode counters are initialised to specific values, and after overflowing are reset to their (re)start values. The total event count is available just as for a-mode counters. The set of counters may be empty, in which case only the TSC (which must be present) can be sampled. Contents of ================================= "struct perfctr_sum_ctrs" ------------------------- struct perfctr_sum_ctrs { unsigned long long tsc; unsigned long long pmc[..]; /* one per counter */ }; Architecture-specific container for counter values. Used in the kernel/user API, but not by the low-level drivers. "struct perfctr_cpu_control" ---------------------------- This struct includes at least the following fields: unsigned int tsc_on; unsigned int nractrs; /* # of a-mode counters */ unsigned int nrictrs; /* # of i-mode counters */ unsigned int pmc_map[..]; /* one per counter: virt-to-phys mapping */ unsigned int evntsel[..]; /* one per counter: hw control data */ int ireset[..]; /* one per counter: i-mode (re)start value */ Architecture-specific container for control data. Used both in the kernel/user API and by the low-level drivers (embedded in "struct perfctr_cpu_state"). "tsc_on" is non-zero if the TSC should be sampled. "nractrs" is the number of a-mode counters, corresponding to elements 0..nractrs-1 in the per-counter arrays. "nrictrs" is the number of i-mode counters, corresponding to elements nractrs..nractrs+nrictrs-1 in the per-counter arrays. "nractrs+nrictrs" is the total number of counters to program and sample. A-mode and i-mode counters are separated in order to allow quick enumeration of either set, which is needed in some low-level driver operations. "pmc_map[]" maps each counter to its corresponding hardware counter identification. No two counters may map to the same hardware counter. This mapping is present because the hardware may have asymmetric counters or other addressing quirks, which means that a counter's index may not suffice to address its hardware counter. "evntsel[]" contains the per-counter control data. Architecture-specific global control data, if any, is placed in architecture-specific fields. "ireset[]" contains the (re)start values for the i-mode counters. Only indices nractrs..nractrs+nrictrs-1 are used. "struct perfctr_cpu_state" -------------------------- This struct includes at least the following fields: unsigned int cstatus; unsigned int tsc_start; unsigned long long tsc_sum; struct { unsigned int map; unsigned int start; unsigned long long sum; } pmc[..]; /* one per counter; the size is not part of the user ABI */ #ifdef __KERNEL__ struct perfctr_cpu_control control; #endif This type records the state and control data for a collection of counters. It is used by many low-level operations, and may be exported to user-space via mmap(). "cstatus" is a re-encoding of control.tsc_on/nractrs/nrictrs, used because it reduces overheads in key low-level operations. Operations on cstatus values include: - unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, unsigned int nrictrs); Construct a cstatus value. - unsigned int perfctr_cstatus_enabled(unsigned int cstatus); Check if any part (tsc_on, nractrs, nrictrs) of the cstatus is non-zero. - int perfctr_cstatus_has_tsc(unsigned int cstatus); Check if the tsc_on part of the cstatus is non-zero. - unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus); Retrieve nractrs+nrictrs from the cstatus. - unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus); Check if the nrictrs part of cstatus is non-zero. "tsc_start" and "tsc_sum" record the state of the TSC. "pmc[]" contains the per-counter state, in the "start" and "sum" fields. The "map" field contains the corresponding hardware counter identification, from the counter's entry in "control.pmc_map[]"; it is copied into pmc[] to reduce overheads in key low-level operations. "control" contains the control data which determines the behaviour of the counters. User-space overflow signal handler items ---------------------------------------- After a counter has overflowed, a user-space signal handler may be invoked with a "struct siginfo" identifying the source of the signal and the set of overflown counters. #define SI_PMC_OVF .. Value to be stored in "si.si_code". #define si_pmc_ovf_mask .. Field in which to store a bit-mask of the overflown counters. Kernel-internal API ------------------- /* Driver init/exit. perfctr_cpu_init() performs hardware detection and may fail. */ extern int perfctr_cpu_init(void); extern void perfctr_cpu_exit(void); /* CPU type name. Set if perfctr_cpu_init() was successful. */ extern char *perfctr_cpu_name; /* Hardware reservation. A high-level driver must reserve the hardware before it may use it, and release it afterwards. "service" is a unique string identifying the high-level driver. perfctr_cpu_reserve() returns NULL on success; if another high-level driver has reserved the hardware, then that driver's "service" string is returned. */ extern const char *perfctr_cpu_reserve(const char *service); extern void perfctr_cpu_release(const char *service); /* PRE: state has no running interrupt-mode counters. Check that the new control data is valid. Update the low-level driver's private control data. is_global should be zero for per-process counters and non-zero for global-mode counters. Returns a negative error code if the control data is invalid. */ extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); /* Stop i-mode counters. Update sums and start values. Read a-mode counters. Subtract from start and accumulate into sums. Must be called with preemption disabled. */ extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); /* Reset i-mode counters to their start values. Write control registers. Read a-mode counters and update their start values. Must be called with preemption disabled. */ extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); /* Perform an efficient combined suspend/resume operation. Must be called with preemption disabled. */ extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); /* The type of a perfctr overflow interrupt handler. It will be called in IRQ context, with preemption disabled. */ typedef void (*perfctr_ihandler_t)(unsigned long pc); /* Install a perfctr overflow interrupt handler. Should be called after perfctr_cpu_reserve() but before any counter state has been activated. */ extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); /* PRE: The state has been suspended and sampled by perfctr_cpu_suspend(). Should be called from the high-level driver's perfctr_ihandler_t, and preemption must not have been enabled. Identify which counters have overflown, reset their start values from ireset[], and perform any necessary hardware cleanup. Returns a bit-mask of the overflown counters. */ extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); /* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to bypass internal caching and force a reload of the i-mode pmcs. This ensures that perfctr_cpu_identify_overflow()'s state changes are propagated to the hardware. */ extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_itanium.h000664 001750 001750 00000031660 13216244362 025452 0ustar00jshenry1963jshenry1963000000 000000 /* * Itanium PMU specific types and definitions * * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_ITANIUM_H__ #define __PFMLIB_ITANIUM_H__ #include #include #if BYTE_ORDER != LITTLE_ENDIAN #error "this file only supports little endian environments" #endif #ifdef __cplusplus extern "C" { #endif #define PMU_ITA_FIRST_COUNTER 4 /* index of first PMC/PMD counter */ #define PMU_ITA_NUM_COUNTERS 4 /* total numbers of PMC/PMD pairs used as counting monitors */ #define PMU_ITA_NUM_PMCS 14 /* total number of PMCS defined */ #define PMU_ITA_NUM_PMDS 18 /* total number of PMDS defined */ #define PMU_ITA_NUM_BTB 8 /* total number of PMDS in BTB */ #define PMU_ITA_COUNTER_WIDTH 32 /* hardware counter bit width */ /* * This structure provides a detailed way to setup a PMC register. */ typedef union { unsigned long pmc_val; /* complete register value */ /* This is the Itanium-specific PMC layout for counter config */ struct { unsigned long pmc_plm:4; /* privilege level mask */ unsigned long pmc_ev:1; /* external visibility */ unsigned long pmc_oi:1; /* overflow interrupt */ unsigned long pmc_pm:1; /* privileged monitor */ unsigned long pmc_ig1:1; /* reserved */ unsigned long pmc_es:7; /* event select */ unsigned long pmc_ig2:1; /* reserved */ unsigned long pmc_umask:4; /* unit mask */ unsigned long pmc_thres:3; /* threshold */ unsigned long pmc_ig3:1; /* reserved (missing from table on p6-17) */ unsigned long pmc_ism:2; /* instruction set mask */ unsigned long pmc_ig4:38; /* reserved */ } pmc_ita_count_reg; /* Opcode matcher */ struct { unsigned long ignored1:3; unsigned long mask:27; /* mask encoding bits {40:27}{12:0} */ unsigned long ignored2:3; unsigned long match:27; /* match encoding bits {40:27}{12:0} */ unsigned long b:1; /* B-syllable */ unsigned long f:1; /* F-syllable */ unsigned long i:1; /* I-syllable */ unsigned long m:1; /* M-syllable */ } pmc8_9_ita_reg; /* Instruction Event Address Registers */ struct { unsigned long iear_plm:4; /* privilege level mask */ unsigned long iear_ig1:2; /* reserved */ unsigned long iear_pm:1; /* privileged monitor */ unsigned long iear_tlb:1; /* cache/tlb mode */ unsigned long iear_ig2:8; /* reserved */ unsigned long iear_umask:4; /* unit mask */ unsigned long iear_ig3:4; /* reserved */ unsigned long iear_ism:2; /* instruction set */ unsigned long iear_ig4:38; /* reserved */ } pmc10_ita_reg; /* Data Event Address Registers */ struct { unsigned long dear_plm:4; /* privilege level mask */ unsigned long dear_ig1:2; /* reserved */ unsigned long dear_pm:1; /* privileged monitor */ unsigned long dear_tlb:1; /* cache/tlb mode */ unsigned long dear_ig2:8; /* reserved */ unsigned long dear_umask:4; /* unit mask */ unsigned long dear_ig3:4; /* reserved */ unsigned long dear_ism:2; /* instruction set */ unsigned long dear_ig4:2; /* reserved */ unsigned long dear_pt:1; /* pass tags */ unsigned long dear_ig5:35; /* reserved */ } pmc11_ita_reg; /* Branch Trace Buffer registers */ struct { unsigned long btbc_plm:4; /* privilege level */ unsigned long btbc_ig1:2; unsigned long btbc_pm:1; /* privileged monitor */ unsigned long btbc_tar:1; /* target address register */ unsigned long btbc_tm:2; /* taken mask */ unsigned long btbc_ptm:2; /* predicted taken address mask */ unsigned long btbc_ppm:2; /* predicted predicate mask */ unsigned long btbc_bpt:1; /* branch prediction table */ unsigned long btbc_bac:1; /* branch address calculator */ unsigned long btbc_ig2:48; } pmc12_ita_reg; struct { unsigned long irange_ta:1; /* tag all bit */ unsigned long irange_ig:63; } pmc13_ita_reg; } pfm_ita_pmc_reg_t; typedef union { unsigned long pmd_val; /* counter value */ /* counting pmd register */ struct { unsigned long pmd_count:32; /* 32-bit hardware counter */ unsigned long pmd_sxt32:32; /* sign extension of bit 32 */ } pmd_ita_counter_reg; struct { unsigned long iear_v:1; /* valid bit */ unsigned long iear_tlb:1; /* tlb miss bit */ unsigned long iear_ig1:3; /* reserved */ unsigned long iear_icla:59; /* instruction cache line address {60:51} sxt {50}*/ } pmd0_ita_reg; struct { unsigned long iear_lat:12; /* latency */ unsigned long iear_ig1:52; /* reserved */ } pmd1_ita_reg; struct { unsigned long dear_daddr; /* data address */ } pmd2_ita_reg; struct { unsigned long dear_latency:12; /* latency */ unsigned long dear_ig1:50; /* reserved */ unsigned long dear_level:2; /* level */ } pmd3_ita_reg; struct { unsigned long btb_b:1; /* branch bit */ unsigned long btb_mp:1; /* mispredict bit */ unsigned long btb_slot:2; /* which slot, 3=not taken branch */ unsigned long btb_addr:60; /* b=1, bundle address, b=0 target address */ } pmd8_15_ita_reg; struct { unsigned long btbi_bbi:3; /* branch buffer index */ unsigned long btbi_full:1; /* full bit (sticky) */ unsigned long btbi_ignored:60; } pmd16_ita_reg; struct { unsigned long dear_vl:1; /* valid bit */ unsigned long dear_ig1:1; /* reserved */ unsigned long dear_slot:2; /* slot number */ unsigned long dear_iaddr:60; /* instruction address */ } pmd17_ita_reg; } pfm_ita_pmd_reg_t; /* * type definition for Itanium instruction set support */ typedef enum { PFMLIB_ITA_ISM_BOTH=0, /* IA-32 and IA-64 (default) */ PFMLIB_ITA_ISM_IA32=1, /* IA-32 only */ PFMLIB_ITA_ISM_IA64=2 /* IA-64 only */ } pfmlib_ita_ism_t; typedef struct { unsigned int flags; /* counter specific flags */ unsigned int thres; /* per event threshold */ pfmlib_ita_ism_t ism; /* per event instruction set */ } pfmlib_ita_counter_t; /* * counter specific flags */ #define PFMLIB_ITA_FL_EVT_NO_QUALCHECK 0x1 /* don't check qualifier constraints */ typedef struct { unsigned char opcm_used; /* set to 1 if this opcode matcher is used */ unsigned long pmc_val; /* value of opcode matcher for PMC8 */ } pfmlib_ita_opcm_t; /* * * The BTB can be configured via 4 different methods: * * - BRANCH_EVENT is in the event list, pfp_ita_btb.btb_used == 0: * The BTB will be configured (PMC12) to record all branches AND a counting * monitor will be setup to count BRANCH_EVENT. * * - BRANCH_EVENT is in the event list, pfp_ita_btb.btb_used == 1: * The BTB will be configured (PMC12) according to information in pfp_ita_btb AND * a counter will be setup to count BRANCH_EVENT. * * - BRANCH_EVENT is NOT in the event list, pfp_ita_btb.btb_used == 0: * Nothing is programmed * * - BRANCH_EVENT is NOT in the event list, pfp_ita_btb.btb_used == 1: * The BTB will be configured (PMC12) according to information in pfp_ita_btb. * This is the free running BTB mode. */ typedef struct { unsigned char btb_used; /* set to 1 if the BTB is used */ unsigned char btb_tar; unsigned char btb_tac; unsigned char btb_bac; unsigned char btb_tm; unsigned char btb_ptm; unsigned char btb_ppm; unsigned int btb_plm; /* BTB privilege level mask */ } pfmlib_ita_btb_t; /* * There are four ways to configure EAR: * * - an EAR event is in the event list AND pfp_ita_ear.ear_used = 0: * The EAR will be programmed (PMC10 or PMC11) based on the information encoded in the * event (umask, cache, tlb). A counting monitor will be programmed to * count DATA_EAR_EVENTS or INSTRUCTION_EAR_EVENTS depending on the type of EAR. * * - an EAR event is in the event list AND pfp_ita_ear.ear_used = 1: * The EAR will be programmed (PMC10 or PMC11) according to the information in the * pfp_ita_ear structure because it contains more detailed information * (such as priv level and instruction set). A counting monitor will be programmed * to count DATA_EAR_EVENTS or INSTRUCTION_EAR_EVENTS depending on the type of EAR. * * - no EAR event is in the event list AND pfp_ita_ear.ear_used = 0: * Nothing is programmed. * * - no EAR event is in the event list AND pfp_ita_ear.ear_used = 1: * The EAR will be programmed (PMC10 or PMC11) according to the information in the * pfp_ita_ear structure. This is the free running mode for EAR */ typedef enum { PFMLIB_ITA_EAR_CACHE_MODE=0, /* Cache mode : I-EAR and D-EAR */ PFMLIB_ITA_EAR_TLB_MODE =1, /* TLB mode : I-EAR and D-EAR */ } pfmlib_ita_ear_mode_t; typedef struct { unsigned char ear_used; /* when set will force definition of PMC[10] */ pfmlib_ita_ear_mode_t ear_mode; /* EAR mode */ pfmlib_ita_ism_t ear_ism; /* instruction set */ unsigned int ear_plm; /* IEAR privilege level mask */ unsigned long ear_umask; /* umask value for PMC10 */ } pfmlib_ita_ear_t; /* * describes one range. rr_plm is ignored for data ranges * a range is interpreted as unused (not defined) when rr_start = rr_end = 0. * if rr_plm is not set it will use the default settings set in the generic * library param structure. */ typedef struct { unsigned int rr_flags; /* currently unused */ unsigned int rr_plm; /* privilege level (ignored for data ranges) */ unsigned long rr_start; /* start address */ unsigned long rr_end; /* end address (not included) */ } pfmlib_ita_input_rr_desc_t; typedef struct { unsigned long rr_soff; /* output: start offset from actual start */ unsigned long rr_eoff; /* output: end offset from actual end */ } pfmlib_ita_output_rr_desc_t; /* * rr_used must be set to true for the library to configure the debug registers. * If using less than 4 intervals, must mark the end with entry: rr_limits[x].rr_start = rr_limits[x].rr_end = 0 */ typedef struct { unsigned char rr_used; /* set if address range restriction is used */ unsigned int rr_flags; /* set of flags for all ranges */ unsigned int rr_nbr_used; /* how many registers were used (output) */ pfmlib_ita_input_rr_desc_t rr_limits[4]; /* at most 4 distinct intervals */ } pfmlib_ita_input_rr_t; typedef struct { unsigned int rr_nbr_used; /* how many registers were used (output) */ pfmlib_ita_output_rr_desc_t rr_infos[4]; /* at most 4 distinct intervals */ pfmlib_reg_t rr_br[8]; /* array of debug reg requests to configure */ } pfmlib_ita_output_rr_t; /* * Itanium specific parameters for the library */ typedef struct { pfmlib_ita_counter_t pfp_ita_counters[PMU_ITA_NUM_COUNTERS]; /* extended counter features */ unsigned long pfp_ita_flags; /* Itanium specific flags */ pfmlib_ita_opcm_t pfp_ita_pmc8; /* PMC8 (opcode matcher) configuration */ pfmlib_ita_opcm_t pfp_ita_pmc9; /* PMC9 (opcode matcher) configuration */ pfmlib_ita_ear_t pfp_ita_iear; /* IEAR configuration */ pfmlib_ita_ear_t pfp_ita_dear; /* DEAR configuration */ pfmlib_ita_btb_t pfp_ita_btb; /* BTB configuration */ pfmlib_ita_input_rr_t pfp_ita_drange; /* data range restrictions */ pfmlib_ita_input_rr_t pfp_ita_irange; /* code range restrictions */ unsigned long reserved[1]; /* for future use */ } pfmlib_ita_input_param_t; typedef struct { pfmlib_ita_output_rr_t pfp_ita_drange; /* data range restrictions */ pfmlib_ita_output_rr_t pfp_ita_irange; /* code range restrictions */ unsigned long reserved[6]; /* for future use */ } pfmlib_ita_output_param_t; extern int pfm_ita_is_ear(unsigned int i); extern int pfm_ita_is_dear(unsigned int i); extern int pfm_ita_is_dear_tlb(unsigned int i); extern int pfm_ita_is_dear_cache(unsigned int i); extern int pfm_ita_is_iear(unsigned int i); extern int pfm_ita_is_iear_tlb(unsigned int i); extern int pfm_ita_is_iear_cache(unsigned int i); extern int pfm_ita_is_btb(unsigned int i); extern int pfm_ita_support_opcm(unsigned int i); extern int pfm_ita_support_iarr(unsigned int i); extern int pfm_ita_support_darr(unsigned int i); extern int pfm_ita_get_ear_mode(unsigned int i, pfmlib_ita_ear_mode_t *m); extern int pfm_ita_get_event_maxincr(unsigned int i, unsigned int *maxincr); extern int pfm_ita_get_event_umask(unsigned int i, unsigned long *umask); #ifdef __cplusplus /* extern C */ } #endif #endif /* __PFMLIB_ITANIUM_H__ */ papi-5.6.0/man/man3/PAPI_get_virt_nsec.3000664 001750 001750 00000002316 13216244356 021702 0ustar00jshenry1963jshenry1963000000 000000 .TH "PAPI_get_virt_nsec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- .ad l .nh .SH NAME PAPI_get_virt_nsec \- .PP Get virtual time counter values in nanoseconds\&. .SH SYNOPSIS .br .PP .SH "Detailed Description" .PP .PP \fBReturn values:\fP .RS 4 \fIPAPI_ECNFLCT\fP If there is no master event set\&. This will happen if the library has not been initialized, or for threaded applications, if there has been no thread id function defined by the \fBPAPI_thread_init\fP function\&. .br \fIPAPI_ENOMEM\fP For threaded applications, if there has not yet been any thread specific master event created for the current thread, and if the allocation of such an event set fails, the call will return PAPI_ENOMEM or PAPI_ESYS \&. .RE .PP This function returns the total number of virtual units from some arbitrary starting point\&. Virtual units accrue every time the process is running in user-mode on behalf of the process\&. Like the real time counters, this count is guaranteed to exist on every platform PAPI supports\&. However on some platforms, the resolution can be as bad as 1/Hz as defined by the operating system\&. .SH "Author" .PP Generated automatically by Doxygen for PAPI from the source code\&. papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_crayx2.h000664 001750 001750 00000006671 13216244362 025220 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Cray Inc. * Contributed by Steve Kaufmann based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_CRAYX2_H__ #define __PFMLIB_CRAYX2_H__ 1 /* * Allows to be included on its own. */ #define PFM_MAX_HW_PMCS 12 #define PFM_MAX_HW_PMDS 512 #include #include /* Priviledge level mask for Cray-X2: * * PFM_PLM0 = Kernel * PFM_PLM1 = Kernel * PFM_PLM2 = Exception * PFM_PLM3 = User */ /* The performance control (PMC) registers appear as follows: * PMC0 control for CPU chip * PMC1 events on CPU chip * PMC2 enable for CPU chip * PMC3 control for L2 Cache chip * PMC4 events on L2 Cache chip * PMC5 enable for L2 Cache chip * PMC6 control for Memory chip * PMC7 events on Memory chip * PMC8 enable for Memory chip * * The performance data (PMD) registers appear for * CPU (32), L2 Cache (16), and Memory (28*16) chips contiguously. * There are four events per chip. * * PMD0 P chip, counter 0 * ... * PMD31 P chip, counter 31 * PMD32 C chip, counter 0 * ... * PMD47 C chip, counter 15 * PMD48 M chip 0, counter 0 * ... * PMD495 M chip 15, counter 27 */ #ifdef __cplusplus extern "C" { #endif /* PMC counts */ #define PMU_CRAYX2_CPU_PMC_COUNT PFM_CPU_PMC_COUNT #define PMU_CRAYX2_CACHE_PMC_COUNT PFM_CACHE_PMC_COUNT #define PMU_CRAYX2_MEMORY_PMC_COUNT PFM_MEM_PMC_COUNT /* PMC bases */ #define PMU_CRAYX2_CPU_PMC_BASE PFM_CPU_PMC #define PMU_CRAYX2_CACHE_PMC_BASE PFM_CACHE_PMC #define PMU_CRAYX2_MEMORY_PMC_BASE PFM_MEM_PMC /* PMD counts */ #define PMU_CRAYX2_CPU_PMD_COUNT PFM_CPU_PMD_COUNT #define PMU_CRAYX2_CACHE_PMD_COUNT PFM_CACHE_PMD_COUNT #define PMU_CRAYX2_MEMORY_PMD_COUNT PFM_MEM_PMD_COUNT /* PMD bases */ #define PMU_CRAYX2_CPU_PMD_BASE PFM_CPU_PMD #define PMU_CRAYX2_CACHE_PMD_BASE PFM_CACHE_PMD #define PMU_CRAYX2_MEMORY_PMD_BASE PFM_MEM_PMD /* Total number of PMCs and PMDs */ #define PMU_CRAYX2_PMC_COUNT PFM_PMC_COUNT #define PMU_CRAYX2_PMD_COUNT PFM_PMD_COUNT #define PMU_CRAYX2_NUM_COUNTERS PFM_PMD_COUNT /* Counter width (can also be acquired via /sys/kernel/perfmon) */ #define PMU_CRAYX2_COUNTER_WIDTH 63 /* PMU name (can also be acquired via /sys/kernel/perfmon) */ #define PMU_CRAYX2_NAME "Cray X2" #ifdef __cplusplus } #endif /* extern C */ #endif /* __PFMLIB_CRAYX2_H__ */ papi-5.6.0/src/linux-bgp-lock.h000664 001750 001750 00000000112 13216244366 020325 0ustar00jshenry1963jshenry1963000000 000000 extern void _papi_hwd_lock( int ); extern void _papi_hwd_unlock( int ); papi-5.6.0/src/perfctr-2.6.x/patches/patch-kernel-2.6.18-194.el5-redhat000664 001750 001750 00000041600 13216244367 026474 0ustar00jshenry1963jshenry1963000000 000000 --- linux-2.6.18-194.el5.perfctr26/CREDITS.~1~ 2010-05-07 12:06:35.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/CREDITS 2010-05-07 14:15:59.000000000 +0200 @@ -2635,9 +2635,10 @@ S: Ottawa, Ontario S: Canada K2P 0X8 N: Mikael Pettersson -E: mikpe@csd.uu.se -W: http://www.csd.uu.se/~mikpe/ +E: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net --- linux-2.6.18-194.el5.perfctr26/Documentation/ioctl-number.txt.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/Documentation/ioctl-number.txt 2010-05-07 14:15:59.000000000 +0200 @@ -187,6 +187,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ 0xF3 00-3F video/sisfb.h sisfb (in development) --- linux-2.6.18-194.el5.perfctr26/MAINTAINERS.~1~ 2010-05-07 12:07:28.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/MAINTAINERS 2010-05-07 14:15:59.000000000 +0200 @@ -2348,6 +2348,12 @@ M: nagar@watson.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@it.uu.se +W: http://user.it.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org --- linux-2.6.18-194.el5.perfctr26/arch/i386/Kconfig.~1~ 2010-05-07 12:07:00.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/i386/Kconfig 2010-05-07 14:15:59.000000000 +0200 @@ -773,6 +773,8 @@ config VGA_NOPROBE data. Say N here unless you are absolutely sure this is what you want. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config KEXEC --- linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/entry.S.~1~ 2010-05-07 12:06:36.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/entry.S 2010-05-07 14:15:59.000000000 +0200 @@ -586,6 +586,22 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + RING0_INT_FRAME + pushl $~(LOCAL_PERFCTR_VECTOR) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + call smp_perfctr_interrupt + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 + jmp ret_from_intr + CFI_ENDPROC +#endif + ENTRY(divide_error) RING0_INT_FRAME pushl $0 # no error code --- linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/i8259.c.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/i8259.c 2010-05-07 14:15:59.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -420,6 +421,8 @@ void __init init_IRQ(void) */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/process.c.~1~ 2010-05-07 12:06:57.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/i386/kernel/process.c 2010-05-07 14:15:59.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -375,6 +376,7 @@ void exit_thread(void) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -427,6 +429,8 @@ int copy_thread(int nr, unsigned long cl savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_task(p, regs); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -694,6 +698,8 @@ struct task_struct fastcall * __switch_t disable_tsc(prev_p, next_p); + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.18-194.el5.perfctr26/arch/powerpc/Kconfig.~1~ 2010-05-07 12:07:03.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/powerpc/Kconfig 2010-05-07 14:15:59.000000000 +0200 @@ -335,6 +335,11 @@ config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 default y + +if PPC32 +source "drivers/perfctr/Kconfig" +endif + endmenu source "init/Kconfig" --- linux-2.6.18-194.el5.perfctr26/arch/powerpc/kernel/process.c.~1~ 2010-05-07 12:06:57.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/powerpc/kernel/process.c 2010-05-07 14:15:59.000000000 +0200 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -338,7 +339,9 @@ struct task_struct *__switch_to(struct t account_process_vtime(current); calculate_steal_time(); + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(flags); @@ -470,6 +473,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -582,6 +586,8 @@ int copy_thread(int nr, unsigned long cl p->thread.last_syscall = -1; #endif + perfctr_copy_task(p, regs); + return 0; } --- linux-2.6.18-194.el5.perfctr26/arch/x86_64/Kconfig.~1~ 2010-05-07 12:07:32.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/x86_64/Kconfig 2010-05-07 14:15:59.000000000 +0200 @@ -606,6 +606,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "drivers/perfctr/Kconfig" + source kernel/Kconfig.hz config REORDER --- linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/entry.S.~1~ 2010-05-07 12:06:54.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/entry.S 2010-05-07 14:15:59.000000000 +0200 @@ -699,6 +699,12 @@ ENTRY(spurious_interrupt) END(spurious_interrupt) #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +END(perfctr_interrupt) +#endif + /* * Exception entry points. */ --- linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/i8259.c.~1~ 2010-05-07 12:06:33.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/i8259.c 2010-05-07 14:15:59.000000000 +0200 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Common place to define all x86 IRQ vectors @@ -591,6 +592,8 @@ void __init init_IRQ(void) set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: --- linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/process.c.~1~ 2010-05-07 12:06:57.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/arch/x86_64/kernel/process.c 2010-05-07 14:15:59.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -360,6 +361,7 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -464,6 +466,8 @@ int copy_thread(int nr, unsigned long cl asm("mov %%es,%0" : "=m" (p->thread.es)); asm("mov %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_task(p, regs); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -620,6 +624,8 @@ __switch_to(struct task_struct *prev_p, } } + perfctr_resume_thread(next); + return prev_p; } --- linux-2.6.18-194.el5.perfctr26/drivers/Makefile.~1~ 2010-05-07 12:07:18.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/drivers/Makefile 2010-05-07 14:15:59.000000000 +0200 @@ -76,6 +76,7 @@ obj-$(CONFIG_NEW_LEDS) += leds/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_IPATH_CORE) += infiniband/ obj-$(CONFIG_SGI_SN) += sn/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ --- linux-2.6.18-194.el5.perfctr26/fs/exec.c.~1~ 2010-05-07 12:07:32.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/fs/exec.c 2010-05-07 14:16:26.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -1070,6 +1071,7 @@ int flush_old_exec(struct linux_binprm * set_task_comm(current, tcomm); current->flags &= ~PF_RANDOMIZE; + perfctr_flush_thread(¤t->thread); flush_thread(); /* Set the new mm task size. We have to do that late because it may --- linux-2.6.18-194.el5.perfctr26/include/asm-i386/mach-default/irq_vectors.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-i386/mach-default/irq_vectors.h 2010-05-07 14:15:59.000000000 +0200 @@ -56,14 +56,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.18-194.el5.perfctr26/include/asm-i386/mach-visws/irq_vectors.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-i386/mach-visws/irq_vectors.h 2010-05-07 14:15:59.000000000 +0200 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 --- linux-2.6.18-194.el5.perfctr26/include/asm-i386/processor.h.~1~ 2010-05-07 12:07:11.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-i386/processor.h 2010-05-07 14:15:59.000000000 +0200 @@ -483,6 +483,8 @@ struct thread_struct { unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ --- linux-2.6.18-194.el5.perfctr26/include/asm-i386/system.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-i386/system.h 2010-05-07 14:15:59.000000000 +0200 @@ -17,6 +17,7 @@ extern struct task_struct * FASTCALL(__s */ #define switch_to(prev,next,last) do { \ unsigned long esi,edi; \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile("pushfl\n\t" /* Save flags */ \ "pushl %%ebp\n\t" \ "movl %%esp,%0\n\t" /* save ESP */ \ --- linux-2.6.18-194.el5.perfctr26/include/asm-powerpc/processor.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-powerpc/processor.h 2010-05-07 14:15:59.000000000 +0200 @@ -169,6 +169,9 @@ struct thread_struct { unsigned long spefscr; /* SPE & eFP status */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ +#ifdef CONFIG_PERFCTR_VIRTUAL + struct vperfctr *perfctr; /* performance counters */ +#endif }; #define ARCH_MIN_TASKALIGN 16 --- linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/hw_irq.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/hw_irq.h 2010-05-07 14:15:59.000000000 +0200 @@ -64,14 +64,15 @@ struct hw_interrupt_type; * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ --- linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/irq.h.~1~ 2006-09-20 05:42:06.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/irq.h 2010-05-07 14:15:59.000000000 +0200 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_MSI #define NR_IRQS FIRST_SYSTEM_VECTOR --- linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/processor.h.~1~ 2010-05-07 12:07:21.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/processor.h 2010-05-07 14:15:59.000000000 +0200 @@ -284,6 +284,8 @@ struct thread_struct { unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; } __attribute__((aligned(16))); #define INIT_THREAD { \ --- linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/system.h.~1~ 2010-05-07 12:06:32.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/asm-x86_64/system.h 2010-05-07 14:15:59.000000000 +0200 @@ -21,7 +21,8 @@ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" /* Save restore flags to clear handle leaking NT */ -#define switch_to(prev,next,last) \ +#define switch_to(prev,next,last) do { \ + perfctr_suspend_thread(&(prev)->thread); \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ @@ -41,7 +42,8 @@ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, thread_info)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ - : "memory", "cc" __EXTRA_CLOBBER) + : "memory", "cc" __EXTRA_CLOBBER); \ +} while (0) extern void load_gs_index(unsigned); --- linux-2.6.18-194.el5.perfctr26/include/linux/config.h.~1~ 2010-05-07 12:06:22.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/include/linux/config.h 2010-05-07 14:15:59.000000000 +0200 @@ -3,6 +3,8 @@ /* This file is no longer in use and kept only for backward compatibility. * autoconf.h is now included via -imacros on the commandline */ +#define HAVE_EXPORT___put_task_struct 1 +#define DONT_HAVE_i_blksize 1 #warning Including config.h is deprecated. #include #if !defined (__KERNEL__) && !defined(__KERNGLUE__) --- linux-2.6.18-194.el5.perfctr26/kernel/exit.c.~1~ 2010-05-07 12:07:12.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/kernel/exit.c 2010-05-07 14:15:59.000000000 +0200 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -198,6 +199,7 @@ repeat: } } + perfctr_release_task(p); sched_exit(p); write_unlock_irq(&tasklist_lock); proc_flush_task(p); --- linux-2.6.18-194.el5.perfctr26/kernel/sched.c.~1~ 2010-05-07 12:07:29.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/kernel/sched.c 2010-05-07 14:15:59.000000000 +0200 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -5160,6 +5161,8 @@ int set_cpus_allowed(struct task_struct struct rq *rq; int ret = 0; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (!cpus_intersects(new_mask, cpu_online_map)) { ret = -EINVAL; --- linux-2.6.18-194.el5.perfctr26/kernel/timer.c.~1~ 2010-05-07 12:07:32.000000000 +0200 +++ linux-2.6.18-194.el5.perfctr26/kernel/timer.c 2010-05-07 14:15:59.000000000 +0200 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1328,6 +1329,7 @@ void update_process_times(int user_tick, account_user_time(p, jiffies_to_cputime(1)); else account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1)); + perfctr_sample_thread(&p->thread); run_local_timers(regs); if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user_tick); papi-5.6.0/src/libpfm-3.y/examples_v2.x/notify_self2.c000664 001750 001750 00000022045 13216244362 024450 0ustar00jshenry1963jshenry1963000000 000000 /* * notify_self2.c - example of how you can use overflow notifications with F_SETSIG * * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE /* for getline */ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define SMPL_PERIOD 1000000000ULL static volatile unsigned long notification_received; #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static pfarg_pmd_t pd[NUM_PMDS]; static int ctx_fd; static char *event1_name; static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static void warning(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } static void sigio_handler(int n, struct siginfo *info, void *data) { pfarg_msg_t msg; int fd; int r =0; if (info == NULL) fatal_error("info is NULL\n"); fd = info->si_fd; if (info->si_code < 0) fatal_error("signal not generated by kernel\n"); if (info->si_code != POLL_IN) fatal_error("unexpected si_code=0x%x\n", info->si_code); if (fd != ctx_fd) fatal_error("handler does not get valid file descriptor\n"); if (event1_name && pfm_read_pmds(fd, pd+1, 1)) fatal_error("pfm_read_pmds: %s", strerror(errno)); retry: r = read(fd, &msg, sizeof(msg)); if (r != sizeof(msg)) { if(r == -1 && errno == EINTR) { warning("read interrupted, retrying\n"); goto retry; } fatal_error("cannot read overflow message: %s\n", strerror(errno)); } if (msg.type != PFM_MSG_OVFL) fatal_error("unexpected msg type: %d\n",msg.type); /* * increment our notification counter */ notification_received++; /* * XXX: risky to do printf() in signal handler! */ if (event1_name) printf("Notification %lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); else printf("Notification %lu\n", notification_received); /* * And resume monitoring */ if (pfm_restart(fd)) fatal_error("pfm_restart: %d\n", errno); } /* * infinite loop waiting for notification to get out */ void busyloop(void) { /* * busy loop to burn CPU cycles */ for(;notification_received < 20;) ; } #define BPL (sizeof(uint64_t)<<3) #define LBPL 6 static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) { bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); } int main(int argc, char **argv) { pfarg_ctx_t ctx; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmc_t pc[NUM_PMCS]; pfarg_load_t load_args; pfmlib_options_t pfmlib_options; struct sigaction act; unsigned int i, num_counters; size_t len; int ret; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); /* * Install the signal handler (SIGIO) * * SA_SIGINFO required on some platforms * to get siginfo passed to handler. */ memset(&act, 0, sizeof(act)); act.sa_sigaction = sigio_handler; act.sa_flags = SA_SIGINFO; sigaction (SIGIO, &act, 0); memset(pc, 0, sizeof(pc)); memset(&ctx, 0, sizeof(ctx)); memset(&load_args, 0, sizeof(load_args)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); pfm_get_num_counters(&num_counters); if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } inp.pfp_event_count = i; /* * how many counters we use */ if (i > 1) { pfm_get_max_event_name_len(&len); event1_name = malloc(len+1); if (event1_name == NULL) fatal_error("cannot allocate event name\n"); pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); } /* * now create the context for self monitoring/per-task */ ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); if (ctx_fd == -1) { if (errno == ENOSYS) { fatal_error("Your kernel does not have performance monitoring support!\n"); } fatal_error("Can't create PFM context %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certina PMU registers are not available. */ detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) pd[i].reg_num = outp.pfp_pmds[i].reg_num; /* * We want to get notified when the counter used for our first * event overflows */ pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; if (inp.pfp_event_count > 1) pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); /* * we arm the first counter, such that it will overflow * after SMPL_PERIOD events have been observed */ pd[0].reg_value = - SMPL_PERIOD; pd[0].reg_long_reset = - SMPL_PERIOD; pd[0].reg_short_reset = - SMPL_PERIOD; /* * Now program the registers */ if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) fatal_error("pfm_write_pmcs error errno %d\n",errno); if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) fatal_error("pfm_write_pmds error errno %d\n",errno); /* * we want to monitor ourself */ load_args.load_pid = getpid(); if (pfm_load_context(ctx_fd, &load_args)) fatal_error("pfm_load_context error errno %d\n",errno); /* * setup asynchronous notification on the file descriptor */ ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); if (ret == -1) fatal_error("cannot set ASYNC: %s\n", strerror(errno)); /* * get ownership of the descriptor */ ret = fcntl(ctx_fd, F_SETOWN, getpid()); if (ret == -1) fatal_error("cannot setown: %s\n", strerror(errno)); #ifndef _GNU_SOURCE #error "this program must be compiled with -D_GNU_SOURCE" #else /* * when you explicitely declare that you want a particular signal, * even with you use the default signal, the kernel will send more * information concerning the event to the signal handler. * * In particular, it will send the file descriptor from which the * event is originating which can be quite useful when monitoring * multiple tasks from a single thread. */ ret = fcntl(ctx_fd, F_SETSIG, SIGIO); if (ret == -1) fatal_error("cannot setsig: %s\n", strerror(errno)); #endif /* * Let's roll now */ pfm_self_start(ctx_fd); busyloop(); pfm_self_stop(ctx_fd); /* * free our context */ close(ctx_fd); if (event1_name) free(event1_name); return 0; } papi-5.6.0/src/components/appio/README000664 001750 001750 00000006152 13216244356 021507 0ustar00jshenry1963jshenry1963000000 000000 COMPONENT appio SUMMARY Application I/O component DESCRIPTION This application I/O component enables PAPI-C to determine I/O used by the application. This is to be distinguished from system-wide I/O statistics. The goal of this component is to help the programmer attribute the I/O (read/write) to files and sockets, to the source code. Listed below are the events measured by the component: Event names ----------- READ_BYTES READ_CALLS READ_ERR READ_INTERRUPTED READ_WOULD_BLOCK READ_SHORT READ_EOF READ_BLOCK_SIZE READ_USEC WRITE_BYTES WRITE_CALLS WRITE_ERR WRITE_INTERRUPTED WRITE_WOULD_BLOCK WRITE_SHORT WRITE_BLOCK_SIZE WRITE_USEC OPEN_CALLS OPEN_ERR OPEN_FDS SELECT_USEC RECV_BYTES RECV_CALLS RECV_ERR RECV_INTERRUPTED RECV_WOULD_BLOCK RECV_SHORT RECV_EOF RECV_BLOCK_SIZE RECV_USEC SOCK_READ_BYTES SOCK_READ_CALLS SOCK_READ_ERR SOCK_READ_SHORT SOCK_READ_WOULD_BLOCK SOCK_READ_USEC SOCK_WRITE_BYTES SOCK_WRITE_CALLS SOCK_WRITE_ERR SOCK_WRITE_SHORT SOCK_WRITE_WOULD_BLOCK SOCK_WRITE_USEC SEEK_CALLS SEEK_ABS_BLOCK_SIZE SEEK_USEC The component works by intercepting I/O system calls on Linux. At present, the code uses a features available in libc on Linux, and is unlikely to work on other platforms without modifications. The code works for static and shared executables. The component has been tested on 32 and 64-bit Linux. It's also been tested to work for multithreaded programs. Limitations and future work: --------------------------- The most important aspect to note is that the code is likely to only work on Linux, given the low-level dependencies on libc features. At present the component intercepts the open(), close(), read(), write(), fread() and fwrite(). In the future it's expected that these will be expanded to cover lseek(), select(), other I/O calls. While READ_* and WRITE_* calls will not distinguish between file and network I/O, the user can explicitly determine network statistics using SOCK_* calls. Threads are handled using thread-specific structures in the backend. However, no aggregation is currently performed across threads. There is also NO global structure that has the statistics of all the threads. This means the user can call a PAPI read to get statitics for a running thread. However, if the thread has joined, then it's statistics can no longer be queried. TESTING: ------- Tests lie in the tests/ sub-directory. All but one test take no argument. The iozone test (appio_test_iozone) needs arguments just like iozone does. It is not built by default as part of the PAPI tests. To build it: cd appio/tests; make appio_test_iozone An example run for the iozone test could be: ./appio_test_iozone -s 100m -r 64 -i 0 -i 1 -t 1 AUTHOR The code is written by Tushar Mohan and Philip Mucci . The component leverages code written by Jose Pedro Oliveira for the PAPI net component. SEE ALSO # vim:set ai ts=4 sw=4 sts=4 et: papi-5.6.0/src/papi_libpfm_events.h000664 001750 001750 00000003337 13216244366 021354 0ustar00jshenry1963jshenry1963000000 000000 #ifndef _PAPI_LIBPFM_EVENTS_H #define _PAPI_LIBPFM_EVENTS_H #include "papi.h" /* For PAPI_event_info_t */ #include "papi_vector.h" /* For papi_vector_t */ /* * File: papi_libpfm_events.h */ /* Prototypes for libpfm name library access */ int _papi_libpfm_error( int pfm_error ); int _papi_libpfm_setup_presets( char *name, int type, int cidx ); int _papi_libpfm_ntv_enum_events( unsigned int *EventCode, int modifier ); int _papi_libpfm_ntv_name_to_code( const char *ntv_name, unsigned int *EventCode ); int _papi_libpfm_ntv_code_to_name( unsigned int EventCode, char *name, int len ); int _papi_libpfm_ntv_code_to_descr( unsigned int EventCode, char *name, int len ); int _papi_libpfm_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ); int _papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode, hwd_register_t * bits ); int _papi_libpfm_shutdown(void); int _papi_libpfm_init(papi_vector_t *my_vector, int cidx); int _pfm_decode_native_event( unsigned int EventCode, unsigned int *event, unsigned int *umask ); unsigned int _pfm_convert_umask( unsigned int event, unsigned int umask ); int prepare_umask( unsigned int foo, unsigned int *values ); int _papi_libpfm_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info); /* Gross perfctr/perf_events compatability hack */ /* need to think up a better way to handle this */ #ifndef __PERFMON_PERF_EVENT_H__ struct perf_event_attr { int config; int type; }; #define PERF_TYPE_RAW 4; #endif /* !__PERFMON_PERF_EVENT_H__ */ extern int _papi_libpfm_setup_counters( struct perf_event_attr *attr, hwd_register_t *ni_bits ); #endif // _PAPI_LIBPFM_EVENTS_H papi-5.6.0/src/components/appio/tests/iozone/gnu3d.dem000664 001750 001750 00000005312 13216244356 025000 0ustar00jshenry1963jshenry1963000000 000000 # # $Id: 3D plot of performance # # Processes files that were created by Generate_Graphs # and displays the results. Also, saves a postscript copy. # # Don Capps dirs = "write rewrite read reread randread randwrite bkwdread recrewrite strideread fwrite frewrite fread freread" titles = "Write ReWrite Read Reread Random_read Random_write Read_Backwards Record_rewrite Stride_read Fwrite Frewrite Fread Freread" file(n) = sprintf("%s/iozone_gen_out.gnuplot", word(dirs,n)) outfile(n) = sprintf("%s/%s.ps", word(dirs,n), word(dirs,n)) title(n) = word(titles,n) set title "Iozone performance" set grid lt 2 lw 1 set surface set parametric set xtics set ytics set logscale x 2 set logscale y 2 set xlabel "File size in 2^n KBytes" set ylabel "Record size in 2^n Kbytes" set zlabel "Kbytes/sec" set style data lines set dgrid3d 80,80,3 i = 1 set terminal x11 set output splot file(i) title title(i) pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 2 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 3 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 4 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 5 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 6 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 7 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 8 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 9 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 10 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 11 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 12 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot i = 13 set terminal x11 set output replot pause -1 "Hit return to continue" set terminal postscript color set output outfile(i) replot papi-5.6.0/src/libpfm-3.y/docs/man3/pfm_set_options.3000664 001750 001750 00000003454 13216244362 024273 0ustar00jshenry1963jshenry1963000000 000000 .TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" .SH NAME pfm_set_options \- set performance monitoring library debug options .SH SYNOPSIS .nf .B #include .sp .BI "int pfm_set_options(pfmlib_options_t *"opt); .sp .SH DESCRIPTION This function can be called at any time to adjust the level of debug of the library. In both cases, extra output will be generated on standard error when the library gets called. This can be useful to figure out how the PMC registers are initialized for instance. .sp The opt argument to this function is a pointer to a .B pfmlib_options_t structure which is defined as follows: .sp .nf typedef struct { unsigned int pfm_debug:1; unsigned int pfm_verbose:1; } pfmlib_options_t; .fi .sp .sp Setting \fBpfm_debug\fR to 1 will enable debug messages whereas setting \fBpfm_verbose\fR will enable verbose messages. .SH ENVIRONMENT VARIABLES Setting library options with this function has lower priority than with environment variables. As such, the call to this function may not have any actual effects. A user can set the following environment variables to control verbosity and debug output: .TP .B LIBPFM_VERBOSE Enable verbose output. Value must be 0 or 1. When not set, verbosity level can be controlled with this function. .TP .B LIBPFM_DEBUG Enable debug output. Value must be 0 or 1. When not set, debug level can be controlled with this function. .LP .SH RETURN The function returns whether or not it was successful. A return value of \fBPFMLIB_SUCCESS\fR indicates success, otherwise the value is the error code. .sp When environment variables exist, they take precedence and this function returns \fBPFMLIB_SUCCESS\fR. .SH ERRORS .TP .B PFMLIB_ERR_INVAL the argument is invalid, most likely a NULL pointer. .SH AUTHOR Stephane Eranian .PP papi-5.6.0/src/perfctr-2.7.x/linux/Documentation/perfctr/low-level-x86.txt000664 001750 001750 00000032400 13216244370 030247 0ustar00jshenry1963jshenry1963000000 000000 $Id: low-level-x86.txt,v 1.2 2004/07/11 17:12:28 mikpe Exp $ PERFCTRS X86 LOW-LEVEL API ========================== See low-level-api.txt for the common low-level API. This document only describes x86-specific behaviour. For detailed hardware control register layouts, see the manufacturers' documentation. Contents ======== - Supported processors - Contents of - Processor-specific Notes - Implementation Notes Supported processors ==================== - Intel P5, P5MMX, P6, P4. - AMD K7, K8. (P6 clones, with some changes) - Cyrix 6x86MX, MII, and III. (good P5 clones) - Centaur WinChip C6, 2, and 3. (bad P5 clones) - VIA C3. (bad P6 clone) - Any generic x86 with a TSC. Contents of ================================ "struct perfctr_sum_ctrs" ------------------------- struct perfctr_sum_ctrs { unsigned long long tsc; unsigned long long pmc[18]; }; The pmc[] array has room for 18 counters. "struct perfctr_cpu_control" ---------------------------- struct perfctr_cpu_control { unsigned int tsc_on; unsigned int nractrs; /* # of a-mode counters */ unsigned int nrictrs; /* # of i-mode counters */ unsigned int pmc_map[18]; unsigned int evntsel[18]; /* one per counter, even on P5 */ struct { unsigned int escr[18]; unsigned int pebs_enable; /* for replay tagging */ unsigned int pebs_matrix_vert; /* for replay tagging */ } p4; int ireset[18]; /* < 0, for i-mode counters */ unsigned int _reserved1; unsigned int _reserved2; unsigned int _reserved3; unsigned int _reserved4; }; The per-counter arrays have room for 18 elements. ireset[] values must be negative, since overflow occurs on the negative-to-non-negative transition. The p4 sub-struct contains P4-specific control data: - escr[]: the control data to write to the ESCR register associatied with the counter - pebs_enable: the control data to write to the PEBS_ENABLE MSR - pebs_matrix_vert: the control data to write to the PEBS_MATRIX_VERT MSR "struct perfctr_cpu_state" -------------------------- struct perfctr_cpu_state { unsigned int cstatus; struct { /* k1 is opaque in the user ABI */ unsigned int id; int isuspend_cpu; } k1; /* The two tsc fields must be inlined. Placing them in a sub-struct causes unwanted internal padding on x86-64. */ unsigned int tsc_start; unsigned long long tsc_sum; struct { unsigned int map; unsigned int start; unsigned long long sum; } pmc[18]; /* the size is not part of the user ABI */ #ifdef __KERNEL__ struct perfctr_cpu_control control; unsigned int p4_escr_map[18]; #endif }; The k1 sub-struct is used by the low-level driver for caching purposes. "id" identifies the control data, and "isuspend_cpu" identifies the CPU on which the i-mode counters were last suspended. The pmc[] array has room for 18 elements. p4_escr_map[] is computed from control by the low-level driver, and provides the MSR number for the counter's associated ESCR. User-space overflow signal handler items ---------------------------------------- #ifdef __KERNEL__ #define SI_PMC_OVF (__SI_FAULT|'P') #else #define SI_PMC_OVF ('P') #endif #define si_pmc_ovf_mask _sifields._pad[0] Kernel-internal API ------------------- In perfctr_cpu_update_control(), the is_global parameter controls whether monitoring the other thread (T1) on HT P4s is permitted or not. On other processors the parameter is ignored. SMP kernels define CONFIG_PERFCTR_CPUS_FORBIDDEN_MASK and "extern cpumask_t perfctr_cpus_forbidden_mask;". On HT P4s, resource conflicts can occur because both threads (T0 and T1) in a processor share the same perfctr registers. To prevent conflicts, only thread 0 in each processor is allowed to access the counters. perfctr_cpus_forbidden_mask contains the smp_processor_id()s of each processor's thread 1, and it is the responsibility of the high-level driver to ensure that it never accesses the perfctr state from a forbidden thread. Overflow interrupt handling requires local APIC support in the kernel. Processor-specific Notes ======================== General ------- pmc_map[] contains a counter number, as used by the RDPMC instruction. It never contains an MSR number. Counters are 32, 40, or 48 bits wide. The driver always only reads the low 32 bits. This avoids performance issues, and errata on some processors. Writing to counters or their control registers tends to be very expensive. This is why a-mode counters only use read operations on the counter registers. Caching of control register contents is done to avoid writing them. "Suspend CPU" is recorded for i-mode counters to avoid writing the counter registers when the counters are resumed (their control registers must be written at both suspend and resume, however). Some processors are unable to stop the counters (Centaur/VIA), and some are unable to reinitialise them to arbitrary values (P6). Storing the counters' total counts in the hardware counters would break as soon as context-switches occur. This is another reason why the accumulate-differences method for maintaining the counter values is used. Intel P5 -------- The hardware stores both counters' control data in a single control register, the CESR MSR. The evntsel values are limited to 16 bits each, and are combined by the low-level driver to form the value for the CESR. Apart from that, the evntsel values are direct images of the CESR. Bits 0xFE00 in an evntsel value are reserved. At least one evntsel CPL bit (0x00C0) must be set. For Cyrix' P5 clones, evntsel bits 0xFA00 are reserved. For Centaur's P5 clones, evntsel bits 0xFF00 are reserved. It has no CPL bits to set. The TSC is broken and cannot be used. Intel P6 -------- The evntsel values are mapped directly onto the counters' EVNTSEL control registers. The global enable bit (22) in EVNTSEL0 must be set. That bit is reserved in EVNTSEL1. Bits 21 and 19 (0x00280000) in each evntsel are reserved. For an i-mode counter, bit 20 (0x00100000) of its evntsel must be set. For a-mode counters, that bit must not be set. Hardware quirk: Counters are 40 bits wide, but writing to a counter only writes the low 32 bits: remaining bits are sign-extended from bit 31. AMD K7/K8 --------- Similar to Intel P6. The main difference is that each evntsel has its own enable bit, which must be set. VIA C3 ------ Superficially similar to Intel P6, but only PERFCTR1/EVNTSEL1 are programmable. pmc_map[0] must be 1, if nractrs == 1. Bits 0xFFFFFE00 in the evntsel are reserved. There are no auxiliary control bits to set. Generic ------- Only permits TSC sampling, with tsc_on == 1 and nractrs == nrictrs == 0 in the control data. Intel P4 -------- For each counter, its evntsel[] value is mapped onto its CCCR control register, and its p4.escr[] value is mapped onto its associated ESCR control register. The ESCR register number is computed from the hardware counter number (from pmc_map[]) and the ESCR SELECT field in the CCCR, and is cached in p4_escr_map[]. pmc_map[] contains the value to pass to RDPMC when reading the counter. It is strongly recommended to set bit 31 (fast rdpmc). In each evntsel/CCCR value: - the OVF, OVF_PMI_T1 and hardware-reserved bits (0xB80007FF) are reserved and must not be set - bit 11 (EXTENDED_CASCADE) is only permitted on P4 models >= 2, and for counters 12 and 15-17 - bits 16 and 17 (ACTIVE_THREAD) must both be set on non-HT processors - at least one of bits 12 (ENABLE), 30 (CASCADE), or 11 (EXTENDED_CASCADE) must be set - bit 26 (OVF_PMI_T0) must be clear for a-mode counters, and set for i-mode counters; if bit 25 (FORCE_OVF) also is set, then the corresponding ireset[] value must be exactly -1 In each p4.escr[] value: - bit 32 is reserved and must not be set - the CPL_T1 field (bits 0 and 1) must be zero except on HT processors when global-mode counters are used - IQ_ESCR0 and IQ_ESCR1 can only be used on P4 models <= 2 PEBS is not supported, but the replay tagging bits in PEBS_ENABLE and PEBS_MATRIX_VERT may be used. If p4.pebs_enable is zero, then p4.pebs_matrix_vert must also be zero. If p4.pebs_enable is non-zero: - only bits 24, 10, 9, 2, 1, and 0 may be set; note that in contrast to Intel's documentation, bit 25 (ENABLE_PEBS_MY_THR) is not needed and must not be set - bit 24 (UOP_TAG) must be set - at least one of bits 10, 9, 2, 1, or 0 must be set - in p4.pebs_matrix_vert, all bits except 1 and 0 must be clear, and at least one of bits 1 and 0 must be set Implementation Notes ==================== Caching ------- Each 'struct perfctr_cpu_state' contains two cache-related fields: - 'id': a unique identifier for the control data contents - 'isuspend_cpu': the identity of the CPU on which a state containing interrupt-mode counters was last suspended To this the driver adds a per-CPU cache, recording: - the 'id' of the control data currently in that CPU - the current contents of each control register When perfctr_cpu_update_control() has validated the new control data, it also updates the id field. The driver's internal 'write_control' function, called from the perfctr_cpu_resume() API function, first checks if the state's id matches that of the CPU's cache, and if so, returns. Otherwise it checks each control register in the state and updates those that do not match the cache. Finally, it writes the state's id to the cache. Tests on various x86 processor types have shown that MSR writes are very expensive: the purpose of these cache checks is to avoid MSR writes whenever possible. Unlike accumulation-mode counters, interrupt-mode counters must be physically stopped when suspended, primilarly to avoid overflow interrupts in contexts not expecting them, and secondarily to avoid increments to the counters themselves (see below). When suspending interrupt-mode counters, the driver: - records the CPU identity in the per-CPU cache - stops each interrupt-mode counter by disabling its control register - lets the cache and state id values remain the same Later, when resuming interrupt-mode counters, the driver: - if the state and cache id values match: * the cache id is cleared, to force a reload of the control registers stopped at suspend (see below) * if the state's "suspend" CPU identity matches the current CPU, the counter registers are still valid, and the procedure returns - if the procedure did not return above, it then loops over each interrupt-mode counter: * the counter's control register is physically disabled, unless the cache indicates that it already is disabled; this is necessary to prevent premature events and overflow interrupts if the CPU's registers previously belonged to some other state * then the counter register itself is restored After this interrupt-mode specific resume code is complete, the driver continues by calling 'write_control' as described above. The state and cache ids will not match, forcing write_control to reload the disabled interrupt-mode control registers. Call-site Backpatching ---------------------- The x86 family of processors is quite diverse in how their performance counters work and are accessed. There are three main designs (P5, P6, and P4) with several variations. To handle this the processor type detection and initialisation code sets up a number of function pointers to point to the correct procedures for the actual CPU type. Calls via function pointers are more expensive than direct calls, so the driver actually performs direct calls to wrappers that backpatch the original call sites to instead call the actual CPU-specific functions in the future. Unsynchronised code backpatching in SMP systems doesn't work on Intel P6 processors due to an erratum, so the driver performs a "finalise backpatching" step after the CPU-specific function pointers have been set up. This step invokes the API procedures on a temporary state object, set up to force every backpatchable call site to be invoked and adjusted. Several low-level API procedures are called in the context-switch path by the per-process perfctrs kernel extension, which motivates the efforts to reduce runtime overheads as much as possible. Overflow Interrupts ------------------- The x86 hardware enables overflow interrupts via the local APIC's LVTPC entry, which is only present in P6/K7/K8/P4. The low-level driver supports overflow interrupts as follows: - It reserves a local APIC vector, 0xee, as LOCAL_PERFCTR_VECTOR. - It adds a local APIC exception handler to entry.S, which invokes the driver's smp_perfctr_interrupt() procedure. - It adds code to i8259.c to bind the LOCAL_PERFCTR_VECTOR interrupt gate to the exception handler in entry.S. - During processor type detection, it records whether the processor supports the local APIC, and sets up function pointers for the suspend and resume operations on interrupt-mode counters. - When the low-level driver is activated, it enables overflow interrupts by writing LOCAL_PERFCTR_VECTOR to each CPU's APIC_LVTPC. - Overflow interrupts now end up in smp_perfctr_interrupt(), which ACKs the interrupt and invokes the interrupt handler installed by the high-level service/driver. - When the low-level driver is deactivated, it disables overflow interrupts by masking APIC_LVTPC in each CPU. It then releases the local APIC back to the NMI watchdog. At compile-time, the low-level driver indicates overflow interrupt support by enabling CONFIG_PERFCTR_INTERRUPT_SUPPORT. If the feature is also available at runtime, it sets the PERFCTR_FEATURE_PCINT flag in the perfctr_info object. papi-5.6.0/src/libpfm-3.y/examples_v3.x/self.c000664 001750 001750 00000015653 13216244362 023006 0ustar00jshenry1963jshenry1963000000 000000 /* * self.c - example of a simple self monitoring task * * Copyright (c) 2002-2007 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * This file is part of libpfm, a performance monitoring support library for * applications on Linux. */ #include #include #include #include #include #include #include #include #include #include #include #include "detect_pmcs.h" #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static volatile int quit; void sig_handler(int n) { quit = 1; } /* * our test code (function cannot be made static otherwise it is optimized away) */ void noploop(void) { for(;quit == 0;); } static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } int main(int argc, char **argv) { char **p; unsigned int i; int ret, ctx_fd; pfmlib_input_param_t inp; pfmlib_output_param_t outp; pfarg_pmr_t pd[NUM_PMDS]; pfarg_pmr_t pc[NUM_PMCS]; pfarg_sinfo_t sif; pfmlib_options_t pfmlib_options; unsigned int num_counters; size_t len; char *name; /* * pass options to library (optional) */ memset(&pfmlib_options, 0, sizeof(pfmlib_options)); pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ pfm_set_options(&pfmlib_options); /* * Initialize pfm library (required before we can use it) */ ret = pfm_initialize(); if (ret != PFMLIB_SUCCESS) fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); pfm_get_max_event_name_len(&len); name = malloc(len+1); if (!name) fatal_error("cannot allocate event name buffer\n"); pfm_get_num_counters(&num_counters); memset(pd, 0, sizeof(pd)); memset(pc, 0, sizeof(pc)); memset(&sif, 0, sizeof(sif)); /* * prepare parameters to library. */ memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); /* * be nice to user! */ if (argc > 1) { p = argv+1; for (i=0; *p ; i++, p++) { ret = pfm_find_full_event(*p, &inp.pfp_events[i]); if (ret != PFMLIB_SUCCESS) fatal_error("event %s: %s\n", *p, pfm_strerror(ret)); } } else { if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; } /* * set the default privilege mode for all counters: * PFM_PLM3 : user level only */ inp.pfp_dfl_plm = PFM_PLM3; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * now create a new per-thread session * This just creates a new session with some initial state, it is not * active nor attached to any thread yet. */ ctx_fd = pfm_create(0, &sif); if (ctx_fd == -1) { if (errno == ENOSYS) fatal_error("Your kernel does not have performance monitoring support!\n"); fatal_error("cannot create session %s\n", strerror(errno)); } /* * build the pfp_unavail_pmcs bitmask by looking * at what perfmon has available. It is not always * the case that all PMU registers are actually available * to applications. For instance, on IA-32 platforms, some * registers may be reserved for the NMI watchdog timer. * * With this bitmap, the library knows which registers NOT to * use. Of source, it is possible that no valid assignement may * be possible if certain PMU registers are not available. */ detect_unavail_pmu_regs(&sif, &inp.pfp_unavail_pmcs, NULL); /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); /* * Now prepare the argument to initialize the PMDs and PMCS. * We use pfp_pmc_count to determine the number of PMC to intialize. * We use pfp_pmd_count to determine the number of PMD to initialize. * Some events/features may cause extra PMCs to be used, leading to: * - pfp_pmc_count may be >= pfp_event_count * - pfp_pmd_count may be >= pfp_event_count */ for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for (i=0; i < outp.pfp_pmd_count; i++) { pd[i].reg_num = outp.pfp_pmds[i].reg_num; } /* * Now program the registers */ if (pfm_write(ctx_fd, 0, PFM_RW_PMC, pc, outp.pfp_pmc_count * sizeof(*pc))) fatal_error("pfm_write error errno %d\n",errno); if (pfm_write(ctx_fd, 0, PFM_RW_PMD, pd, outp.pfp_pmd_count * sizeof(*pd))) fatal_error("pfm_write(PMD) error errno %d\n",errno); /* * now we attach the session to ourself */ if (pfm_attach(ctx_fd, 0, getpid())) fatal_error("pfm_attacherror errno %d\n",errno); /* * Let's roll now */ if (pfm_set_state(ctx_fd, 0, PFM_ST_START)) fatal_error("pfm_set_state(start) error errno %d\n",errno); signal(SIGALRM, sig_handler); alarm(10); noploop(); if (pfm_set_state(ctx_fd, 0, PFM_ST_STOP)) fatal_error("pfm_set_state(stop) error errno %d\n",errno); /* * now read the results. We use pfp_event_count because * libpfm guarantees that counters for the events always * come first. */ if (pfm_read(ctx_fd, 0, PFM_RW_PMD, pd, inp.pfp_event_count * sizeof(*pd))) fatal_error( "pfm_read error errno %d\n",errno); /* * print the results */ for (i=0; i < inp.pfp_event_count; i++) { pfm_get_full_event_name(&inp.pfp_events[i], name, len+1); printf("PMD%-3u %20"PRIu64" %s\n", pd[i].reg_num, pd[i].reg_value, name); } free(name); /* * and destroy our session */ close(ctx_fd); return 0; } papi-5.6.0/src/libpfm-3.y/include/perfmon/pfmlib_os.h000664 001750 001750 00000003442 13216244362 024422 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __PFMLIB_OS_H__ #define __PFMLIB_OS_H__ #ifdef __linux__ #ifdef __ia64__ #include #endif #ifdef __x86_64__ #include #endif #ifdef __i386__ #include #endif #if defined(__mips__) #include #endif #ifdef __powerpc__ #include #endif #ifdef __sparc__ #include #endif #ifdef __cell__ #include #endif #ifdef __crayx2 #include #endif #endif /* __linux__ */ #endif /* __PFMLIB_OS_H__ */ papi-5.6.0/src/components/example/tests/Makefile000664 001750 001750 00000001101 13216244357 023742 0ustar00jshenry1963jshenry1963000000 000000 NAME=example include ../../Makefile_comp_tests.target %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< TESTS = example_basic example_multiple_components example_tests: $(TESTS) example_basic: example_basic.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o example_basic example_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) example_multiple_components: example_multiple_components.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o example_multiple_components example_multiple_components.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/components/infiniband/tests/Makefile000664 001750 001750 00000000742 13216244357 024422 0ustar00jshenry1963jshenry1963000000 000000 NAME=infiniband include ../../Makefile_comp_tests.target TESTS = infiniband_list_events infiniband_values_by_code infiniband_tests: $(TESTS) %.o:%.c $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< infiniband_list_events: infiniband_list_events.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) infiniband_values_by_code: infiniband_values_by_code.o $(UTILOBJS) $(PAPILIB) $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) clean: rm -f $(TESTS) *.o papi-5.6.0/src/components/bgpm/IOunit/Rules.IOunit000664 001750 001750 00000000432 13216244356 024071 0ustar00jshenry1963jshenry1963000000 000000 # $Id$ COMPSRCS += components/bgpm/IOunit/linux-IOunit.c COMPOBJS += linux-IOunit.o linux-IOunit.o: components/bgpm/IOunit/linux-IOunit.c components/bgpm/IOunit/linux-IOunit.h $(HEADERS) $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/IOunit/linux-IOunit.c -o linux-IOunit.o papi-5.6.0/src/libpfm-3.y/lib/crayx2_events.h000664 001750 001750 00003111527 13216244363 022731 0ustar00jshenry1963jshenry1963000000 000000 /* * Copyright (c) 2007 Cray Inc. * Contributed by Steve Kaufmann based on code from * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. * Contributed by Stephane Eranian * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef __CRAYX2_EVENTS_H__ #define __CRAYX2_EVENTS_H__ 1 #include "pfmlib_crayx2_priv.h" /* ***************************************************************** ******* THIS TABLE IS GENERATED AUTOMATICALLY ******* MODIFICATIONS REQUIRED FOR THE EVENT NAMES ******* OR EVENT DESCRIPTIONS SHOULD BE MADE TO ******* THE TEXT FILE AND THE TABLE REGENERATED ******* Sat Nov 10 14:40:30 CST 2007 ***************************************************************** */ static pme_crayx2_entry_t crayx2_pe[ ] = { /* P Counter 0 Event 0 */ { .pme_name = "CYCLES", .pme_desc = "Cycles.", .pme_code = 0, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 0 Event 1 */ { .pme_name = "CYCLES", .pme_desc = "Cycles.", .pme_code = 1, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 0 Event 2 */ { .pme_name = "CYCLES", .pme_desc = "Cycles.", .pme_code = 2, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 0 Event 3 */ { .pme_name = "CYCLES", .pme_desc = "Cycles.", .pme_code = 3, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 1 Event 0 */ { .pme_name = "INST_GRAD", .pme_desc = "Number of instructions graduated.", .pme_code = 4, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 1 Event 1 */ { .pme_name = "INST_GRAD", .pme_desc = "Number of instructions graduated.", .pme_code = 5, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 1 Event 2 */ { .pme_name = "INST_GRAD", .pme_desc = "Number of instructions graduated.", .pme_code = 6, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 1 Event 3 */ { .pme_name = "INST_GRAD", .pme_desc = "Number of instructions graduated.", .pme_code = 7, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 2 Event 0 */ { .pme_name = "INST_DISPATCH", .pme_desc = "Number of instructions dispatched.", .pme_code = 8, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 2 Event 1 */ { .pme_name = "ITLB_MISS", .pme_desc = "Number of Instruction TLB misses.", .pme_code = 9, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 2 Event 2 */ { .pme_name = "JB_CORRECT", .pme_desc = "Number of jumps and branches predicted correctly.", .pme_code = 10, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 2 Event 3 */ { .pme_name = "STALL_VU_FUG1", .pme_desc = "CPs VU stalled waiting for FUG 1.", .pme_code = 11, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 3 Event 0 */ { .pme_name = "INST_SYNCS", .pme_desc = "Number of synchronization instructions graduated g=02.", .pme_code = 12, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 3 Event 1 */ { .pme_name = "INST_GSYNCS", .pme_desc = "Number of Gsync instructions graduated g=02 & f=0-3.", .pme_code = 13, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 3 Event 2 */ { .pme_name = "STALL_DU_ICACHE", .pme_desc = "CPs dispatch stalled waiting for instruction from Icache.", .pme_code = 14, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 3 Event 3 */ { .pme_name = "STALL_VU_FUG2", .pme_desc = "CPs VU stalled waiting for FUG 2.", .pme_code = 15, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 4 Event 0 */ { .pme_name = "INST_AMO", .pme_desc = "Number of AMO instructions graduated g=04.", .pme_code = 16, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 4 Event 1 */ { .pme_name = "ICACHE_FETCH", .pme_desc = "Number of instruction fetch requests to memory.", .pme_code = 17, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 4 Event 2 */ { .pme_name = "STALL_DU_BRANCH_PRED", .pme_desc = "CPs Dispatch stalled waiting for branch prediction register.", .pme_code = 18, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 4 Event 3 */ { .pme_name = "STALL_VU_FUG3", .pme_desc = "CPs VU stalled waiting for FUG 3.", .pme_code = 19, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 5 Event 0 */ { .pme_name = "INST_A", .pme_desc = "Number of A register instructions graduated g=05,40,42,43.", .pme_code = 20, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 5 Event 1 */ { .pme_name = "ICACHE_HIT", .pme_desc = "Number of Icache hits.", .pme_code = 21, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 5 Event 2 */ { .pme_name = "STALL_DU_AREG", .pme_desc = "CPs instruction dispatch stalled waiting for free A register.", .pme_code = 22, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 5 Event 3 */ { .pme_name = "STALL_VU", .pme_desc = "CPs VU is stalled with a valid instruction.", .pme_code = 23, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 6 Event 0 */ { .pme_name = "INST_S_INT", .pme_desc = "Number of S register integer instructions graduated g=60,62 & t1=1,63.", .pme_code = 24, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 6 Event 1 */ { .pme_name = "INST_MSYNCS", .pme_desc = "Number of Msync instructions graduated g=02 & f=20-22.", .pme_code = 25, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 6 Event 2 */ { .pme_name = "STALL_DU_ACT_LIST_FULL", .pme_desc = "CPs dispatch stalled waiting for active list entry.", .pme_code = 26, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 6 Event 3 */ { .pme_name = "STALL_VU_NO_INST", .pme_desc = "CPs VU has no valid instruction.", .pme_code = 27, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 7 Event 0 */ { .pme_name = "INST_S_FP", .pme_desc = "Number of S register FP instructions graduated g=62 & t1=0.", .pme_code = 28, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 7 Event 1 */ { .pme_name = "STLB_MISS", .pme_desc = "Number of Scalar TLB misses.", .pme_code = 29, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 7 Event 2 */ { .pme_name = "STALL_DU_SREG", .pme_desc = "CPs instruction dispatch stalled waiting for free S register.", .pme_code = 30, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 7 Event 3 */ { .pme_name = "STALL_VU_VR", .pme_desc = "CPs VU is stalled waiting for busy V Reg.", .pme_code = 31, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 8 Event 0 */ { .pme_name = "INST_MISC", .pme_desc = "Number of Misc. scalar instructions graduated g=00, 01, 03, 06, 34.", .pme_code = 32, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 8 Event 1 */ { .pme_name = "VTLB_MISS", .pme_desc = "Number of vector TLB misses.", .pme_code = 33, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 8 Event 2 */ { .pme_name = "STALL_DU_INST", .pme_desc = "CPs dispatch stalled due to an instruction such as a Gsync or Lsync FP that stops dispatch until it executes.", .pme_code = 34, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 8 Event 3 */ { .pme_name = "STALL_VLSU_NO_INST", .pme_desc = "CPs VLSU has no valid instruction.", .pme_code = 35, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 9 Event 0 */ { .pme_name = "INST_JB", .pme_desc = "Number of Jump and Branch instructions graduated g=50-57, 70-76.", .pme_code = 36, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 9 Event 1 */ { .pme_name = "ICACHE_MISS", .pme_desc = "Number of Icache misses.", .pme_code = 37, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 9 Event 2 */ { .pme_name = "STALL_GRAD", .pme_desc = "CPs no instructions graduate for any reason.", .pme_code = 38, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 9 Event 3 */ { .pme_name = "STALL_VLSU_LB", .pme_desc = "CPs VLSU stalled waiting for load buffers (LB).", .pme_code = 39, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 10 Event 0 */ { .pme_name = "INST_MEM", .pme_desc = "Number of A and S register load and store instructions graduated g=41, 44-47, 61, 64-67.", .pme_code = 40, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 10 Event 1 */ { .pme_name = "ICACHE_HIT_PEND", .pme_desc = "Number of Icache hits to blocks with allocations pending.", .pme_code = 41, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 10 Event 2 */ { .pme_name = "STALL_GRAD_NO_INST", .pme_desc = "CPs no instructions graduated due to empty active list.", .pme_code = 42, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 10 Event 3 */ { .pme_name = "STALL_VLSU_SB", .pme_desc = "CPs VLSU stalled waiting for store buffer (SB).", .pme_code = 43, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 11 Event 0 */ { .pme_name = "INST_VFUG1", .pme_desc = "Number of vector FUG 1 instructions graduated g=20-27, f=0-7,60-77 Add, sub, compare.", .pme_code = 44, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 11 Event 1 */ { .pme_name = "TLB_MISS", .pme_desc = "Total number of TLB misses including ITLB, STLB, and VTLB.", .pme_code = 45, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 11 Event 2 */ { .pme_name = "STALL_GRAD_AX_INST", .pme_desc = "CPs no instructions graduate and an A FUG instruction is at the head of the active list g=5, 40, 42, 43.", .pme_code = 46, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 11 Event 3 */ { .pme_name = "STALL_VLSU_RB", .pme_desc = "CPs VLSU stalled waiting for request buffer (RB).", .pme_code = 47, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 12 Event 0 */ { .pme_name = "INST_VFUG2", .pme_desc = "Number of vector FUG 2 instructions graduated g=20-27, f=30-37 (multiply, shift).", .pme_code = 48, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 12 Event 1 */ { .pme_name = "DCACHE_HIT", .pme_desc = "Number of A or S loads that hit in the Dcache.", .pme_code = 49, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 12 Event 2 */ { .pme_name = "STALL_GRAD_SX_INST", .pme_desc = "CPs no instructions graduate and an S FUG instruction is at the head of the active list g=60, 62, 63.", .pme_code = 50, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 12 Event 3 */ { .pme_name = "STALL_VLSU_VM", .pme_desc = "CPs VLSU stalled waiting for VU vector mask (VM).", .pme_code = 51, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 13 Event 0 */ { .pme_name = "INST_VFUG3", .pme_desc = "Number of vector FUG 3 instructions graduated g=20-27, f=10-27, 40-57, 77 div, sqrt, abs, cpsign, compress, merge, logical, bmm.", .pme_code = 52, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 13 Event 1 */ { .pme_name = "DCACHE_MISS", .pme_desc = "Number of A or S loads that miss in the Dcache.", .pme_code = 53, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 13 Event 2 */ { .pme_name = "STALL_GRAD_FP_INST", .pme_desc = "CPs no instructions graduate and an S FP instruction is at the head of the active list g=62, t1=0.", .pme_code = 54, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 13 Event 3 */ { .pme_name = "STALL_VLSU_SREF", .pme_desc = "CPs VLSU stalled waiting for prior scalar instruction reference sent.", .pme_code = 55, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 14 Event 0 */ { .pme_name = "VOPS_EXT_FUG3", .pme_desc = "Number of vector FUG 3 external operations g=20-27 f=25,57,77 compress, merge, bmm.", .pme_code = 56, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 14 Event 1 */ { .pme_name = "DCACHE_HIT_PEND", .pme_desc = "Number of scalar loads that hit in the Dcache and in the FOQ and the load is merged with a pending allocation.", .pme_code = 57, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 14 Event 2 */ { .pme_name = "STALL_GRAD_LOAD_INST", .pme_desc = "CPs no instructions graduate and a scalar load is at the head of the active list.", .pme_code = 58, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 14 Event 3 */ { .pme_name = "STALL_VLSU_INDEX", .pme_desc = "CPS VLSU stalled waiting for busy scatter or gather index register.", .pme_code = 59, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 15 Event 0 */ { .pme_name = "VOPS_LOG_FUG3", .pme_desc = "Number of vector FUG 3 logical operations.", .pme_code = 60, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 15 Event 1 */ { .pme_name = "DCACHE_HIT_WORD", .pme_desc = "Number of scalar loads that hit in the Dcache and hit in the FOQ and were not merged with a pending allocation.", .pme_code = 61, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 15 Event 2 */ { .pme_name = "STALL_GRAD_STORE_INST", .pme_desc = "CPs no instructions graduate and a scalar store is at the head of the active list.", .pme_code = 62, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 15 Event 3 */ { .pme_name = "STALL_VLSU_FOM", .pme_desc = "CPs VLSU stalled in forced order mode.", .pme_code = 63, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 16 Event 0 */ { .pme_name = "INST_V", .pme_desc = "Number of elemental vector instructions graduated g=20-27, 30-33.", .pme_code = 64, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 16 Event 1 */ { .pme_name = "INST_V_INT", .pme_desc = "Number of elemental vector integer instructions graduated g=20-27 & t1=", .pme_code = 65, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 16 Event 2 */ { .pme_name = "INST_V_FP", .pme_desc = "Number of elemental vector FP instructions graduated g=20-27 & t1=0.", .pme_code = 66, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 16 Event 3 */ { .pme_name = "INST_V_MEM", .pme_desc = "Number of elemental vector memory instructions graduated g=30-33.", .pme_code = 67, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 17 Event 0 */ { .pme_name = "VOPS_VL", .pme_desc = "Inst_V * Current VL.", .pme_code = 68, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 17 Event 1 */ { .pme_name = "DCACHE_INVAL_V", .pme_desc = "Number of Dcache invalidates due to vector stores.", .pme_code = 69, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 17 Event 2 */ { .pme_name = "VOPS_VL_32-BIT", .pme_desc = "Inst_V * Current VL for 32-bit operations only.", .pme_code = 70, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 17 Event 3 */ { .pme_name = "STALL_VLSU", .pme_desc = "Stall vector load store for any reason.", .pme_code = 71, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 18 Event 0 */ { .pme_name = "VOPS_INT_ADD", .pme_desc = "Number of selected vector integer add operations g=20-27 & f=0-3 & t1=", .pme_code = 72, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 18 Event 1 */ { .pme_name = "DCACHE_INVAL_L2", .pme_desc = "Number of Dcache invalidates from L2 cache.", .pme_code = 73, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 18 Event 2 */ { .pme_name = "STALL_GRAD_XFER_INST", .pme_desc = "Number of CPs no instruction graduates and an A to S or S to A move is at the head of the active list.", .pme_code = 74, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 18 Event 3 */ { .pme_name = "STALL_VU_VM", .pme_desc = "CPs VU stalled waiting for vector mask.", .pme_code = 75, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 19 Event 0 */ { .pme_name = "VOPS_FP_ADD", .pme_desc = "Number of selected vector FP add operations g=20-27 & f=0-3 & t1=0.", .pme_code = 76, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 19 Event 1 */ { .pme_name = "DCACHE_INVALIDATE", .pme_desc = "Total Number of Dcache invalidates.", .pme_code = 77, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 19 Event 2 */ { .pme_name = "STALL_GRAD_VXFER_INST", .pme_desc = "CPs no instruction graduates and a V to A or V to S move is at the head of the active list.", .pme_code = 78, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 19 Event 3 */ { .pme_name = "STALL_VU_VR_MEM", .pme_desc = "CPs VU is stalled waiting on a busy vector register being loaded from memory.", .pme_code = 79, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 20 Event 0 */ { .pme_name = "VOPS_INT_LOG", .pme_desc = "Number of selected vector integer logical operations g=20-27 & f=10-27 & t1=1.", .pme_code = 80, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 20 Event 1 */ { .pme_name = "BRANCH_PRED", .pme_desc = "Number of branches predicted.", .pme_code = 81, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 20 Event 2 */ { .pme_name = "STALL_GRAD_VLSU_INST", .pme_desc = "Number of CPs no instruction graduates and a vector load, store, or AMO instruction is at the head of the active list.", .pme_code = 82, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 20 Event 3 */ { .pme_name = "STALL_VU_TLB", .pme_desc = "CPs VU stalled waiting for a memory translation.", .pme_code = 83, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 21 Event 0 */ { .pme_name = "VOPS_FP_DIV", .pme_desc = "Number of selected vector FP divide and sqrt operations g=20-27 & f=10-11 & t1=0.", .pme_code = 84, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 21 Event 1 */ { .pme_name = "BRANCH_CORRECT", .pme_desc = "Number of branches predicted correctly.", .pme_code = 85, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 21 Event 2 */ { .pme_name = "STALL_SLSQ_DEST", .pme_desc = "SLS issue stall for FOQ, PARB, ORB full or Lsync vs active.", .pme_code = 86, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 21 Event 3 */ { .pme_name = "STALL_VLSU_VK_PORT", .pme_desc = "CPs VLSU stalled waiting for scatter or gather index register read port.", .pme_code = 87, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 22 Event 0 */ { .pme_name = "VOPS_INT_SHIFT", .pme_desc = "Number of selected vector integer shift operations g=20-27 & f=30-37 & t1=", .pme_code = 88, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 22 Event 1 */ { .pme_name = "JTB_PRED", .pme_desc = "Number of jumps predicted g=57 & f=0,20.", .pme_code = 89, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 22 Event 2 */ { .pme_name = "STALL_GRAD_ARQ_DEST", .pme_desc = "Stall arq issue due to vdispatch, control unit, or A to S full.", .pme_code = 90, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 22 Event 3 */ { .pme_name = "STALL_VLSU_ADR_PORT", .pme_desc = "CPs VLSU stalled waiting for address read port.", .pme_code = 91, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 23 Event 0 */ { .pme_name = "VOPS_FP_MULT", .pme_desc = "Number of selected vector FP multiply operations g=20-27 & f=30-37 & t1=0.", .pme_code = 92, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 23 Event 1 */ { .pme_name = "JTB_CORRECT", .pme_desc = "Number of jumps predicted correctly g=57 & f=0,20.", .pme_code = 93, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 23 Event 2 */ { .pme_name = "STALL_SRQ_DEST", .pme_desc = "Stall srq issue due to vdispatch or S to A full.", .pme_code = 94, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 23 Event 3 */ { .pme_name = "STALL_VLSU_MISC", .pme_desc = "CPs VLSU stalled due to miscellaneous instructions.", .pme_code = 95, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 24 Event 0 */ { .pme_name = "VOPS_LOAD_INDEX", .pme_desc = "Number of selected vector load indexed references g=30-33 & f2=1 & f0=0.", .pme_code = 96, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 24 Event 1 */ { .pme_name = "VOPS_INT_MISC", .pme_desc = "Number of selected vector integer misc. operations g=20-27 & f=40-77 & t1=", .pme_code = 97, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 24 Event 2 */ { .pme_name = "INST_LSYNCVS", .pme_desc = "Number of LsyncVS instructions graduated.", .pme_code = 98, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 24 Event 3 */ { .pme_name = "VOPS_VL_64-BIT", .pme_desc = "Inst_V * Current VL for 64-bit operations only.", .pme_code = 99, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 25 Event 0 */ { .pme_name = "VOPS_STORE_INDEX", .pme_desc = "Number of selected vector store indexed references g=30-33 & f2=1 & f0=1", .pme_code = 100, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 25 Event 1 */ { .pme_name = "JRS_PRED", .pme_desc = "Number of return jumps predicted g=57, f=40.", .pme_code = 101, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 25 Event 2 */ { .pme_name = "STALL_SLSQ_PARB", .pme_desc = "Number of CPs SLS issue stalled due to PARB full.", .pme_code = 102, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 25 Event 3 */ { .pme_name = "", .pme_desc = "", .pme_code = 103, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 26 Event 0 */ { .pme_name = "VOPS_LOADS", .pme_desc = "Number of selected vector load references g=30-33 & f0=0.", .pme_code = 104, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 26 Event 1 */ { .pme_name = "JRS_CORRECT", .pme_desc = "Number of return jumps predicted correctly g=57, f=40.", .pme_code = 105, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 26 Event 2 */ { .pme_name = "STALL_SLSQ_ORB", .pme_desc = "Number of CPs SLS issue stalled due to all ORB entries in use.", .pme_code = 106, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 26 Event 3 */ { .pme_name = "STALL_VU_MISC", .pme_desc = "CPs VU stalled due to miscellaneous instructions.", .pme_code = 107, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 27 Event 0 */ { .pme_name = "VOPS_STORE", .pme_desc = "Number of selected vector store references g=30-33 & f0=", .pme_code = 108, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 27 Event 1 */ { .pme_name = "INST_MEM_ALLOC", .pme_desc = "Number of A and S register memory instructions that allocate.", .pme_code = 109, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 27 Event 2 */ { .pme_name = "STALL_SLSQ_FOQ", .pme_desc = "Number of CPs SLS issue stalled due to full FOQ.", .pme_code = 110, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 27 Event 3 */ { .pme_name = "STALL_VDU_NO_INST_VU", .pme_desc = "CPs VDU and VU have no valid instructions.", .pme_code = 111, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 28 Event 0 */ { .pme_name = "VOPS_LOAD_STRIDE", .pme_desc = "Number of selected vector load references that were stride >2 or <-2.", .pme_code = 112, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 28, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 28 Event 1 */ { .pme_name = "INST_SYSCALL", .pme_desc = "Number of syscall instructions graduated g=01.", .pme_code = 113, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 28, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 28 Event 2 */ { .pme_name = "STALL_SLSQ_LSYNC_VS", .pme_desc = "Number of CPs SLS issue is stalled due to active Lsync vs instruction.", .pme_code = 114, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 28, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 28 Event 3 */ { .pme_name = "STALL_VDU_SOP_VU", .pme_desc = "Number of CPs vector issue has no instructions and the next instruction is waiting on an S reg operand.", .pme_code = 115, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 28, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 29 Event 0 */ { .pme_name = "VOPS_STORE_STRIDE", .pme_desc = "Number of selected vector store references that were stride >2 or <-2.", .pme_code = 116, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 29, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 29 Event 1 */ { .pme_name = "", .pme_desc = "", .pme_code = 117, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 29, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 29 Event 2 */ { .pme_name = "", .pme_desc = "", .pme_code = 118, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 29, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 29 Event 3 */ { .pme_name = "STALL_VDU_NO_INST_VLSU", .pme_desc = "CPs VDU and VLSU have no valid instructions.", .pme_code = 119, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 29, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 30 Event 0 */ { .pme_name = "VOPS_LOAD_ALLOC", .pme_desc = "Number of selected vector load references that were marked allocate (cache line requests count as 1).", .pme_code = 120, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 30, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 30 Event 1 */ { .pme_name = "INST_LOAD", .pme_desc = "Number of A or S memory loads g=44, 45, 41 & f0=0, 64, 65, 61 & f0=0.", .pme_code = 121, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 30, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 30 Event 2 */ { .pme_name = "EXCEPTIONS_TAKEN", .pme_desc = "Taken exception count.", .pme_code = 122, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 30, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 30 Event 3 */ { .pme_name = "STALL_VDU_SCM_VLSU", .pme_desc = "CPs VDU stalled waiting for scalar commit and VLSU has no valid instruction.", .pme_code = 123, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 30, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 31 Event 0 */ { .pme_name = "VOPS_STORE_ALLOC", .pme_desc = "Number of selected vector stores references that were marked allocate (cache line requests count as 1).", .pme_code = 124, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 31, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 31 Event 1 */ { .pme_name = "BRANCH_TAKEN", .pme_desc = "Number of taken branches.", .pme_code = 125, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 31, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 31 Event 2 */ { .pme_name = "INST_LSYNCSV", .pme_desc = "Number of graduated Lsync SV instructions.", .pme_code = 126, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 31, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* P Counter 31 Event 3 */ { .pme_name = "STALL_VDU_SCM_VU", .pme_desc = "CPs VDU stalled waiting for scalar commit and VU has no valid instruction.", .pme_code = 127, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CPU, .pme_ctr = 31, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CPU_PMD_BASE, .pme_nctrs = PME_CRAYX2_CPU_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CPU_CHIPS }, /* C Counter 0 Event 0 */ { .pme_name = "REQUESTS", .pme_desc = "Processor requests processed.", .pme_code = 128, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 0 Event 1 */ { .pme_name = "L2_MISSES", .pme_desc = "Cache line allocations.", .pme_code = 129, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 0 Event 2 */ { .pme_name = "M_OUT_BUSY", .pme_desc = "Cycles W chip output port busy.", .pme_code = 130, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 0 Event 3 */ { .pme_name = "REPLAYED", .pme_desc = "Requests sent to replay queue.", .pme_code = 131, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 1 Event 0 */ { .pme_name = "ALLOC_REQUESTS", .pme_desc = "Allocating requests (Read, ReadUC, ReadShared, ReadUCShared, ReadMod, SWrite, VWrite).", .pme_code = 132, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 1 Event 1 */ { .pme_name = "", .pme_desc = "", .pme_code = 133, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 1 Event 2 */ { .pme_name = "M_OUT_BLOCK", .pme_desc = "CyclesWchip output port blocked (something to send but no flow control credits).", .pme_code = 134, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 1 Event 3 */ { .pme_name = "LS/VS", .pme_desc = "Replayed Ls or Vs Requests sent to the replay queue.", .pme_code = 135, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 2 Event 0 */ { .pme_name = "DWORDS_ALLOCATED", .pme_desc = "Dwords written into L2 from L3 (excluding updates).", .pme_code = 136, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 2 Event 1 */ { .pme_name = "", .pme_desc = "", .pme_code = 137, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 2 Event 2 */ { .pme_name = "NW_OUT_BUSY", .pme_desc = "Cycles NIF output port busy.", .pme_code = 138, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 2 Event 3 */ { .pme_name = "REPLAY_PENDING", .pme_desc = "Requests sent to replay queue because the line was in PendingReq state.", .pme_code = 139, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 3 Event 0 */ { .pme_name = "DWORDS_EVICTED", .pme_desc = "Dwords written back to L3.", .pme_code = 140, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 3 Event 1 */ { .pme_name = "CACHE_LINE_EVICTIONS", .pme_desc = "Cache lines evicted due to new allocations.", .pme_code = 141, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 3 Event 2 */ { .pme_name = "NW_OUT_BLOCK", .pme_desc = "Cycles NIF output port blocked (something to send but no flow control credits).", .pme_code = 142, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 3 Event 3 */ { .pme_name = "REPLAY_ALLOC", .pme_desc = "Requests sent to replay queue because a line could not be allocated due to all ways pending.", .pme_code = 143, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 4 Event 0 */ { .pme_name = "ALLOC_WRITE_TO_L2", .pme_desc = "Dwords written to L2 by local allocating write requests.", .pme_code = 144, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 4 Event 1 */ { .pme_name = "DROPS", .pme_desc = "Drops sent to directory.", .pme_code = 145, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 4 Event 2 */ { .pme_name = "", .pme_desc = "", .pme_code = 146, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 4 Event 3 */ { .pme_name = "REPLAY_WAKEUPS", .pme_desc = "Replay queue wakeups.", .pme_code = 147, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 5 Event 0 */ { .pme_name = "NON_ALLOC_WRITE_TO_L2", .pme_desc = "Dwords written to L2 by local non-allocating write requests.", .pme_code = 148, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 5 Event 1 */ { .pme_name = "WRITE_BACKS", .pme_desc = "WriteBacks sent to directory.", .pme_code = 149, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 5 Event 2 */ { .pme_name = "", .pme_desc = "", .pme_code = 150, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 5 Event 3 */ { .pme_name = "REPLAY_MATCHES", .pme_desc = "Requests matched during replay wakeups (Replay_Matches/Replay_Wakeups=avg. number of matches per wakeup).", .pme_code = 151, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 6 Event 0 */ { .pme_name = "NON_ALLOC_WRITE_TO_L3", .pme_desc = "Dwords written to L3 by local non-allocating write requests.", .pme_code = 152, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 6 Event 1 */ { .pme_name = "FWD_REQ", .pme_desc = "Forwarded requests received (FlushReq, FwdRead, FwdReadShared, FwdGet).", .pme_code = 153, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 6 Event 2 */ { .pme_name = "", .pme_desc = "", .pme_code = 154, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 6 Event 3 */ { .pme_name = "", .pme_desc = "", .pme_code = 155, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 7 Event 0 */ { .pme_name = "ALLOC_READ_FROM_L2", .pme_desc = "Dwords read from L2 by local allocating read requests.", .pme_code = 156, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 7 Event 1 */ { .pme_name = "FWD_READ_ALL", .pme_desc = "FwdReads and FwdReadShared received.", .pme_code = 157, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 7 Event 2 */ { .pme_name = "STALL_RP_FULL_NW", .pme_desc = "Cycles NW request queue stalled due to replay queue full.", .pme_code = 158, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 7 Event 3 */ { .pme_name = "ALLOC_NO_FILL", .pme_desc = "ReadMods sent to directory when the entire line is dirty.", .pme_code = 159, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 8 Event 0 */ { .pme_name = "NON_ALLOC_READ_FROM_L2", .pme_desc = "Dwords read from L2 by local non-allocating read requests.", .pme_code = 160, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 8 Event 1 */ { .pme_name = "FWD_READ_SHARED_RECV", .pme_desc = "FwdReadShareds received.", .pme_code = 161, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 8 Event 2 */ { .pme_name = "STALL_RP_FULL_PROC", .pme_desc = "Cycles Ls/Vs request queue stalled due to replay queue full.", .pme_code = 162, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 8 Event 3 */ { .pme_name = "UPGRADES", .pme_desc = "ReadMods sent to directory when the line was currently in ShClean state.", .pme_code = 163, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 9 Event 0 */ { .pme_name = "NON_ALLOC_READ_FROM_L3", .pme_desc = "Dwords read from L3 by local non-allocating read requests.", .pme_code = 164, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 9 Event 1 */ { .pme_name = "FWD_GET_RECV", .pme_desc = "FwdGets received.", .pme_code = 165, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 9 Event 2 */ { .pme_name = "STALL_TB_FULL", .pme_desc = "Cycles bank request queue stalled due to transient buffer full.", .pme_code = 166, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 9 Event 3 */ { .pme_name = "", .pme_desc = "", .pme_code = 167, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 10 Event 0 */ { .pme_name = "NETWORK_WRITE_TO_L2", .pme_desc = "Dwords written to L2 by remote write requests.", .pme_code = 168, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 10 Event 1 */ { .pme_name = "FLUSH_REQ", .pme_desc = "FlushReqs received.", .pme_code = 169, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 10 Event 2 */ { .pme_name = "STALL_VWRITENA", .pme_desc = "Cycles bank request queue stalled due to VWriteNA bit being set.", .pme_code = 170, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 10 Event 3 */ { .pme_name = "", .pme_desc = "", .pme_code = 171, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 11 Event 0 */ { .pme_name = "NETWORK_WRITE_TO_L3", .pme_desc = "Dwords written to L3 by remote write requests.", .pme_code = 172, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 11 Event 1 */ { .pme_name = "UPDATES_RECV", .pme_desc = "Updates received.", .pme_code = 173, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 11 Event 2 */ { .pme_name = "PROT_ENGINE_IDLE_NO_REQUEST", .pme_desc = "Cycles protocol engine idle due to no new requests to process.", .pme_code = 174, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 11 Event 3 */ { .pme_name = "READ_DATA_TO_VECTOR_UNIT_PIPE_0_3", .pme_desc = "Swords delivered to vector unit via pipes 0 - 3.", .pme_code = 175, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 12 Event 0 */ { .pme_name = "NETWORK_READ_FROM_L2", .pme_desc = "Dwords read from L2 by remote read requests.", .pme_code = 176, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 12 Event 1 */ { .pme_name = "", .pme_desc = "", .pme_code = 177, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 12 Event 2 */ { .pme_name = "UPDATE_NACK_SENT", .pme_desc = "UpdateNacks sent.", .pme_code = 178, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 12 Event 3 */ { .pme_name = "READ_DATA_TO_VECTOR_UNIT_PIPE_4_7", .pme_desc = "Swords delivered to vector unit via pipes 4 - 7.", .pme_code = 179, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 13 Event 0 */ { .pme_name = "NETWORK_READ_FROM_L3", .pme_desc = "Dwords read from L3 by remote read requests.", .pme_code = 180, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 13 Event 1 */ { .pme_name = "NACKS_SENT", .pme_desc = "FlushAcks and UpdateNacks sent (these happen when there's a race b/w a forwarded request and an eviction by the current owner).", .pme_code = 181, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 13 Event 2 */ { .pme_name = "INVAL_RECV", .pme_desc = "Inval packets received from the directory.", .pme_code = 182, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 13 Event 3 */ { .pme_name = "READ_DATA_TO_SCALAR_UNIT", .pme_desc = "Dwords delivered to scalar unit.", .pme_code = 183, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 14 Event 0 */ { .pme_name = "REMOTE_READS", .pme_desc = "Dwords read from remote nodes.", .pme_code = 184, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 14 Event 1 */ { .pme_name = "LOCAL_INVAL", .pme_desc = "Local writes that cause invals of other Dcaches.", .pme_code = 185, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 14 Event 2 */ { .pme_name = "MARKED_REQS", .pme_desc = "Memory requests sent with TID 0.", .pme_code = 186, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 14 Event 3 */ { .pme_name = "READ_DATA_TO_ICACHE", .pme_desc = "Dwords delivered to Icache.", .pme_code = 187, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 15 Event 0 */ { .pme_name = "REMOTE_WRITES", .pme_desc = "Dwords written to remote nodes.", .pme_code = 188, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 15 Event 1 */ { .pme_name = "DCACHE_INVAL_EVENTS", .pme_desc = "State transitions (evictions, directory Invals or forwards, processor writes) requiring Dcache invals.", .pme_code = 189, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 15 Event 2 */ { .pme_name = "MARKED_CYCLES", .pme_desc = "Cycles with a TID 0 request outstanding.", .pme_code = 190, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* C Counter 15 Event 3 */ { .pme_name = "READ_DATA_TO_NIF", .pme_desc = "Dwords delivered to NIF.", .pme_code = 191, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_CACHE, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_CACHE_PMD_BASE, .pme_nctrs = PME_CRAYX2_CACHE_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_CACHE_CHIPS }, /* M Counter 0 Event 0 */ { .pme_name = "W_IN_IDLE_0@0", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 0)", .pme_code = 192, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@1", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 1)", .pme_code = 193, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@2", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 2)", .pme_code = 194, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@3", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 3)", .pme_code = 195, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@4", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 4)", .pme_code = 196, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@5", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 5)", .pme_code = 197, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@6", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 6)", .pme_code = 198, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@7", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 7)", .pme_code = 199, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@8", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 8)", .pme_code = 200, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@9", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 9)", .pme_code = 201, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@10", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 10)", .pme_code = 202, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@11", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 11)", .pme_code = 203, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@12", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 12)", .pme_code = 204, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@13", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 13)", .pme_code = 205, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@14", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 14)", .pme_code = 206, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_0@15", .pme_desc = "Wclk cycles BW2MD input port 0 is idle (no flits in either VC0 or VC2). (M chip 15)", .pme_code = 207, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 0 Event 1 */ { .pme_name = "STALL_REPLAY_FULL@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 0)", .pme_code = 208, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 1)", .pme_code = 209, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 2)", .pme_code = 210, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 3)", .pme_code = 211, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 4)", .pme_code = 212, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 5)", .pme_code = 213, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 6)", .pme_code = 214, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 7)", .pme_code = 215, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 8)", .pme_code = 216, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 9)", .pme_code = 217, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 10)", .pme_code = 218, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 11)", .pme_code = 219, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 12)", .pme_code = 220, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 13)", .pme_code = 221, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 14)", .pme_code = 222, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_REPLAY_FULL@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to replay queue full (sum of 4 engines). (M chip 15)", .pme_code = 223, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 0 Event 2 */ { .pme_name = "W_OUT_IDLE_0@0", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 0)", .pme_code = 224, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@1", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 1)", .pme_code = 225, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@2", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 2)", .pme_code = 226, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@3", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 3)", .pme_code = 227, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@4", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 4)", .pme_code = 228, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@5", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 5)", .pme_code = 229, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@6", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 6)", .pme_code = 230, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@7", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 7)", .pme_code = 231, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@8", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 8)", .pme_code = 232, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@9", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 9)", .pme_code = 233, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@10", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 10)", .pme_code = 234, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@11", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 11)", .pme_code = 235, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@12", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 12)", .pme_code = 236, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@13", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 13)", .pme_code = 237, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@14", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 14)", .pme_code = 238, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_0@15", .pme_desc = "Wclk cycles MD2BW output port 0 is idle (no flits flowing). (M chip 15)", .pme_code = 239, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 0 Event 3 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 240, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 241, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 242, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 243, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 244, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 245, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 246, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 247, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 248, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 249, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 250, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 251, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 252, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 253, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 254, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 255, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 0, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 1 Event 0 */ { .pme_name = "W_IN_IDLE_1@0", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 0)", .pme_code = 256, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@1", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 1)", .pme_code = 257, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@2", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 2)", .pme_code = 258, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@3", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 3)", .pme_code = 259, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@4", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 4)", .pme_code = 260, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@5", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 5)", .pme_code = 261, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@6", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 6)", .pme_code = 262, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@7", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 7)", .pme_code = 263, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@8", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 8)", .pme_code = 264, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@9", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 9)", .pme_code = 265, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@10", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 10)", .pme_code = 266, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@11", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 11)", .pme_code = 267, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@12", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 12)", .pme_code = 268, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@13", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 13)", .pme_code = 269, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@14", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 14)", .pme_code = 270, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_1@15", .pme_desc = "Wclk cycles BW2MD input port 1 is idle (no flits in either VC0 or VC2). (M chip 15)", .pme_code = 271, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 1 Event 1 */ { .pme_name = "STALL_TDB_FULL@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 0)", .pme_code = 272, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 1)", .pme_code = 273, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 2)", .pme_code = 274, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 3)", .pme_code = 275, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 4)", .pme_code = 276, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 5)", .pme_code = 277, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 6)", .pme_code = 278, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 7)", .pme_code = 279, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 8)", .pme_code = 280, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 9)", .pme_code = 281, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 10)", .pme_code = 282, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 11)", .pme_code = 283, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 12)", .pme_code = 284, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 13)", .pme_code = 285, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 14)", .pme_code = 286, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_TDB_FULL@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to transient directory buffer full (sum of 4 engines). (M chip 15)", .pme_code = 287, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 1 Event 2 */ { .pme_name = "W_OUT_IDLE_1@0", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 0)", .pme_code = 288, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@1", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 1)", .pme_code = 289, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@2", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 2)", .pme_code = 290, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@3", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 3)", .pme_code = 291, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@4", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 4)", .pme_code = 292, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@5", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 5)", .pme_code = 293, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@6", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 6)", .pme_code = 294, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@7", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 7)", .pme_code = 295, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@8", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 8)", .pme_code = 296, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@9", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 9)", .pme_code = 297, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@10", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 10)", .pme_code = 298, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@11", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 11)", .pme_code = 299, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@12", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 12)", .pme_code = 300, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@13", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 13)", .pme_code = 301, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@14", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 14)", .pme_code = 302, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_1@15", .pme_desc = "Wclk cycles MD2BW output port 1 is idle (no flits flowing). (M chip 15)", .pme_code = 303, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 1 Event 3 */ { .pme_name = "FWD_READ_SHARED_SENT@0", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 0)", .pme_code = 304, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@1", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 1)", .pme_code = 305, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@2", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 2)", .pme_code = 306, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@3", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 3)", .pme_code = 307, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@4", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 4)", .pme_code = 308, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@5", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 5)", .pme_code = 309, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@6", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 6)", .pme_code = 310, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@7", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 7)", .pme_code = 311, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@8", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 8)", .pme_code = 312, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@9", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 9)", .pme_code = 313, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@10", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 10)", .pme_code = 314, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@11", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 11)", .pme_code = 315, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@12", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 12)", .pme_code = 316, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@13", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 13)", .pme_code = 317, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@14", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 14)", .pme_code = 318, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ_SHARED_SENT@15", .pme_desc = "FwdReadShared packets sent (Exclusive -> PendFwd transition). (M chip 15)", .pme_code = 319, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 1, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 2 Event 0 */ { .pme_name = "UPDATES_SENT@0", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 0)", .pme_code = 320, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@1", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 1)", .pme_code = 321, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@2", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 2)", .pme_code = 322, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@3", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 3)", .pme_code = 323, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@4", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 4)", .pme_code = 324, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@5", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 5)", .pme_code = 325, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@6", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 6)", .pme_code = 326, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@7", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 7)", .pme_code = 327, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@8", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 8)", .pme_code = 328, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@9", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 9)", .pme_code = 329, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@10", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 10)", .pme_code = 330, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@11", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 11)", .pme_code = 331, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@12", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 12)", .pme_code = 332, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@13", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 13)", .pme_code = 333, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@14", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 14)", .pme_code = 334, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATES_SENT@15", .pme_desc = "Puts that cause an Update to be sent to owner. (M chip 15)", .pme_code = 335, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 2 Event 1 */ { .pme_name = "STALL_MM_RESPQ@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 0)", .pme_code = 336, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 1)", .pme_code = 337, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 2)", .pme_code = 338, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 3)", .pme_code = 339, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 4)", .pme_code = 340, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 5)", .pme_code = 341, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 6)", .pme_code = 342, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 7)", .pme_code = 343, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 8)", .pme_code = 344, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 9)", .pme_code = 345, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 10)", .pme_code = 346, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 11)", .pme_code = 347, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 12)", .pme_code = 348, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 13)", .pme_code = 349, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 14)", .pme_code = 350, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM_RESPQ@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to MM VN1 response queue full (sum of 4 engines). (M chip 15)", .pme_code = 351, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 2 Event 2 */ { .pme_name = "W_OUT_IDLE_2@0", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 0)", .pme_code = 352, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@1", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 1)", .pme_code = 353, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@2", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 2)", .pme_code = 354, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@3", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 3)", .pme_code = 355, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@4", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 4)", .pme_code = 356, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@5", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 5)", .pme_code = 357, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@6", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 6)", .pme_code = 358, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@7", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 7)", .pme_code = 359, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@8", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 8)", .pme_code = 360, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@9", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 9)", .pme_code = 361, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@10", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 10)", .pme_code = 362, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@11", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 11)", .pme_code = 363, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@12", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 12)", .pme_code = 364, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@13", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 13)", .pme_code = 365, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@14", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 14)", .pme_code = 366, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_2@15", .pme_desc = "Wclk cycles MD2BW output port 2 is idle (no flits flowing). (M chip 15)", .pme_code = 367, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 2 Event 3 */ { .pme_name = "W_IN_IDLE_2@0", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 0)", .pme_code = 368, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@1", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 1)", .pme_code = 369, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@2", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 2)", .pme_code = 370, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@3", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 3)", .pme_code = 371, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@4", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 4)", .pme_code = 372, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@5", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 5)", .pme_code = 373, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@6", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 6)", .pme_code = 374, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@7", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 7)", .pme_code = 375, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@8", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 8)", .pme_code = 376, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@9", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 9)", .pme_code = 377, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@10", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 10)", .pme_code = 378, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@11", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 11)", .pme_code = 379, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@12", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 12)", .pme_code = 380, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@13", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 13)", .pme_code = 381, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@14", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 14)", .pme_code = 382, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_2@15", .pme_desc = "Wclk cycles BW2MD input port 2 is idle (no flits in either VC0 or VC2). (M chip 15)", .pme_code = 383, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 2, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 3 Event 0 */ { .pme_name = "NON_CACHED@0", .pme_desc = "Read requests satisfied from non-cached state. (M chip 0)", .pme_code = 384, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@1", .pme_desc = "Read requests satisfied from non-cached state. (M chip 1)", .pme_code = 385, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@2", .pme_desc = "Read requests satisfied from non-cached state. (M chip 2)", .pme_code = 386, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@3", .pme_desc = "Read requests satisfied from non-cached state. (M chip 3)", .pme_code = 387, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@4", .pme_desc = "Read requests satisfied from non-cached state. (M chip 4)", .pme_code = 388, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@5", .pme_desc = "Read requests satisfied from non-cached state. (M chip 5)", .pme_code = 389, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@6", .pme_desc = "Read requests satisfied from non-cached state. (M chip 6)", .pme_code = 390, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@7", .pme_desc = "Read requests satisfied from non-cached state. (M chip 7)", .pme_code = 391, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@8", .pme_desc = "Read requests satisfied from non-cached state. (M chip 8)", .pme_code = 392, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@9", .pme_desc = "Read requests satisfied from non-cached state. (M chip 9)", .pme_code = 393, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@10", .pme_desc = "Read requests satisfied from non-cached state. (M chip 10)", .pme_code = 394, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@11", .pme_desc = "Read requests satisfied from non-cached state. (M chip 11)", .pme_code = 395, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@12", .pme_desc = "Read requests satisfied from non-cached state. (M chip 12)", .pme_code = 396, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@13", .pme_desc = "Read requests satisfied from non-cached state. (M chip 13)", .pme_code = 397, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@14", .pme_desc = "Read requests satisfied from non-cached state. (M chip 14)", .pme_code = 398, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NON_CACHED@15", .pme_desc = "Read requests satisfied from non-cached state. (M chip 15)", .pme_code = 399, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 3 Event 1 */ { .pme_name = "STALL_ASSOC@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 0)", .pme_code = 400, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 1)", .pme_code = 401, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 2)", .pme_code = 402, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 3)", .pme_code = 403, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 4)", .pme_code = 404, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 5)", .pme_code = 405, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 6)", .pme_code = 406, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 7)", .pme_code = 407, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 8)", .pme_code = 408, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 9)", .pme_code = 409, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 10)", .pme_code = 410, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 11)", .pme_code = 411, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 12)", .pme_code = 412, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 13)", .pme_code = 413, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 14)", .pme_code = 414, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_ASSOC@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to temporary over-subscription of directory ways. (M chip 15)", .pme_code = 415, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 3 Event 2 */ { .pme_name = "W_OUT_IDLE_3@0", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 0)", .pme_code = 416, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@1", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 1)", .pme_code = 417, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@2", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 2)", .pme_code = 418, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@3", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 3)", .pme_code = 419, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@4", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 4)", .pme_code = 420, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@5", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 5)", .pme_code = 421, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@6", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 6)", .pme_code = 422, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@7", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 7)", .pme_code = 423, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@8", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 8)", .pme_code = 424, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@9", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 9)", .pme_code = 425, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@10", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 10)", .pme_code = 426, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@11", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 11)", .pme_code = 427, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@12", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 12)", .pme_code = 428, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@13", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 13)", .pme_code = 429, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@14", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 14)", .pme_code = 430, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_IDLE_3@15", .pme_desc = "Wclk cycles MD2BW output port 3 is idle (no flits flowing). (M chip 15)", .pme_code = 431, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 3 Event 3 */ { .pme_name = "W_IN_IDLE_3@0", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 0)", .pme_code = 432, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@1", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 1)", .pme_code = 433, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@2", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 2)", .pme_code = 434, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@3", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 3)", .pme_code = 435, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@4", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 4)", .pme_code = 436, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@5", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 5)", .pme_code = 437, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@6", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 6)", .pme_code = 438, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@7", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 7)", .pme_code = 439, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@8", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 8)", .pme_code = 440, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@9", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 9)", .pme_code = 441, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@10", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 10)", .pme_code = 442, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@11", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 11)", .pme_code = 443, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@12", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 12)", .pme_code = 444, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@13", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 13)", .pme_code = 445, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@14", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 14)", .pme_code = 446, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_IDLE_3@15", .pme_desc = "Wclk cycles BW2MD input port 3 is idle (no flits in either VC0 or VC2). (M chip 15)", .pme_code = 447, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 3, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 4 Event 0 */ { .pme_name = "READ_REQ_SHARED@0", .pme_desc = "Read requests satisfied from the Shared state. (M chip 0)", .pme_code = 448, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@1", .pme_desc = "Read requests satisfied from the Shared state. (M chip 1)", .pme_code = 449, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@2", .pme_desc = "Read requests satisfied from the Shared state. (M chip 2)", .pme_code = 450, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@3", .pme_desc = "Read requests satisfied from the Shared state. (M chip 3)", .pme_code = 451, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@4", .pme_desc = "Read requests satisfied from the Shared state. (M chip 4)", .pme_code = 452, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@5", .pme_desc = "Read requests satisfied from the Shared state. (M chip 5)", .pme_code = 453, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@6", .pme_desc = "Read requests satisfied from the Shared state. (M chip 6)", .pme_code = 454, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@7", .pme_desc = "Read requests satisfied from the Shared state. (M chip 7)", .pme_code = 455, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@8", .pme_desc = "Read requests satisfied from the Shared state. (M chip 8)", .pme_code = 456, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@9", .pme_desc = "Read requests satisfied from the Shared state. (M chip 9)", .pme_code = 457, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@10", .pme_desc = "Read requests satisfied from the Shared state. (M chip 10)", .pme_code = 458, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@11", .pme_desc = "Read requests satisfied from the Shared state. (M chip 11)", .pme_code = 459, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@12", .pme_desc = "Read requests satisfied from the Shared state. (M chip 12)", .pme_code = 460, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@13", .pme_desc = "Read requests satisfied from the Shared state. (M chip 13)", .pme_code = 461, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@14", .pme_desc = "Read requests satisfied from the Shared state. (M chip 14)", .pme_code = 462, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "READ_REQ_SHARED@15", .pme_desc = "Read requests satisfied from the Shared state. (M chip 15)", .pme_code = 463, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 4 Event 1 */ { .pme_name = "STALL_VN1_BLOCKED@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 0)", .pme_code = 464, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 1)", .pme_code = 465, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 2)", .pme_code = 466, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 3)", .pme_code = 467, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 4)", .pme_code = 468, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 5)", .pme_code = 469, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 6)", .pme_code = 470, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 7)", .pme_code = 471, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 8)", .pme_code = 472, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 9)", .pme_code = 473, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 10)", .pme_code = 474, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 11)", .pme_code = 475, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 12)", .pme_code = 476, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 13)", .pme_code = 477, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 14)", .pme_code = 478, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_VN1_BLOCKED@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to virtual network 1 output blocked. (M chip 15)", .pme_code = 479, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 4 Event 2 */ { .pme_name = "W_IN_FLOWING_0@0", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 0)", .pme_code = 480, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@1", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 1)", .pme_code = 481, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@2", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 2)", .pme_code = 482, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@3", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 3)", .pme_code = 483, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@4", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 4)", .pme_code = 484, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@5", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 5)", .pme_code = 485, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@6", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 6)", .pme_code = 486, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@7", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 7)", .pme_code = 487, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@8", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 8)", .pme_code = 488, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@9", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 9)", .pme_code = 489, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@10", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 10)", .pme_code = 490, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@11", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 11)", .pme_code = 491, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@12", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 12)", .pme_code = 492, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@13", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 13)", .pme_code = 493, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@14", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 14)", .pme_code = 494, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_0@15", .pme_desc = "Wclk cycles BW2MD input port 0 has a flit flowing (on either VC0 or VC2). (M chip 15)", .pme_code = 495, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 4 Event 3 */ { .pme_name = "W_OUT_FLOWING_0@0", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 0)", .pme_code = 496, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@1", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 1)", .pme_code = 497, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@2", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 2)", .pme_code = 498, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@3", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 3)", .pme_code = 499, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@4", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 4)", .pme_code = 500, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@5", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 5)", .pme_code = 501, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@6", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 6)", .pme_code = 502, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@7", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 7)", .pme_code = 503, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@8", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 8)", .pme_code = 504, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@9", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 9)", .pme_code = 505, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@10", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 10)", .pme_code = 506, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@11", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 11)", .pme_code = 507, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@12", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 12)", .pme_code = 508, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@13", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 13)", .pme_code = 509, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@14", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 14)", .pme_code = 510, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_0@15", .pme_desc = "Wclk cycles MD2BW output port 0 has a flit flowing. (M chip 15)", .pme_code = 511, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 4, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 5 Event 0 */ { .pme_name = "FWD_REQ_TO_OWNER@0", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 0)", .pme_code = 512, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@1", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 1)", .pme_code = 513, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@2", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 2)", .pme_code = 514, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@3", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 3)", .pme_code = 515, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@4", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 4)", .pme_code = 516, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@5", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 5)", .pme_code = 517, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@6", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 6)", .pme_code = 518, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@7", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 7)", .pme_code = 519, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@8", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 8)", .pme_code = 520, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@9", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 9)", .pme_code = 521, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@10", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 10)", .pme_code = 522, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@11", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 11)", .pme_code = 523, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@12", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 12)", .pme_code = 524, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@13", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 13)", .pme_code = 525, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@14", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 14)", .pme_code = 526, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_REQ_TO_OWNER@15", .pme_desc = "Requests forwarded to current owner (FwdRead, FwdReadShared, FlushReq, FwdGet, Update). (M chip 15)", .pme_code = 527, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 5 Event 1 */ { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@0", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 0)", .pme_code = 528, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@1", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 1)", .pme_code = 529, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@2", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 2)", .pme_code = 530, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@3", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 3)", .pme_code = 531, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@4", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 4)", .pme_code = 532, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@5", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 5)", .pme_code = 533, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@6", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 6)", .pme_code = 534, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@7", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 7)", .pme_code = 535, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@8", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 8)", .pme_code = 536, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@9", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 9)", .pme_code = 537, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@10", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 10)", .pme_code = 538, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@11", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 11)", .pme_code = 539, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@12", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 12)", .pme_code = 540, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@13", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 13)", .pme_code = 541, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@14", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 14)", .pme_code = 542, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PROT_ENGINE_IDLE_NO_PACKETS@15", .pme_desc = "Wclk cycles protocol engine idle due to no new packets to process. Note: The maximum packet acceptance rate into the MD is 1 packet every 2 Wclk periods. (M chip 15)", .pme_code = 543, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 5 Event 2 */ { .pme_name = "W_IN_FLOWING_1@0", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 0)", .pme_code = 544, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@1", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 1)", .pme_code = 545, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@2", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 2)", .pme_code = 546, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@3", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 3)", .pme_code = 547, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@4", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 4)", .pme_code = 548, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@5", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 5)", .pme_code = 549, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@6", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 6)", .pme_code = 550, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@7", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 7)", .pme_code = 551, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@8", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 8)", .pme_code = 552, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@9", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 9)", .pme_code = 553, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@10", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 10)", .pme_code = 554, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@11", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 11)", .pme_code = 555, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@12", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 12)", .pme_code = 556, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@13", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 13)", .pme_code = 557, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@14", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 14)", .pme_code = 558, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_1@15", .pme_desc = "Wclk cycles BW2MD input port 1 has a flit flowing (on either VC0 or VC2). (M chip 15)", .pme_code = 559, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 5 Event 3 */ { .pme_name = "FWD_READ@0", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 0)", .pme_code = 560, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@1", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 1)", .pme_code = 561, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@2", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 2)", .pme_code = 562, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@3", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 3)", .pme_code = 563, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@4", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 4)", .pme_code = 564, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@5", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 5)", .pme_code = 565, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@6", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 6)", .pme_code = 566, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@7", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 7)", .pme_code = 567, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@8", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 8)", .pme_code = 568, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@9", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 9)", .pme_code = 569, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@10", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 10)", .pme_code = 570, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@11", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 11)", .pme_code = 571, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@12", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 12)", .pme_code = 572, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@13", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 13)", .pme_code = 573, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@14", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 14)", .pme_code = 574, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_READ@15", .pme_desc = "FwdRead packets sent (Exclusive -> PendFwd transition). (M chip 15)", .pme_code = 575, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 5, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 6 Event 0 */ { .pme_name = "SUPPLY_INV@0", .pme_desc = "SupplyInv packets received. (M chip 0)", .pme_code = 576, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@1", .pme_desc = "SupplyInv packets received. (M chip 1)", .pme_code = 577, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@2", .pme_desc = "SupplyInv packets received. (M chip 2)", .pme_code = 578, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@3", .pme_desc = "SupplyInv packets received. (M chip 3)", .pme_code = 579, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@4", .pme_desc = "SupplyInv packets received. (M chip 4)", .pme_code = 580, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@5", .pme_desc = "SupplyInv packets received. (M chip 5)", .pme_code = 581, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@6", .pme_desc = "SupplyInv packets received. (M chip 6)", .pme_code = 582, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@7", .pme_desc = "SupplyInv packets received. (M chip 7)", .pme_code = 583, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@8", .pme_desc = "SupplyInv packets received. (M chip 8)", .pme_code = 584, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@9", .pme_desc = "SupplyInv packets received. (M chip 9)", .pme_code = 585, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@10", .pme_desc = "SupplyInv packets received. (M chip 10)", .pme_code = 586, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@11", .pme_desc = "SupplyInv packets received. (M chip 11)", .pme_code = 587, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@12", .pme_desc = "SupplyInv packets received. (M chip 12)", .pme_code = 588, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@13", .pme_desc = "SupplyInv packets received. (M chip 13)", .pme_code = 589, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@14", .pme_desc = "SupplyInv packets received. (M chip 14)", .pme_code = 590, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_INV@15", .pme_desc = "SupplyInv packets received. (M chip 15)", .pme_code = 591, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 6 Event 1 */ { .pme_name = "NUM_REPLAY@0", .pme_desc = "Requests sent through replay queue. (M chip 0)", .pme_code = 592, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@1", .pme_desc = "Requests sent through replay queue. (M chip 1)", .pme_code = 593, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@2", .pme_desc = "Requests sent through replay queue. (M chip 2)", .pme_code = 594, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@3", .pme_desc = "Requests sent through replay queue. (M chip 3)", .pme_code = 595, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@4", .pme_desc = "Requests sent through replay queue. (M chip 4)", .pme_code = 596, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@5", .pme_desc = "Requests sent through replay queue. (M chip 5)", .pme_code = 597, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@6", .pme_desc = "Requests sent through replay queue. (M chip 6)", .pme_code = 598, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@7", .pme_desc = "Requests sent through replay queue. (M chip 7)", .pme_code = 599, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@8", .pme_desc = "Requests sent through replay queue. (M chip 8)", .pme_code = 600, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@9", .pme_desc = "Requests sent through replay queue. (M chip 9)", .pme_code = 601, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@10", .pme_desc = "Requests sent through replay queue. (M chip 10)", .pme_code = 602, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@11", .pme_desc = "Requests sent through replay queue. (M chip 11)", .pme_code = 603, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@12", .pme_desc = "Requests sent through replay queue. (M chip 12)", .pme_code = 604, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@13", .pme_desc = "Requests sent through replay queue. (M chip 13)", .pme_code = 605, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@14", .pme_desc = "Requests sent through replay queue. (M chip 14)", .pme_code = 606, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NUM_REPLAY@15", .pme_desc = "Requests sent through replay queue. (M chip 15)", .pme_code = 607, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 6 Event 2 */ { .pme_name = "W_IN_FLOWING_2@0", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 0)", .pme_code = 608, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@1", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 1)", .pme_code = 609, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@2", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 2)", .pme_code = 610, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@3", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 3)", .pme_code = 611, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@4", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 4)", .pme_code = 612, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@5", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 5)", .pme_code = 613, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@6", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 6)", .pme_code = 614, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@7", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 7)", .pme_code = 615, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@8", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 8)", .pme_code = 616, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@9", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 9)", .pme_code = 617, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@10", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 10)", .pme_code = 618, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@11", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 11)", .pme_code = 619, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@12", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 12)", .pme_code = 620, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@13", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 13)", .pme_code = 621, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@14", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 14)", .pme_code = 622, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_2@15", .pme_desc = "Wclk cycles BW2MD input port 2 has a flit flowing (on either VC0 or VC2). (M chip 15)", .pme_code = 623, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 6 Event 3 */ { .pme_name = "INVAL_1@0", .pme_desc = "Invalidations sent to a single BW. (M chip 0)", .pme_code = 624, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@1", .pme_desc = "Invalidations sent to a single BW. (M chip 1)", .pme_code = 625, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@2", .pme_desc = "Invalidations sent to a single BW. (M chip 2)", .pme_code = 626, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@3", .pme_desc = "Invalidations sent to a single BW. (M chip 3)", .pme_code = 627, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@4", .pme_desc = "Invalidations sent to a single BW. (M chip 4)", .pme_code = 628, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@5", .pme_desc = "Invalidations sent to a single BW. (M chip 5)", .pme_code = 629, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@6", .pme_desc = "Invalidations sent to a single BW. (M chip 6)", .pme_code = 630, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@7", .pme_desc = "Invalidations sent to a single BW. (M chip 7)", .pme_code = 631, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@8", .pme_desc = "Invalidations sent to a single BW. (M chip 8)", .pme_code = 632, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@9", .pme_desc = "Invalidations sent to a single BW. (M chip 9)", .pme_code = 633, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@10", .pme_desc = "Invalidations sent to a single BW. (M chip 10)", .pme_code = 634, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@11", .pme_desc = "Invalidations sent to a single BW. (M chip 11)", .pme_code = 635, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@12", .pme_desc = "Invalidations sent to a single BW. (M chip 12)", .pme_code = 636, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@13", .pme_desc = "Invalidations sent to a single BW. (M chip 13)", .pme_code = 637, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@14", .pme_desc = "Invalidations sent to a single BW. (M chip 14)", .pme_code = 638, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_1@15", .pme_desc = "Invalidations sent to a single BW. (M chip 15)", .pme_code = 639, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 6, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 7 Event 0 */ { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@0", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 0)", .pme_code = 640, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@1", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 1)", .pme_code = 641, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@2", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 2)", .pme_code = 642, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@3", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 3)", .pme_code = 643, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@4", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 4)", .pme_code = 644, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@5", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 5)", .pme_code = 645, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@6", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 6)", .pme_code = 646, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@7", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 7)", .pme_code = 647, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@8", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 8)", .pme_code = 648, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@9", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 9)", .pme_code = 649, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@10", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 10)", .pme_code = 650, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@11", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 11)", .pme_code = 651, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@12", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 12)", .pme_code = 652, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@13", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 13)", .pme_code = 653, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@14", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 14)", .pme_code = 654, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_HIT@15", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 hit. (M chip 15)", .pme_code = 655, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 7 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 656, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 657, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 658, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 659, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 660, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 661, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 662, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 663, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 664, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 665, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 666, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 667, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 668, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 669, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 670, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 671, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 7 Event 2 */ { .pme_name = "W_IN_FLOWING_3@0", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 0)", .pme_code = 672, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@1", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 1)", .pme_code = 673, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@2", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 2)", .pme_code = 674, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@3", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 3)", .pme_code = 675, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@4", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 4)", .pme_code = 676, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@5", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 5)", .pme_code = 677, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@6", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 6)", .pme_code = 678, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@7", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 7)", .pme_code = 679, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@8", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 8)", .pme_code = 680, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@9", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 9)", .pme_code = 681, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@10", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 10)", .pme_code = 682, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@11", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 11)", .pme_code = 683, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@12", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 12)", .pme_code = 684, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@13", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 13)", .pme_code = 685, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@14", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 14)", .pme_code = 686, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_FLOWING_3@15", .pme_desc = "Wclk cycles BW2MD input port 3 has a flit flowing (on either VC0 or VC2). (M chip 15)", .pme_code = 687, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 7 Event 3 */ { .pme_name = "INVAL_2@0", .pme_desc = "Invalidations sent to two BWs. (M chip 0)", .pme_code = 688, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@1", .pme_desc = "Invalidations sent to two BWs. (M chip 1)", .pme_code = 689, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@2", .pme_desc = "Invalidations sent to two BWs. (M chip 2)", .pme_code = 690, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@3", .pme_desc = "Invalidations sent to two BWs. (M chip 3)", .pme_code = 691, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@4", .pme_desc = "Invalidations sent to two BWs. (M chip 4)", .pme_code = 692, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@5", .pme_desc = "Invalidations sent to two BWs. (M chip 5)", .pme_code = 693, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@6", .pme_desc = "Invalidations sent to two BWs. (M chip 6)", .pme_code = 694, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@7", .pme_desc = "Invalidations sent to two BWs. (M chip 7)", .pme_code = 695, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@8", .pme_desc = "Invalidations sent to two BWs. (M chip 8)", .pme_code = 696, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@9", .pme_desc = "Invalidations sent to two BWs. (M chip 9)", .pme_code = 697, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@10", .pme_desc = "Invalidations sent to two BWs. (M chip 10)", .pme_code = 698, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@11", .pme_desc = "Invalidations sent to two BWs. (M chip 11)", .pme_code = 699, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@12", .pme_desc = "Invalidations sent to two BWs. (M chip 12)", .pme_code = 700, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@13", .pme_desc = "Invalidations sent to two BWs. (M chip 13)", .pme_code = 701, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@14", .pme_desc = "Invalidations sent to two BWs. (M chip 14)", .pme_code = 702, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_2@15", .pme_desc = "Invalidations sent to two BWs. (M chip 15)", .pme_code = 703, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 7, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 8 Event 0 */ { .pme_name = "SUPPLY_SH@0", .pme_desc = "SupplySh packets received. (M chip 0)", .pme_code = 704, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@1", .pme_desc = "SupplySh packets received. (M chip 1)", .pme_code = 705, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@2", .pme_desc = "SupplySh packets received. (M chip 2)", .pme_code = 706, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@3", .pme_desc = "SupplySh packets received. (M chip 3)", .pme_code = 707, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@4", .pme_desc = "SupplySh packets received. (M chip 4)", .pme_code = 708, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@5", .pme_desc = "SupplySh packets received. (M chip 5)", .pme_code = 709, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@6", .pme_desc = "SupplySh packets received. (M chip 6)", .pme_code = 710, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@7", .pme_desc = "SupplySh packets received. (M chip 7)", .pme_code = 711, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@8", .pme_desc = "SupplySh packets received. (M chip 8)", .pme_code = 712, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@9", .pme_desc = "SupplySh packets received. (M chip 9)", .pme_code = 713, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@10", .pme_desc = "SupplySh packets received. (M chip 10)", .pme_code = 714, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@11", .pme_desc = "SupplySh packets received. (M chip 11)", .pme_code = 715, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@12", .pme_desc = "SupplySh packets received. (M chip 12)", .pme_code = 716, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@13", .pme_desc = "SupplySh packets received. (M chip 13)", .pme_code = 717, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@14", .pme_desc = "SupplySh packets received. (M chip 14)", .pme_code = 718, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_SH@15", .pme_desc = "SupplySh packets received. (M chip 15)", .pme_code = 719, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 8 Event 1 */ { .pme_name = "STALL_MM@0", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 0)", .pme_code = 720, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@1", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 1)", .pme_code = 721, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@2", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 2)", .pme_code = 722, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@3", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 3)", .pme_code = 723, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@4", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 4)", .pme_code = 724, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@5", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 5)", .pme_code = 725, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@6", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 6)", .pme_code = 726, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@7", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 7)", .pme_code = 727, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@8", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 8)", .pme_code = 728, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@9", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 9)", .pme_code = 729, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@10", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 10)", .pme_code = 730, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@11", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 11)", .pme_code = 731, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@12", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 12)", .pme_code = 732, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@13", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 13)", .pme_code = 733, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@14", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 14)", .pme_code = 734, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "STALL_MM@15", .pme_desc = "Wclk cycles protocol engine request queue stalled due to back-pressure from memory manager. (M chip 15)", .pme_code = 735, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 8 Event 2 */ { .pme_name = "W_IN_WAITING_0@0", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 0)", .pme_code = 736, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@1", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 1)", .pme_code = 737, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@2", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 2)", .pme_code = 738, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@3", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 3)", .pme_code = 739, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@4", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 4)", .pme_code = 740, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@5", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 5)", .pme_code = 741, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@6", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 6)", .pme_code = 742, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@7", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 7)", .pme_code = 743, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@8", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 8)", .pme_code = 744, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@9", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 9)", .pme_code = 745, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@10", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 10)", .pme_code = 746, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@11", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 11)", .pme_code = 747, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@12", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 12)", .pme_code = 748, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@13", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 13)", .pme_code = 749, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@14", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 14)", .pme_code = 750, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_0@15", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 15)", .pme_code = 751, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 8 Event 3 */ { .pme_name = "W_OUT_FLOWING_1@0", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 0)", .pme_code = 752, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@1", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 1)", .pme_code = 753, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@2", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 2)", .pme_code = 754, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@3", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 3)", .pme_code = 755, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@4", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 4)", .pme_code = 756, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@5", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 5)", .pme_code = 757, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@6", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 6)", .pme_code = 758, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@7", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 7)", .pme_code = 759, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@8", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 8)", .pme_code = 760, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@9", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 9)", .pme_code = 761, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@10", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 10)", .pme_code = 762, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@11", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 11)", .pme_code = 763, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@12", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 12)", .pme_code = 764, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@13", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 13)", .pme_code = 765, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@14", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 14)", .pme_code = 766, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_1@15", .pme_desc = "Wclk cycles MD2BW output port 1 has a flit flowing. (M chip 15)", .pme_code = 767, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 8, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 9 Event 0 */ { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@0", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 0)", .pme_code = 768, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@1", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 1)", .pme_code = 769, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@2", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 2)", .pme_code = 770, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@3", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 3)", .pme_code = 771, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@4", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 4)", .pme_code = 772, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@5", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 5)", .pme_code = 773, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@6", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 6)", .pme_code = 774, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@7", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 7)", .pme_code = 775, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@8", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 8)", .pme_code = 776, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@9", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 9)", .pme_code = 777, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@10", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 10)", .pme_code = 778, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@11", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 11)", .pme_code = 779, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@12", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 12)", .pme_code = 780, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@13", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 13)", .pme_code = 781, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@14", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 14)", .pme_code = 782, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_GETS_4DWORDS_L3_MISS@15", .pme_desc = "NGet or Get Full cache line requests to MDs - L3 miss. (M chip 15)", .pme_code = 783, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 9 Event 1 */ { .pme_name = "SECTION_BUSY@0", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 0)", .pme_code = 784, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@1", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 1)", .pme_code = 785, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@2", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 2)", .pme_code = 786, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@3", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 3)", .pme_code = 787, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@4", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 4)", .pme_code = 788, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@5", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 5)", .pme_code = 789, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@6", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 6)", .pme_code = 790, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@7", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 7)", .pme_code = 791, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@8", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 8)", .pme_code = 792, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@9", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 9)", .pme_code = 793, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@10", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 10)", .pme_code = 794, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@11", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 11)", .pme_code = 795, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@12", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 12)", .pme_code = 796, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@13", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 13)", .pme_code = 797, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@14", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 14)", .pme_code = 798, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SECTION_BUSY@15", .pme_desc = "Wclk cycles MD pipeline busy. (M chip 15)", .pme_code = 799, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 9 Event 2 */ { .pme_name = "W_IN_WAITING_1@0", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 0)", .pme_code = 800, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@1", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 1)", .pme_code = 801, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@2", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 2)", .pme_code = 802, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@3", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 3)", .pme_code = 803, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@4", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 4)", .pme_code = 804, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@5", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 5)", .pme_code = 805, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@6", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 6)", .pme_code = 806, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@7", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 7)", .pme_code = 807, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@8", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 8)", .pme_code = 808, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@9", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 9)", .pme_code = 809, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@10", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 10)", .pme_code = 810, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@11", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 11)", .pme_code = 811, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@12", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 12)", .pme_code = 812, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@13", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 13)", .pme_code = 813, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@14", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 14)", .pme_code = 814, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_1@15", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 15)", .pme_code = 815, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 9 Event 3 */ { .pme_name = "W_OUT_FLOWING_2@0", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 0)", .pme_code = 816, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@1", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 1)", .pme_code = 817, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@2", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 2)", .pme_code = 818, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@3", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 3)", .pme_code = 819, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@4", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 4)", .pme_code = 820, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@5", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 5)", .pme_code = 821, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@6", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 6)", .pme_code = 822, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@7", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 7)", .pme_code = 823, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@8", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 8)", .pme_code = 824, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@9", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 9)", .pme_code = 825, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@10", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 10)", .pme_code = 826, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@11", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 11)", .pme_code = 827, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@12", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 12)", .pme_code = 828, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@13", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 13)", .pme_code = 829, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@14", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 14)", .pme_code = 830, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_2@15", .pme_desc = "Wclk cycles MD2BW output port 2 has a flit flowing. (M chip 15)", .pme_code = 831, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 9, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 10 Event 0 */ { .pme_name = "SUPPLY_EXCL@0", .pme_desc = "SupplyExcl packets received. (M chip 0)", .pme_code = 832, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@1", .pme_desc = "SupplyExcl packets received. (M chip 1)", .pme_code = 833, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@2", .pme_desc = "SupplyExcl packets received. (M chip 2)", .pme_code = 834, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@3", .pme_desc = "SupplyExcl packets received. (M chip 3)", .pme_code = 835, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@4", .pme_desc = "SupplyExcl packets received. (M chip 4)", .pme_code = 836, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@5", .pme_desc = "SupplyExcl packets received. (M chip 5)", .pme_code = 837, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@6", .pme_desc = "SupplyExcl packets received. (M chip 6)", .pme_code = 838, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@7", .pme_desc = "SupplyExcl packets received. (M chip 7)", .pme_code = 839, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@8", .pme_desc = "SupplyExcl packets received. (M chip 8)", .pme_code = 840, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@9", .pme_desc = "SupplyExcl packets received. (M chip 9)", .pme_code = 841, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@10", .pme_desc = "SupplyExcl packets received. (M chip 10)", .pme_code = 842, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@11", .pme_desc = "SupplyExcl packets received. (M chip 11)", .pme_code = 843, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@12", .pme_desc = "SupplyExcl packets received. (M chip 12)", .pme_code = 844, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@13", .pme_desc = "SupplyExcl packets received. (M chip 13)", .pme_code = 845, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@14", .pme_desc = "SupplyExcl packets received. (M chip 14)", .pme_code = 846, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "SUPPLY_EXCL@15", .pme_desc = "SupplyExcl packets received. (M chip 15)", .pme_code = 847, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 10 Event 1 */ { .pme_name = "W_OUT_FLOWING_3@0", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 0)", .pme_code = 848, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@1", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 1)", .pme_code = 849, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@2", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 2)", .pme_code = 850, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@3", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 3)", .pme_code = 851, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@4", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 4)", .pme_code = 852, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@5", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 5)", .pme_code = 853, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@6", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 6)", .pme_code = 854, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@7", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 7)", .pme_code = 855, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@8", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 8)", .pme_code = 856, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@9", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 9)", .pme_code = 857, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@10", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 10)", .pme_code = 858, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@11", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 11)", .pme_code = 859, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@12", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 12)", .pme_code = 860, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@13", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 13)", .pme_code = 861, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@14", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 14)", .pme_code = 862, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_FLOWING_3@15", .pme_desc = "Wclk cycles MD2BW output port 3 has a flit flowing. (M chip 15)", .pme_code = 863, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 10 Event 2 */ { .pme_name = "W_IN_WAITING_2@0", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 0)", .pme_code = 864, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@1", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 1)", .pme_code = 865, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@2", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 2)", .pme_code = 866, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@3", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 3)", .pme_code = 867, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@4", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 4)", .pme_code = 868, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@5", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 5)", .pme_code = 869, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@6", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 6)", .pme_code = 870, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@7", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 7)", .pme_code = 871, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@8", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 8)", .pme_code = 872, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@9", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 9)", .pme_code = 873, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@10", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 10)", .pme_code = 874, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@11", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 11)", .pme_code = 875, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@12", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 12)", .pme_code = 876, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@13", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 13)", .pme_code = 877, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@14", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 14)", .pme_code = 878, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_2@15", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 15)", .pme_code = 879, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 10 Event 3 */ { .pme_name = "INVAL_3@0", .pme_desc = "Invalidations sent to three BWs. (M chip 0)", .pme_code = 880, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@1", .pme_desc = "Invalidations sent to three BWs. (M chip 1)", .pme_code = 881, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@2", .pme_desc = "Invalidations sent to three BWs. (M chip 2)", .pme_code = 882, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@3", .pme_desc = "Invalidations sent to three BWs. (M chip 3)", .pme_code = 883, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@4", .pme_desc = "Invalidations sent to three BWs. (M chip 4)", .pme_code = 884, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@5", .pme_desc = "Invalidations sent to three BWs. (M chip 5)", .pme_code = 885, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@6", .pme_desc = "Invalidations sent to three BWs. (M chip 6)", .pme_code = 886, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@7", .pme_desc = "Invalidations sent to three BWs. (M chip 7)", .pme_code = 887, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@8", .pme_desc = "Invalidations sent to three BWs. (M chip 8)", .pme_code = 888, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@9", .pme_desc = "Invalidations sent to three BWs. (M chip 9)", .pme_code = 889, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@10", .pme_desc = "Invalidations sent to three BWs. (M chip 10)", .pme_code = 890, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@11", .pme_desc = "Invalidations sent to three BWs. (M chip 11)", .pme_code = 891, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@12", .pme_desc = "Invalidations sent to three BWs. (M chip 12)", .pme_code = 892, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@13", .pme_desc = "Invalidations sent to three BWs. (M chip 13)", .pme_code = 893, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@14", .pme_desc = "Invalidations sent to three BWs. (M chip 14)", .pme_code = 894, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_3@15", .pme_desc = "Invalidations sent to three BWs. (M chip 15)", .pme_code = 895, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 10, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 11 Event 0 */ { .pme_name = "NACKS_RECV@0", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 0)", .pme_code = 896, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@1", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 1)", .pme_code = 897, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@2", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 2)", .pme_code = 898, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@3", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 3)", .pme_code = 899, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@4", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 4)", .pme_code = 900, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@5", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 5)", .pme_code = 901, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@6", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 6)", .pme_code = 902, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@7", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 7)", .pme_code = 903, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@8", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 8)", .pme_code = 904, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@9", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 9)", .pme_code = 905, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@10", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 10)", .pme_code = 906, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@11", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 11)", .pme_code = 907, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@12", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 12)", .pme_code = 908, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@13", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 13)", .pme_code = 909, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@14", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 14)", .pme_code = 910, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "NACKS_RECV@15", .pme_desc = "FlushAck and Update Nack packets received (race between forwarded request and eviction by owner). (M chip 15)", .pme_code = 911, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 11 Event 1 */ { .pme_name = "W_OUT_BLOCK_CRED_0@0", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 0)", .pme_code = 912, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@1", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 1)", .pme_code = 913, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@2", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 2)", .pme_code = 914, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@3", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 3)", .pme_code = 915, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@4", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 4)", .pme_code = 916, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@5", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 5)", .pme_code = 917, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@6", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 6)", .pme_code = 918, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@7", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 7)", .pme_code = 919, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@8", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 8)", .pme_code = 920, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@9", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 9)", .pme_code = 921, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@10", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 10)", .pme_code = 922, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@11", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 11)", .pme_code = 923, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@12", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 12)", .pme_code = 924, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@13", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 13)", .pme_code = 925, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@14", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 14)", .pme_code = 926, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_0@15", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to lack of credits. (M chip 15)", .pme_code = 927, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 11 Event 2 */ { .pme_name = "W_IN_WAITING_3@0", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 0)", .pme_code = 928, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@1", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 1)", .pme_code = 929, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@2", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 2)", .pme_code = 930, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@3", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 3)", .pme_code = 931, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@4", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 4)", .pme_code = 932, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@5", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 5)", .pme_code = 933, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@6", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 6)", .pme_code = 934, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@7", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 7)", .pme_code = 935, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@8", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 8)", .pme_code = 936, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@9", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 9)", .pme_code = 937, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@10", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 10)", .pme_code = 938, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@11", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 11)", .pme_code = 939, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@12", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 12)", .pme_code = 940, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@13", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 13)", .pme_code = 941, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@14", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 14)", .pme_code = 942, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_WAITING_3@15", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that failed to win arbitration (on either VC0 or VC2). (M chip 15)", .pme_code = 943, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 11 Event 3 */ { .pme_name = "INVAL_4@0", .pme_desc = "Invalidations sent to four BWs. (M chip 0)", .pme_code = 944, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@1", .pme_desc = "Invalidations sent to four BWs. (M chip 1)", .pme_code = 945, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@2", .pme_desc = "Invalidations sent to four BWs. (M chip 2)", .pme_code = 946, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@3", .pme_desc = "Invalidations sent to four BWs. (M chip 3)", .pme_code = 947, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@4", .pme_desc = "Invalidations sent to four BWs. (M chip 4)", .pme_code = 948, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@5", .pme_desc = "Invalidations sent to four BWs. (M chip 5)", .pme_code = 949, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@6", .pme_desc = "Invalidations sent to four BWs. (M chip 6)", .pme_code = 950, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@7", .pme_desc = "Invalidations sent to four BWs. (M chip 7)", .pme_code = 951, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@8", .pme_desc = "Invalidations sent to four BWs. (M chip 8)", .pme_code = 952, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@9", .pme_desc = "Invalidations sent to four BWs. (M chip 9)", .pme_code = 953, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@10", .pme_desc = "Invalidations sent to four BWs. (M chip 10)", .pme_code = 954, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@11", .pme_desc = "Invalidations sent to four BWs. (M chip 11)", .pme_code = 955, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@12", .pme_desc = "Invalidations sent to four BWs. (M chip 12)", .pme_code = 956, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@13", .pme_desc = "Invalidations sent to four BWs. (M chip 13)", .pme_code = 957, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@14", .pme_desc = "Invalidations sent to four BWs. (M chip 14)", .pme_code = 958, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_4@15", .pme_desc = "Invalidations sent to four BWs. (M chip 15)", .pme_code = 959, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 11, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 12 Event 0 */ { .pme_name = "UPDATE_NACK_RECV@0", .pme_desc = "UpdateNacks received. (M chip 0)", .pme_code = 960, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@1", .pme_desc = "UpdateNacks received. (M chip 1)", .pme_code = 961, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@2", .pme_desc = "UpdateNacks received. (M chip 2)", .pme_code = 962, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@3", .pme_desc = "UpdateNacks received. (M chip 3)", .pme_code = 963, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@4", .pme_desc = "UpdateNacks received. (M chip 4)", .pme_code = 964, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@5", .pme_desc = "UpdateNacks received. (M chip 5)", .pme_code = 965, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@6", .pme_desc = "UpdateNacks received. (M chip 6)", .pme_code = 966, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@7", .pme_desc = "UpdateNacks received. (M chip 7)", .pme_code = 967, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@8", .pme_desc = "UpdateNacks received. (M chip 8)", .pme_code = 968, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@9", .pme_desc = "UpdateNacks received. (M chip 9)", .pme_code = 969, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@10", .pme_desc = "UpdateNacks received. (M chip 10)", .pme_code = 970, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@11", .pme_desc = "UpdateNacks received. (M chip 11)", .pme_code = 971, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@12", .pme_desc = "UpdateNacks received. (M chip 12)", .pme_code = 972, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@13", .pme_desc = "UpdateNacks received. (M chip 13)", .pme_code = 973, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@14", .pme_desc = "UpdateNacks received. (M chip 14)", .pme_code = 974, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "UPDATE_NACK_RECV@15", .pme_desc = "UpdateNacks received. (M chip 15)", .pme_code = 975, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 12 Event 1 */ { .pme_name = "W_OUT_BLOCK_CRED_1@0", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 0)", .pme_code = 976, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@1", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 1)", .pme_code = 977, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@2", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 2)", .pme_code = 978, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@3", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 3)", .pme_code = 979, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@4", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 4)", .pme_code = 980, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@5", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 5)", .pme_code = 981, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@6", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 6)", .pme_code = 982, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@7", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 7)", .pme_code = 983, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@8", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 8)", .pme_code = 984, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@9", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 9)", .pme_code = 985, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@10", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 10)", .pme_code = 986, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@11", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 11)", .pme_code = 987, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@12", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 12)", .pme_code = 988, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@13", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 13)", .pme_code = 989, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@14", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 14)", .pme_code = 990, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_1@15", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to lack of credits. (M chip 15)", .pme_code = 991, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 12 Event 2 */ { .pme_name = "W_IN_BLOCKED_0@0", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 0)", .pme_code = 992, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@1", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 1)", .pme_code = 993, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@2", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 2)", .pme_code = 994, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@3", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 3)", .pme_code = 995, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@4", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 4)", .pme_code = 996, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@5", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 5)", .pme_code = 997, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@6", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 6)", .pme_code = 998, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@7", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 7)", .pme_code = 999, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@8", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 8)", .pme_code = 1000, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@9", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 9)", .pme_code = 1001, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@10", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 10)", .pme_code = 1002, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@11", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 11)", .pme_code = 1003, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@12", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 12)", .pme_code = 1004, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@13", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 13)", .pme_code = 1005, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@14", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 14)", .pme_code = 1006, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_0@15", .pme_desc = "Wclk cycles BW2MD input port 0 has a packet waiting that is blocked due to MD full. (M chip 15)", .pme_code = 1007, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 12 Event 3 */ { .pme_name = "FWD_GET_SENT@0", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 0)", .pme_code = 1008, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@1", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 1)", .pme_code = 1009, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@2", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 2)", .pme_code = 1010, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@3", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 3)", .pme_code = 1011, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@4", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 4)", .pme_code = 1012, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@5", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 5)", .pme_code = 1013, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@6", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 6)", .pme_code = 1014, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@7", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 7)", .pme_code = 1015, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@8", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 8)", .pme_code = 1016, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@9", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 9)", .pme_code = 1017, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@10", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 10)", .pme_code = 1018, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@11", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 11)", .pme_code = 1019, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@12", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 12)", .pme_code = 1020, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@13", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 13)", .pme_code = 1021, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@14", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 14)", .pme_code = 1022, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FWD_GET_SENT@15", .pme_desc = "FwdGet packets sent (Exclusive -> PendFwd transition). (M chip 15)", .pme_code = 1023, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 12, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 13 Event 0 */ { .pme_name = "PEND_DROP@0", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 0)", .pme_code = 1024, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@1", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 1)", .pme_code = 1025, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@2", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 2)", .pme_code = 1026, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@3", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 3)", .pme_code = 1027, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@4", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 4)", .pme_code = 1028, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@5", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 5)", .pme_code = 1029, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@6", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 6)", .pme_code = 1030, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@7", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 7)", .pme_code = 1031, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@8", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 8)", .pme_code = 1032, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@9", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 9)", .pme_code = 1033, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@10", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 10)", .pme_code = 1034, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@11", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 11)", .pme_code = 1035, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@12", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 12)", .pme_code = 1036, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@13", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 13)", .pme_code = 1037, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@14", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 14)", .pme_code = 1038, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "PEND_DROP@15", .pme_desc = "Times entering PendDrop state (from Shared). (M chip 15)", .pme_code = 1039, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 13 Event 1 */ { .pme_name = "LINE_EVICTIONS@0", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 0)", .pme_code = 1040, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@1", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 1)", .pme_code = 1041, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@2", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 2)", .pme_code = 1042, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@3", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 3)", .pme_code = 1043, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@4", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 4)", .pme_code = 1044, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@5", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 5)", .pme_code = 1045, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@6", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 6)", .pme_code = 1046, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@7", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 7)", .pme_code = 1047, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@8", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 8)", .pme_code = 1048, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@9", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 9)", .pme_code = 1049, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@10", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 10)", .pme_code = 1050, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@11", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 11)", .pme_code = 1051, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@12", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 12)", .pme_code = 1052, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@13", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 13)", .pme_code = 1053, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@14", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 14)", .pme_code = 1054, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "LINE_EVICTIONS@15", .pme_desc = "Counts lines that are evicted. Note: doesn't count AMO forced evictions. Also note that the counter will increment if the line is not dirty and it is evicted. (M chip 15)", .pme_code = 1055, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 13 Event 2 */ { .pme_name = "W_IN_BLOCKED_1@0", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 0)", .pme_code = 1056, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@1", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 1)", .pme_code = 1057, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@2", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 2)", .pme_code = 1058, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@3", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 3)", .pme_code = 1059, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@4", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 4)", .pme_code = 1060, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@5", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 5)", .pme_code = 1061, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@6", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 6)", .pme_code = 1062, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@7", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 7)", .pme_code = 1063, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@8", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 8)", .pme_code = 1064, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@9", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 9)", .pme_code = 1065, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@10", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 10)", .pme_code = 1066, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@11", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 11)", .pme_code = 1067, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@12", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 12)", .pme_code = 1068, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@13", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 13)", .pme_code = 1069, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@14", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 14)", .pme_code = 1070, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_1@15", .pme_desc = "Wclk cycles BW2MD input port 1 has a packet waiting that is blocked due to MD full. (M chip 15)", .pme_code = 1071, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 13 Event 3 */ { .pme_name = "FLUSH_REQ_PACKETS@0", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 0)", .pme_code = 1072, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@1", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 1)", .pme_code = 1073, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@2", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 2)", .pme_code = 1074, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@3", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 3)", .pme_code = 1075, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@4", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 4)", .pme_code = 1076, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@5", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 5)", .pme_code = 1077, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@6", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 6)", .pme_code = 1078, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@7", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 7)", .pme_code = 1079, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@8", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 8)", .pme_code = 1080, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@9", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 9)", .pme_code = 1081, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@10", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 10)", .pme_code = 1082, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@11", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 11)", .pme_code = 1083, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@12", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 12)", .pme_code = 1084, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@13", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 13)", .pme_code = 1085, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@14", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 14)", .pme_code = 1086, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "FLUSH_REQ_PACKETS@15", .pme_desc = "FlushReq packets sent (Exclusive -> PendFwd transition). (M chip 15)", .pme_code = 1087, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 13, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 14 Event 0 */ { .pme_name = "INVAL_EVENTS@0", .pme_desc = "Invalidation events (any number of sharers). (M chip 0)", .pme_code = 1088, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@1", .pme_desc = "Invalidation events (any number of sharers). (M chip 1)", .pme_code = 1089, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@2", .pme_desc = "Invalidation events (any number of sharers). (M chip 2)", .pme_code = 1090, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@3", .pme_desc = "Invalidation events (any number of sharers). (M chip 3)", .pme_code = 1091, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@4", .pme_desc = "Invalidation events (any number of sharers). (M chip 4)", .pme_code = 1092, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@5", .pme_desc = "Invalidation events (any number of sharers). (M chip 5)", .pme_code = 1093, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@6", .pme_desc = "Invalidation events (any number of sharers). (M chip 6)", .pme_code = 1094, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@7", .pme_desc = "Invalidation events (any number of sharers). (M chip 7)", .pme_code = 1095, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@8", .pme_desc = "Invalidation events (any number of sharers). (M chip 8)", .pme_code = 1096, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@9", .pme_desc = "Invalidation events (any number of sharers). (M chip 9)", .pme_code = 1097, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@10", .pme_desc = "Invalidation events (any number of sharers). (M chip 10)", .pme_code = 1098, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@11", .pme_desc = "Invalidation events (any number of sharers). (M chip 11)", .pme_code = 1099, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@12", .pme_desc = "Invalidation events (any number of sharers). (M chip 12)", .pme_code = 1100, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@13", .pme_desc = "Invalidation events (any number of sharers). (M chip 13)", .pme_code = 1101, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@14", .pme_desc = "Invalidation events (any number of sharers). (M chip 14)", .pme_code = 1102, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "INVAL_EVENTS@15", .pme_desc = "Invalidation events (any number of sharers). (M chip 15)", .pme_code = 1103, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 14 Event 1 */ { .pme_name = "L3_LINE_HIT_GLOBAL@0", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 0)", .pme_code = 1104, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@1", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 1)", .pme_code = 1105, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@2", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 2)", .pme_code = 1106, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@3", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 3)", .pme_code = 1107, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@4", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 4)", .pme_code = 1108, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@5", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 5)", .pme_code = 1109, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@6", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 6)", .pme_code = 1110, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@7", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 7)", .pme_code = 1111, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@8", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 8)", .pme_code = 1112, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@9", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 9)", .pme_code = 1113, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@10", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 10)", .pme_code = 1114, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@11", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 11)", .pme_code = 1115, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@12", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 12)", .pme_code = 1116, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@13", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 13)", .pme_code = 1117, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@14", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 14)", .pme_code = 1118, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_GLOBAL@15", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was global. (M chip 15)", .pme_code = 1119, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 14 Event 2 */ { .pme_name = "W_IN_BLOCKED_2@0", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 0)", .pme_code = 1120, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@1", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 1)", .pme_code = 1121, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@2", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 2)", .pme_code = 1122, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@3", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 3)", .pme_code = 1123, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@4", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 4)", .pme_code = 1124, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@5", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 5)", .pme_code = 1125, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@6", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 6)", .pme_code = 1126, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@7", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 7)", .pme_code = 1127, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@8", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 8)", .pme_code = 1128, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@9", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 9)", .pme_code = 1129, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@10", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 10)", .pme_code = 1130, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@11", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 11)", .pme_code = 1131, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@12", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 12)", .pme_code = 1132, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@13", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 13)", .pme_code = 1133, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@14", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 14)", .pme_code = 1134, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_2@15", .pme_desc = "Wclk cycles BW2MD input port 2 has a packet waiting that is blocked due to MD full. (M chip 15)", .pme_code = 1135, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 14 Event 3 */ { .pme_name = "W_OUT_BLOCK_CRED_2@0", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 0)", .pme_code = 1136, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@1", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 1)", .pme_code = 1137, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@2", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 2)", .pme_code = 1138, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@3", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 3)", .pme_code = 1139, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@4", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 4)", .pme_code = 1140, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@5", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 5)", .pme_code = 1141, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@6", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 6)", .pme_code = 1142, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@7", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 7)", .pme_code = 1143, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@8", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 8)", .pme_code = 1144, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@9", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 9)", .pme_code = 1145, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@10", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 10)", .pme_code = 1146, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@11", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 11)", .pme_code = 1147, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@12", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 12)", .pme_code = 1148, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@13", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 13)", .pme_code = 1149, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@14", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 14)", .pme_code = 1150, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_2@15", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to lack of credits. (M chip 15)", .pme_code = 1151, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 14, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 15 Event 0 */ { .pme_name = "REQUEST_ALLOC_NO_FILL@0", .pme_desc = "Allocating no fill requests. (M chip 0)", .pme_code = 1152, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@1", .pme_desc = "Allocating no fill requests. (M chip 1)", .pme_code = 1153, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@2", .pme_desc = "Allocating no fill requests. (M chip 2)", .pme_code = 1154, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@3", .pme_desc = "Allocating no fill requests. (M chip 3)", .pme_code = 1155, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@4", .pme_desc = "Allocating no fill requests. (M chip 4)", .pme_code = 1156, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@5", .pme_desc = "Allocating no fill requests. (M chip 5)", .pme_code = 1157, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@6", .pme_desc = "Allocating no fill requests. (M chip 6)", .pme_code = 1158, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@7", .pme_desc = "Allocating no fill requests. (M chip 7)", .pme_code = 1159, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@8", .pme_desc = "Allocating no fill requests. (M chip 8)", .pme_code = 1160, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@9", .pme_desc = "Allocating no fill requests. (M chip 9)", .pme_code = 1161, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@10", .pme_desc = "Allocating no fill requests. (M chip 10)", .pme_code = 1162, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@11", .pme_desc = "Allocating no fill requests. (M chip 11)", .pme_code = 1163, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@12", .pme_desc = "Allocating no fill requests. (M chip 12)", .pme_code = 1164, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@13", .pme_desc = "Allocating no fill requests. (M chip 13)", .pme_code = 1165, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@14", .pme_desc = "Allocating no fill requests. (M chip 14)", .pme_code = 1166, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_ALLOC_NO_FILL@15", .pme_desc = "Allocating no fill requests. (M chip 15)", .pme_code = 1167, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 15 Event 1 */ { .pme_name = "L3_LINE_HIT_SHARED@0", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 0)", .pme_code = 1168, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@1", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 1)", .pme_code = 1169, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@2", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 2)", .pme_code = 1170, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@3", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 3)", .pme_code = 1171, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@4", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 4)", .pme_code = 1172, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@5", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 5)", .pme_code = 1173, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@6", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 6)", .pme_code = 1174, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@7", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 7)", .pme_code = 1175, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@8", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 8)", .pme_code = 1176, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@9", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 9)", .pme_code = 1177, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@10", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 10)", .pme_code = 1178, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@11", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 11)", .pme_code = 1179, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@12", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 12)", .pme_code = 1180, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@13", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 13)", .pme_code = 1181, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@14", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 14)", .pme_code = 1182, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "L3_LINE_HIT_SHARED@15", .pme_desc = "Allocating read requests that hit out of L3 cached data and state was shared. (M chip 15)", .pme_code = 1183, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 15 Event 2 */ { .pme_name = "W_IN_BLOCKED_3@0", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 0)", .pme_code = 1184, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@1", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 1)", .pme_code = 1185, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@2", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 2)", .pme_code = 1186, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@3", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 3)", .pme_code = 1187, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@4", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 4)", .pme_code = 1188, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@5", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 5)", .pme_code = 1189, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@6", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 6)", .pme_code = 1190, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@7", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 7)", .pme_code = 1191, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@8", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 8)", .pme_code = 1192, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@9", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 9)", .pme_code = 1193, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@10", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 10)", .pme_code = 1194, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@11", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 11)", .pme_code = 1195, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@12", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 12)", .pme_code = 1196, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@13", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 13)", .pme_code = 1197, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@14", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 14)", .pme_code = 1198, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_IN_BLOCKED_3@15", .pme_desc = "Wclk cycles BW2MD input port 3 has a packet waiting that is blocked due to MD full. (M chip 15)", .pme_code = 1199, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 15 Event 3 */ { .pme_name = "W_OUT_BLOCK_CRED_3@0", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 0)", .pme_code = 1200, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@1", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 1)", .pme_code = 1201, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@2", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 2)", .pme_code = 1202, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@3", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 3)", .pme_code = 1203, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@4", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 4)", .pme_code = 1204, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@5", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 5)", .pme_code = 1205, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@6", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 6)", .pme_code = 1206, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@7", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 7)", .pme_code = 1207, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@8", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 8)", .pme_code = 1208, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@9", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 9)", .pme_code = 1209, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@10", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 10)", .pme_code = 1210, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@11", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 11)", .pme_code = 1211, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@12", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 12)", .pme_code = 1212, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@13", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 13)", .pme_code = 1213, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@14", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 14)", .pme_code = 1214, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CRED_3@15", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to lack of credits. (M chip 15)", .pme_code = 1215, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 15, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 16 Event 0 */ { .pme_name = "REQUEST_1DWORD_L3_HIT@0", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 0)", .pme_code = 1216, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@1", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 1)", .pme_code = 1217, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@2", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 2)", .pme_code = 1218, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@3", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 3)", .pme_code = 1219, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@4", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 4)", .pme_code = 1220, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@5", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 5)", .pme_code = 1221, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@6", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 6)", .pme_code = 1222, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@7", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 7)", .pme_code = 1223, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@8", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 8)", .pme_code = 1224, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@9", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 9)", .pme_code = 1225, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@10", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 10)", .pme_code = 1226, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@11", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 11)", .pme_code = 1227, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@12", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 12)", .pme_code = 1228, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@13", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 13)", .pme_code = 1229, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@14", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 14)", .pme_code = 1230, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_HIT@15", .pme_desc = "Single DWord Get and NGet requests to MDs - L3 hit. (M chip 15)", .pme_code = 1231, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 16 Event 1 */ { .pme_name = "AMOS@0", .pme_desc = "AMOs to local memory (memory manager). (M chip 0)", .pme_code = 1232, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@1", .pme_desc = "AMOs to local memory (memory manager). (M chip 1)", .pme_code = 1233, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@2", .pme_desc = "AMOs to local memory (memory manager). (M chip 2)", .pme_code = 1234, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@3", .pme_desc = "AMOs to local memory (memory manager). (M chip 3)", .pme_code = 1235, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@4", .pme_desc = "AMOs to local memory (memory manager). (M chip 4)", .pme_code = 1236, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@5", .pme_desc = "AMOs to local memory (memory manager). (M chip 5)", .pme_code = 1237, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@6", .pme_desc = "AMOs to local memory (memory manager). (M chip 6)", .pme_code = 1238, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@7", .pme_desc = "AMOs to local memory (memory manager). (M chip 7)", .pme_code = 1239, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@8", .pme_desc = "AMOs to local memory (memory manager). (M chip 8)", .pme_code = 1240, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@9", .pme_desc = "AMOs to local memory (memory manager). (M chip 9)", .pme_code = 1241, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@10", .pme_desc = "AMOs to local memory (memory manager). (M chip 10)", .pme_code = 1242, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@11", .pme_desc = "AMOs to local memory (memory manager). (M chip 11)", .pme_code = 1243, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@12", .pme_desc = "AMOs to local memory (memory manager). (M chip 12)", .pme_code = 1244, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@13", .pme_desc = "AMOs to local memory (memory manager). (M chip 13)", .pme_code = 1245, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@14", .pme_desc = "AMOs to local memory (memory manager). (M chip 14)", .pme_code = 1246, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMOS@15", .pme_desc = "AMOs to local memory (memory manager). (M chip 15)", .pme_code = 1247, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 16 Event 2 */ { .pme_name = "MM0_ANY_BANK_BUSY@0", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 0)", .pme_code = 1248, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@1", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 1)", .pme_code = 1249, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@2", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 2)", .pme_code = 1250, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@3", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 3)", .pme_code = 1251, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@4", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 4)", .pme_code = 1252, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@5", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 5)", .pme_code = 1253, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@6", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 6)", .pme_code = 1254, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@7", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 7)", .pme_code = 1255, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@8", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 8)", .pme_code = 1256, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@9", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 9)", .pme_code = 1257, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@10", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 10)", .pme_code = 1258, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@11", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 11)", .pme_code = 1259, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@12", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 12)", .pme_code = 1260, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@13", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 13)", .pme_code = 1261, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@14", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 14)", .pme_code = 1262, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ANY_BANK_BUSY@15", .pme_desc = "Wclk cycles that any back is busy in MM0. (M chip 15)", .pme_code = 1263, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 16 Event 3 */ { .pme_name = "W_OUT_BLOCK_CHN_0@0", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 0)", .pme_code = 1264, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@1", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 1)", .pme_code = 1265, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@2", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 2)", .pme_code = 1266, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@3", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 3)", .pme_code = 1267, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@4", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 4)", .pme_code = 1268, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@5", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 5)", .pme_code = 1269, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@6", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 6)", .pme_code = 1270, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@7", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 7)", .pme_code = 1271, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@8", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 8)", .pme_code = 1272, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@9", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 9)", .pme_code = 1273, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@10", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 10)", .pme_code = 1274, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@11", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 11)", .pme_code = 1275, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@12", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 12)", .pme_code = 1276, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@13", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 13)", .pme_code = 1277, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@14", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 14)", .pme_code = 1278, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_0@15", .pme_desc = "Wclk cycles MD2BW output port 0 is blocked due to channel back-pressure. (M chip 15)", .pme_code = 1279, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 16, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 17 Event 0 */ { .pme_name = "REQUEST_4DWORDS_L3_HIT@0", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 0)", .pme_code = 1280, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@1", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 1)", .pme_code = 1281, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@2", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 2)", .pme_code = 1282, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@3", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 3)", .pme_code = 1283, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@4", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 4)", .pme_code = 1284, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@5", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 5)", .pme_code = 1285, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@6", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 6)", .pme_code = 1286, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@7", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 7)", .pme_code = 1287, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@8", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 8)", .pme_code = 1288, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@9", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 9)", .pme_code = 1289, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@10", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 10)", .pme_code = 1290, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@11", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 11)", .pme_code = 1291, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@12", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 12)", .pme_code = 1292, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@13", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 13)", .pme_code = 1293, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@14", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 14)", .pme_code = 1294, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_HIT@15", .pme_desc = "Allocating read requests to MDs - L3 hit. (M chip 15)", .pme_code = 1295, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 17 Event 1 */ { .pme_name = "AMO_MISSES@0", .pme_desc = "Misses in AMO cache (memory manager). (M chip 0)", .pme_code = 1296, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@1", .pme_desc = "Misses in AMO cache (memory manager). (M chip 1)", .pme_code = 1297, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@2", .pme_desc = "Misses in AMO cache (memory manager). (M chip 2)", .pme_code = 1298, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@3", .pme_desc = "Misses in AMO cache (memory manager). (M chip 3)", .pme_code = 1299, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@4", .pme_desc = "Misses in AMO cache (memory manager). (M chip 4)", .pme_code = 1300, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@5", .pme_desc = "Misses in AMO cache (memory manager). (M chip 5)", .pme_code = 1301, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@6", .pme_desc = "Misses in AMO cache (memory manager). (M chip 6)", .pme_code = 1302, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@7", .pme_desc = "Misses in AMO cache (memory manager). (M chip 7)", .pme_code = 1303, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@8", .pme_desc = "Misses in AMO cache (memory manager). (M chip 8)", .pme_code = 1304, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@9", .pme_desc = "Misses in AMO cache (memory manager). (M chip 9)", .pme_code = 1305, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@10", .pme_desc = "Misses in AMO cache (memory manager). (M chip 10)", .pme_code = 1306, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@11", .pme_desc = "Misses in AMO cache (memory manager). (M chip 11)", .pme_code = 1307, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@12", .pme_desc = "Misses in AMO cache (memory manager). (M chip 12)", .pme_code = 1308, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@13", .pme_desc = "Misses in AMO cache (memory manager). (M chip 13)", .pme_code = 1309, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@14", .pme_desc = "Misses in AMO cache (memory manager). (M chip 14)", .pme_code = 1310, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "AMO_MISSES@15", .pme_desc = "Misses in AMO cache (memory manager). (M chip 15)", .pme_code = 1311, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 17 Event 2 */ { .pme_name = "MM0_ACCUM_BANK_BUSY@0", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 0)", .pme_code = 1312, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@1", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 1)", .pme_code = 1313, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@2", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 2)", .pme_code = 1314, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@3", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 3)", .pme_code = 1315, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@4", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 4)", .pme_code = 1316, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@5", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 5)", .pme_code = 1317, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@6", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 6)", .pme_code = 1318, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@7", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 7)", .pme_code = 1319, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@8", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 8)", .pme_code = 1320, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@9", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 9)", .pme_code = 1321, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@10", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 10)", .pme_code = 1322, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@11", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 11)", .pme_code = 1323, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@12", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 12)", .pme_code = 1324, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@13", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 13)", .pme_code = 1325, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@14", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 14)", .pme_code = 1326, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM0_ACCUM_BANK_BUSY@15", .pme_desc = "Accumulation of the MM0 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 15)", .pme_code = 1327, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 17 Event 3 */ { .pme_name = "W_OUT_BLOCK_CHN_1@0", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 0)", .pme_code = 1328, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@1", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 1)", .pme_code = 1329, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@2", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 2)", .pme_code = 1330, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@3", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 3)", .pme_code = 1331, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@4", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 4)", .pme_code = 1332, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@5", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 5)", .pme_code = 1333, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@6", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 6)", .pme_code = 1334, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@7", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 7)", .pme_code = 1335, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@8", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 8)", .pme_code = 1336, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@9", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 9)", .pme_code = 1337, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@10", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 10)", .pme_code = 1338, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@11", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 11)", .pme_code = 1339, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@12", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 12)", .pme_code = 1340, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@13", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 13)", .pme_code = 1341, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@14", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 14)", .pme_code = 1342, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_1@15", .pme_desc = "Wclk cycles MD2BW output port 1 is blocked due to channel back-pressure. (M chip 15)", .pme_code = 1343, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 17, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 18 Event 0 */ { .pme_name = "REQUEST_1DWORD@0", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 0)", .pme_code = 1344, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@1", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 1)", .pme_code = 1345, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@2", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 2)", .pme_code = 1346, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@3", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 3)", .pme_code = 1347, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@4", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 4)", .pme_code = 1348, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@5", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 5)", .pme_code = 1349, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@6", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 6)", .pme_code = 1350, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@7", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 7)", .pme_code = 1351, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@8", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 8)", .pme_code = 1352, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@9", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 9)", .pme_code = 1353, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@10", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 10)", .pme_code = 1354, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@11", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 11)", .pme_code = 1355, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@12", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 12)", .pme_code = 1356, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@13", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 13)", .pme_code = 1357, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@14", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 14)", .pme_code = 1358, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD@15", .pme_desc = "Single DWord Get and NGet requests to MDs. (M chip 15)", .pme_code = 1359, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 18 Event 1 */ { .pme_name = "RETRIES_MM@0", .pme_desc = "Memory Manager retries. (M chip 0)", .pme_code = 1360, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@1", .pme_desc = "Memory Manager retries. (M chip 1)", .pme_code = 1361, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@2", .pme_desc = "Memory Manager retries. (M chip 2)", .pme_code = 1362, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@3", .pme_desc = "Memory Manager retries. (M chip 3)", .pme_code = 1363, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@4", .pme_desc = "Memory Manager retries. (M chip 4)", .pme_code = 1364, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@5", .pme_desc = "Memory Manager retries. (M chip 5)", .pme_code = 1365, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@6", .pme_desc = "Memory Manager retries. (M chip 6)", .pme_code = 1366, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@7", .pme_desc = "Memory Manager retries. (M chip 7)", .pme_code = 1367, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@8", .pme_desc = "Memory Manager retries. (M chip 8)", .pme_code = 1368, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@9", .pme_desc = "Memory Manager retries. (M chip 9)", .pme_code = 1369, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@10", .pme_desc = "Memory Manager retries. (M chip 10)", .pme_code = 1370, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@11", .pme_desc = "Memory Manager retries. (M chip 11)", .pme_code = 1371, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@12", .pme_desc = "Memory Manager retries. (M chip 12)", .pme_code = 1372, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@13", .pme_desc = "Memory Manager retries. (M chip 13)", .pme_code = 1373, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@14", .pme_desc = "Memory Manager retries. (M chip 14)", .pme_code = 1374, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "RETRIES_MM@15", .pme_desc = "Memory Manager retries. (M chip 15)", .pme_code = 1375, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 18 Event 2 */ { .pme_name = "MM1_ANY_BANK_BUSY@0", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 0)", .pme_code = 1376, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@1", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 1)", .pme_code = 1377, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@2", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 2)", .pme_code = 1378, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@3", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 3)", .pme_code = 1379, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@4", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 4)", .pme_code = 1380, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@5", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 5)", .pme_code = 1381, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@6", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 6)", .pme_code = 1382, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@7", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 7)", .pme_code = 1383, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@8", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 8)", .pme_code = 1384, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@9", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 9)", .pme_code = 1385, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@10", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 10)", .pme_code = 1386, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@11", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 11)", .pme_code = 1387, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@12", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 12)", .pme_code = 1388, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@13", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 13)", .pme_code = 1389, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@14", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 14)", .pme_code = 1390, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ANY_BANK_BUSY@15", .pme_desc = "Wclk cycles that any bank is busy in MM1. (M chip 15)", .pme_code = 1391, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 18 Event 3 */ { .pme_name = "W_OUT_BLOCK_CHN_2@0", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 0)", .pme_code = 1392, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@1", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 1)", .pme_code = 1393, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@2", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 2)", .pme_code = 1394, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@3", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 3)", .pme_code = 1395, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@4", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 4)", .pme_code = 1396, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@5", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 5)", .pme_code = 1397, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@6", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 6)", .pme_code = 1398, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@7", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 7)", .pme_code = 1399, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@8", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 8)", .pme_code = 1400, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@9", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 9)", .pme_code = 1401, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@10", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 10)", .pme_code = 1402, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@11", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 11)", .pme_code = 1403, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@12", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 12)", .pme_code = 1404, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@13", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 13)", .pme_code = 1405, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@14", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 14)", .pme_code = 1406, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_2@15", .pme_desc = "Wclk cycles MD2BW output port 2 is blocked due to channel back-pressure. (M chip 15)", .pme_code = 1407, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 18, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 19 Event 0 */ { .pme_name = "REQUEST_4DWORDS@0", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 0)", .pme_code = 1408, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@1", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 1)", .pme_code = 1409, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@2", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 2)", .pme_code = 1410, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@3", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 3)", .pme_code = 1411, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@4", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 4)", .pme_code = 1412, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@5", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 5)", .pme_code = 1413, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@6", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 6)", .pme_code = 1414, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@7", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 7)", .pme_code = 1415, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@8", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 8)", .pme_code = 1416, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@9", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 9)", .pme_code = 1417, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@10", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 10)", .pme_code = 1418, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@11", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 11)", .pme_code = 1419, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@12", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 12)", .pme_code = 1420, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@13", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 13)", .pme_code = 1421, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@14", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 14)", .pme_code = 1422, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS@15", .pme_desc = "Allocating read, Get and NGet full cache line requests to MDs. (M chip 15)", .pme_code = 1423, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 19 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1424, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1425, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1426, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1427, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1428, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1429, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1430, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1431, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1432, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1433, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1434, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1435, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1436, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1437, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1438, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1439, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 19 Event 2 */ { .pme_name = "MM1_ACCUM_BANK_BUSY@0", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 0)", .pme_code = 1440, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@1", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 1)", .pme_code = 1441, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@2", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 2)", .pme_code = 1442, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@3", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 3)", .pme_code = 1443, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@4", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 4)", .pme_code = 1444, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@5", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 5)", .pme_code = 1445, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@6", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 6)", .pme_code = 1446, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@7", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 7)", .pme_code = 1447, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@8", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 8)", .pme_code = 1448, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@9", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 9)", .pme_code = 1449, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@10", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 10)", .pme_code = 1450, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@11", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 11)", .pme_code = 1451, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@12", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 12)", .pme_code = 1452, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@13", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 13)", .pme_code = 1453, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@14", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 14)", .pme_code = 1454, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM1_ACCUM_BANK_BUSY@15", .pme_desc = "Accumulation of the MM1 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 15)", .pme_code = 1455, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 19 Event 3 */ { .pme_name = "W_OUT_BLOCK_CHN_3@0", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 0)", .pme_code = 1456, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@1", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 1)", .pme_code = 1457, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@2", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 2)", .pme_code = 1458, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@3", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 3)", .pme_code = 1459, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@4", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 4)", .pme_code = 1460, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@5", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 5)", .pme_code = 1461, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@6", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 6)", .pme_code = 1462, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@7", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 7)", .pme_code = 1463, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@8", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 8)", .pme_code = 1464, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@9", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 9)", .pme_code = 1465, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@10", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 10)", .pme_code = 1466, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@11", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 11)", .pme_code = 1467, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@12", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 12)", .pme_code = 1468, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@13", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 13)", .pme_code = 1469, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@14", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 14)", .pme_code = 1470, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_BLOCK_CHN_3@15", .pme_desc = "Wclk cycles MD2BW output port 3 is blocked due to channel back-pressure. (M chip 15)", .pme_code = 1471, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 19, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 20 Event 0 */ { .pme_name = "REQUESTS_0@0", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 0)", .pme_code = 1472, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@1", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 1)", .pme_code = 1473, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@2", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 2)", .pme_code = 1474, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@3", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 3)", .pme_code = 1475, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@4", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 4)", .pme_code = 1476, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@5", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 5)", .pme_code = 1477, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@6", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 6)", .pme_code = 1478, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@7", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 7)", .pme_code = 1479, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@8", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 8)", .pme_code = 1480, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@9", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 9)", .pme_code = 1481, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@10", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 10)", .pme_code = 1482, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@11", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 11)", .pme_code = 1483, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@12", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 12)", .pme_code = 1484, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@13", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 13)", .pme_code = 1485, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@14", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 14)", .pme_code = 1486, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_0@15", .pme_desc = "Read or write requests from port 0 to MDs. (M chip 15)", .pme_code = 1487, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 20 Event 1 */ { .pme_name = "REQUEST_1DWORD_L3_MISS@0", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 0)", .pme_code = 1488, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@1", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 1)", .pme_code = 1489, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@2", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 2)", .pme_code = 1490, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@3", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 3)", .pme_code = 1491, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@4", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 4)", .pme_code = 1492, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@5", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 5)", .pme_code = 1493, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@6", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 6)", .pme_code = 1494, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@7", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 7)", .pme_code = 1495, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@8", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 8)", .pme_code = 1496, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@9", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 9)", .pme_code = 1497, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@10", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 10)", .pme_code = 1498, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@11", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 11)", .pme_code = 1499, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@12", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 12)", .pme_code = 1500, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@13", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 13)", .pme_code = 1501, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@14", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 14)", .pme_code = 1502, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1DWORD_L3_MISS@15", .pme_desc = "Single DWord get requests to MDs - L3 miss. (M chip 15)", .pme_code = 1503, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 20 Event 2 */ { .pme_name = "MM2_ANY_BANK_BUSY@0", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 0)", .pme_code = 1504, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@1", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 1)", .pme_code = 1505, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@2", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 2)", .pme_code = 1506, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@3", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 3)", .pme_code = 1507, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@4", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 4)", .pme_code = 1508, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@5", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 5)", .pme_code = 1509, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@6", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 6)", .pme_code = 1510, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@7", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 7)", .pme_code = 1511, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@8", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 8)", .pme_code = 1512, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@9", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 9)", .pme_code = 1513, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@10", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 10)", .pme_code = 1514, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@11", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 11)", .pme_code = 1515, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@12", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 12)", .pme_code = 1516, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@13", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 13)", .pme_code = 1517, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@14", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 14)", .pme_code = 1518, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ANY_BANK_BUSY@15", .pme_desc = "Wclk cycles that any bank is busy in MM2. (M chip 15)", .pme_code = 1519, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 20 Event 3 */ { .pme_name = "W_OUT_QUEUE_BP_0@0", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 0)", .pme_code = 1520, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@1", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 1)", .pme_code = 1521, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@2", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 2)", .pme_code = 1522, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@3", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 3)", .pme_code = 1523, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@4", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 4)", .pme_code = 1524, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@5", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 5)", .pme_code = 1525, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@6", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 6)", .pme_code = 1526, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@7", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 7)", .pme_code = 1527, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@8", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 8)", .pme_code = 1528, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@9", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 9)", .pme_code = 1529, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@10", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 10)", .pme_code = 1530, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@11", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 11)", .pme_code = 1531, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@12", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 12)", .pme_code = 1532, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@13", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 13)", .pme_code = 1533, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@14", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 14)", .pme_code = 1534, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_0@15", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 0 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 15)", .pme_code = 1535, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 20, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 21 Event 0 */ { .pme_name = "REQUESTS_1@0", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 0)", .pme_code = 1536, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@1", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 1)", .pme_code = 1537, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@2", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 2)", .pme_code = 1538, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@3", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 3)", .pme_code = 1539, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@4", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 4)", .pme_code = 1540, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@5", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 5)", .pme_code = 1541, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@6", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 6)", .pme_code = 1542, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@7", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 7)", .pme_code = 1543, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@8", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 8)", .pme_code = 1544, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@9", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 9)", .pme_code = 1545, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@10", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 10)", .pme_code = 1546, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@11", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 11)", .pme_code = 1547, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@12", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 12)", .pme_code = 1548, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@13", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 13)", .pme_code = 1549, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@14", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 14)", .pme_code = 1550, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_1@15", .pme_desc = "Read or write requests from port 1 to MDs. (M chip 15)", .pme_code = 1551, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 21 Event 1 */ { .pme_name = "REQUEST_4DWORDS_L3_MISS@0", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 0)", .pme_code = 1552, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@1", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 1)", .pme_code = 1553, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@2", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 2)", .pme_code = 1554, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@3", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 3)", .pme_code = 1555, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@4", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 4)", .pme_code = 1556, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@5", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 5)", .pme_code = 1557, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@6", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 6)", .pme_code = 1558, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@7", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 7)", .pme_code = 1559, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@8", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 8)", .pme_code = 1560, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@9", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 9)", .pme_code = 1561, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@10", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 10)", .pme_code = 1562, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@11", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 11)", .pme_code = 1563, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@12", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 12)", .pme_code = 1564, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@13", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 13)", .pme_code = 1565, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@14", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 14)", .pme_code = 1566, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_4DWORDS_L3_MISS@15", .pme_desc = "Allocating read requests to MDs - L3 miss. (M chip 15)", .pme_code = 1567, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 21 Event 2 */ { .pme_name = "MM2_ACCUM_BANK_BUSY@0", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 0)", .pme_code = 1568, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@1", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 1)", .pme_code = 1569, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@2", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 2)", .pme_code = 1570, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@3", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 3)", .pme_code = 1571, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@4", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 4)", .pme_code = 1572, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@5", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 5)", .pme_code = 1573, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@6", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 6)", .pme_code = 1574, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@7", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 7)", .pme_code = 1575, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@8", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 8)", .pme_code = 1576, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@9", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 9)", .pme_code = 1577, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@10", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 10)", .pme_code = 1578, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@11", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 11)", .pme_code = 1579, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@12", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 12)", .pme_code = 1580, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@13", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 13)", .pme_code = 1581, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@14", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 14)", .pme_code = 1582, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM2_ACCUM_BANK_BUSY@15", .pme_desc = "Accumulation of the MM2 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 15)", .pme_code = 1583, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 21 Event 3 */ { .pme_name = "W_OUT_QUEUE_BP_1@0", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 0)", .pme_code = 1584, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@1", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 1)", .pme_code = 1585, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@2", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 2)", .pme_code = 1586, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@3", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 3)", .pme_code = 1587, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@4", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 4)", .pme_code = 1588, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@5", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 5)", .pme_code = 1589, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@6", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 6)", .pme_code = 1590, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@7", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 7)", .pme_code = 1591, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@8", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 8)", .pme_code = 1592, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@9", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 9)", .pme_code = 1593, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@10", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 10)", .pme_code = 1594, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@11", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 11)", .pme_code = 1595, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@12", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 12)", .pme_code = 1596, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@13", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 13)", .pme_code = 1597, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@14", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 14)", .pme_code = 1598, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_1@15", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 1 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 15)", .pme_code = 1599, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 21, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 22 Event 0 */ { .pme_name = "REQUESTS_2@0", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 0)", .pme_code = 1600, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@1", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 1)", .pme_code = 1601, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@2", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 2)", .pme_code = 1602, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@3", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 3)", .pme_code = 1603, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@4", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 4)", .pme_code = 1604, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@5", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 5)", .pme_code = 1605, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@6", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 6)", .pme_code = 1606, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@7", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 7)", .pme_code = 1607, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@8", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 8)", .pme_code = 1608, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@9", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 9)", .pme_code = 1609, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@10", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 10)", .pme_code = 1610, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@11", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 11)", .pme_code = 1611, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@12", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 12)", .pme_code = 1612, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@13", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 13)", .pme_code = 1613, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@14", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 14)", .pme_code = 1614, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_2@15", .pme_desc = "Read or write requests from port 2 to MDs. (M chip 15)", .pme_code = 1615, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 22 Event 1 */ { .pme_name = "REQUEST_1SWORD@0", .pme_desc = "Single SWord requests to MDs. (M chip 0)", .pme_code = 1616, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@1", .pme_desc = "Single SWord requests to MDs. (M chip 1)", .pme_code = 1617, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@2", .pme_desc = "Single SWord requests to MDs. (M chip 2)", .pme_code = 1618, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@3", .pme_desc = "Single SWord requests to MDs. (M chip 3)", .pme_code = 1619, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@4", .pme_desc = "Single SWord requests to MDs. (M chip 4)", .pme_code = 1620, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@5", .pme_desc = "Single SWord requests to MDs. (M chip 5)", .pme_code = 1621, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@6", .pme_desc = "Single SWord requests to MDs. (M chip 6)", .pme_code = 1622, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@7", .pme_desc = "Single SWord requests to MDs. (M chip 7)", .pme_code = 1623, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@8", .pme_desc = "Single SWord requests to MDs. (M chip 8)", .pme_code = 1624, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@9", .pme_desc = "Single SWord requests to MDs. (M chip 9)", .pme_code = 1625, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@10", .pme_desc = "Single SWord requests to MDs. (M chip 10)", .pme_code = 1626, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@11", .pme_desc = "Single SWord requests to MDs. (M chip 11)", .pme_code = 1627, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@12", .pme_desc = "Single SWord requests to MDs. (M chip 12)", .pme_code = 1628, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@13", .pme_desc = "Single SWord requests to MDs. (M chip 13)", .pme_code = 1629, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@14", .pme_desc = "Single SWord requests to MDs. (M chip 14)", .pme_code = 1630, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUEST_1SWORD@15", .pme_desc = "Single SWord requests to MDs. (M chip 15)", .pme_code = 1631, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 22 Event 2 */ { .pme_name = "MM3_ANY_BANK_BUSY@0", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 0)", .pme_code = 1632, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@1", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 1)", .pme_code = 1633, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@2", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 2)", .pme_code = 1634, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@3", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 3)", .pme_code = 1635, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@4", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 4)", .pme_code = 1636, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@5", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 5)", .pme_code = 1637, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@6", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 6)", .pme_code = 1638, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@7", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 7)", .pme_code = 1639, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@8", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 8)", .pme_code = 1640, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@9", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 9)", .pme_code = 1641, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@10", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 10)", .pme_code = 1642, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@11", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 11)", .pme_code = 1643, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@12", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 12)", .pme_code = 1644, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@13", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 13)", .pme_code = 1645, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@14", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 14)", .pme_code = 1646, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ANY_BANK_BUSY@15", .pme_desc = "Wclk cycles that any bank is busy in MM3. (M chip 15)", .pme_code = 1647, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 22 Event 3 */ { .pme_name = "W_OUT_QUEUE_BP_2@0", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 0)", .pme_code = 1648, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@1", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 1)", .pme_code = 1649, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@2", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 2)", .pme_code = 1650, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@3", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 3)", .pme_code = 1651, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@4", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 4)", .pme_code = 1652, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@5", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 5)", .pme_code = 1653, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@6", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 6)", .pme_code = 1654, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@7", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 7)", .pme_code = 1655, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@8", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 8)", .pme_code = 1656, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@9", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 9)", .pme_code = 1657, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@10", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 10)", .pme_code = 1658, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@11", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 11)", .pme_code = 1659, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@12", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 12)", .pme_code = 1660, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@13", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 13)", .pme_code = 1661, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@14", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 14)", .pme_code = 1662, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_2@15", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 2 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 15)", .pme_code = 1663, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 22, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 23 Event 0 */ { .pme_name = "REQUESTS_3@0", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 0)", .pme_code = 1664, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@1", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 1)", .pme_code = 1665, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@2", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 2)", .pme_code = 1666, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@3", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 3)", .pme_code = 1667, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@4", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 4)", .pme_code = 1668, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@5", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 5)", .pme_code = 1669, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@6", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 6)", .pme_code = 1670, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@7", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 7)", .pme_code = 1671, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@8", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 8)", .pme_code = 1672, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@9", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 9)", .pme_code = 1673, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@10", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 10)", .pme_code = 1674, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@11", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 11)", .pme_code = 1675, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@12", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 12)", .pme_code = 1676, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@13", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 13)", .pme_code = 1677, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@14", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 14)", .pme_code = 1678, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "REQUESTS_3@15", .pme_desc = "Read or write requests from port 3 to MDs. (M chip 15)", .pme_code = 1679, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 23 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1680, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1681, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1682, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1683, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1684, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1685, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1686, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1687, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1688, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1689, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1690, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1691, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1692, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1693, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1694, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1695, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 23 Event 2 */ { .pme_name = "MM3_ACCUM_BANK_BUSY@0", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 0)", .pme_code = 1696, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@1", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 1)", .pme_code = 1697, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@2", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 2)", .pme_code = 1698, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@3", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 3)", .pme_code = 1699, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@4", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 4)", .pme_code = 1700, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@5", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 5)", .pme_code = 1701, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@6", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 6)", .pme_code = 1702, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@7", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 7)", .pme_code = 1703, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@8", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 8)", .pme_code = 1704, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@9", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 9)", .pme_code = 1705, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@10", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 10)", .pme_code = 1706, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@11", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 11)", .pme_code = 1707, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@12", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 12)", .pme_code = 1708, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@13", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 13)", .pme_code = 1709, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@14", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 14)", .pme_code = 1710, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "MM3_ACCUM_BANK_BUSY@15", .pme_desc = "Accumulation of the MM3 memory banks are busy in Mclks. There are 8 banks per MM and this counter will be +1 every Mclk that 1 bank is busy, +2 every Mclk that 2 banks are busy, etc. (M chip 15)", .pme_code = 1711, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 23 Event 3 */ { .pme_name = "W_OUT_QUEUE_BP_3@0", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 0)", .pme_code = 1712, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@1", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 1)", .pme_code = 1713, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@2", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 2)", .pme_code = 1714, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@3", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 3)", .pme_code = 1715, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@4", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 4)", .pme_code = 1716, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@5", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 5)", .pme_code = 1717, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@6", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 6)", .pme_code = 1718, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@7", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 7)", .pme_code = 1719, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@8", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 8)", .pme_code = 1720, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@9", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 9)", .pme_code = 1721, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@10", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 10)", .pme_code = 1722, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@11", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 11)", .pme_code = 1723, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@12", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 12)", .pme_code = 1724, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@13", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 13)", .pme_code = 1725, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@14", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 14)", .pme_code = 1726, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_OUT_QUEUE_BP_3@15", .pme_desc = "One of the input FIFOs that is destined for MD2BW output port 3 is full and asserting back-pressure to the MD (Wclk cycles). (M chip 15)", .pme_code = 1727, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 23, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 24 Event 0 */ { .pme_name = "W_SWORD_PUTS@0", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 0)", .pme_code = 1728, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@1", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 1)", .pme_code = 1729, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@2", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 2)", .pme_code = 1730, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@3", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 3)", .pme_code = 1731, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@4", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 4)", .pme_code = 1732, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@5", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 5)", .pme_code = 1733, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@6", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 6)", .pme_code = 1734, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@7", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 7)", .pme_code = 1735, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@8", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 8)", .pme_code = 1736, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@9", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 9)", .pme_code = 1737, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@10", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 10)", .pme_code = 1738, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@11", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 11)", .pme_code = 1739, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@12", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 12)", .pme_code = 1740, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@13", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 13)", .pme_code = 1741, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@14", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 14)", .pme_code = 1742, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_PUTS@15", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with Put commands. Counts up to 2 SWords per memory directory per clock period. (M chip 15)", .pme_code = 1743, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 24 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1744, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1745, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1746, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1747, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1748, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1749, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1750, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1751, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1752, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1753, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1754, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1755, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1756, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1757, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1758, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1759, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 24 Event 2 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1760, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1761, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1762, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1763, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1764, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1765, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1766, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1767, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1768, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1769, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1770, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1771, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1772, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1773, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1774, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1775, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 24 Event 3 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1776, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1777, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1778, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1779, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1780, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1781, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1782, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1783, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1784, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1785, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1786, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1787, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1788, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1789, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1790, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1791, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 24, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 25 Event 0 */ { .pme_name = "W_SWORD_NPUTS@0", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 0)", .pme_code = 1792, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@1", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 1)", .pme_code = 1793, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@2", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 2)", .pme_code = 1794, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@3", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 3)", .pme_code = 1795, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@4", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 4)", .pme_code = 1796, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@5", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 5)", .pme_code = 1797, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@6", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 6)", .pme_code = 1798, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@7", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 7)", .pme_code = 1799, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@8", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 8)", .pme_code = 1800, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@9", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 9)", .pme_code = 1801, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@10", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 10)", .pme_code = 1802, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@11", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 11)", .pme_code = 1803, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@12", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 12)", .pme_code = 1804, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@13", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 13)", .pme_code = 1805, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@14", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 14)", .pme_code = 1806, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NPUTS@15", .pme_desc = "Count of the total number of SWords that are written to memory or the L3 cache with NPut commands. Counts up to 2 SWords per memory directory per clock period. (M chip 15)", .pme_code = 1807, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 25 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1808, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1809, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1810, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1811, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1812, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1813, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1814, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1815, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1816, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1817, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1818, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1819, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1820, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1821, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1822, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1823, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 25 Event 2 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1824, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1825, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1826, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1827, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1828, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1829, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1830, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1831, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1832, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1833, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1834, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1835, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1836, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1837, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1838, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1839, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 25 Event 3 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1840, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1841, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1842, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1843, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1844, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1845, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1846, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1847, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1848, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1849, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1850, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1851, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1852, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1853, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1854, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1855, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 25, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 26 Event 0 */ { .pme_name = "W_SWORD_GETS@0", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 0)", .pme_code = 1856, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@1", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 1)", .pme_code = 1857, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@2", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 2)", .pme_code = 1858, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@3", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 3)", .pme_code = 1859, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@4", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 4)", .pme_code = 1860, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@5", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 5)", .pme_code = 1861, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@6", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 6)", .pme_code = 1862, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@7", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 7)", .pme_code = 1863, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@8", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 8)", .pme_code = 1864, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@9", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 9)", .pme_code = 1865, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@10", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 10)", .pme_code = 1866, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@11", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 11)", .pme_code = 1867, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@12", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 12)", .pme_code = 1868, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@13", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 13)", .pme_code = 1869, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@14", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 14)", .pme_code = 1870, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_GETS@15", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with Get commands. Counts up to 2 SWords per memory directory per clock period. (M chip 15)", .pme_code = 1871, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 26 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1872, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1873, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1874, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1875, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1876, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1877, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1878, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1879, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1880, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1881, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1882, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1883, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1884, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1885, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1886, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1887, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 26 Event 2 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1888, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1889, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1890, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1891, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1892, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1893, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1894, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1895, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1896, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1897, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1898, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1899, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1900, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1901, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1902, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1903, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 26 Event 3 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1904, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1905, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1906, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1907, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1908, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1909, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1910, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1911, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1912, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1913, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1914, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1915, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1916, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1917, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1918, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1919, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 26, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 27 Event 0 */ { .pme_name = "W_SWORD_NGETS@0", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 0)", .pme_code = 1920, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@1", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 1)", .pme_code = 1921, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@2", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 2)", .pme_code = 1922, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@3", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 3)", .pme_code = 1923, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@4", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 4)", .pme_code = 1924, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@5", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 5)", .pme_code = 1925, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@6", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 6)", .pme_code = 1926, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@7", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 7)", .pme_code = 1927, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@8", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 8)", .pme_code = 1928, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@9", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 9)", .pme_code = 1929, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@10", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 10)", .pme_code = 1930, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@11", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 11)", .pme_code = 1931, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@12", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 12)", .pme_code = 1932, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@13", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 13)", .pme_code = 1933, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@14", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 14)", .pme_code = 1934, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "W_SWORD_NGETS@15", .pme_desc = "Count of the total number of SWords that are read from memory or the L3 cache with NGet commands. Counts up to 2 SWords per memory directory per clock period. (M chip 15)", .pme_code = 1935, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 0, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 27 Event 1 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1936, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1937, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1938, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1939, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1940, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1941, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1942, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1943, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1944, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1945, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1946, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1947, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1948, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1949, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1950, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1951, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 1, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 27 Event 2 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1952, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1953, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1954, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1955, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1956, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1957, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1958, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1959, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1960, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1961, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1962, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1963, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1964, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1965, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1966, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1967, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 2, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, /* M Counter 27 Event 3 */ { .pme_name = "@0", .pme_desc = "", .pme_code = 1968, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 0, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@1", .pme_desc = "", .pme_code = 1969, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 1, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@2", .pme_desc = "", .pme_code = 1970, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 2, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@3", .pme_desc = "", .pme_code = 1971, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 3, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@4", .pme_desc = "", .pme_code = 1972, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 4, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@5", .pme_desc = "", .pme_code = 1973, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 5, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@6", .pme_desc = "", .pme_code = 1974, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 6, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@7", .pme_desc = "", .pme_code = 1975, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 7, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@8", .pme_desc = "", .pme_code = 1976, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 8, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@9", .pme_desc = "", .pme_code = 1977, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 9, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@10", .pme_desc = "", .pme_code = 1978, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 10, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@11", .pme_desc = "", .pme_code = 1979, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 11, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@12", .pme_desc = "", .pme_code = 1980, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 12, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@13", .pme_desc = "", .pme_code = 1981, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 13, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@14", .pme_desc = "", .pme_code = 1982, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 14, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, { .pme_name = "@15", .pme_desc = "", .pme_code = 1983, .pme_flags = 0x0, .pme_numasks = 0, .pme_chip = PME_CRAYX2_CHIP_MEMORY, .pme_ctr = 27, .pme_event = 3, .pme_chipno = 15, .pme_base = PMU_CRAYX2_MEMORY_PMD_BASE, .pme_nctrs = PME_CRAYX2_MEMORY_CTRS_PER_CHIP, .pme_nchips = PME_CRAYX2_MEMORY_CHIPS }, }; #define PME_CRAYX2_CYCLES 0 #define PME_CRAYX2_INSTR_GRADUATED 4 #define PME_CRAYX2_EVENT_COUNT (sizeof(crayx2_pe)/sizeof(pme_crayx2_entry_t)) #endif /* __CRAYX2_EVENTS_H__ */ papi-5.6.0/src/libpfm-3.y/examples_v2.x/pfmsetup.c000664 001750 001750 00000136404 13216244362 023715 0ustar00jshenry1963jshenry1963000000 000000 /* * (C) Copyright IBM Corp. 2006 * Contributed by Kevin Corry * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sellcopies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * * pfmsetup * * Very simple command-line tool to drive the perfmon2 kernel API. Inspired * by the dmsetup tool from device-mapper. * * Compile with: * gcc -Wall -o pfmsetup pfmsetup.c -lpfm * * Run with: * pfmsetup * * Available commands for the command_file: * * create_context [options] * Create a new context for accessing the performance counters. Each new * context automatically gets one event-set with an ID of 0. * - options: --system * --no-overflow-msg * --block-on-notify * --sampler * - : specify an integer that you want to associate with * the new context for use in other commands. * * load_context * Attach the specified context and event-set to the specified program. * - : ID that you specified when creating the context. * - : ID that you specified when creating an event-set * within the given context. All contexts automatically * have an event-set with ID of 0. * - : ID that you specified when starting a program * with the run_program command, or the number of * the CPU to attach to for system-wide mode. * * unload_context * Detach the specified context from the program that it's currently * attached to. * - : ID that you specified when creating the context. * * close_context * Clean up the specified context. After this call, the context_id will no * longer be valid. * - : ID that you specified when creating the context. * * write_pmc < >+ * Write one or more control register values. * - : ID that you specified when creating the context. * - : ID that you specified when creating an event-set * within the given context. All contexts automatically * have an event-set with ID of 0. * - : ID of the desired control register. See the register * mappings in the Perfmon kernel code to determine which * PMC represents the control register you're interested in. * - : Value to write into the specified PMC. You need to know * the exact numeric value - no translations are done from * event names or masks. Multiple PMC id/value pairs can * be given in one write_pmc command. * * write_pmd < >+ * Write one or more data register values. * - : ID that you specified when creating the context. * - : ID that you specified when creating an event-set * within the given context. All contexts automatically * have an event-set with ID of 0. * - : ID of the desired data register. See the register * mappings in the Perfmon kernel code to determine which * PMD represents the control register you're interested in. * - : Value to write into the specified PMD. Multiple PMD * id/value pairs can be given in one write_pmd command. * * read_pmd + * Read one or more data register values. * - : ID that you specified when creating the context. * - : ID that you specified when creating an event-set * within the given context. All contexts automatically * have an event-set with ID of 0. * - : ID of the desired data register. See the register * mappings in the Perfmon kernel code to determine which * PMD represents the control register you're interested in. * Multiple PMD IDs can be given in one read_pmd command. * * start_counting * Start counting using the specified context and event-set. * - : ID that you specified when creating the context. * - : ID that you specified when creating an event-set * within the given context. All contexts automatically * have an event-set with ID of 0. * * stop_counting * Stop counting on the specified context. * - : ID that you specified when creating the context. * * restart_counting * Restart counting on the specified context. * - : ID that you specified when creating the context. * * create_eventset [options] * Create a new event-set for an existing context. * - options: --next-set * --timeout * --switch-on-overflow * --exclude-idle * - : ID that you specified when creating the context. * - : specify an integer that you want to associate with * the new event-set for use in other commands. * * delete_eventset * Delete an existing event-set from an existing context. * - : ID that you specified when creating the context. * - : ID that you specified when creating the event-set. * * getinfo_eventset * Display information about an event-set. * - : ID that you specified when creating the context. * - : ID that you specified when creating the event-set. * * run_program * First step in starting a program to monitor. In order to allow time to * set up the counters to monitor the program, this command only forks a * child process. It then suspends itself using ptrace. You must call the * resume_program command to wake up the new child process and exec the * desired program. * - : Specify an integer that you want to associate with * the program for use in other commands. * - : Specify the program and its arguments * exactly as you would on the command * line. * * resume_program * When a program is 'run', a child process is forked, but the child is * ptrace'd before exec'ing the specified program. This gives you time to * do any necessary setup to monitor the program. This resume_program * command wakes up the child process and finishes exec'ing the desired * program. If a context has been loaded and started for this program, * then the counters will have actually started following this command. * - : ID that you specified when starting the program. * * wait_on_program * Wait for a program to complete and exit. After this call, the program_id * will no longer be valid. * - : ID that you specified when starting the program. * * sleep #include #include #include #include #include #include #include #include #include #include #include #define FALSE 0 #define TRUE 1 #define WHITESPACE " \t\n" #define MAX_TOKENS 32 #define PFMSETUP_NAME "pfmsetup" #define USAGE(f, x...) printf(PFMSETUP_NAME ": USAGE: " f "\n" , ## x) #define LOG_ERROR(f, x...) printf(PFMSETUP_NAME ": Error: %s: " f "\n", __FUNCTION__ , ## x) #define LOG_INFO(f, x...) printf(PFMSETUP_NAME ": " f "\n" , ## x) typedef int (*command_fn)(int argc, char **argv); struct command { const char *full_name; const char *short_name; const char *help; command_fn fn; int min_args; }; struct context { int id; int fd; int cpu; pfarg_ctx_t ctx_arg; pfm_dfl_smpl_arg_t smpl_arg; struct event_set *event_sets; struct context *next; }; struct event_set { int id; struct event_set *next; }; struct program { int id; pid_t pid; struct program *next; }; /* Global list of all contexts that have been created. List is ordered by * context id. Each context contains a list of event-sets belonging to that * context, which is ordered by event-set id. */ static struct context *contexts = NULL; /* Global list of all programs that have been started. * List is ordered by program id. */ static struct program *programs = NULL; /* * Routines to manipulate the context, event-set, and program lists. */ static struct context *find_context(int ctx_id) { struct context *ctx; for (ctx = contexts; ctx; ctx = ctx->next) { if (ctx->id == ctx_id) { break; } } return ctx; } static void insert_context(struct context *ctx) { struct context **next_ctx; for (next_ctx = &contexts; *next_ctx && (*next_ctx)->id < ctx->id; next_ctx = &((*next_ctx)->next)) { ; } ctx->next = *next_ctx; *next_ctx = ctx; } static void remove_context(struct context *ctx) { struct context **next_ctx; for (next_ctx = &contexts; *next_ctx; next_ctx = &((*next_ctx)->next)) { if (*next_ctx == ctx) { *next_ctx = ctx->next; break; } } } static struct event_set *find_event_set(struct context *ctx, int event_set_id) { struct event_set *evt; for (evt = ctx->event_sets; evt; evt = evt->next) { if (evt->id == event_set_id) { break; } } return evt; } static void insert_event_set(struct context *ctx, struct event_set *evt) { struct event_set **next_evt; for (next_evt = &ctx->event_sets; *next_evt && (*next_evt)->id < evt->id; next_evt = &((*next_evt)->next)) { ; } evt->next = *next_evt; *next_evt = evt; } static void remove_event_set(struct context *ctx, struct event_set *evt) { struct event_set **next_evt; for (next_evt = &ctx->event_sets; *next_evt; next_evt = &((*next_evt)->next)) { if (*next_evt == evt) { *next_evt = evt->next; break; } } } static struct program *find_program(int program_id) { struct program *prog; for (prog = programs; prog; prog = prog->next) { if (prog->id == program_id) { break; } } return prog; } static void insert_program(struct program *prog) { struct program **next_prog; for (next_prog = &programs; *next_prog && (*next_prog)->id < prog->id; next_prog = &((*next_prog)->next)) { ; } prog->next = *next_prog; *next_prog = prog; } static void remove_program(struct program *prog) { struct program **next_prog; for (next_prog = &programs; *next_prog; next_prog = &((*next_prog)->next)) { if (*next_prog == prog) { *next_prog = prog->next; break; } } } /** * set_affinity * * When loading or unloading a system-wide context, we must pin the pfmsetup * process to that CPU before making the system call. Also, get the current * affinity and return it to the caller so we can change it back later. **/ static int set_affinity(int cpu, cpu_set_t *old_cpu_set) { cpu_set_t new_cpu_set; int rc; rc = sched_getaffinity(0, sizeof(*old_cpu_set), old_cpu_set); if (rc) { rc = errno; LOG_ERROR("Can't get current process affinity mask: %d\n", rc); return rc; } CPU_ZERO(&new_cpu_set); CPU_SET(cpu, &new_cpu_set); rc = sched_setaffinity(0, sizeof(new_cpu_set), &new_cpu_set); if (rc) { rc = errno; LOG_ERROR("Can't set process affinity to CPU %d: %d\n", cpu, rc); return rc; } return 0; } /** * revert_affinity * * Reset the process affinity to the specified mask. **/ static void revert_affinity(cpu_set_t *old_cpu_set) { int rc; rc = sched_setaffinity(0, sizeof(*old_cpu_set), old_cpu_set); if (rc) { /* Not a fatal error if we can't reset the affinity. */ LOG_INFO("Can't revert process affinity to original value.\n"); } } /** * create_context * * Arguments: [options] * Options: --system * --no-overflow-msg * --block-on-notify * --sampler * * Call the pfm_create_context system-call to create a new perfmon context. * Add a new entry to the global 'contexts' list. **/ static int create_context(int argc, char **argv) { pfarg_ctx_t ctx_arg; pfm_dfl_smpl_arg_t smpl_arg; struct context *new_ctx = NULL; char *sampler_name = NULL; void *smpl_p; int no_overflow_msg = FALSE; int block_on_notify = FALSE; int system_wide = FALSE; int c, ctx_id = 0; int rc; size_t sz; struct option long_opts[] = { {"sampler", required_argument, NULL, 1}, {"system", no_argument, NULL, 2}, {"no-overflow-msg", no_argument, NULL, 3}, {"block-on-notify", no_argument, NULL, 4}, {NULL, 0, NULL, 0} }; memset(&ctx_arg, 0, sizeof(ctx_arg)); opterr = 0; optind = 0; while ((c = getopt_long_only(argc, argv, "", long_opts, NULL)) != EOF) { switch (c) { case 1: sampler_name = optarg; break; case 2: system_wide = TRUE; break; case 3: no_overflow_msg = TRUE; break; case 4: block_on_notify = TRUE; break; default: LOG_ERROR("invalid option: %c", optopt); rc = EINVAL; goto error; } } if (argc < optind + 1) { USAGE("create_context [options] "); rc = EINVAL; goto error; } ctx_id = strtoul(argv[optind], NULL, 0); if (ctx_id <= 0) { LOG_ERROR("Invalid context ID (%s). Must be a positive " "integer.", argv[optind]); rc = EINVAL; goto error; } /* Make sure we don't already have a context with this ID. */ new_ctx = find_context(ctx_id); if (new_ctx) { LOG_ERROR("Context with ID %d already exists.", ctx_id); rc = EINVAL; goto error; } if (sampler_name) { smpl_arg.buf_size = getpagesize(); smpl_p = &smpl_arg; sz = sizeof(smpl_arg); } else { smpl_p = NULL; sz = 0; } ctx_arg.ctx_flags = (system_wide ? PFM_FL_SYSTEM_WIDE : 0) | (no_overflow_msg ? PFM_FL_OVFL_NO_MSG : 0) | (block_on_notify ? PFM_FL_NOTIFY_BLOCK : 0); rc = pfm_create_context(&ctx_arg, sampler_name, smpl_p, sz); if (rc == -1) { rc = errno; LOG_ERROR("pfm_create_context system call returned " "an error: %d.", rc); goto error; } /* Allocate and initialize a new context structure and add it to the * global list. Every new context automatically gets one event_set * with an event ID of 0. */ new_ctx = calloc(1, sizeof(*new_ctx)); if (!new_ctx) { LOG_ERROR("Can't allocate structure for new context %d.", ctx_id); rc = ENOMEM; goto error; } new_ctx->event_sets = calloc(1, sizeof(*(new_ctx->event_sets))); if (!new_ctx->event_sets) { LOG_ERROR("Can't allocate event-set structure for new " "context %d.", ctx_id); rc = ENOMEM; goto error; } new_ctx->id = ctx_id; new_ctx->fd = rc; new_ctx->cpu = -1; new_ctx->ctx_arg = ctx_arg; new_ctx->smpl_arg = smpl_arg; insert_context(new_ctx); LOG_INFO("Created context %d with file-descriptor %d.", new_ctx->id, new_ctx->fd); return 0; error: if (new_ctx) { close(new_ctx->fd); free(new_ctx->event_sets); free(new_ctx); } return rc; } /** * load_context * * Arguments: * * Call the pfm_load_context system-call to load a perfmon context into the * system's performance monitoring unit. **/ static int load_context(int argc, char **argv) { struct context *ctx; struct event_set *evt; struct program *prog; pfarg_load_t load_arg; cpu_set_t old_cpu_set; int ctx_id, event_set_id, program_id; int system_wide, rc; ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); program_id = strtoul(argv[3], NULL, 0); if (ctx_id <= 0 || event_set_id < 0 || program_id < 0) { LOG_ERROR("context ID, event-set ID, and program/CPU ID must " "be positive integers."); return EINVAL; } /* Find the context, event_set, and program in the global lists. */ ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } load_arg.load_set = evt->id; system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide) { if (ctx->cpu >= 0) { LOG_ERROR("Trying to load context %d which is already " "loaded on CPU %d.\n", ctx_id, ctx->cpu); return EBUSY; } rc = set_affinity(program_id, &old_cpu_set); if (rc) { return rc; } /* Specify the CPU as the PID. */ load_arg.load_pid = program_id; } else { prog = find_program(program_id); if (!prog) { LOG_ERROR("Can't find program with ID %d.", program_id); return EINVAL; } load_arg.load_pid = prog->pid; } rc = pfm_load_context(ctx->fd, &load_arg); if (rc) { rc = errno; LOG_ERROR("pfm_load_context system call returned " "an error: %d.", rc); return rc; } if (system_wide) { /* Keep track of which CPU this context is loaded on. */ ctx->cpu = program_id; revert_affinity(&old_cpu_set); } LOG_INFO("Loaded context %d, event-set %d onto %s %d.", ctx_id, event_set_id, system_wide ? "cpu" : "program", program_id); return 0; } /** * unload_context * * Arguments: * * Call the pfm_unload_context system-call to unload a perfmon context from * the system's performance monitoring unit. **/ static int unload_context(int argc, char **argv) { struct context *ctx; cpu_set_t old_cpu_set; int system_wide; int ctx_id; int rc; ctx_id = strtoul(argv[1], NULL, 0); if (ctx_id <= 0) { LOG_ERROR("context ID must be a positive integer."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide) { if (ctx->cpu < 0) { /* This context isn't loaded on any CPU. */ LOG_ERROR("Trying to unload context %d that isn't " "loaded.\n", ctx_id); return EINVAL; } rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_unload_context(ctx->fd); if (rc) { rc = errno; LOG_ERROR("pfm_unload_context system call returned " "an error: %d.", rc); return rc; } if (system_wide) { ctx->cpu = -1; revert_affinity(&old_cpu_set); } LOG_INFO("Unloaded context %d.", ctx_id); return 0; } /** * close_context * * Arguments: * * Close the context's file descriptor, remove it from the global list, and * free the context data structures. **/ static int close_context(int argc, char **argv) { struct context *ctx; struct event_set *evt, *next_evt; int ctx_id; ctx_id = strtoul(argv[1], NULL, 0); if (ctx_id <= 0) { LOG_ERROR("context ID must be a positive integer."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } /* There's no perfmon system-call to delete a context. We simply call * close on the file handle. */ close(ctx->fd); remove_context(ctx); for (evt = ctx->event_sets; evt; evt = next_evt) { next_evt = evt->next; free(evt); } free(ctx); LOG_INFO("Closed and freed context %d.", ctx_id); return 0; } /** * write_pmc * * Arguments: < >+ * * Write values to one or more control registers. **/ static int write_pmc(int argc, char **argv) { struct context *ctx; struct event_set *evt; pfarg_pmc_t *pmc_args = NULL; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int pmc_id, num_pmcs; unsigned long long pmc_value; int system_wide, i, rc; ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } /* Allocate an array of PMC structures. */ num_pmcs = (argc - 3) / 2; pmc_args = calloc(num_pmcs, sizeof(*pmc_args)); if (!pmc_args) { LOG_ERROR("Can't allocate PMC argument array."); return ENOMEM; } for (i = 0; i < num_pmcs; i++) { pmc_id = strtoul(argv[3 + i*2], NULL, 0); pmc_value = strtoull(argv[4 + i*2], NULL, 0); if (pmc_id < 0) { LOG_ERROR("PMC ID must be a positive integer."); rc = EINVAL; goto out; } pmc_args[i].reg_num = pmc_id; pmc_args[i].reg_set = evt->id; pmc_args[i].reg_value = pmc_value; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { goto out; } } rc = pfm_write_pmcs(ctx->fd, pmc_args, num_pmcs); if (rc) { rc = errno; LOG_ERROR("pfm_write_pmcs system call returned " "an error: %d.", rc); goto out; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } out: free(pmc_args); return rc; } /** * write_pmd * * Arguments: < >+ * * FIXME: Add options for other fields in pfarg_pmd_t. **/ static int write_pmd(int argc, char **argv) { struct context *ctx; struct event_set *evt; pfarg_pmd_t *pmd_args = NULL; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int pmd_id, num_pmds; unsigned long long pmd_value; int system_wide, i, rc; ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } /* Allocate an array of PMD structures. */ num_pmds = (argc - 3) / 2; pmd_args = calloc(num_pmds, sizeof(*pmd_args)); if (!pmd_args) { LOG_ERROR("Can't allocate PMD argument array."); return ENOMEM; } for (i = 0; i < num_pmds; i++) { pmd_id = strtoul(argv[3 + i*2], NULL, 0); pmd_value = strtoull(argv[4 + i*2], NULL, 0); if (pmd_id < 0) { LOG_ERROR("PMD ID must be a positive integer."); rc = EINVAL; goto out; } pmd_args[i].reg_num = pmd_id; pmd_args[i].reg_set = evt->id; pmd_args[i].reg_value = pmd_value; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { goto out; } } rc = pfm_write_pmds(ctx->fd, pmd_args, num_pmds); if (rc) { rc = errno; LOG_ERROR("pfm_write_pmds system call returned " "an error: %d.", rc); goto out; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } out: free(pmd_args); return rc; } /** * read_pmd * * Arguments: + * * FIXME: Add options for other fields in pfarg_pmd_t. **/ static int read_pmd(int argc, char **argv) { struct context *ctx; struct event_set *evt; pfarg_pmd_t *pmd_args = NULL; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int pmd_id, num_pmds; int system_wide, i, rc; ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } /* Allocate an array of PMD structures. */ num_pmds = argc - 3; pmd_args = calloc(num_pmds, sizeof(*pmd_args)); if (!pmd_args) { LOG_ERROR("Can't allocate PMD argument array."); return ENOMEM; } for (i = 0; i < num_pmds; i++) { pmd_id = strtoul(argv[3 + i], NULL, 0); if (pmd_id < 0) { LOG_ERROR("PMD ID must be a positive integer."); rc = EINVAL; goto out; } pmd_args[i].reg_num = pmd_id; pmd_args[i].reg_set = evt->id; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { goto out; } } rc = pfm_read_pmds(ctx->fd, pmd_args, num_pmds); if (rc) { rc = errno; LOG_ERROR("pfm_read_pmds system call returned " "an error: %d.", rc); goto out; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } out: free(pmd_args); return rc; } /** * start_counting * * Arguments: * * Call the pfm_start system-call to start counting for a perfmon context * that was previously stopped. **/ static int start_counting(int argc, char **argv) { pfarg_start_t start_arg; struct context *ctx; struct event_set *evt; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int system_wide, rc; memset(&start_arg, 0, sizeof(start_arg)); ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } start_arg.start_set = evt->id; system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_start(ctx->fd, &start_arg); if (rc) { rc = errno; LOG_ERROR("pfm_start system call returned an error: %d.", rc); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } LOG_INFO("Started counting for context %d, event-set %d.", ctx_id, event_set_id); return 0; } /** * stop_counting * * Arguments: * * Call the pfm_stop system-call to stop counting for a perfmon context that * was previously loaded. **/ static int stop_counting(int argc, char **argv) { struct context *ctx; cpu_set_t old_cpu_set; int system_wide; int ctx_id; int rc; ctx_id = strtoul(argv[1], NULL, 0); if (ctx_id <= 0) { LOG_ERROR("context ID must be a positive integer."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_stop(ctx->fd); if (rc) { rc = errno; LOG_ERROR("pfm_stop system call returned an error: %d.", rc); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } LOG_INFO("Stopped counting for context %d.", ctx_id); return 0; } /** * restart_counting * * Arguments: * * Call the pfm_restart system-call to clear the data counters and start * counting from zero for a perfmon context that was previously loaded. **/ static int restart_counting(int argc, char **argv) { struct context *ctx; cpu_set_t old_cpu_set; int system_wide; int ctx_id; int rc; ctx_id = strtoul(argv[1], NULL, 0); if (ctx_id <= 0) { LOG_ERROR("context ID must be a positive integer."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_restart(ctx->fd); if (rc) { rc = errno; LOG_ERROR("pfm_restart system call returned an error: %d.", rc); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } LOG_INFO("Restarted counting for context %d.", ctx_id); return 0; } /** * create_eventset * * Arguments: [options] * Options: --timeout * --switch-on-overflow * --exclude-idle **/ static int create_eventset(int argc, char **argv) { pfarg_setdesc_t set_arg; struct context *ctx; struct event_set *evt; cpu_set_t old_cpu_set; int ctx_id, event_set_id; unsigned long timeout = 0; int switch_on_overflow = FALSE; int switch_on_timeout = FALSE; int exclude_idle = FALSE; int new_set = FALSE; int system_wide,c, rc; struct option long_opts[] = { {"next-set", required_argument, NULL, 1}, {"timeout", required_argument, NULL, 2}, {"switch-on-overflow", no_argument, NULL, 3}, {"exclude-idle", no_argument, NULL, 4}, {NULL, 0, NULL, 0} }; memset(&set_arg, 0, sizeof(set_arg)); opterr = 0; optind = 0; while ((c = getopt_long_only(argc, argv, "", long_opts, NULL)) != EOF) { switch (c) { case 1: timeout = strtoul(optarg, NULL, 0); if (!timeout) { LOG_ERROR("timeout must be a " "non-zero integer."); return EINVAL; } switch_on_timeout = TRUE; break; case 2: switch_on_overflow = TRUE; break; case 3: exclude_idle = TRUE; break; default: LOG_ERROR("invalid option: %c", optopt); return EINVAL; } } (void) exclude_idle; if (argc < optind + 2) { USAGE("create_eventset [options] "); return EINVAL; } ctx_id = strtoul(argv[optind], NULL, 0); event_set_id = strtoul(argv[optind+1], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } if (switch_on_timeout && switch_on_overflow) { LOG_ERROR("Cannot switch set %d (context %d) on both " "timeout and overflow.", event_set_id, ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { evt = calloc(1, sizeof(*evt)); if (!evt) { LOG_ERROR("Can't allocate structure for new event-set " "%d in context %d.", event_set_id, ctx_id); return ENOMEM; } evt->id = event_set_id; new_set = TRUE; } set_arg.set_id = event_set_id; set_arg.set_timeout = timeout; /* in nanseconds */ set_arg.set_flags = (switch_on_overflow ? PFM_SETFL_OVFL_SWITCH : 0) | (switch_on_timeout ? PFM_SETFL_TIME_SWITCH : 0); system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { free(evt); return rc; } } rc = pfm_create_evtsets(ctx->fd, &set_arg, 1); if (rc) { rc = errno; LOG_ERROR("pfm_create_evtsets system call returned " "an error: %d.", rc); free(evt); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } if (new_set) { insert_event_set(ctx, evt); } LOG_INFO("%s event-set %d in context %d.", new_set ? "Created" : "Modified", event_set_id, ctx_id); if (switch_on_timeout) { LOG_INFO(" Actual timeout set to %llu ns.", (unsigned long long)set_arg.set_timeout); } return 0; } /** * delete_eventset * * Arguments: **/ static int delete_eventset(int argc, char **argv) { pfarg_setdesc_t set_arg; struct context *ctx; struct event_set *evt; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int system_wide, rc; memset(&set_arg, 0, sizeof(set_arg)); ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } set_arg.set_id = evt->id; system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_delete_evtsets(ctx->fd, &set_arg, 1); if (rc) { rc = errno; LOG_ERROR("pfm_delete_evtsets system call returned " "an error: %d.", rc); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } remove_event_set(ctx, evt); free(evt); LOG_INFO("Deleted event-set %d from context %d.", event_set_id, ctx_id); return 0; } /** * getinfo_eventset * * Arguments: **/ static int getinfo_eventset(int argc, char **argv) { pfarg_setinfo_t set_arg; struct context *ctx; struct event_set *evt; cpu_set_t old_cpu_set; int ctx_id, event_set_id; int system_wide, rc; memset(&set_arg, 0, sizeof(set_arg)); ctx_id = strtoul(argv[1], NULL, 0); event_set_id = strtoul(argv[2], NULL, 0); if (ctx_id <= 0 || event_set_id < 0) { LOG_ERROR("context ID and event-set ID must be " "positive integers."); return EINVAL; } ctx = find_context(ctx_id); if (!ctx) { LOG_ERROR("Can't find context with ID %d.", ctx_id); return EINVAL; } evt = find_event_set(ctx, event_set_id); if (!evt) { LOG_ERROR("Can't find event-set with ID %d in context %d.", event_set_id, ctx_id); return EINVAL; } set_arg.set_id = evt->id; system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; if (system_wide && ctx->cpu >= 0) { rc = set_affinity(ctx->cpu, &old_cpu_set); if (rc) { return rc; } } rc = pfm_getinfo_evtsets(ctx->fd, &set_arg, 1); if (rc) { rc = errno; LOG_ERROR("pfm_getinfo_evtsets system call returned " "an error: %d.", rc); return rc; } if (system_wide && ctx->cpu >= 0) { revert_affinity(&old_cpu_set); } LOG_INFO("Got info for event-set %d in context %d.", event_set_id, ctx_id); LOG_INFO(" Flags: 0x%x", set_arg.set_flags); LOG_INFO(" Runs: %llu", (unsigned long long)set_arg.set_runs); LOG_INFO(" Timeout: %"PRIu64, set_arg.set_timeout); return 0; } /** * run_program * * Arguments: * * Start the specified program. After fork'ing but before exec'ing, ptrace * the child so it will remain suspended until a corresponding resume_program * command. We do this so we can load a context for the program before it * actually starts running. This logic is taken from the task.c example in * the libpfm source code tree. **/ static int run_program(int argc, char **argv) { struct program *prog; int program_id; pid_t pid; int rc; program_id = strtoul(argv[1], NULL, 0); if (program_id <= 0) { LOG_ERROR("program ID must be a positive integer."); return EINVAL; } /* Make sure we haven't already started a program with this ID. */ prog = find_program(program_id); if (prog) { LOG_ERROR("Program with ID %d already exists.", program_id); return EINVAL; } prog = calloc(1, sizeof(*prog)); if (!prog) { LOG_ERROR("Can't allocate new program structure to run '%s'.", argv[2]); return ENOMEM; } prog->id = program_id; pid = fork(); if (pid == -1) { /* Error fork'ing. */ LOG_ERROR("Unable to fork child process."); return EINVAL; } else if (!pid) { /* Child */ /* This will cause the program to stop before executing the * first user level instruction. We can only load a context * if the program is in the STOPPED state. This child * process will sit here until we've process a resume_program * command. */ rc = ptrace(PTRACE_TRACEME, 0, NULL, NULL); if (rc) { rc = errno; LOG_ERROR("Error ptrace'ing '%s': %d", argv[2], rc); exit(rc); } execvp(argv[2], argv + 2); rc = errno; LOG_ERROR("Error exec'ing '%s': %d", argv[2], rc); exit(rc); } /* Parent */ prog->pid = pid; insert_program(prog); /* Wait for the child to exec. */ waitpid(pid, &rc, WUNTRACED); /* Check if process exited early. */ if (WIFEXITED(rc)) { LOG_ERROR("Program '%s' exited too early with status " "%d", argv[2], WEXITSTATUS(rc)); return WEXITSTATUS(rc); } LOG_INFO("Started program %d: '%s'.", program_id, argv[2]); return 0; } /** * resume_program * * Arguments: * * A program started with run_program must be 'resumed' before it actually * begins running. This allows us to load a context to the process and * start the counters before the program executes any code. **/ static int resume_program(int argc, char **argv) { struct program *prog; int program_id; int rc; program_id = strtoul(argv[1], NULL, 0); if (program_id <= 0) { LOG_ERROR("program ID must be a positive integer."); return EINVAL; } prog = find_program(program_id); if (!prog) { LOG_ERROR("Can't find program with ID %d.", program_id); return EINVAL; } /* Call ptrace to resume execution of the process. If a context has * been loaded and the counters started, this is where monitoring * is effectively activated. */ rc = ptrace(PTRACE_DETACH, prog->pid, NULL, 0); if (rc) { rc = errno; LOG_ERROR("Error detaching program %d.\n", prog->id); return rc; } LOG_INFO("Resumed program %d.", program_id); return 0; } /** * wait_on_program * * Arguments: * * Wait for the specified program to complete and exit. **/ static int wait_on_program(int argc, char **argv) { struct program *prog; int program_id; int rc; program_id = strtoul(argv[1], NULL, 0); if (program_id <= 0) { LOG_ERROR("program ID must be a positive integer."); return EINVAL; } prog = find_program(program_id); if (!prog) { LOG_ERROR("Can't find program with ID %d.", program_id); return EINVAL; } waitpid(prog->pid, &rc, 0); /* The program has exitted, but if there was a context loaded on that * process, it will still have the latest counts available to read. */ remove_program(prog); free(prog); LOG_INFO("Waited for program %d to complete.", program_id); return 0; } /** * _sleep * * Arguments: