pax_global_header00006660000000000000000000000064126173231520014514gustar00rootroot0000000000000052 comment=63d759f2380ee0cb1b04daed964ca23ad0365709 mcelog-128+dfsg/000077500000000000000000000000001261732315200135555ustar00rootroot00000000000000mcelog-128+dfsg/.gitignore000066400000000000000000000001011261732315200155350ustar00rootroot00000000000000*.o *~ mcelog dbquery *.orig *.rej .gdb_history .depend tsc core mcelog-128+dfsg/CHANGES000066400000000000000000000115651261732315200145600ustar00rootroot00000000000000 Changes file is obsolete. Please see git log on https://git.kernel.org/cgit/utils/cpu/mce/mcelog.git/ for newer changes. Add Linux Kongress 2010 paper Add Sandy Bridge Support Write pid file by default in daemon mode Reopen log files on SIGUSR1 in daemon mode Default --daemon mode to logging to /var/log/mcelog Add Core i3/5 and more Westmere support Add Xeon75xx support Add Intel architectural mca fallback for new family 6 Intel CPUs Add --pidfile option to write daemon pid into file. Enable DIMM, socket, cache tracking and per page tracking by default Support changing mcelog daemon to other user Disabled by default because this breaks the advanced triggers which need root rights. Allow to disable memory error logging Add per socket memory accounting/trigger support Add per page memory error accounting and predictive page offlining Support for accounting errors per memory page and offlining pages and executing shell script triggers when error thresholds are exceeded. Offlining requires a new kernel that supports page offlining. Works with specific Intel systems with integrated memory controller Add cache error ``yellow bit trigger'' support An cache error threshold indication on Intel CPUs can execute a special trigger program to offline affected CPUs. Add in memory dimm database in daemon mode Errors are accounted per DIMM and can execute shell scripts when error thresholds are exceeded. This can be all configured using the configuration file. Add mcelog --client to query a running mcelog daemon for errors Works only on specific Intel systems for now with integrated memory controllers that report DIMM errors. Keep stdout open in daemon mode when no syslog Add --foreground option Avoid %a scanf extension that broke old glibc version Fixes to Tulsa and Dunngton decoding (Youkang Song) Fixes to Nehalem decoding and (Ted Barragy, Hidetoshi Seto) Fix parallel logging in daemon mode Fixes to the manpage (Thomas Renninger) More consistent number printing (Huang Ying) mcelog builds now with more gcc warning options enabled Improved man page; now actually describing what a machine check is Fix and finally document TSC decoding (still doesn't work everywhere and is obsolete now) Add support for --ascii --file option to specify input file Add support for /etc/mcelog.conf config file. This can set all options on the command line and some more. Add new --cpu=cputype option and deprecate old k8/core2/generic/intel-cpu options Use official Nehalem names Support re-parsing mcelog output with --ascii Add Intel Tulsa (Xeon 71xx) decoding support Add Dunnington (Xeon 7400) decoding support Add Nehalem decoding support Add --no-syslog option Various bug fixes Add --logfile option Implement daemon mode Various bug fixes to the decoder Add --intel-family=... argument Better decoding of core2 events and explicit decoding of pre Core P6 cores Add decoding of broadcast timeouts Add --version argument Fix parsing of multiple fields on a line in --ascii Add --raw mode for easier machine decoding (Ying Huang) Skip printk timestamp headers in --ascii (Ying Huang) Support more than 255 CPUs (requires updated kernel) Update for upcoming kernel interface "struct mce" extensions Support AMD Fam10h/11h CPUs (Joachim Deguara) Add switch to use LOG_ERR for syslog messages Various misc. cleanups mcelog now logs summaries of some serious events into syslog by default Fix syslog multiline logging Separate decoding output and error messages Automatic dependency generation in Makefile Many cleanups in DMI decoding Add DIMM database and error triggers Automatic sanity check for DMI information and enable by default Add support for decoding Intel Core2 machine checks Simple decoding of the TSC value into uptime for Intel CPUs Add the MCE design paper. Decode Intel thermal events properly Add some "RAMs" to K8 ECC strings in futile hope that users will get the hint Allow modifier command line options after --ascii Don't print decoded address twice for --ascii Fix SMBIOS anchor scan to work on more machines and don't crash when no anchor found. Fix --ascii reparsing of mcelog output. Add --filter and filter out known broken K8 GART errors Add --ignorenodev argument and use in cron script (avoids cron errors in Xen guest kernels) Add new --dmi argument to look up machine check addresses in SMBIOS (warning unreliable due to wide spread bios bugs) Fix argument decoding (support --, allow arguments in any order) Clarify --ascii in the manpage Support for AMD K8 Revision F machine check DRAM error thresholding from Jacob Shin Add P4 decoder contributed by "Guo, Racing" for Intel P4 and Xeon. Add K8 decoder from 2.4 kernel code to decode Opteron/Athlon64 logs. (code mostly from Eric Morton and Andi Kleen) Add --ascii function to decode fatal kernel output. Improve manpage Fix 32bit bugs Fix uninitialized variable in check_cpu Minor cleanups mcelog-128+dfsg/Makefile000066400000000000000000000100061261732315200152120ustar00rootroot00000000000000CFLAGS := -g -Os prefix := /usr etcprefix := MANDIR := ${prefix}/share/man # Define appropiately for your distribution # DOCDIR := /usr/share/doc/packages/mcelog # Note when changing prefix: some of the non-critical files like # the manpage or the init script have hardcoded prefixes # Warning flags added implicitely to CFLAGS in the default rule # this is done so that even when CFLAGS are overriden we still get # the additional warnings # Some warnings require the global optimizer and are only output with # -O2/-Os, so that should be tested occasionally WARNINGS := -Wall -Wextra -Wno-missing-field-initializers -Wno-unused-parameter \ -Wstrict-prototypes -Wformat-security -Wmissing-declarations \ -Wdeclaration-after-statement # The on disk database has still many problems (partly in this code and partly # due to missing support from BIOS), so it's disabled by default. You can # enable it here by uncommenting the following line # CONFIG_DISKDB = 1 TRIGGERS=cache-error-trigger dimm-error-trigger page-error-trigger \ socket-memory-error-trigger \ bus-error-trigger \ iomca-error-trigger \ unknown-error-trigger all: mcelog .PHONY: install clean depend FORCE OBJ := p4.o k8.o mcelog.o dmi.o tsc.o core2.o bitfield.o intel.o \ nehalem.o dunnington.o tulsa.o config.o memutil.o msg.o \ eventloop.o leaky-bucket.o memdb.o server.o trigger.o \ client.o cache.o sysfs.o yellow.o page.o rbtree.o \ xeon75xx.o sandy-bridge.o ivy-bridge.o haswell.o msr.o bus.o \ unknown.o DISKDB_OBJ := diskdb.o dimm.o db.o CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o ${DISKDB_OBJ} \ version.o version.c version.tmp DOC := mce.pdf ADD_DEFINES := ifdef CONFIG_DISKDB ADD_DEFINES := -DCONFIG_DISKDB=1 OBJ += ${DISKDB_OBJ} all: dbquery endif SRC := $(OBJ:.o=.c) mcelog: ${OBJ} version.o # dbquery intentionally not installed by default install: mcelog mcelog.conf mcelog.conf.5 mcelog.triggers.5 mkdir -p $(DESTDIR)${etcprefix}/etc/mcelog $(DESTDIR)${prefix}/sbin $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man8 install -m 755 -p mcelog $(DESTDIR)${prefix}/sbin/mcelog install -m 644 -p mcelog.8 $(DESTDIR)$(MANDIR)/man8 install -m 644 -p mcelog.conf.5 $(DESTDIR)$(MANDIR)/man5 install -m 644 -p mcelog.triggers.5 $(DESTDIR)$(MANDIR)/man5 install -m 644 -p -b mcelog.conf $(DESTDIR)${etcprefix}/etc/mcelog/mcelog.conf for i in ${TRIGGERS} ; do \ install -m 755 -p -b triggers/$$i $(DESTDIR)${etcprefix}/etc/mcelog ; \ done ifdef DOCDIR install -d 755 $(DESTDIR)${DOCDIR} install -m 644 -p ${DOC} $(DESTDIR)${DOCDIR} else echo echo "Consider defining DOCDIR to install additional documentation" endif mcelog.conf.5: mcelog.conf config-intro.man ./genconfig.py mcelog.conf config-intro.man > mcelog.conf.5 clean: test-clean rm -f ${CLEAN} ${OBJ} tsc: tsc.c gcc -o tsc ${CFLAGS} -DSTANDALONE tsc.c ${LDFLAGS} dbquery: db.o dbquery.o memutil.o depend: .depend %.o: %.c $(CC) -c $(CFLAGS) $(CPPFLAGS) $(WARNINGS) $(ADD_DEFINES) -o $@ $< version.tmp: FORCE ( echo -n "char version[] = \"" ; \ if type -p git >/dev/null; then \ if [ -d .git ] ; then \ git describe --tags HEAD | tr -d '\n'; \ else \ echo -n "unknown" ; \ fi ; \ else echo -n "unknown" ; fi ; \ echo '";' \ ) > version.tmp version.c: version.tmp cmp version.tmp version.c || mv version.tmp version.c .depend: ${SRC} ${CC} -MM -I. ${SRC} > .depend.X && mv .depend.X .depend include .depend Makefile: .depend .PHONY: iccverify src test # run the icc static verifier over sources. you need the intel compiler installed for this DISABLED_DIAGS := -diag-disable 188,271,869,2259,981,12072,181,12331,1572 iccverify: icc -Wall -diag-enable sv3 $(DISABLED_DIAGS) $(ADD_DEFINES) $(SRC) clangverify: clang --analyze $(ADD_DEFINES) $(SRC) src: echo $(SRC) config-test: config.c gcc -DTEST=1 config.c -o config-test test: $(MAKE) -C tests test DEBUG="" VALGRIND=valgrind --leak-check=full valgrind-test: $(MAKE) -C tests test DEBUG="${VALGRIND}" test-clean: $(MAKE) -C tests clean mcelog-128+dfsg/README000066400000000000000000000110021261732315200144270ustar00rootroot00000000000000mcelog is the user space backend for logging machine check errors reported by the hardware to the kernel. The kernel does the immediate actions (like killing processes etc.) and mcelog decodes the errors and manages various other advanced error responses like offlining memory, CPUs or triggering events. In addition mcelog also handles corrected errors, by logging and accounting them. It primarily handles machine checks and thermal events, which are reported for errors detected by the CPU. For more details on what mcelog can do and the underlying theory see http://www.mcelog.org It is recommended that mcelog runs on all x86 machines, both 64bit (since early 2.6) and 32bit (since 2.6.32) mcelog can run in several modi: cronjob, trigger, daemon cronjob is the old method. mcelog runs every 5 minutes from cron and checks for errors. Disadvantage of this is that it can delay error reporting significantly (upto 10 minutes) and does not allow mcelog to keep extended state. trigger is a newer method where the kernel runs mcelog on a error. This is configured with echo /usr/sbin/mcelog > /sys/devices/system/machinecheck/machinecheck0/trigger This is faster, but still doesn't allow mcelog to keep state, and has relatively high overhead for each error because a program has to be initialized from scratch. In daemon mode mcelog runs continuously as a daemon in the background and wait for errors. It is enabled by running mcelog --daemon & from a init script. This is the fastest and most feature-ful. The recommended mode is daemon, because several new functions (like page error predictive failure analysis) require a continuously running daemon. Documentation: The primary reference documentation are the man pages. lk10-mcelog.pdf has a overview over the errors mcelog handles (originally from Linux Kongress 2010) mce.pdf is a very old paper describing the first releases of mcelog (some parts are obsolete) For distributors: You can run mcelog from systemd or similar daemons. An example systemd unit file is in mcelog.service. For older distributions using init scripts: Please install a init script by default that runs mcelog in daemon mode. The mcelog.init script is a good starting point. Also install a logrotated file (mcelog.logrotate) or equivalent when mcelog is running in daemon mode. These two are not in make install. The installation also requires a config file (/etc/mcelog.conf) and the default triggers. These are all installed by "make install" /dev/mcelog is needed for mcelog operation If it's not there it can be created with mknod /dev/mcelog c 10 227 Normally it should be created automatically in udev. Security: mcelog needs to run as root because it might trigger actions like page-offlining, which require CAP_SYS_ADMIN. Also it opens /dev/mcelog and a unix socket for client support. It also opens /dev/mem to parse the BIOS DMI tables. It is careful to close the file descriptor and unmap any mappings after using them. There is support for changing the user in daemon mode after opening the device and the sockets, but that would stop triggers from doing corrective action that require root. In principle it would be possible to only keep CAP_SYS_ADMIN for page-offling, but that would prevent triggers from doing root only actions not covered by it (and CAP_SYS_ADMIN is not that different from full root) In daemon mode mcelog listens to a unix socket and processes requests from mcelog --client. This can be disabled in the configuration file. The uid/gid of the requestor is checked on access and is configurable (default 0/0 only). The command parsing code is very straight forward (server.c) The client parsing/reply is currently done with full privileges of the daemon. Testing: There is a simple test suite in tests/. The test suite requires root to run and access to mce-inject and a kernel with MCE injection support (CONFIG_X86_MCE_INJECT). It will kill any running mcelog daemon. Run it with "make test" The test suite requires the mce-inject tool, available from git://git.kernel.org/pub/utils/cpu/mce/mce-inject.git The mce-inject executable must be either in $PATH or in the ../mce-inject directory. You can also test under valgrind with "make valgrind-test". For this valgrind needs to be installed of course. Advanced valgrind options can be specified with make VALGRIND="valgrind --option" valgrind-test Other checks: make iccverify and make clangverify run the static verifiers in clang and icc respectively. License: This program is licensed under the subject of the GNU Public General License, v.2 mcelog-128+dfsg/README.releases000066400000000000000000000005571261732315200162460ustar00rootroot00000000000000 mcelog used to do released, but now switched to a rolling release scheme. That means the git tree is always kept stable and can be used directly in production. To simplify package management which likes to have increasing version numbers the commits are regularly tagged with a number. The number starts (arbitarily) with 100. The tags are named vXXX (e.g. v100) mcelog-128+dfsg/TODO000066400000000000000000000001631261732315200142450ustar00rootroot00000000000000 - unified error output for memory errors - support replacement DIMM table - decode syndromes on K8? (from EDAC) mcelog-128+dfsg/TODO-diskdb000066400000000000000000000014441261732315200155060ustar00rootroot00000000000000 diskdb was a experimental attempt to track errors per DIMM on disk. It ran into problems unfortunately. diskdb is not compiled by default now. It can be enabled with make CONFIG_DISKDB=1 It is replaced with a new memory only database now that relies on daemon mode. Open fundamental issues: - DIMM tracking over boot doesn't work due to SMBIOS not reporting serial numbers Code problems: - Missing aging - For Intel Nehalem CE errors need reverse smbios translation - SMBIOS interleaving decoding missing - Some crash races in db.c (see comments there) - Need lock timeout - Default enable/disable heuristics (smbios check etc.) - write db test suite (with crash) General: - Missing CPU database Missing: - rename to different name without memory Old: - add ifdef for memory because it's broken mcelog-128+dfsg/bitfield.c000066400000000000000000000023511261732315200155040ustar00rootroot00000000000000#include #include #include "mcelog.h" #include "bitfield.h" char *reserved_3bits[8]; char *reserved_1bit[2]; char *reserved_2bits[4]; static u64 bitmask(u64 i) { u64 mask = 1; while (mask < i) mask = (mask << 1) | 1; return mask; } void decode_bitfield(u64 status, struct field *fields) { struct field *f; int linelen = 0; char *delim = ""; char buf[60]; int len; for (f = fields; f->str; f++) { u64 v = (status >> f->start_bit) & bitmask(f->stringlen - 1); char *s = NULL; if (v < f->stringlen) s = f->str[v]; if (!s) { if (v == 0) continue; s = buf; buf[(sizeof buf)-1] = 0; snprintf(buf, (sizeof buf) - 1, "<%u:%llx>", f->start_bit, v); } len = strlen(s); if (linelen + len > 75) { delim = "\n"; linelen = 0; } Wprintf("%s%s", delim, s); delim = " "; linelen += len + 1; } if (linelen > 0) Wprintf("\n"); } void decode_numfield(u64 status, struct numfield *fields) { struct numfield *f; for (f = fields; f->name; f++) { u64 mask = (1ULL << (f->end - f->start + 1)) - 1; u64 v = (status >> f->start) & mask; if (v > 0 || f->force) { char fmt[30]; snprintf(fmt, 30, "%%s: %s\n", f->fmt ? f->fmt : "%Lu"); Wprintf(fmt, f->name, v); } } } mcelog-128+dfsg/bitfield.h000066400000000000000000000017601261732315200155140ustar00rootroot00000000000000/* Generic bitfield decoder */ struct field { unsigned start_bit; char **str; unsigned stringlen; }; struct numfield { unsigned start, end; char *name; char *fmt; int force; }; #define FIELD(start_bit, name) { start_bit, name, NELE(name) } #define SBITFIELD(start_bit, string) { start_bit, ((char * [2]) { NULL, string }), 2 } #define NUMBER(start, end, name) { start, end, name, "%Lu", 0 } #define NUMBERFORCE(start, end, name) { start, end, name, "%Lu", 1 } #define HEXNUMBER(start, end, name) { start, end, name, "%Lx", 0 } #define HEXNUMBERFORCE(start, end, name) { start, end, name, "%Lx", 1 } void decode_bitfield(u64 status, struct field *fields); void decode_numfield(u64 status, struct numfield *fields); extern char *reserved_3bits[8]; extern char *reserved_1bit[2]; extern char *reserved_2bits[4]; #define MASK(x) ((1ULL << (1 + (x))) - 1) #define EXTRACT(v, a, b) (((v) >> (a)) & MASK((b)-(a))) static inline int test_prefix(int nr, __u32 value) { return ((value >> nr) == 1); } mcelog-128+dfsg/bus.c000066400000000000000000000063421261732315200145170ustar00rootroot00000000000000/* Copyright (C) 20014 Intel Corporation Author: Rui Wang Handle 'Bus and Interconnect' error threshold indications. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system. */ #define _GNU_SOURCE 1 #include #include #include #include #include #include "memutil.h" #include "mcelog.h" #include "config.h" #include "trigger.h" #include "bus.h" static char *bus_trigger, *iomca_trigger; enum { MAX_ENV = 20, }; void bus_setup(void) { bus_trigger = config_string("socket", "bus-uc-threshold-trigger"); if (bus_trigger && trigger_check(bus_trigger) < 0) { SYSERRprintf("Cannot access bus threshold trigger `%s'", bus_trigger); exit(1); } iomca_trigger = config_string("socket", "iomca-threshold-trigger"); if (iomca_trigger && trigger_check(iomca_trigger) < 0) { SYSERRprintf("Cannot access iomca threshold trigger `%s'", iomca_trigger); exit(1); } } void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr, char *ii, char *timeout) { int ei = 0; char *env[MAX_ENV]; int i; char *msg; char *location; if (!bus_trigger) return; if (socket >= 0) asprintf(&location, "CPU %d on socket %d", cpu, socket); else asprintf(&location, "CPU %d", cpu); asprintf(&msg, "%s received Bus and Interconnect Errors in %s", location, ii); asprintf(&env[ei++], "LOCATION=%s", location); free(location); if (socket >= 0) asprintf(&env[ei++], "SOCKETID=%d", socket); asprintf(&env[ei++], "MESSAGE=%s", msg); asprintf(&env[ei++], "CPU=%d", cpu); asprintf(&env[ei++], "LEVEL=%s", level); asprintf(&env[ei++], "PARTICIPATION=%s", pp); asprintf(&env[ei++], "REQUEST=%s", rrrr); asprintf(&env[ei++], "ORIGIN=%s", ii); asprintf(&env[ei++], "TIMEOUT=%s", timeout); env[ei] = NULL; assert(ei < MAX_ENV); run_trigger(bus_trigger, NULL, env); for (i = 0; i < ei; i++) free(env[i]); free(msg); } void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn) { int ei = 0; char *env[MAX_ENV]; int i; char *msg; char *location; if (!iomca_trigger) return; if (socket >= 0) asprintf(&location, "CPU %d on socket %d", cpu, socket); else asprintf(&location, "CPU %d", cpu); asprintf(&msg, "%s received IO MCA Errors from %x:%02x:%02x.%x", location, seg, bus, dev, fn); asprintf(&env[ei++], "LOCATION=%s", location); free(location); if (socket >= 0) asprintf(&env[ei++], "SOCKETID=%d", socket); asprintf(&env[ei++], "MESSAGE=%s", msg); asprintf(&env[ei++], "CPU=%d", cpu); asprintf(&env[ei++], "SEG=%x", seg); asprintf(&env[ei++], "BUS=%02x", bus); asprintf(&env[ei++], "DEVICE=%02x", dev); asprintf(&env[ei++], "FUNCTION=%x", fn); env[ei] = NULL; assert(ei < MAX_ENV); run_trigger(iomca_trigger, NULL, env); for (i = 0; i < ei; i++) free(env[i]); free(msg); } mcelog-128+dfsg/bus.h000066400000000000000000000003171261732315200145200ustar00rootroot00000000000000void bus_setup(void); void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr, char *ii, char *timeout); void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn); mcelog-128+dfsg/cache.c000066400000000000000000000106421261732315200147670ustar00rootroot00000000000000/* Copyright (C) 2008 Intel Corporation Author: Andi Kleen Parse sysfs exported CPU cache topology mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "mcelog.h" #include "memutil.h" #include "sysfs.h" #include "cache.h" struct cache { unsigned level; /* Numerical values must match MCACOD */ enum { INSTR, DATA, UNIFIED } type; unsigned *cpumap; unsigned cpumaplen; }; struct cache **caches; static unsigned cachelen; #define PREFIX "/sys/devices/system/cpu" #define MIN_CPUS 8 #define MIN_INDEX 4 static struct map type_map[] = { { "Instruction", INSTR }, { "Data", DATA }, { "Unified", UNIFIED }, { }, }; static void more_cpus(int cpu) { int old = cachelen; if (!cachelen) cachelen = MIN_CPUS/2; cachelen *= 2; caches = xrealloc(caches, cachelen * sizeof(struct cache *)); memset(caches + old, 0, (cachelen - old) * sizeof(struct cache *)); } static unsigned cpumap_len(char *s) { unsigned len = 0, width = 0; do { if (isxdigit(*s)) width++; else { len += round_up(width * 4, BITS_PER_INT) / 8; width = 0; } } while (*s++); return len; } static void parse_cpumap(char *map, unsigned *buf, unsigned len) { char *s; int c; c = 0; s = map + strlen(map); for (;;) { s = memrchr(map, ',', s - map); if (!s) s = map; else s++; buf[c++] = strtoul(s, NULL, 16); if (s == map) break; s--; } assert(len == c * sizeof(unsigned)); } static void read_cpu_map(struct cache *c, char *cfn) { char *map = read_field(cfn, "shared_cpu_map"); if (map[0] == 0) { c->cpumap = NULL; goto out; } c->cpumaplen = cpumap_len(map); c->cpumap = xalloc(c->cpumaplen); parse_cpumap(map, c->cpumap, c->cpumaplen); out: free(map); } static int read_caches(void) { DIR *cpus = opendir(PREFIX); struct dirent *de; if (!cpus) { Wprintf("Cannot read cache topology from %s", PREFIX); return -1; } while ((de = readdir(cpus)) != NULL) { unsigned cpu; if (sscanf(de->d_name, "cpu%u", &cpu) == 1) { struct stat st; char *fn; int i; int numindex; asprintf(&fn, "%s/%s/cache", PREFIX, de->d_name); if (!stat(fn, &st)) { numindex = st.st_nlink - 2; if (numindex < 0) numindex = MIN_INDEX; if (cachelen <= cpu) more_cpus(cpu); caches[cpu] = xalloc(sizeof(struct cache) * (numindex+1)); for (i = 0; i < numindex; i++) { char *cfn; struct cache *c = caches[cpu] + i; asprintf(&cfn, "%s/index%d", fn, i); c->type = read_field_map(cfn, "type", type_map); c->level = read_field_num(cfn, "level"); read_cpu_map(c, cfn); free(cfn); } } free(fn); } } closedir(cpus); return 0; } int cache_to_cpus(int cpu, unsigned level, unsigned type, int *cpulen, unsigned **cpumap) { struct cache *c; if (!caches) { if (read_caches() < 0) return -1; if (!caches) { Wprintf("No caches found in sysfs"); return -1; } } for (c = caches[cpu]; c && c->cpumap; c++) { //printf("%d level %d type %d\n", cpu, c->level, c->type); if (c->level == level && c->type == type) { *cpumap = c->cpumap; *cpulen = c->cpumaplen; return 0; } } Wprintf("Cannot find sysfs cache for CPU %d", cpu); return -1; } #ifdef TEST main() { int cpulen; unsigned *cpumap; cache_to_cpus(1, 1, INSTR, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); cache_to_cpus(1, 1, DATA, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); cache_to_cpus(1, 2, UNIFIED, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); cache_to_cpus(0, 1, INSTR, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); cache_to_cpus(0, 1, DATA, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); cache_to_cpus(0, 2, UNIFIED, &cpulen, &cpumap); printf("%d %x\n", cpulen, cpumap[0]); } #endif mcelog-128+dfsg/cache.h000066400000000000000000000001401261732315200147640ustar00rootroot00000000000000int cache_to_cpus(int cpu, unsigned level, unsigned type, int *cpulen, unsigned **cpumap); mcelog-128+dfsg/client.c000066400000000000000000000034741261732315200152070ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Client code to talk to the mcelog server. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include #include "mcelog.h" #include "client.h" #include "paths.h" #include "config.h" /* Send a command to the mcelog server and dump output */ void ask_server(char *command) { struct sockaddr_un sun; int fd; FILE * fp; int n; char buf[1024]; char *path = config_string("server", "socket-path"); if (!path) path = SOCKET_PATH; fd = socket(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) SYSERRprintf("client socket"); sun.sun_family = AF_UNIX; sun.sun_path[sizeof(sun.sun_path)-1] = 0; strncpy(sun.sun_path, path, sizeof(sun.sun_path)-1); if (connect(fd, (struct sockaddr *)&sun, sizeof(struct sockaddr_un)) < 0) SYSERRprintf("client connect"); n = strlen(command); if (write(fd, command, n) != n) SYSERRprintf("client command write"); if ((fp = fdopen(fd, "r")) != NULL) { while (fgets(buf, sizeof buf, fp)) { n = strlen(buf); if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) { fclose(fp); return; } fputs(buf, stdout); } fclose(fp); } SYSERRprintf("client read"); } mcelog-128+dfsg/client.h000066400000000000000000000000401261732315200151760ustar00rootroot00000000000000void ask_server(char *command); mcelog-128+dfsg/config-intro.man000066400000000000000000000003621261732315200166510ustar00rootroot00000000000000.SH NAME mcelog.conf \- mcelog.conf reference .SH SYNOPSIS .B /etc/mcelog.conf .SH DESCRIPTION /etc/mcelog.conf is the main configuration file for .B mcelog(8). This is configuration file separated into sections including a default section. mcelog-128+dfsg/config.c000066400000000000000000000203451261732315200151720ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Simple config file parser mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include "memutil.h" #include "mcelog.h" #include "config.h" #include "leaky-bucket.h" #include "trigger.h" #ifdef TEST #define Eprintf printf #define Wprintf printf #define xalloc(x) calloc(x,1) #endif /* ISSUES: doesn't detect misspelled options (this would require a major revamp!) doesn't merge/detect duplicated headers */ #define SHASH 11 struct opt { struct opt *next; char *name; char *val; }; struct header { struct header *next; char *name; struct opt *opts[SHASH]; struct opt *optslast[SHASH]; }; static struct header *hlist; /* djb hash */ static unsigned hash(const char *str) { const unsigned char *s; unsigned hash = 5381; for (s = (const unsigned char *)str; *s; s++) hash = (hash * 32) + hash + *s; return hash % SHASH; } static struct header *new_header(struct header *prevh, char *name) { struct header *h = xalloc(sizeof(struct header)); h->name = xstrdup(name); if (prevh) prevh->next = h; else hlist = h; return h; } static int empty(char *s) { while (isspace(*s)) ++s; return *s == 0; } static void noreturn parse_error(int line, char *msg) { Eprintf("config file line %d: %s\n", line, msg); exit(1); } static void nothing(char *s, int line) { if (!empty(s) != 0) parse_error(line, "left over characters at end of line"); } static void unparseable(char *desc, const char *header, const char *name) { char *field; if (!strcmp(header, "global")) { asprintf(&field, "%s", name); } else { asprintf(&field, "[%s] %s", header, name); } Eprintf("%s config option `%s' unparseable\n", desc, field); free(field); exit(1); } /* Remove leading/trailing white space */ static char *strstrip(char *s) { char *p; while (isspace(*s)) s++; p = s + strlen(s) - 1; if (p <= s) return s; while (isspace(*p) && p >= s) *p-- = 0; return s; } int parse_config_file(const char *fn) { FILE *f; char *line = NULL; size_t linelen = 0; char *name; char *val; struct opt *opt; struct header *hdr; int lineno = 1; unsigned h; f = fopen(fn, "r"); if (!f) return -1; hdr = NULL; while (getline(&line, &linelen, f) > 0) { char *s = strchr(line, '#'); if (s) *s = 0; s = strstrip(line); if (*s == '[') { char *p = strchr(s, ']'); if (p == NULL) parse_error(lineno, "Header without ending ]"); nothing(p + 1, lineno); *p = 0; hdr = new_header(hdr, s + 1); } else if ((val = strchr(line, '=')) != NULL) { *val++ = 0; name = strstrip(s); val = strstrip(val); opt = xalloc(sizeof(struct opt)); opt->name = xstrdup(name); opt->val = xstrdup(val); h = hash(name); if (!hdr) hdr = new_header(hdr, "global"); //printf("[%s] \"%s\" = \"%s\"\n", hdr->name, name, val); if (hdr->optslast[h] == NULL) hdr->opts[h] = opt; else hdr->optslast[h]->next = opt; hdr->optslast[h] = opt; } else if (!empty(s)) { parse_error(lineno, "config file line not field nor header"); } lineno++; } fclose(f); free(line); return 0; } char *config_string(const char *header, const char *name) { struct header *hdr; unsigned h = hash(name); for (hdr = hlist; hdr; hdr = hdr->next) { if (!strcmp(hdr->name, header)) { struct opt *o; for (o = hdr->opts[h]; o; o = o->next) { if (!strcmp(o->name, name)) return o->val; } } } if (strcmp(header, "global")) return config_string("global", name); return NULL; } int config_number(const char *header, const char *name, char *fmt, void *val) { char *str = config_string(header, name); if (str == NULL) return -1; if (sscanf(str, fmt, val) != 1) { unparseable("numerical", header, name); return -1; } return 0; } int config_choice(const char *header, const char *name, const struct config_choice *c) { char *str = config_string(header, name); if (!str) return -1; for (; c->name; c++) { if (!strcasecmp(str, c->name)) return c->val; } unparseable("choice", header, name); return -1; } int config_bool(const char *header, const char *name) { static const struct config_choice bool_choices[] = { { "yes", 1 }, { "true", 1 }, { "1", 1 }, { "on", 1 }, { "no", 0 }, { "false", 0 }, { "0", 0 }, { "off", 0 }, {} }; return config_choice(header, name, bool_choices); } static char *match_arg(char **av, char *arg) { int len = strlen(arg); if (!strncmp(*av, arg, len)) { if ((*av)[len] == '=') { return len + 1 + *av; } else { if (av[1] == NULL) usage(); return av[1]; } } return NULL; } /* Look for the config file argument before parsing the other options because we want to read the config file first so that command line options can conveniently override it. */ const char *config_file(char **av, const char *deffn) { char *arg; while (*++av) { if (!strcmp(*av, "--")) break; if ((arg = match_arg(av, "--conf")) != NULL) return arg; } return deffn; } /* Use getopt_long struct option array to process config file */ void config_options(struct option *opts, int (*func)(int)) { for (; opts->name; opts++) { if (!opts->has_arg) { if (config_bool("global", opts->name) != 1) continue; if (opts->flag) { *(opts->flag) = opts->val; continue; } } else { char *s = config_string("global", opts->name); if (s == NULL) continue; optarg = s; } func(opts->val); } } int config_trigger(const char *header, const char *base, struct bucket_conf *bc) { char *s; char *name; int n; asprintf(&name, "%s-threshold", base); s = config_string(header, name); if (s) { if (bucket_conf_init(bc, s) < 0) { unparseable("trigger", header, name); return -1; } } free(name); asprintf(&name, "%s-trigger", base); s = config_string(header, name); if (s) { /* no $PATH */ if (trigger_check(s) != 0) { SYSERRprintf("Trigger `%s' not executable\n", s); exit(1); } bc->trigger = s; } free(name); bc->log = 0; asprintf(&name, "%s-log", base); n = config_bool(header, name); if (n >= 0) bc->log = n; free(name); return 0; } void config_cred(char *header, char *base, struct config_cred *cred) { char *s; char *name; asprintf(&name, "%s-user", base); if ((s = config_string(header, name)) != NULL) { struct passwd *pw; if (!strcmp(s, "*")) cred->uid = -1U; else if ((pw = getpwnam(s)) == NULL) Eprintf("Unknown user `%s' in %s:%s config entry\n", s, header, name); else cred->uid = pw->pw_uid; } free(name); asprintf(&name, "%s-group", base); if ((s = config_string(header, name)) != NULL) { struct group *gr; if (!strcmp(s, "*")) cred->gid = -1U; else if ((gr = getgrnam(s)) == NULL) Eprintf("Unknown group `%s' in %s:%s config entry\n", header, name, s); else cred->gid = gr->gr_gid; } free(name); } #ifdef TEST int main(int ac, char **av) { if (!av[1]) printf("need config file\n"), exit(1); if (parse_config_file(av[1]) < 0) printf("cannot parse config file\n"), exit(1); char *type; char *header; char *name; int n; while (scanf("%as %as %as", &type, &header, &name) == 3) { switch (type[0]) { case 'n': if (config_number(header, name, "%d", &n) < 0) printf("Cannot parse number %s %s\n", header, name); else printf("res %d\n", n); break; case 's': printf("res %s\n", config_string(header, name)); break; case 'b': printf("res %d\n", config_bool(header, name)); break; default: printf("unknown type %s\n", type); break; } free(type); free(header); free(name); } return 0; } #endif mcelog-128+dfsg/config.h000066400000000000000000000013521261732315200151740ustar00rootroot00000000000000#include struct config_choice { char *name; int val; }; int config_choice(const char *header, const char *name, const struct config_choice *c); char *config_string(const char *header, const char *name); int config_number(const char *header, const char *name, char *fmt, void *val); int config_bool(const char *header, const char *name); int parse_config_file(const char *fn); const char *config_file(char **av, const char *deffn); struct option; void config_options(struct option *opts, int (*func)(int)); struct bucket_conf; int config_trigger(const char *header, const char *name, struct bucket_conf *bc); struct config_cred { uid_t uid; gid_t gid; }; void config_cred(char *header, char *name, struct config_cred *cred); mcelog-128+dfsg/core2.c000066400000000000000000000057101261732315200147360ustar00rootroot00000000000000#include #include #include #include "mcelog.h" #include "core2.h" #include "bitfield.h" /* Decode P6 family (Core2) model specific errors. The generic errors are decoded in p4.c */ /* [19..24] */ static char *bus_queue_req_type[] = { [0] = "BQ_DCU_READ_TYPE", [2] = "BQ_IFU_DEMAND_TYPE", [3] = "BQ_IFU_DEMAND_NC_TYPE", [4] = "BQ_DCU_RFO_TYPE", [5] = "BQ_DCU_RFO_LOCK_TYPE", [6] = "BQ_DCU_ITOM_TYPE", [8] = "BQ_DCU_WB_TYPE", [10] = "BC_DCU_WCEVICT_TYPE", [11] = "BQ_DCU_WCLINE_TYPE", [12] = "BQ_DCU_BTM_TYPE", [13] = "BQ_DCU_INTACK_TYPE", [14] = "BQ_DCU_INVALL2_TYPE", [15] = "BQ_DCU_FLUSHL2_TYPE", [16] = "BQ_DCU_PART_RD_TYPE", [18] = "BQ_DCU_PART_WR_TYPE", [20] = "BQ_DCU_SPEC_CYC_TYPE", [24] = "BQ_DCU_IO_RD_TYPE", [25] = "BQ_DCU_IO_WR_TYPE", [28] = "BQ_DCU_LOCK_RD_TYPE", [30] = "BQ_DCU_SPLOCK_RD_TYPE", [29] = "BQ_DCU_LOCK_WR_TYPE", }; /* [25..27] */ static char *bus_queue_error_type[] = { [0] = "BQ_ERR_HARD_TYPE", [1] = "BQ_ERR_DOUBLE_TYPE", [2] = "BQ_ERR_AERR2_TYPE", [4] = "BQ_ERR_SINGLE_TYPE", [5] = "BQ_ERR_AERR1_TYPE", }; static struct field p6_shared_status[] = { FIELD(16, reserved_3bits), FIELD(19, bus_queue_req_type), FIELD(25, bus_queue_error_type), FIELD(25, bus_queue_error_type), SBITFIELD(30, "internal BINIT"), SBITFIELD(36, "received parity error on response transaction"), SBITFIELD(38, "timeout BINIT (ROB timeout)." " No micro-instruction retired for some time"), FIELD(39, reserved_3bits), SBITFIELD(42, "bus transaction received hard error response"), SBITFIELD(43, "failure that caused IERR"), /* The following are reserved for Core in the SDM. Let's keep them here anyways*/ SBITFIELD(44, "two failing bus transactions with address parity error (AERR)"), SBITFIELD(45, "uncorrectable ECC error"), SBITFIELD(46, "correctable ECC error"), /* [47..54]: ECC syndrome */ FIELD(55, reserved_2bits), {}, }; static struct field p6old_status[] = { SBITFIELD(28, "FRC error"), SBITFIELD(29, "BERR on this CPU"), FIELD(31, reserved_1bit), FIELD(32, reserved_3bits), SBITFIELD(35, "BINIT received from external bus"), SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"), {} }; static struct field core2_status[] = { SBITFIELD(28, "MCE driven"), SBITFIELD(29, "MCE is observed"), SBITFIELD(31, "BINIT observed"), FIELD(32, reserved_2bits), SBITFIELD(34, "PIC or FSB data parity error"), FIELD(35, reserved_1bit), SBITFIELD(37, "FSB address parity error detected"), {} }; static struct numfield p6old_status_numbers[] = { HEXNUMBER(47, 54, "ECC syndrome"), {} }; void core2_decode_model(u64 status) { decode_bitfield(status, p6_shared_status); decode_bitfield(status, core2_status); /* Normally reserved, but let's parse anyways: */ decode_numfield(status, p6old_status_numbers); } void p6old_decode_model(u64 status) { decode_bitfield(status, p6_shared_status); decode_bitfield(status, p6old_status); decode_numfield(status, p6old_status_numbers); } mcelog-128+dfsg/core2.h000066400000000000000000000001121261732315200147320ustar00rootroot00000000000000void core2_decode_model(u64 status); void p6old_decode_model(u64 status); mcelog-128+dfsg/db.c000066400000000000000000000274231261732315200143160ustar00rootroot00000000000000/* Copyright (C) 2006 Andi Kleen, SuSE Labs. Dumb database manager. not suitable for large datasets, but human readable files and simple. assumes groups and entries-per-group are max low double digits. the in memory presentation could be easily optimized with a few hashes, but that shouldn't be needed for now. Note: obsolete, new design uses in memory databases only mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* TBD: add lock file to protect final rename timeout for locks */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include #include #include "db.h" #include "memutil.h" /* file format # comment [group1] entry1: value entry2: value # comment # comment2 [group2] entry: value value is anything before new line, but first will be skipped spaces are allowed in entry names or groups comments are preserved, but moved in front of the group blank lines allowed. code doesnt check for unique records/entries right now. first wins. */ struct entry { char *name; char *val; }; struct group { struct group *next; char *name; struct entry *entries; char *comment; int numentries; }; #define ENTRY_CHUNK (128 / sizeof(struct entry)) struct database { struct group *groups; FILE *fh; char *fn; int dirty; }; static int read_db(struct database *db); static FILE *open_file(char *fn, int wr); static void free_group(struct group *g); static void DBerror(char *fmt, ...) { va_list ap; va_start(ap,fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } #define DB_NEW(p) ((p) = xalloc(sizeof(*(p)))) static struct group *alloc_group(char *name) { struct group *g; DB_NEW(g); g->entries = xalloc(ENTRY_CHUNK * sizeof(struct entry)); g->name = name; return g; } static char *cleanline(char *s) { char *p; while (isspace(*s)) s++; if (*s == 0) return NULL; p = strchr(s, '\n'); if (p) *p = 0; return s; } struct database *open_db(char *fn, int wr) { struct database *db; DB_NEW(db); db->fh = open_file(fn, wr); if (!db->fh) { DBerror("Cannot open database %s\n", fn); free(db); return NULL; } db->fn = xstrdup(fn); if (read_db(db) < 0) { free(db->fn); free(db); return NULL; } return db; } static int read_db(struct database *db) { char *line = NULL; size_t linesz = 0; struct group *group = NULL, **pgroup = &db->groups; int linenr = 0; while (getline(&line, &linesz, db->fh) > 0) { char *s; s = strchr(line, '#'); if (s) { struct group *cmt; DB_NEW(cmt); *pgroup = cmt; pgroup = &cmt->next; cmt->comment = xstrdup(s + 1); *s = 0; } s = cleanline(line); linenr++; if (!s) continue; if (*s == '[') { int n; char *name; ++s; n = strcspn(s, "]"); if (s[n] == 0) goto parse_error; name = xalloc(n + 1); memcpy(name, s, n); group = alloc_group(name); *pgroup = group; pgroup = &group->next; } else { char *p; if (!group) goto parse_error; p = s + strcspn(s, ":"); if (*p != ':') goto parse_error; *p++ = 0; if (*p == ' ') p++; else goto parse_error; change_entry(db, group, line, p); } } if (ferror(db->fh)) { DBerror("IO error while reading database %s: %s\n", db->fn, strerror(errno)); goto error; } free(line); return 0; parse_error: DBerror("Parse error in database %s at line %d\n", db->fn, linenr); error: free(line); return -1; } /* Crash safety strategy: While the database is opened hold a exclusive flock on the file When writing write to a temporary file (.out). Only when the file is written rename to another temporary file (.complete). Then sync and swap tmp file with main file, then sync directory (later is linux specific) During open if the main file doesn't exist and a .complete file does rename the .complete file to main first; or open the .complete file if the file system is read only. */ /* Flush directory. Useful on ext2, on journaling file systems the later fsync would usually force earlier transactions on the metadata too. */ static int flush_dir(char *fn) { int err, fd; char *p; char dir[strlen(fn) + 1]; strcpy(dir, fn); p = strrchr(dir, '/'); if (p) *p = 0; else strcpy(dir, "."); fd = open(dir, O_DIRECTORY|O_RDONLY); if (fd < 0) return -1; err = 0; if (fsync(fd) < 0) err = -1; if (close(fd) < 0) err = -1; return err; } static int force_rename(char *a, char *b) { unlink(b); /* ignore error */ return rename(a, b); } static int rewrite_db(struct database *db) { FILE *fhtmp; int err; int tmplen = strlen(db->fn) + 10; char fn_complete[tmplen], fn_old[tmplen], fn_out[tmplen]; sprintf(fn_complete, "%s.complete", db->fn); sprintf(fn_old, "%s~", db->fn); sprintf(fn_out, "%s.out", db->fn); fhtmp = fopen(fn_out, "w"); if (!fhtmp) { DBerror("Cannot open `%s' output file: %s\n", fn_out, strerror(errno)); return -1; } dump_database(db, fhtmp); err = 0; /* Finish the output file */ if (ferror(fhtmp) || fflush(fhtmp) != 0 || fsync(fileno(fhtmp)) != 0 || fclose(fhtmp)) err = -1; /* Rename to .complete */ else if (force_rename(fn_out, fn_complete)) err = -1; /* RED-PEN: need to do retry for race */ /* Move to final name */ else if (force_rename(db->fn, fn_old) || rename(fn_complete, db->fn)) err = -1; /* Hit disk */ else if (flush_dir(db->fn)) err = -1; if (err) { DBerror("Error writing to database %s: %s\n", db->fn, strerror(errno)); } return err; } int sync_db(struct database *db) { if (!db->dirty) return 0; /* RED-PEN window without lock */ if (rewrite_db(db)) return -1; fclose(db->fh); db->dirty = 0; db->fh = open_file(db->fn, 1); if (!db->fh) return -1; return 0; } static void free_group(struct group *g) { free(g->entries); free(g->name); free(g->comment); free(g); } static void free_data(struct database *db) { struct group *g, *gnext; for (g = db->groups; g; g = gnext) { gnext = g->next; free_group(g); } } int close_db(struct database *db) { if (db->dirty && rewrite_db(db)) return -1; if (fclose(db->fh)) return -1; free_data(db); free(db->fn); free(db); return 0; } static FILE *open_file(char *fn, int wr) { char tmp[strlen(fn) + 10]; FILE *fh; if (access(fn, wr ? (R_OK|W_OK) : R_OK)) { switch (errno) { case EROFS: wr = 0; break; case ENOENT: /* No main DB file */ sprintf(tmp, "%s.complete", fn); /* Handle race */ if (!access(tmp, R_OK)) { if (rename(tmp, fn) < 0 && errno == EEXIST) return open_file(fn, wr); } else creat(fn, 0644); break; } } fh = fopen(fn, wr ? "r+" : "r"); if (fh) { if (flock(fileno(fh), wr ? LOCK_EX : LOCK_SH) < 0) { fclose(fh); return NULL; } } return fh; } void dump_group(struct group *g, FILE *out) { struct entry *e; fprintf(out, "[%s]\n", g->name); for (e = &g->entries[0]; e->name && !ferror(out); e++) fprintf(out, "%s: %s\n", e->name, e->val); } void dump_database(struct database *db, FILE *out) { struct group *g; for (g = db->groups; g && !ferror(out); g = g->next) { if (g->comment) { fprintf(out, "#%s", g->comment); continue; } dump_group(g, out); } } struct group *find_group(struct database *db, char *name) { struct group *g; for (g = db->groups; g; g = g->next) if (g->name && !strcmp(g->name, name)) return g; return NULL; } int delete_group(struct database *db, struct group *group) { struct group *g, **gprev; gprev = &db->groups; for (g = *gprev; g; gprev = &g->next, g = g->next) { if (g == group) { *gprev = g->next; free_group(g); return 0; } } db->dirty = 1; return -1; } char *entry_val(struct group *g, char *entry) { struct entry *e; for (e = &g->entries[0]; e->name; e++) if (!strcmp(e->name, entry)) return e->val; return NULL; } struct group *add_group(struct database *db, char *name, int *existed) { struct group *g, **gprev = &db->groups; for (g = *gprev; g; gprev = &g->next, g = g->next) if (g->name && !strcmp(g->name, name)) break; if (existed) *existed = (g != NULL); if (!g) { g = alloc_group(xstrdup(name)); g->next = *gprev; *gprev = g; } db->dirty = 1; return g; } void change_entry(struct database *db, struct group *g, char *entry, char *newval) { int i; struct entry *e, *entries; db->dirty = 1; entries = &g->entries[0]; for (e = entries; e->name; e++) { if (!strcmp(e->name, entry)) { free(e->val); e->val = xstrdup(newval); return; } } i = e - entries; assert(i == g->numentries); if (i > 0 && (i % ENTRY_CHUNK) == 0) { int new = (i + ENTRY_CHUNK) * sizeof(struct entry); g->entries = xrealloc(g->entries, new); } entries = &g->entries[0]; e = &entries[i]; e->name = xstrdup(entry); e->val = xstrdup(newval); g->numentries++; } void delete_entry(struct database *db, struct group *g, char *entry) { struct entry *e; for (e = &g->entries[0]; e->name; e++) if (!strcmp(e->name, entry)) break; if (e->name == NULL) return; while ((++e)->name) e[-1] = e[0]; g->numentries--; } struct group * clone_group(struct database *db, struct group *gold, char *newname) { struct entry *e; struct group *gnew = add_group(db, newname, NULL); for (e = &gold->entries[0]; e->name; e++) change_entry(db, gnew, e->name, e->val); return gnew; } static char *save_comment(char *c) { int len = strlen(c); char *s = xalloc(len + 2); strcpy(s, c); if (len == 0 || c[len - 1] != '\n') s[len] = '\n'; return s; } void add_comment(struct database *db, struct group *group, char *comment) { struct group *g; struct group **gprev = &db->groups; for (g = *gprev; g; gprev = &g->next, g = g->next) { if ((group && g == group) || (!group && g->comment == NULL)) break; } DB_NEW(g); g->comment = save_comment(comment); g->next = *gprev; *gprev = g; db->dirty = 1; } struct group *first_group(struct database *db) { return next_group(db->groups); } struct group *next_group(struct group *g) { struct group *n; if (!g) return NULL; n = g->next; while (n && n->comment) n = n->next; return n; } char *group_name(struct group *g) { return g->name; } struct group *find_entry(struct database *db, struct group *prev, char *entry, char *value) { int previ = 0; struct entry *e; struct group *g; if (prev) g = prev->next; else g = db->groups; for (; g; g = g->next) { if (g->comment) continue; /* Short cut when entry is at the same place as previous */ if (previ < g->numentries) { e = &g->entries[previ]; if (!strcmp(e->name, entry)) { if (!strcmp(e->val, value)) return g; continue; } } for (e = &g->entries[0]; e->name; e++) { if (strcmp(e->name, entry)) continue; if (!strcmp(e->val, value)) return g; previ = e - &g->entries[0]; break; } } return NULL; } void rename_group(struct database *db, struct group *g, char *newname) { free(g->name); g->name = xstrdup(newname); db->dirty = 1; } unsigned long entry_num(struct group *g, char *entry) { char *e = entry_val(g, entry); unsigned long val = 0; if (e) sscanf(e, "%lu", &val); return val; } void change_entry_num(struct database *db, struct group *g, char *entry, unsigned long val) { char buf[20]; sprintf(buf, "%lu", val); change_entry(db, g, entry, buf); } mcelog-128+dfsg/db.h000066400000000000000000000023641261732315200143200ustar00rootroot00000000000000#include struct database; struct group; struct database *open_db(char *fn, int wr); int sync_db(struct database *db); int close_db(struct database *db); struct group *find_group(struct database *db, char *name); char *entry_val(struct group *g, char *entry); struct group *add_group(struct database *db, char *name, int *existed); int delete_group(struct database *db, struct group *g); void change_entry(struct database *db, struct group *g, char *entry, char *newval); void add_comment(struct database *db, struct group *group, char *comment); struct group *first_group(struct database *db); struct group *next_group(struct group *g); void dump_group(struct group *g, FILE *out); void dump_database(struct database *db, FILE *out); struct group *find_entry(struct database *db, struct group *prev, char *entry, char *value); void rename_group(struct database *db, struct group *group, char *newname); char *group_name(struct group *g); unsigned long entry_num(struct group *g, char *entry); void change_entry_num(struct database *db, struct group *g, char *entry, unsigned long val); void delete_entry(struct database *db, struct group *g, char *entry); struct group * clone_group(struct database *db, struct group *gold, char *newname); mcelog-128+dfsg/dbquery.c000066400000000000000000000047521261732315200154040ustar00rootroot00000000000000/* Access db files. This is for testing and debugging only. */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "db.h" #define C(x) if (x) printf(#x " failed: %s\n", strerror(errno)) #define NEEDGROUP if (!group) { printf("need group first\n"); break; } void Eprintf(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); } void usage(void) { printf( "s sync\n" "q close/quit\n" "ggroup find group\n" "G delete group\n" "agroup add group\n" "ventry dump entry\n" "centry,val change entry to val\n" "fentry,val find entry with value and dump its group\n" "Ccomment add comment\n" "Lnewname clone group to newname\n" "d dump group\n" "D dump database\n"); } int main(int ac, char **av) { struct database *db; struct group *group = NULL; char *line = NULL; size_t linesz = 0; if (!av[1]) { printf("%s database\n", av[0]); exit(1); } printf("dbtest\n"); db = open_db(av[1], 1); while (printf("> "), fflush(stdout), getline(&line, &linesz, stdin) > 0) { char *p = line + strlen(line) - 1; while (p >= line && isspace(*p)) *p-- = 0; switch (line[0]) { case 's': C(sync_db(db)); break; case 'q': C(close_db(db)); exit(0); case 'g': group = find_group(db, line + 1); if (group) printf("found\n"); break; case 'G': NEEDGROUP; C(delete_group(db, group)); group = NULL; break; case 'a': { int existed = 0; group = add_group(db, line + 1, &existed); if (existed) printf("existed\n"); break; } case 'v': NEEDGROUP; printf("%s\n", entry_val(group, line + 1)); break; case 'c': { p = line + 1; char *entry = strsep(&p, ","); NEEDGROUP; change_entry(db, group, entry, strsep(&p, "")); break; } case 'L': NEEDGROUP; clone_group(db, group, line + 1); break; case 'f': { struct group *g; p = line + 1; char *entry = strsep(&p, ","); char *val = strsep(&p, ""); g = NULL; int nr = 0; while ((g = find_entry(db, g, entry, val)) != NULL) { if (nr == 0) group = g; nr++; dump_group(group, stdout); } if (nr == 0) printf("not found\n"); break; } case 'C': NEEDGROUP; add_comment(db, group, line + 1); break; case 'd': NEEDGROUP; dump_group(group, stdout); break; case 'D': dump_database(db, stdout); break; default: usage(); break; } } return 0; } mcelog-128+dfsg/dimm.c000066400000000000000000000243421261732315200146540ustar00rootroot00000000000000/* Copyright (C) 2006 Andi Kleen, SuSE Labs. Manage dimm database. this is used to keep track of the error counts per DIMM so that we can take action when one starts to experience a unusual large number of them. Note: obsolete, not used anymore, new design is in memdb.c mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* TBD: Put error trigger information into database? */ #include #include #include #include #include #include #include #include "dmi.h" #include "mcelog.h" #include "db.h" #include "dimm.h" /* the algorithms are mostly brute force, only the generally small number of dimms saves us. advantage it is a quite simple and straight forward. */ struct database *dimm_db; struct key { char *name; size_t offset; enum { D_STR, D_BYTE, D_WORD, D_SIZE } type; int cmp; }; static unsigned key_sizes[] = { [D_STR] = 1, [D_BYTE] = 1, [D_WORD] = 2, [D_SIZE] = 2, }; #define O(x) offsetof(struct dmi_memdev, x) static struct key keys[] = { { "Locator", O(device_locator), D_STR, 0 }, { "Manufacturer", O(manufacturer), D_STR, 1}, { "Serial Number", O(serial_number),D_STR, 1}, { "Part Number", O(part_number), D_STR,10}, { "Asset Tag", O(asset_tag), D_STR, 0}, { "Speed", O(speed), D_WORD, 0}, { "Size", O(speed), D_SIZE, 1}, { "Form Factor", O(form_factor), D_STR, 0 }, { "Type Details", O(type_details), D_WORD, 1 }, { "Memory Type", O(memory_type), D_BYTE, 1 }, { "Total Width", O(total_width), D_WORD, 0 }, { "Data Width", O(data_width), D_WORD, 0 }, { "Device Set", O(device_set), D_BYTE, 0 }, { "Handle", O(header.handle), D_WORD, 0 }, { "Bank Locator", O(bank_locator), D_STR, 0 }, { "Array Handle", O(array_handle), D_WORD, 0 }, { }, }; static void fmt_size(struct dmi_memdev *a, char *buf) { char *unit; unit = buf + sprintf(buf, "%u ", a->size); dmi_dimm_size(a->size, unit); *++unit = 0; } static char *d_string(struct dmi_memdev *d, struct key *k, char *buf) { unsigned char *p; if (k->offset + key_sizes[k->type] > d->header.length) return NULL; p = (unsigned char *)d + k->offset; switch (k->type) { case D_BYTE: sprintf(buf, "%u", *p); break; case D_WORD: sprintf(buf, "%u", *(unsigned short *)p); break; case D_STR: return dmi_getstring(&d->header, *p); case D_SIZE: fmt_size(d, buf); break; default: abort(); } return buf; } static int cmp_dimm(struct dmi_memdev *a, struct group *b) { int i; for (i = 0; keys[i].name; i++) { char buf[100]; struct key *k = &keys[i]; if (!k->cmp) continue; char *s = d_string(a, k, buf); if (!s) continue; char *s2 = entry_val(b, k->name); if (!s2) continue; if (strcmp(s, s2)) return 0; } return 1; } static void d_to_group(struct dmi_memdev *de, struct group *g) { char buf[100]; int i; for (i = 0; keys[i].name; i++) { struct key *k = &keys[i]; char *s = d_string(de, k, buf); if (s) change_entry(dimm_db, g, k->name, s); } } /* TBD get this into syslog somehow without spamming? */ static void unique_warning(void) { static int warned; if (warned) return; warned = 1; Wprintf("Cannot uniquely identify your memory modules\n"); Wprintf("When changing them you should manage them using command line mcelog\n"); } static struct dmi_memdev *matching_dimm_group(struct group *g) { int i; struct dmi_memdev *match = NULL; int nmatch = 0; for (i = 0; dmi_dimms[i]; i++) { if (cmp_dimm(dmi_dimms[i], g)) { match = dmi_dimms[i]; nmatch++; } } if (nmatch > 1) { unique_warning(); return NULL; } return match; } static struct group *matching_dimm_dmi(struct dmi_memdev *d) { struct group *match = NULL, *g; int nmatch = 0; for (g = first_group(dimm_db); g; g = next_group(g)) { if (!cmp_dimm(d, g)) { match = g; nmatch++; } } if (nmatch > 1) { unique_warning(); return NULL; } return match; } void create_dimm_name(struct dmi_memdev *d, char *buf) { int i = 1; do { sprintf(buf, "dimm%d", i++); } while (find_group(dimm_db, buf)); } static char *timestamp(void) { static char buf[20]; time_t now; time(&now); sprintf(buf, "%lu", now); return buf; } static void remove_dimm(struct group *g) { char *loc = entry_val(g, "Locator"); Wprintf("Removing %s who was at %s\n", group_name(g), loc); change_entry(dimm_db, g, "old locator", loc); change_entry(dimm_db, g, "Locator", "removed"); change_entry(dimm_db, g, "removed at", timestamp()); } static void disable_leftover_dimms(void) { int i; struct group *g; /* Disable any left over dimms in the database. don't remove them because the information might be still useful later */ for (g = first_group(dimm_db); g; g = next_group(g)) { char *gloc = entry_val(g, "Locator"); if (!gloc || !strcmp(gloc, "removed")) continue; for (i = 0; dmi_dimms[i]; i++) { struct dmi_memdev *d = dmi_dimms[i]; char *loc = dmi_getstring(&d->header, d->device_locator); if (!strcmp(loc, gloc)) break; } if (dmi_dimms[i] == NULL) remove_dimm(g); } } void move_dimm(struct group *g, struct dmi_memdev *newpos, char *loc) { char *newloc = dmi_getstring(&newpos->header, newpos->device_locator); Wprintf("%s seems to have moved from %s to %s\n", group_name(g), loc, newloc); change_entry(dimm_db, g, "old locator", loc); change_entry(dimm_db, g, "Locator", newloc); delete_entry(dimm_db, g, "removed at"); change_entry(dimm_db, g, "moved at", timestamp()); } void new_dimm(struct dmi_memdev *d, char *loc) { struct group *g; char name[100]; create_dimm_name(d, name); g = add_group(dimm_db, name, NULL); d_to_group(d, g); change_entry(dimm_db, g, "added at", timestamp()); Wprintf("Found new %s at %s\n", name, loc); /* Run uniqueness check */ (void)matching_dimm_group(g); } /* check if reported dimms are at their places */ void check_dimm_positions(void) { int i; struct group *g; struct dmi_memdev *d; struct dmi_memdev *match; for (i = 0; (d = dmi_dimms[i]) != NULL; i++) { char *loc = dmi_getstring(&d->header, d->device_locator); g = find_entry(dimm_db, NULL, "Locator", loc); /* In the database, but somewhere else? */ if (g && !cmp_dimm(d, g)) { match = matching_dimm_group(g); if (match) move_dimm(g, match, loc); else remove_dimm(g); g = NULL; /* In DMI but somewhere else? */ } else if (!g) { g = matching_dimm_dmi(d); if (g) move_dimm(g, d, loc); } if (!g) new_dimm(d, loc); } } /* synchronize database with smbios */ int sync_dimms(void) { if (!dmi_dimms) return -1; check_dimm_positions(); disable_leftover_dimms(); sync_db(dimm_db); return 0; } void gc_dimms(void) { struct group *g; while ((g = find_entry(dimm_db, NULL, "Locator", "removed")) != NULL) { Wprintf("Purging removed %s which was at %s\n", group_name(g), entry_val(g, "Old Locator")); delete_group(dimm_db, g); } sync_db(dimm_db); } static unsigned long inc_val(struct group *g, char *entry) { unsigned long val = entry_num(g, entry) + 1; change_entry_num(dimm_db, g, entry, val); return val; } static void run_trigger(char *trigger, char *loc, unsigned long val, unsigned long max) { pid_t pid; Lprintf("Running error trigger because memory at %s had %lu errors\n", loc, max); close_dimm_db(); if ((pid = fork()) == 0) { char valbuf[20], maxbuf[20]; char *argv[] = { trigger, loc, valbuf, maxbuf, NULL }; char *env[] = { "PATH=/sbin:/usr/bin", NULL }; sprintf(valbuf, "%lu", val); sprintf(maxbuf, "%lu", max); execve(trigger, argv, env); _exit(1); } int status; if (waitpid(pid, &status, 0) || !WIFEXITED(status) || WEXITSTATUS(status) != 0) Eprintf("Cannot run error trigger %s for %s\n", trigger, loc); open_dimm_db(NULL); } void new_error(unsigned long long addr, unsigned long max_error, char *trigger) { struct dmi_memdev **devs; int i; devs = dmi_find_addr(addr); if (devs[0] == NULL) { Wprintf("No memory found for address %Lx\n", addr); exit(1); } for (i = 0; devs[i]; i++) { struct dmi_memdev *d = devs[i]; char *loc = dmi_getstring(&d->header, d->device_locator); struct group *g = find_entry(dimm_db, NULL, "Locator", loc); if (!g) { // shouldn't happen Eprintf("No record found for %Lx\n", addr); return; } unsigned long val = inc_val(g, "corrected errors"); if (val == max_error) { Lprintf("Large number of corrected errors in memory at %s", loc); Lprintf("Consider replacing it"); if (trigger && trigger[0]) run_trigger(trigger, loc, val, max_error); } } free(devs); } void reset_dimm(char *locator) { struct group *g; if (locator) { g = find_entry(dimm_db, NULL, "Locator", locator); if (!g) { fprintf(stderr, "Locator %s not found\n", locator); exit(1); } change_entry(dimm_db, g, "corrected errors", "0"); } else { for (g = first_group(dimm_db); g; g = next_group(g)) change_entry(dimm_db, g, "corrected errors", "0"); } sync_db(dimm_db); } struct group *lookup_dimm(char *locator) { struct group *g = find_entry(dimm_db, NULL, "Locator", locator); return g; } void dump_all_dimms(void) { dump_database(dimm_db, stdout); } void dump_dimm(char *locator) { struct group *g = lookup_dimm(locator); if (g) dump_group(g, stdout); else fprintf(stderr, "%s not found\n", locator); } void close_dimm_db(void) { if (dimm_db) { close_db(dimm_db); dimm_db = NULL; } } int open_dimm_db(char *fn) { static char *old_db_name; if (dmi_dimms < 0) return -1; if (dimm_db) return 0; if (!fn) { fn = old_db_name; } else { old_db_name = strdup(fn); if (!old_db_name) exit(ENOMEM); atexit(close_dimm_db); } dimm_db = open_db(fn, 1); if (!dimm_db) { Eprintf("Cannot open dimm database %s: %s", fn, strerror(errno)); return -1; } if (sync_dimms() < 0) return -1; return 0; } mcelog-128+dfsg/dimm.h000066400000000000000000000003671261732315200146620ustar00rootroot00000000000000void close_dimm_db(void); int open_dimm_db(char *fn); void new_error(unsigned long long addr, unsigned long max_error, char *trigger); void reset_dimm(char *locator); void gc_dimms(void); void dump_all_dimms(void); void dump_dimm(char *locator); mcelog-128+dfsg/diskdb.c000066400000000000000000000034471261732315200151710ustar00rootroot00000000000000/* High level interface to disk based DIMM database */ /* Note: obsolete: new design is in memdb.c */ #include #include #include #include "mcelog.h" #include "diskdb.h" #include "paths.h" #include "dimm.h" #include "dmi.h" char *error_trigger; unsigned error_thresh = 20; char *dimm_db_fn = DIMM_DB_FILENAME; static void checkdimmdb(void) { if (open_dimm_db(dimm_db_fn) < 0) exit(1); } int diskdb_modifier(int opt) { char *end; switch (opt) { case O_DATABASE: dimm_db_fn = optarg; checkdmi(); checkdimmdb(); break; case O_ERROR_TRIGGER: checkdmi(); open_dimm_db(dimm_db_fn); error_thresh = strtoul(optarg, &end, 0); if (end == optarg || *end != ',') usage(); error_trigger = end + 1; break; default: return 0; } return 1; } void diskdb_resolve_addr(u64 addr) { if (open_dimm_db(dimm_db_fn) >= 0) new_error(addr, error_thresh, error_trigger); } void diskdb_usage(void) { fprintf(stderr, "Manage disk DIMM error database\n" " mcelog [options] --drop-old-memory|--reset-memory locator\n" " mcelog --dump-memory locator\n" " old can be either locator or name\n" "Disk database options:" "--database fn Set filename of DIMM database (default " DIMM_DB_FILENAME ")\n" "--error-trigger cmd,thresh Run cmd on exceeding thresh errors per DIMM\n"); } static void dimm_common(int ac, char **av) { no_syslog(); checkdmi(); checkdimmdb(); argsleft(ac, av); } int diskdb_cmd(int opt, int ac, char **av) { char *arg = optarg; switch (opt) { case O_DUMP_MEMORY: dimm_common(ac, av); if (arg) dump_dimm(arg); else dump_all_dimms(); return 1; case O_RESET_MEMORY: dimm_common(ac, av); reset_dimm(arg); return 1; case O_DROP_OLD_MEMORY: dimm_common(ac, av); gc_dimms(); return 1; } return 0; } mcelog-128+dfsg/diskdb.h000066400000000000000000000014431261732315200151700ustar00rootroot00000000000000 #ifdef CONFIG_DISKDB enum diskdb_options { O_DATABASE = O_DISKDB, O_ERROR_TRIGGER, O_DUMP_MEMORY, O_RESET_MEMORY, O_DROP_OLD_MEMORY, }; void diskdb_resolve_addr(u64 addr); int diskdb_modifier(int opt); int diskdb_cmd(int opt, int ac, char **av); void diskdb_usage(void); #define DISKDB_OPTIONS \ { "database", 1, NULL, O_DATABASE }, \ { "error-trigger", 1, NULL, O_ERROR_TRIGGER }, \ { "dump-memory", 2, NULL, O_DUMP_MEMORY }, \ { "reset-memory", 2, NULL, O_RESET_MEMORY }, \ { "drop-old-memory", 0, NULL, O_DROP_OLD_MEMORY }, #else static inline void diskdb_resolve_addr(u64 addr) {} static inline int diskdb_modifier(int opt) { return 0; } static inline int diskdb_cmd(int opt, int ac, char **av) { return 0; } static inline void diskdb_usage(void) {} #define DISKDB_OPTIONS #endif mcelog-128+dfsg/dmi.c000066400000000000000000000355431261732315200145040ustar00rootroot00000000000000/* Copyright (C) 2006 Andi Kleen, SuSE Labs. Use SMBIOS/DMI to map address to DIMM description. For reference see the SMBIOS specification 2.4 dmi is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. dmi is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Notebook add an option to dump existing errors in SMBIOS? implement code to look up PCI resources too. */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include "mcelog.h" #include "dmi.h" #include "memutil.h" static int verbose = 0; int dmi_forced; int do_dmi; struct anchor { char str[4]; /* _SM_ */ char csum; char entry_length; char major; char minor; short maxlength; char rev; char fmt[5]; char str2[5]; /* _DMI_ */ char csum2; unsigned short length; unsigned table; unsigned short numentries; char bcdrev; } __attribute__((packed)); static struct dmi_entry *entries; static int entrieslen; static int numentries; static int dmi_length; static struct dmi_entry **handle_to_entry; struct dmi_memdev **dmi_dimms; struct dmi_memarray **dmi_arrays; struct dmi_memdev_addr **dmi_ranges; struct dmi_memarray_addr **dmi_array_ranges; static void collect_dmi_dimms(void); static struct dmi_entry **dmi_collect(int type, int minsize, int *len); static void dump_ranges(struct dmi_memdev_addr **, struct dmi_memdev **); static unsigned checksum(unsigned char *s, int len) { unsigned char csum = 0; int i; for (i = 0; i < len; i++) csum += s[i]; return csum; } /* Check if entry is valid */ static int check_entry(struct dmi_entry *e, struct dmi_entry **next) { char *end = (char *)entries + dmi_length; char *s; if (!e) return 0; s = (char *)e + e->length; if (verbose > 3) printf("length %d handle %x\n", e->length, e->handle); do { if (verbose > 3) printf("string %s\n", s); while (s < end-1 && *s) s++; if (s >= end-1) { if (verbose > 0) printf("handle %x length %d truncated\n", e->handle, e->length); return 0; } s++; } while (*s); if (s >= end) *next = NULL; else *next = (struct dmi_entry *)(s + 1); return 1; } /* Relies on sanity checks in check_entry */ char *dmi_getstring(struct dmi_entry *e, unsigned number) { char *s = (char *)e + e->length; if (number == 0) return ""; do { if (--number == 0) return s; while (*s) s++; s++; } while (*s); return NULL; } static void fill_handles(void) { int i; struct dmi_entry *e, *next; e = entries; handle_to_entry = xalloc(sizeof(void *) * 0xffff); for (i = 0; i < numentries; i++, e = next) { if (!check_entry(e, &next)) break; handle_to_entry[e->handle] = e; } } static int get_efi_base_addr(size_t *address) { FILE *efi_systab; const char *filename; char linebuf[64]; int ret = 0; *address = 0; /* Prevent compiler warning */ /* Linux 2.6.7 and up: /sys/firmware/efi/systab */ filename = "/sys/firmware/efi/systab"; if ((efi_systab = fopen(filename, "r")) != NULL) goto check_symbol; /* Linux up to 2.6.6: /proc/efi/systab */ filename = "/proc/efi/systab"; if ((efi_systab = fopen(filename, "r")) != NULL) goto check_symbol; /* Failed to open EFI interfaces */ return ret; check_symbol: while ((fgets(linebuf, sizeof(linebuf) - 1, efi_systab)) != NULL) { char *addrp = strchr(linebuf, '='); if (!addrp) break; *(addrp++) = '\0'; if (strcmp(linebuf, "SMBIOS") == 0) { *address = strtoul(addrp, NULL, 0); ret = 1; break; } } if (fclose(efi_systab) != 0) perror(filename); if (!ret || !*address){ Lprintf("No valid SMBIOS entry point: Continue without DMI decoding"); return 0; } if (verbose) printf("%s: SMBIOS entry point at 0x%08lx\n", filename, (unsigned long)*address); return ret; } int opendmi(void) { struct anchor *a, *abase; void *p, *q; int pagesize = getpagesize(); int memfd; unsigned corr; int err = -1; const int segsize = 0x10000; size_t entry_point_addr = 0; size_t length = 0; if (entries) return 0; memfd = open("/dev/mem", O_RDONLY); if (memfd < 0) { Eprintf("Cannot open /dev/mem for DMI decoding: %s", strerror(errno)); return -1; } /* * On EFI-based systems, the SMBIOS Entry Point structure can be * located by looking in the EFI Configuration Table. */ if (get_efi_base_addr(&entry_point_addr)) { size_t addr_start = round_down(entry_point_addr, pagesize); size_t addr_end = round_up(entry_point_addr + 0x20, pagesize); length = addr_end - addr_start; /* mmap() the address of SMBIOS structure table entry point. */ abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd, addr_start); if (abase == (struct anchor *)-1) { Eprintf("Cannot mmap 0x%lx for efi mode: %s", (unsigned long)entry_point_addr, strerror(errno)); goto legacy; } a = (struct anchor*)((char*)abase + (entry_point_addr - addr_start)); goto fill_entries; } legacy: /* * On non-EFI systems, the SMBIOS Entry Point structure can be located * by searching for the anchor-string on paragraph (16-byte) boundaries * within the physical memory address range 000F0000h to 000FFFFFh */ length = segsize - 1; abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd, 0xf0000); if (abase == (struct anchor *)-1) { Eprintf("Cannot mmap 0xf0000 for legacy mode: %s", strerror(errno)); goto out; } for (p = abase, q = p + segsize; p < q; p += 0x10) { if (!memcmp(p, "_SM_", 4) && (checksum(p, ((struct anchor *)p)->entry_length) == 0)) break; } if (p >= q) { Eprintf("Cannot find SMBIOS DMI tables"); goto out; } a = p; fill_entries: if (verbose) printf("DMI tables at %x, %u bytes, %u entries\n", a->table, a->length, a->numentries); corr = a->table - round_down(a->table, pagesize); entrieslen = round_up(a->table + a->length, pagesize) - round_down(a->table, pagesize); entries = mmap(NULL, entrieslen, PROT_READ, MAP_SHARED, memfd, round_down(a->table, pagesize)); if (entries == (struct dmi_entry *)-1) { Eprintf("Cannot mmap SMBIOS tables at %x", a->table); entries = NULL; goto out_mmap; } entries = (struct dmi_entry *)(((char *)entries) + corr); numentries = a->numentries; dmi_length = a->length; fill_handles(); collect_dmi_dimms(); err = 0; out_mmap: munmap(abase, length); out: close(memfd); return err; } unsigned dmi_dimm_size(unsigned short size, char *unit) { unsigned mbflag = !(size & (1<<15)); size &= ~(1<<15); strcpy(unit, "KB"); if (mbflag) { unit[0] = 'M'; if (size >= 1024) { unit[0] = 'G'; size /= 1024; } } return size; } static char *form_factors[] = { "?", "Other", "Unknown", "SIMM", "SIP", "Chip", "DIP", "ZIP", "Proprietary Card", "DIMM", "TSOP", "Row of chips", "RIMM", "SODIMM", "SRIMM" }; static char *memory_types[] = { "?", "Other", "Unknown", "DRAM", "EDRAM", "VRAM", "SRAM", "RAM", "ROM", "FLASH", "EEPROM", "FEPROM", "EPROM", "CDRAM", "3DRAM", "SDRAM", "SGRAM", "RDRAM", "DDR", "DDR2" }; #define LOOKUP(array, val, buf) \ ((val) >= NELE(array) ? \ (sprintf(buf,"<%u>",(val)), (buf)) : \ (array)[val]) static char *type_details[16] = { "Reserved", "Other", "Unknown", "Fast-paged", "Static Column", "Pseudo static", "RAMBUS", "Synchronous", "CMOS", "EDO", "Window DRAM", "Cache DRAM", "Non-volatile", "Res13", "Res14", "Res15" }; static void dump_type_details(unsigned short td) { int i; if (!td) return; for (i = 0; i < 16; i++) if (td & (1<header.length < offsetof(struct dmi_memdev, manufacturer)) { if (verbose > 0) printf("Memory device for address %llx too short %u\n", addr, md->header.length); return; } Wprintf("%s ", LOOKUP(memory_types, md->memory_type, tmp)); if (md->form_factor >= 3) Wprintf("%s ", LOOKUP(form_factors, md->form_factor, tmp)); if (md->speed != 0) Wprintf("%hu Mhz ", md->speed); dump_type_details(md->type_details); Wprintf("Width %hu Data Width %hu Size %u %s\n", md->total_width, md->data_width, dmi_dimm_size(md->size, unit), unit); #define DUMPSTR(n,x) \ if (md->x) { \ s = dmi_getstring(&md->header, md->x); \ if (s && *s && strcmp(s,"None")) \ Wprintf(n ": %s\n", s); \ } DUMPSTR("Device Locator", device_locator); DUMPSTR("Bank Locator", bank_locator); if (md->header.length < offsetof(struct dmi_memdev, manufacturer)) return; DUMPSTR("Manufacturer", manufacturer); DUMPSTR("Serial Number", serial_number); DUMPSTR("Asset Tag", asset_tag); DUMPSTR("Part Number", part_number); } static void warnuser(void) { static int warned; if (warned) return; warned = 1; Wprintf("WARNING: " "SMBIOS data is often unreliable. Take with a grain of salt!\n"); } static int cmp_range(const void *a, const void *b) { struct dmi_memdev_addr *ap = *(struct dmi_memdev_addr **)a; struct dmi_memdev_addr *bp = *(struct dmi_memdev_addr **)b; return (int)ap->start_addr - (int)bp->end_addr; } static int cmp_arr_range(const void *a, const void *b) { struct dmi_memarray_addr *ap = *(struct dmi_memarray_addr **)a; struct dmi_memarray_addr *bp = *(struct dmi_memarray_addr **)b; return (int)ap->start_addr - (int)bp->end_addr; } #define COLLECT(var, id, ele) { \ typedef typeof (**(var)) T; \ var = (T **)dmi_collect(id, \ offsetof(T, ele) + sizeof_field(T, ele), \ &len); \ } static void collect_dmi_dimms(void) { int len; COLLECT(dmi_ranges, DMI_MEMORY_MAPPED_ADDR, dev_handle); qsort(dmi_ranges, len, sizeof(struct dmi_entry *), cmp_range); COLLECT(dmi_dimms, DMI_MEMORY_DEVICE, device_locator); if (verbose > 1) dump_ranges(dmi_ranges, dmi_dimms); COLLECT(dmi_arrays, DMI_MEMORY_ARRAY, location); COLLECT(dmi_array_ranges, DMI_MEMORY_ARRAY_ADDR, array_handle); qsort(dmi_array_ranges, len, sizeof(struct dmi_entry *),cmp_arr_range); } #undef COLLECT static struct dmi_entry ** dmi_collect(int type, int minsize, int *len) { struct dmi_entry **r; struct dmi_entry *e, *next; int i, k; r = xalloc(sizeof(struct dmi_entry *) * (numentries + 1)); k = 0; e = entries; next = NULL; for (i = 0; i < numentries; i++, e = next) { if (!check_entry(e, &next)) break; if (e->type != type) continue; if (e->length < minsize) { if (verbose > 0) printf("hnd %x size %d expected %d\n", e->handle, e->length, minsize); continue; } if (type == DMI_MEMORY_DEVICE && ((struct dmi_memdev *)e)->size == 0) { if (verbose > 0) printf("entry %x disabled\n", e->handle); continue; } r[k++] = e; } *len = k; return r; } #define FAILED " SMBIOS DIMM sanity check failed\n" int dmi_sanity_check(void) { int i, k; int numdmi_dimms = 0; int numranges = 0; if (dmi_ranges[0] == NULL) return 0; for (k = 0; dmi_dimms[k]; k++) numdmi_dimms++; /* Do we have multiple ranges? */ for (k = 1; dmi_ranges[k]; k++) { if (dmi_ranges[k]->start_addr <= dmi_ranges[k-1]->end_addr) { return 0; } if (dmi_ranges[k]->start_addr >= dmi_ranges[k-1]->end_addr) numranges++; } if (numranges == 1 && numdmi_dimms > 2) { if (verbose > 0) printf("Not enough unique address ranges." FAILED); return 0; } /* Unique locators? */ for (k = 0; dmi_dimms[k]; k++) { char *loc; loc = dmi_getstring(&dmi_dimms[k]->header, dmi_dimms[k]->device_locator); if (!loc) { if (verbose > 0) printf("Missing locator." FAILED); return 0; } for (i = 0; i < k; i++) { char *b = dmi_getstring(&dmi_dimms[i]->header, dmi_dimms[i]->device_locator); if (!strcmp(b, loc)) { if (verbose > 0) printf("Ambiguous locators `%s'<->`%s'." FAILED, b, loc); return 0; } } } return 1; } #define DMIGET(p, member) \ (offsetof(typeof(*(p)), member) + sizeof((p)->member) <= (p)->header.length ? \ (p)->member : 0) static void dump_ranges(struct dmi_memdev_addr **ranges, struct dmi_memdev **dmi_dimms) { int i; printf("RANGES\n"); for (i = 0; ranges[i]; i++) printf("range %x-%x h %x a %x row %u ilpos %u ildepth %u\n", ranges[i]->start_addr, ranges[i]->end_addr, ranges[i]->dev_handle, DMIGET(ranges[i], memarray_handle), DMIGET(ranges[i], row), DMIGET(ranges[i], interleave_pos), DMIGET(ranges[i], interleave_depth)); printf("DMI_DIMMS\n"); for (i = 0; dmi_dimms[i]; i++) printf("dimm h %x width %u datawidth %u size %u set %u\n", dmi_dimms[i]->header.handle, dmi_dimms[i]->total_width, DMIGET(dmi_dimms[i],data_width), DMIGET(dmi_dimms[i],size), DMIGET(dmi_dimms[i],device_set)); } struct dmi_memdev **dmi_find_addr(unsigned long long addr) { struct dmi_memdev **devs; int i, k; devs = xalloc(sizeof(void *) * (numentries+1)); k = 0; for (i = 0; dmi_ranges[i]; i++) { struct dmi_memdev_addr *da = dmi_ranges[i]; if (addr < ((unsigned long long)da->start_addr)*1024 || addr >= ((unsigned long long)da->end_addr)*1024) continue; devs[k] = (struct dmi_memdev *)handle_to_entry[da->dev_handle]; if (devs[k]) k++; } #if 0 /* Need to implement proper decoding of interleaving sets before enabling this. */ int j, w; for (i = 0; dmi_array_ranges[i]; i++) { struct dmi_memarray_addr *d = dmi_array_ranges[i]; if (addr < ((unsigned long long)d->start_addr)*1024 || addr >= ((unsigned long long)d->end_addr)*1024) continue; for (w = 0; dmi_dimms[w]; w++) { struct dmi_memdev *m = dmi_dimms[w]; if (m->array_handle == d->array_handle) { for (j = 0; j < k; j++) { if (devs[j] == m) break; } if (j == k) devs[k++] = m; } } } #endif devs[k] = NULL; return devs; } void dmi_decodeaddr(unsigned long long addr) { struct dmi_memdev **devs = dmi_find_addr(addr); if (devs[0]) { int i; warnuser(); for (i = 0; devs[i]; i++) dump_memdev(devs[i], addr); } else { Wprintf("No DIMM found for %llx in SMBIOS\n", addr); } free(devs); } void dmi_set_verbosity(int v) { verbose = v; } void checkdmi(void) { static int dmi_checked; if (dmi_checked) return; dmi_checked = 1; if (dmi_forced && !do_dmi) return; if (opendmi() < 0) { if (dmi_forced) exit(1); do_dmi = 0; return; } if (!dmi_forced) do_dmi = dmi_sanity_check(); } #define FREE(x) free(x), (x) = NULL void closedmi(void) { if (!entries) return; munmap(entries, entrieslen); entries = NULL; FREE(dmi_dimms); FREE(dmi_arrays); FREE(dmi_ranges); FREE(dmi_array_ranges); FREE(handle_to_entry); } mcelog-128+dfsg/dmi.h000066400000000000000000000037561261732315200145120ustar00rootroot00000000000000 struct dmi_entry { unsigned char type; unsigned char length; unsigned short handle; }; enum { DMI_MEMORY_ARRAY = 16, DMI_MEMORY_DEVICE = 17, DMI_MEMORY_ARRAY_ADDR = 19, DMI_MEMORY_MAPPED_ADDR = 20, }; struct dmi_memdev_addr { struct dmi_entry header; unsigned start_addr; unsigned end_addr; unsigned short dev_handle; unsigned short memarray_handle; unsigned char row; unsigned char interleave_pos; unsigned char interleave_depth; } __attribute__((packed)); struct dmi_memdev { struct dmi_entry header; unsigned short array_handle; unsigned short memerr_handle; unsigned short total_width; unsigned short data_width; unsigned short size; unsigned char form_factor; unsigned char device_set; unsigned char device_locator; unsigned char bank_locator; unsigned char memory_type; unsigned short type_details; unsigned short speed; unsigned char manufacturer; unsigned char serial_number; unsigned char asset_tag; unsigned char part_number; } __attribute__((packed)); struct dmi_memarray { struct dmi_entry header; unsigned char location; unsigned char use; unsigned char error_correction; unsigned int maximum_capacity; unsigned short error_handle; short num_devices; } __attribute__((packed)); struct dmi_memarray_addr { struct dmi_entry header; unsigned int start_addr; unsigned int end_addr; unsigned short array_handle; unsigned partition_width; } __attribute__((packed)); int opendmi(void); void dmi_decodeaddr(unsigned long long addr); int dmi_sanity_check(void); unsigned dmi_dimm_size(unsigned short size, char *unit); struct dmi_memdev **dmi_find_addr(unsigned long long addr); void dmi_set_verbosity(int v); char *dmi_getstring(struct dmi_entry *e, unsigned number); extern void checkdmi(void); void closedmi(void); /* valid after opendmi: */ extern struct dmi_memdev **dmi_dimms; extern struct dmi_memdev_addr **dmi_ranges; extern struct dmi_memarray **dmi_arrays; extern struct dmi_memarray_addr **dmi_array_ranges; extern int dmi_forced; extern int do_dmi; mcelog-128+dfsg/dunnington.c000066400000000000000000000064321261732315200161110ustar00rootroot00000000000000/* Copyright (c) 2008 by Intel Corp. Decode Intel Xeon Processor 7400 Model (Dunnington) specific MCEs mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #include #include "mcelog.h" #include "bitfield.h" #include "dunnington.h" /* Follows Intel IA32 SDM 3b Appendix E.2.1 ++ */ static struct field dunnington_bus_status[] = { SBITFIELD(16, "Parity error detected during FSB request phase"), FIELD(17, reserved_3bits), SBITFIELD(20, "Hard Failure response received for a local transaction"), SBITFIELD(21, "Parity error on FSB response field detected"), SBITFIELD(22, "Parity data error on inbound data detected"), FIELD(23, reserved_3bits), FIELD(25, reserved_3bits), FIELD(28, reserved_3bits), FIELD(31, reserved_1bit), {} }; static char *dnt_front_error[0xf] = { [0x1] = "Inclusion error from core 0", [0x2] = "Inclusion error from core 1", [0x3] = "Write Exclusive error from core 0", [0x4] = "Write Exclusive error from core 1", [0x5] = "Inclusion error from FSB", [0x6] = "SNP stall error from FSB", [0x7] = "Write stall error from FSB", [0x8] = "FSB Arbiter Timeout error", [0xA] = "Inclusion error from core 2", [0xB] = "Write exclusive error from core 2", }; static char *dnt_int_error[0xf] = { [0x2] = "Internal timeout error", [0x3] = "Internal timeout error", [0x4] = "Intel Cache Safe Technology Queue full error\n" "or disabled ways in a set overflow", [0x5] = "Quiet cycle timeout error (correctable)", }; struct field dnt_int_status[] = { FIELD(8, dnt_int_error), {} }; struct field dnt_front_status[] = { FIELD(0, dnt_front_error), {} }; struct field dnt_cecc[] = { SBITFIELD(1, "Correctable ECC event on outgoing core 0 data"), SBITFIELD(2, "Correctable ECC event on outgoing core 1 data"), SBITFIELD(3, "Correctable ECC event on outgoing core 2 data"), {} }; struct field dnt_uecc[] = { SBITFIELD(1, "Uncorrectable ECC event on outgoing core 0 data"), SBITFIELD(2, "Uncorrectable ECC event on outgoing core 1 data"), SBITFIELD(3, "Uncorrectable ECC event on outgoing core 2 data"), {} }; static void dunnington_decode_bus(u64 status) { decode_bitfield(status, dunnington_bus_status); } static void dunnington_decode_internal(u64 status) { u32 mca = (status >> 16) & 0xffff; if ((mca & 0xfff0) == 0) decode_bitfield(mca, dnt_front_status); else if ((mca & 0xf0ff) == 0) decode_bitfield(mca, dnt_int_status); else if ((mca & 0xfff0) == 0xc000) decode_bitfield(mca, dnt_cecc); else if ((mca & 0xfff0) == 0xe000) decode_bitfield(mca, dnt_uecc); } void dunnington_decode_model(u64 status) { if ((status & 0xffff) == 0xe0f) dunnington_decode_bus(status); else if ((status & 0xffff) == (1 << 10)) dunnington_decode_internal(status); } mcelog-128+dfsg/dunnington.h000066400000000000000000000000531261732315200161070ustar00rootroot00000000000000void dunnington_decode_model(u64 status); mcelog-128+dfsg/eventloop.c000066400000000000000000000065341261732315200157440ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Event loop for mcelog daemon mode. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "mcelog.h" #include "eventloop.h" #define MAX_POLLFD 10 static int max_pollfd; struct pollcb { poll_cb_t cb; int fd; void *data; }; static struct pollfd pollfds[MAX_POLLFD]; static struct pollcb pollcbs[MAX_POLLFD]; static sigset_t event_sigs; static int closeonexec(int fd) { int flags = fcntl(fd, F_GETFD); if (flags < 0 || fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0) { SYSERRprintf("Cannot set FD_CLOEXEC flag on fd"); return -1; } return 0; } int register_pollcb(int fd, int events, poll_cb_t cb, void *data) { int i = max_pollfd; if (closeonexec(fd) < 0) return -1; if (i >= MAX_POLLFD) { Eprintf("poll table overflow"); return -1; } max_pollfd++; pollfds[i].fd = fd; pollfds[i].events = events; pollcbs[i].cb = cb; pollcbs[i].data = data; return 0; } /* Could mark free and put into a free list */ void unregister_pollcb(struct pollfd *pfd) { int i = pfd - pollfds; assert(i >= 0 && i < max_pollfd); memmove(pollfds + i, pollfds + i + 1, (max_pollfd - i - 1) * sizeof(struct pollfd)); memmove(pollcbs + i, pollcbs + i + 1, (max_pollfd - i - 1) * sizeof(struct pollcb)); max_pollfd--; } static void poll_callbacks(int n) { int k; for (k = 0; k < max_pollfd && n > 0; k++) { struct pollfd *f = pollfds + k; if (f->revents) { struct pollcb *c = pollcbs + k; c->cb(f, c->data); n--; } } } /* Run signal handler only directly after event loop */ int event_signal(int sig) { static int first = 1; sigset_t mask; if (first && sigprocmask(SIG_BLOCK, NULL, &event_sigs) < 0) return -1; first = 0; if (sigprocmask(SIG_BLOCK, NULL, &mask) < 0) return -1; sigaddset(&mask, sig); if (sigprocmask(SIG_BLOCK, &mask, NULL) < 0) return -1; return 0; } /* Handle old glibc without ppoll. */ static int ppoll_fallback(struct pollfd *pfd, nfds_t nfds, const struct timespec *ts, const sigset_t *ss) { sigset_t origmask; int ready; sigprocmask(SIG_SETMASK, ss, &origmask); ready = poll(pfd, nfds, ts ? ts->tv_sec : -1); sigprocmask(SIG_SETMASK, &origmask, NULL); return ready; } static int (*ppoll_vec)(struct pollfd *, nfds_t, const struct timespec *, const sigset_t *); void eventloop(void) { #if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 5 || __GLIBC__ > 2 ppoll_vec = ppoll; #endif if (!ppoll_vec) ppoll_vec = ppoll_fallback; for (;;) { int n = ppoll_vec(pollfds, max_pollfd, NULL, &event_sigs); if (n <= 0) { if (n < 0 && errno != EINTR) SYSERRprintf("poll error"); continue; } poll_callbacks(n); } } mcelog-128+dfsg/eventloop.h000066400000000000000000000003571261732315200157460ustar00rootroot00000000000000#include typedef void (*poll_cb_t)(struct pollfd *pfd, void *data); int register_pollcb(int fd, int events, poll_cb_t cb, void *data); void unregister_pollcb(struct pollfd *pfd); void eventloop(void); int event_signal(int sig); mcelog-128+dfsg/genconfig.py000077500000000000000000000033441261732315200160750ustar00rootroot00000000000000#!/usr/bin/python # generate man config documentation from mcelog.conf example # genconfig.py mcelog.conf intro.html import sys import re import string import argparse ap = argparse.ArgumentParser(description="generate man config documentation from mcelog.conf example") ap.add_argument('config', type=argparse.FileType('r'), help="mcelog example config file") ap.add_argument('intro', type=argparse.FileType('r'), help="intro file") args = ap.parse_args() def parse(f): lineno = 1 explanation = 0 header = 1 for line in f: lineno += 1 # skip first comment if header: if not re.match('^#', line): header = 0 continue # explanation m = re.match('^#\s(.*)', line) if m: explanation += 1 s = m.group(1) if explanation == 1: s = string.capitalize(s) print s continue if explanation: print ".PP" explanation = 0 # empty line: new option if re.match('\s+', line): new_option() continue # group m = re.match('\[(.*)\]', line) if m: start_group(m.group(1)) continue # config option m = re.match('^(#?)([a-z-]+) = (.*)', line) if m: config_option(m.group(1), m.group(2), m.group(3)) continue print >>sys.stderr, "Unparseable line %d" % (lineno-1) def config_option(enabled, name, value): print ".B %s = %s" % (name, value) print ".PP" def start_group(name): print ".SS \"The %s config section\"" % (name) def new_option(): print ".PP" print """ .\\" Auto generated mcelog.conf manpage. Do not edit. .TH "mcelog.conf" 5 "mcelog" """ print args.intro.read() parse(args.config) print """ .SH SEE ALSO .BR mcelog (8), .BR mcelog.triggers (5) .B http://www.mcelog.org """ mcelog-128+dfsg/haswell.c000066400000000000000000000114251261732315200153630ustar00rootroot00000000000000/* Copyright (C) 2013 Intel Corporation Decode Intel Haswell specific machine check errors. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Tony Luck */ #include "mcelog.h" #include "bitfield.h" #include "haswell.h" #include "memdb.h" /* See IA32 SDM Vol3B Table 16-20 */ static char *pcu_1[] = { [0x00] = "No Error", [0x09] = "MC_MESSAGE_CHANNEL_TIMEOUT", [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", [0x0E] = "MC_CPD_UNCPD_SD_TIMEOUT", [0x13] = "MC_DMI_TRAINING_TIMEOUT", [0x15] = "MC_DMI_CPU_RESET_ACK_TIMEOUT", [0x1E] = "MC_VR_ICC_MAX_LT_FUSED_ICC_MAX", [0x25] = "MC_SVID_COMMAN_TIMEOUT", [0x29] = "MC_VR_VOUT_MAC_LT_FUSED_SVID", [0x2B] = "MC_PKGC_WATCHDOG_HANG_CBZ_DOWN", [0x2C] = "MC_PKGC_WATCHDOG_HANG_CBZ_UP", [0x39] = "MC_PKGC_WATCHDOG_HANG_C3_UP_SF", [0x44] = "MC_CRITICAL_VR_FAILED", [0x45] = "MC_ICC_MAX_NOTSUPPORTED", [0x46] = "MC_VID_RAMP_DOWN_FAILED", [0x47] = "MC_EXCL_MODE_NO_PMREQ_CMP", [0x48] = "MC_SVID_READ_REG_ICC_MAX_FAILED", [0x49] = "MC_SVID_WRITE_REG_VOUT_MAX_FAILED", [0x4B] = "MC_BOOT_VID_TIMEOUT_DRAM_0", [0x4C] = "MC_BOOT_VID_TIMEOUT_DRAM_1", [0x4D] = "MC_BOOT_VID_TIMEOUT_DRAM_2", [0x4E] = "MC_BOOT_VID_TIMEOUT_DRAM_3", [0x4F] = "MC_SVID_COMMAND_ERROR", [0x52] = "MC_FIVR_CATAS_OVERVOL_FAULT", [0x53] = "MC_FIVR_CATAS_OVERCUR_FAULT", [0x57] = "MC_SVID_PKGC_REQUEST_FAILED", [0x58] = "MC_SVID_IMON_REQUEST_FAILED", [0x59] = "MC_SVID_ALERT_REQUEST_FAILED", [0x60] = "MC_INVALID_PKGS_REQ_PCH", [0x61] = "MC_INVALID_PKGS_REQ_QPI", [0x62] = "MC_INVALID_PKGS_RSP_QPI", [0x63] = "MC_INVALID_PKGS_RSP_PCH", [0x64] = "MC_INVALID_PKG_STATE_CONFIG", [0x67] = "MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT", [0x68] = "MC_IMC_RW_SMBUS_TIMEOUT", [0x69] = "MC_HA_FAILSTS_CHANGE_DETECTED", [0x6A] = "MC_MSGCH_PMREQ_CMP_TIMEOUT", [0x70] = "MC_WATCHDOG_TIMEOUT_PKGC_SLAVE", [0x71] = "MC_WATCHDOG_TIMEOUT_PKGC_MASTER", [0x72] = "MC_WATCHDOG_TIMEOUT_PKGS_MASTER", [0x7C] = "MC_BIOS_RST_CPL_INVALID_SEQ", [0x7D] = "MC_MORE_THAN_ONE_TXT_AGENT", [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT" }; static struct field pcu_mc4[] = { FIELD(24, pcu_1), {} }; /* See IA32 SDM Vol3B Table 16-21 */ static char *qpi[] = { [0x02] = "Intel QPI physical layer detected drift buffer alarm", [0x03] = "Intel QPI physical layer detected latency buffer rollover", [0x10] = "Intel QPI link layer detected control error from R3QPI", [0x11] = "Rx entered LLR abort state on CRC error", [0x12] = "Unsupported or undefined packet", [0x13] = "Intel QPI link layer control error", [0x15] = "RBT used un-initialized value", [0x20] = "Intel QPI physical layer detected a QPI in-band reset but aborted initialization", [0x21] = "Link failover data self healing", [0x22] = "Phy detected in-band reset (no width change)", [0x23] = "Link failover clock failover", [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", }; static struct field qpi_mc[] = { FIELD(16, qpi), {} }; /* See IA32 SDM Vol3B Table 16-22 */ static struct field memctrl_mc9[] = { SBITFIELD(16, "DDR3 address parity error"), SBITFIELD(17, "Uncorrected HA write data error"), SBITFIELD(18, "Uncorrected HA data byte enable error"), SBITFIELD(19, "Corrected patrol scrub error"), SBITFIELD(20, "Uncorrected patrol scrub error"), SBITFIELD(21, "Corrected spare error"), SBITFIELD(22, "Uncorrected spare error"), SBITFIELD(23, "Corrected memory read error"), SBITFIELD(24, "iMC write data buffer parity error"), SBITFIELD(25, "DDR4 command address parity error"), {} }; void hsw_decode_model(int cputype, int bank, u64 status, u64 misc) { switch (bank) { case 4: Wprintf("PCU: "); switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { case 0x402: case 0x403: Wprintf("Internal errors "); break; case 0x406: Wprintf("Intel TXT errors "); break; case 0x407: Wprintf("Other UBOX Internal errors "); break; } if (EXTRACT(status, 16, 19)) Wprintf("PCU internal error "); decode_bitfield(status, pcu_mc4); break; case 5: case 20: case 21: Wprintf("QPI: "); decode_bitfield(status, qpi_mc); break; case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: Wprintf("MemCtrl: "); decode_bitfield(status, memctrl_mc9); break; } } mcelog-128+dfsg/haswell.h000066400000000000000000000002111261732315200153570ustar00rootroot00000000000000void hsw_decode_model(int cputype, int bank, u64 status, u64 misc); void haswell_ep_memerr_misc(struct mce *m, int *channel, int *dimm); mcelog-128+dfsg/input/000077500000000000000000000000001261732315200147145ustar00rootroot00000000000000mcelog-128+dfsg/input/GENCACHE000077500000000000000000000021041261732315200157740ustar00rootroot00000000000000#!/bin/bash # GENCACHE cpu level type track # generate a memory error. All fields are optional. # see SDM 3a chapter 15 for details # # level: # 0 level 0 # 1 level 1 # 2 level 2 # 3 level generic # # ctype # 0 instruction # 1 data # 2 generic # # track # 0 no tracking # 1 green: below threshold # 2 yellow: above threshold # 3 reserved cpu=${1:-0} level=${2:-0} ctype=${3:-0} track=${4:-1} case "$ctype" in instr) ctype=0 ;; data) ctype=1 ;; generic) ctype=2 ;; [0-3]) ;; *) echo "Unknown ctype $ctype" ; exit 1 esac case "$level" in L0) level=0 ;; L1) level=1 ;; L2) level=2 ;; LG) level=3 ;; [0-3]) ;; *) echo "Unknown Cache $level" ; exit 1 esac case "$track" in none) track=0 ;; green) track=1 ;; yellow) track=2 ;; [0-3]) ;; *) echo "Unknown tracking flag $track" ; exit 1 esac echo "# cache error on cpu $cpu level $level type $ctype track $track" echo "CPU $cpu 2" # XXX use cpu in socket echo "# nehalem" echo "PROCESSOR 0:0x106a0" printf "MCGCAP 0x%x\n" $[1 << 11] printf "STATUS 0x%08x%08x\n" \ $[0x88000000 + ($track << (53-32))] $[0x100 + $level + ($ctype << 2)] mcelog-128+dfsg/input/GENMEM000077500000000000000000000015251261732315200156150ustar00rootroot00000000000000#!/bin/bash # GENMEM socketid channel dimm corr-err-cnt uc-flag # generate a memory error. All fields are optional. # suitable to be fed into mce-inject or mcelog --ascii # Note: DIMMs only work when mcelog is in Nehalem mode # this cannot be forced through mce-inject, but only on the command line # setting ucflag and injecting can panic your system socketid=${1:-0} channel=${2:-0} dimm=${3:-0} corr_err_cnt=${4:-0} if [ ! -z "$5" ] ; then ucflag=$[1 << (61-32)] else ucflag=0 fi echo "# memory error on socket $socketid ch $channel dimm $dimm" echo "CPU 0 2" # XXX use cpu in socket echo "# nehalem" echo "PROCESSOR 0:0x106a0" printf "MCGCAP 0x%x\n" $[1 << 10] echo "SOCKETID $socketid" printf "STATUS 0x%08x%08x\n" \ $[0x88000000 + ($corr_err_cnt << 6) + $ucflag] $[0xb0 + $channel] printf "MISC 0x%08x\n" $[($channel << 18) + ($dimm << 16)] mcelog-128+dfsg/input/GENPAGE000077500000000000000000000015451261732315200157150ustar00rootroot00000000000000#!/bin/bash # GENMPAGE pfn socketid channel dimm corr-err-cnt # generate a memory error on a page. All fields are optional. # dimm/channel can be out of sync with the address # XXX page max 44bit for now # suitable to be fed into mce-inject or mcelog --ascii # Note: DIMMs only work when mcelog is in Nehalem mode # this cannot be forced through mce-inject, but only on the command line page=${1:-$RANDOM} socketid=${2:-0} channel=${3:-0} dimm=${4:-0} corr_err_cnt=${5:-0} printf "# memory error on page %08x000 socket $socketid ch $channel dimm $dimm \n" $page echo "CPU 0 2" # XXX use cpu in socket echo "# nehalem" echo "PROCESSOR 0:0x106a0" echo "SOCKETID $socketid" printf "STATUS 0x%08x%08x\n" \ $[0x88000000 + (1 << (58-32)) + ($corr_err_cnt << 6)] $[0xb0 + $channel] printf "MISC 0x%08x\n" $[($channel << 18) + ($dimm << 16)] printf "ADDR 0x%08x000\n" $page mcelog-128+dfsg/input/dimm0000066400000000000000000000001401261732315200156400ustar00rootroot00000000000000# dimm0, channel0 corrected error CPU 0 2 PROCESSOR 0:0x106a0 STATUS 0x8800000000000080 MISC 0 mcelog-128+dfsg/input/full1000066400000000000000000000001671261732315200156660ustar00rootroot00000000000000CPU 0 BANK 1 STATUS 1234 TSC 3062652eaab RIP 3f:<5678> PROCESSOR 0:6f5 TIME 123456789 SOCKETID 1 MCGCAP 0xabc APICID f mcelog-128+dfsg/input/iomca000066400000000000000000000001011261732315200157170ustar00rootroot00000000000000CPU 0 BANK 1 STATUS 0x9c00000000000e0b MISC 0xabcdef ADDR 0xabcd mcelog-128+dfsg/input/simple1000066400000000000000000000001021261732315200162020ustar00rootroot00000000000000CPU 0 2 STATUS 1234 TSC 3062652eaab RIP 3f:<5678> MCGSTATUS 0x123 mcelog-128+dfsg/input/unknown000066400000000000000000000001011261732315200163260ustar00rootroot00000000000000CPU 0 BANK 1 STATUS 0x9c0000000000040b MISC 0xabcdef ADDR 0xabcd mcelog-128+dfsg/input/xen000066400000000000000000000002121261732315200154240ustar00rootroot00000000000000(XEN) MCE: The hardware reports a non fatal, correctable incident occurred on CPU 1. (XEN) Bank 2: d400008000040150 at 182c480179cf0 mcelog-128+dfsg/intel.c000066400000000000000000000110001261732315200150240ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Common Intel CPU code. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include "mcelog.h" #include "intel.h" #include "bitfield.h" #include "nehalem.h" #include "memdb.h" #include "page.h" #include "sandy-bridge.h" #include "ivy-bridge.h" #include "haswell.h" #include "xeon75xx.h" int memory_error_support; void intel_cpu_init(enum cputype cpu) { if (cpu == CPU_NEHALEM || cpu == CPU_XEON75XX || cpu == CPU_INTEL || cpu == CPU_SANDY_BRIDGE || cpu == CPU_SANDY_BRIDGE_EP || cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX || cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL || cpu == CPU_KNIGHTS_LANDING || cpu == CPU_SKYLAKE) memory_error_support = 1; } enum cputype select_intel_cputype(int family, int model) { if (family == 15) { if (model == 6) return CPU_TULSA; return CPU_P4; } if (family == 6) { if (model >= 0x1a && model != 28) memory_error_support = 1; if (model < 0xf) return CPU_P6OLD; else if (model == 0xf || model == 0x17) /* Merom/Penryn */ return CPU_CORE2; else if (model == 0x1d) return CPU_DUNNINGTON; else if (model == 0x1a || model == 0x2c || model == 0x1e || model == 0x25) return CPU_NEHALEM; else if (model == 0x2e || model == 0x2f) return CPU_XEON75XX; else if (model == 0x2a) return CPU_SANDY_BRIDGE; else if (model == 0x2d) return CPU_SANDY_BRIDGE_EP; else if (model == 0x3a) return CPU_IVY_BRIDGE; else if (model == 0x3e) return CPU_IVY_BRIDGE_EPEX; else if (model == 0x3c || model == 0x45 || model == 0x46) return CPU_HASWELL; else if (model == 0x3f) return CPU_HASWELL_EPEX; else if (model == 0x3d || model == 0x4f || model == 0x56) return CPU_BROADWELL; else if (model == 0x57) return CPU_KNIGHTS_LANDING; else if (model == 0x1c || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x36 || model == 0x37 || model == 0x4a || model == 0x4c || model == 0x4d || model == 0x5a || model == 0x5d) return CPU_ATOM; else if (model == 0x4e || model == 0x5e) return CPU_SKYLAKE; if (model > 0x1a) { Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n", model); return CPU_INTEL; } } if (family > 6) { Eprintf("Family %u Model %x CPU: only decoding architectural errors\n", family, model); return CPU_INTEL; } Eprintf("Unknown Intel CPU type family %x model %x\n", family, model); return family == 6 ? CPU_P6OLD : CPU_GENERIC; } int is_intel_cpu(int cpu) { switch (cpu) { CASE_INTEL_CPUS: return 1; } return 0; } static int intel_memory_error(struct mce *m, unsigned recordlen) { u32 mca = m->status & 0xffff; if ((mca >> 7) == 1) { unsigned corr_err_cnt = 0; int channel[2] = { (mca & 0xf) == 0xf ? -1 : (int)(mca & 0xf), -1 }; int dimm[2] = { -1, -1 }; switch (cputype) { case CPU_NEHALEM: nehalem_memerr_misc(m, channel, dimm); break; case CPU_XEON75XX: xeon75xx_memory_error(m, recordlen, channel, dimm); break; case CPU_SANDY_BRIDGE_EP: sandy_bridge_ep_memerr_misc(m, channel, dimm); break; case CPU_IVY_BRIDGE_EPEX: ivy_bridge_ep_memerr_misc(m, channel, dimm); break; default: break; } if (recordlen > offsetof(struct mce, mcgcap) && m->mcgcap & MCG_CMCI_P) corr_err_cnt = EXTRACT(m->status, 38, 52); memory_error(m, channel[0], dimm[0], corr_err_cnt, recordlen); account_page_error(m, channel[0], dimm[0]); /* * When both DIMMs have a error account the error twice to the page. */ if (channel[1] != -1) { memory_error(m, channel[1], dimm[1], corr_err_cnt, recordlen); account_page_error(m, channel[1], dimm[1]); } return 1; } return 0; } /* No bugs known, but filter out memory errors if the user asked for it */ int mce_filter_intel(struct mce *m, unsigned recordlen) { if (intel_memory_error(m, recordlen) == 1) return !filter_memory_errors; return 1; } mcelog-128+dfsg/intel.h000066400000000000000000000011631261732315200150420ustar00rootroot00000000000000enum cputype select_intel_cputype(int family, int model); int is_intel_cpu(int cpu); int mce_filter_intel(struct mce *m, unsigned recordlen); void intel_cpu_init(enum cputype cpu); extern int memory_error_support; #define CASE_INTEL_CPUS \ case CPU_P6OLD: \ case CPU_CORE2: \ case CPU_NEHALEM: \ case CPU_DUNNINGTON: \ case CPU_TULSA: \ case CPU_P4: \ case CPU_INTEL: \ case CPU_XEON75XX: \ case CPU_SANDY_BRIDGE_EP: \ case CPU_SANDY_BRIDGE: \ case CPU_IVY_BRIDGE: \ case CPU_IVY_BRIDGE_EPEX: \ case CPU_HASWELL: \ case CPU_HASWELL_EPEX: \ case CPU_BROADWELL: \ case CPU_KNIGHTS_LANDING: \ case CPU_SKYLAKE mcelog-128+dfsg/ivy-bridge.c000066400000000000000000000103671261732315200157710ustar00rootroot00000000000000/* Copyright (C) 2013 Intel Corporation Decode Intel Ivy Bridge specific machine check errors. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Tony Luck */ #include "mcelog.h" #include "bitfield.h" #include "ivy-bridge.h" #include "memdb.h" /* See IA32 SDM Vol3B Table 16-17 */ static char *pcu_1[] = { [0] = "No error", [1] = "Non_IMem_Sel", [2] = "I_Parity_Error", [3] = "Bad_OpCode", [4] = "I_Stack_Underflow", [5] = "I_Stack_Overflow", [6] = "D_Stack_Underflow", [7] = "D_Stack_Overflow", [8] = "Non-DMem_Sel", [9] = "D_Parity_Error" }; static char *pcu_2[] = { [0x00] = "No Error", [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", [0x0E] = "MC_MC_CPD_UNCPD_ST_TIMEOUT", [0x0F] = "MC_PKGS_SAFE_WP_TIMEOUT", [0x43] = "MC_PECI_MAILBOX_QUIESCE_TIMEOUT", [0x44] = "MC_CRITICAL_VR_FAILED", [0x45] = "MC_ICC_MAX-NOTSUPPORTED", [0x5C] = "MC_MORE_THAN_ONE_LT_AGENT", [0x60] = "MC_INVALID_PKGS_REQ_PCH", [0x61] = "MC_INVALID_PKGS_REQ_QPI", [0x62] = "MC_INVALID_PKGS_RES_QPI", [0x63] = "MC_INVALID_PKGC_RES_PCH", [0x64] = "MC_INVALID_PKG_STATE_CONFIG", [0x70] = "MC_WATCHDG_TIMEOUT_PKGC_SLAVE", [0x71] = "MC_WATCHDG_TIMEOUT_PKGC_MASTER", [0x72] = "MC_WATCHDG_TIMEOUT_PKGS_MASTER", [0x7A] = "MC_HA_FAILSTS_CHANGE_DETECTED", [0x7B] = "MC_PCIE_R2PCIE-RW_BLOCK_ACK_TIMEOUT", [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT", }; static struct field pcu_mc4[] = { FIELD(16, pcu_1), FIELD(24, pcu_2), {} }; /* See IA32 SDM Vol3B Table 16-18 */ static struct field memctrl_mc9[] = { SBITFIELD(16, "Address parity error"), SBITFIELD(17, "HA Wrt buffer Data parity error"), SBITFIELD(18, "HA Wrt byte enable parity error"), SBITFIELD(19, "Corrected patrol scrub error"), SBITFIELD(20, "Uncorrected patrol scrub error"), SBITFIELD(21, "Corrected spare error"), SBITFIELD(22, "Uncorrected spare error"), SBITFIELD(23, "Corrected memory read error"), SBITFIELD(24, "iMC, WDB, parity errors"), {} }; void ivb_decode_model(int cputype, int bank, u64 status, u64 misc) { switch (bank) { case 4: Wprintf("PCU: "); decode_bitfield(status, pcu_mc4); Wprintf("\n"); break; case 5: if (cputype == CPU_IVY_BRIDGE_EPEX) { /* MCACOD already decoded */ Wprintf("QPI\n"); } break; case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: Wprintf("MemCtrl: "); decode_bitfield(status, memctrl_mc9); Wprintf("\n"); break; } } /* * Ivy Bridge EP and EX processors (family 6, model 62) support additional * logging for corrected errors in the integrated memory controller (IMC) * banks. The mode is off by default, but can be enabled by setting the * "MemError Log Enable" * bit in MSR_ERROR_CONTROL (MSR 0x17f). * The SDM leaves it as an exercise for the reader to convert the * faling rank to a DIMM slot. */ static int failrank2dimm(unsigned failrank, int socket, int channel) { switch (failrank) { case 0: case 1: case 2: case 3: return 0; case 4: case 5: return 1; case 6: case 7: if (get_memdimm(socket, channel, 2, 0)) return 2; else return 1; } return -1; } void ivy_bridge_ep_memerr_misc(struct mce *m, int *channel, int *dimm) { u64 status = m->status; unsigned failrank, chan; /* Ignore unless this is an corrected extended error from an iMC bank */ if (!imc_log || m->bank < 9 || m->bank > 16 || (status & MCI_STATUS_UC) || !test_prefix(7, status & 0xefff)) return; chan = EXTRACT(status, 0, 3); if (chan == 0xf) return; if (EXTRACT(m->misc, 62, 62)) { failrank = EXTRACT(m->misc, 46, 50); dimm[0] = failrank2dimm(failrank, m->socketid, chan); channel[0] = chan; } if (EXTRACT(m->misc, 63, 63)) { failrank = EXTRACT(m->misc, 51, 55); dimm[1] = failrank2dimm(failrank, m->socketid, chan); channel[1] = chan; } } mcelog-128+dfsg/ivy-bridge.h000066400000000000000000000002141261732315200157640ustar00rootroot00000000000000void ivb_decode_model(int cputype, int bank, u64 status, u64 misc); void ivy_bridge_ep_memerr_misc(struct mce *m, int *channel, int *dimm); mcelog-128+dfsg/k8.c000066400000000000000000000157151261732315200142540ustar00rootroot00000000000000/* Based on K8 decoding code written for the 2.4 kernel by Andi Kleen and * Eric Morton. Hacked and extended for mcelog by AK. * * Original copyright: * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. * Additional K8 decoding and simplification Copyright 2003 Eric Morton, Newisys Inc * K8 threshold counters decoding Copyright 2005,2006 Jacob Shin, AMD Inc. * * Subject to the GNU General Public License */ #include #include "mcelog.h" #include "k8.h" static char *k8bank[] = { "data cache", "instruction cache", "bus unit", "load/store unit", "northbridge", "fixed-issue reoder" }; static char *transaction[] = { "instruction", "data", "generic", "reserved" }; static char *cachelevel[] = { "0", "1", "2", "generic" }; static char *memtrans[] = { "generic error", "generic read", "generic write", "data read", "data write", "instruction fetch", "prefetch", "evict", "snoop", "?", "?", "?", "?", "?", "?", "?" }; static char *partproc[] = { "local node origin", "local node response", "local node observed", "generic participation" }; static char *timeout[] = { "request didn't time out", "request timed out" }; static char *memoryio[] = { "memory", "res.", "i/o", "generic" }; static char *nbextendederr[] = { "RAM ECC error", "CRC error", "Sync error", "Master abort", "Target abort", "GART error", "RMW error", "Watchdog error", "RAM Chipkill ECC error", "DEV Error", "Link Data Error", "Link Protocol Error", "NB Array Error", "DRAM Parity Error", "Link Retry", "Tablew Walk Data Error", "L3 Cache Data Error", "L3 Cache Tag Error", "L3 Cache LRU Error" }; static char *highbits[32] = { [31] = "valid", [30] = "error overflow (multiple errors)", [29] = "error uncorrected", [28] = "error enable", [27] = "misc error valid", [26] = "error address valid", [25] = "processor context corrupt", [24] = "res24", [23] = "res23", /* 22-15 ecc syndrome bits */ [14] = "corrected ecc error", [13] = "uncorrected ecc error", [12] = "res12", [11] = "L3 subcache in error bit 1", [10] = "L3 subcache in error bit 0", [9] = "sublink or DRAM channel", [8] = "error found by scrub", /* 7-4 ht link number of error */ [3] = "err cpu3", [2] = "err cpu2", [1] = "err cpu1", [0] = "err cpu0", }; static char *k8threshold[] = { [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknow threshold counter", [K8_MCELOG_THRESHOLD_DRAM_ECC] = "MC4_MISC0 DRAM threshold", [K8_MCELOG_THRESHOLD_LINK] = "MC4_MISC1 Link threshold", [K8_MCELOG_THRESHOLD_L3_CACHE] = "MC4_MISC2 L3 Cache threshold", [K8_MCELOG_THRESHOLD_FBDIMM] = "MC4_MISC3 FBDIMM threshold", [K8_MCELOG_THRESHOLD_FBDIMM + 1 ... K8_MCE_THRESHOLD_TOP - K8_MCE_THRESHOLD_BASE - 1] = "Unknown threshold counter", }; static void decode_k8_generic_errcode(u64 status) { unsigned short errcode = status & 0xffff; int i; for (i=0; i<32; i++) { if (i==31 || i==28 || i==26) continue; if (highbits[i] && (status & (1ULL<<(i+32)))) { Wprintf( " bit%d = %s\n", i+32, highbits[i]); } } if ((errcode & 0xFFF0) == 0x0010) { Wprintf( " TLB error '%s transaction, level %s'\n", transaction[(errcode >> 2) & 3], cachelevel[errcode & 3]); } else if ((errcode & 0xFF00) == 0x0100) { Wprintf( " memory/cache error '%s mem transaction, %s transaction, level %s'\n", memtrans[(errcode >> 4) & 0xf], transaction[(errcode >> 2) & 3], cachelevel[errcode & 3]); } else if ((errcode & 0xF800) == 0x0800) { Wprintf( " bus error '%s, %s\n %s mem transaction\n %s access, level %s'\n", partproc[(errcode >> 9) & 0x3], timeout[(errcode >> 8) & 1], memtrans[(errcode >> 4) & 0xf], memoryio[(errcode >> 2) & 0x3], cachelevel[(errcode & 0x3)]); } } static void decode_k8_dc_mc(u64 status, int *err) { unsigned short exterrcode = (status >> 16) & 0x0f; unsigned short errcode = status & 0xffff; if(status&(3ULL<<45)) { Wprintf( " Data cache ECC error (syndrome %x)", (u32) (status >> 47) & 0xff); if(status&(1ULL<<40)) { Wprintf(" found by scrubber"); } Wprintf("\n"); } if ((errcode & 0xFFF0) == 0x0010) { Wprintf( " TLB parity error in %s array\n", (exterrcode == 0) ? "physical" : "virtual"); } decode_k8_generic_errcode(status); } static void decode_k8_ic_mc(u64 status, int *err) { unsigned short exterrcode = (status >> 16) & 0x0f; unsigned short errcode = status & 0xffff; if(status&(3ULL<<45)) { Wprintf(" Instruction cache ECC error\n"); } if ((errcode & 0xFFF0) == 0x0010) { Wprintf(" TLB parity error in %s array\n", (exterrcode == 0) ? "physical" : "virtual"); } decode_k8_generic_errcode(status); } static void decode_k8_bu_mc(u64 status, int *err) { unsigned short exterrcode = (status >> 16) & 0x0f; if(status&(3ULL<<45)) { Wprintf(" L2 cache ECC error\n"); } Wprintf(" %s array error\n", (exterrcode == 0) ? "Bus or cache" : "Cache tag"); decode_k8_generic_errcode(status); } static void decode_k8_ls_mc(u64 status, int *err) { decode_k8_generic_errcode(status); } static void decode_k8_nb_mc(u64 status, int *memerr) { unsigned short exterrcode = (status >> 16) & 0x0f; Wprintf(" Northbridge %s\n", nbextendederr[exterrcode]); switch (exterrcode) { case 0: *memerr = 1; Wprintf(" ECC syndrome = %x\n", (u32) (status >> 47) & 0xff); break; case 8: *memerr = 1; Wprintf(" Chipkill ECC syndrome = %x\n", (u32) ((((status >> 24) & 0xff) << 8) | ((status >> 47) & 0xff))); break; case 1: case 2: case 3: case 4: case 6: Wprintf(" link number = %x\n", (u32) (status >> 36) & 0xf); break; } decode_k8_generic_errcode(status); } static void decode_k8_fr_mc(u64 status, int *err) { decode_k8_generic_errcode(status); } static void decode_k8_threshold(u64 misc) { if (misc & MCI_THRESHOLD_OVER) Wprintf(" Threshold error count overflow\n"); } typedef void (*decoder_t)(u64, int *ismemerr); static decoder_t decoders[] = { [0] = decode_k8_dc_mc, [1] = decode_k8_ic_mc, [2] = decode_k8_bu_mc, [3] = decode_k8_ls_mc, [4] = decode_k8_nb_mc, [5] = decode_k8_fr_mc, }; void decode_k8_mc(struct mce *mce, int *ismemerr) { if (mce->bank < NELE(decoders)) decoders[mce->bank](mce->status, ismemerr); else if (mce->bank >= K8_MCE_THRESHOLD_BASE && mce->bank < K8_MCE_THRESHOLD_TOP) decode_k8_threshold(mce->misc); else Wprintf(" no decoder for unknown bank %u\n", mce->bank); } char *k8_bank_name(unsigned num) { static char buf[64]; char *s = "unknown"; if (num < NELE(k8bank)) s = k8bank[num]; else if (num >= K8_MCE_THRESHOLD_BASE && num < K8_MCE_THRESHOLD_TOP) s = k8threshold[num - K8_MCE_THRESHOLD_BASE]; buf[sizeof(buf)-1] = 0; snprintf(buf, sizeof(buf) - 1, "%u %s", num, s); return buf; } int mce_filter_k8(struct mce *m) { /* Filter out GART errors */ if (m->bank == 4) { unsigned short exterrcode = (m->status >> 16) & 0x0f; if (exterrcode == 5 && (m->status & (1ULL<<61))) return 0; } return 1; } mcelog-128+dfsg/k8.h000066400000000000000000000007221261732315200142510ustar00rootroot00000000000000char *k8_bank_name(unsigned num); void decode_k8_mc(struct mce *mce, int *ismemerr); int mce_filter_k8(struct mce *m); #define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ #define K8_MCE_THRESHOLD_TOP (K8_MCE_THRESHOLD_BASE + 6 * 9) #define K8_MCELOG_THRESHOLD_DRAM_ECC (4 * 9 + 0) #define K8_MCELOG_THRESHOLD_LINK (4 * 9 + 1) #define K8_MCELOG_THRESHOLD_L3_CACHE (4 * 9 + 2) #define K8_MCELOG_THRESHOLD_FBDIMM (4 * 9 + 3) mcelog-128+dfsg/leaky-bucket.c000066400000000000000000000124211261732315200163010ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Leaky bucket algorithm. This is used for all error triggers. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include "leaky-bucket.h" time_t __attribute__((weak)) bucket_time(void) { return time(NULL); } void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b, time_t now) { long diff; diff = now - b->tstamp; if (diff >= c->agetime) { unsigned age = (diff / (double)c->agetime) * c->capacity; b->tstamp = now; if (age > b->count) b->count = 0; else b->count -= age; b->excess = 0; } } /* Account increase in leaky bucket. Return 1 if bucket overflowed. */ int __bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc, time_t t) { if (c->capacity == 0) return 0; bucket_age(c, b, t); b->count += inc; if (b->count >= c->capacity) { b->excess += b->count; /* should disable overflow completely in the same time unit */ b->count = 0; return 1; } return 0; } int bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc) { return __bucket_account(c, b, inc, bucket_time()); } static int timeconv(char unit, int *out) { unsigned corr = 1; switch (unit) { case 'd': corr *= 24; case 'h': corr *= 60; case 'm': corr *= 60; case 0: break; default: return -1; } *out = corr; return 0; } /* Format leaky bucket as a string. Caller must free string */ char *bucket_output(const struct bucket_conf *c, struct leaky_bucket *b) { char *buf; if (c->capacity == 0) { asprintf(&buf, "not enabled"); } else { int unit = 0; //bucket_age(c, b, bucket_time()); timeconv(c->tunit, &unit); asprintf(&buf, "%u in %u%c", b->count + b->excess, c->agetime/unit, c->tunit); } return buf; } /* Parse user specified capacity / rate string */ /* capacity / time time: number [hmds] capacity: number [kmg] */ static int parse_rate(const char *rate, struct bucket_conf *c) { char cunit[2], tunit[2]; unsigned cap, t; int n; int unit; cunit[0] = 0; tunit[0] = 0; n = sscanf(rate, "%u %1s / %u %1s", &cap, cunit, &t, tunit); if (n != 4) { cunit[0] = 0; tunit[0] = 0; if (n <= 2) { n = sscanf(rate, "%u / %u %1s", &cap, &t, tunit); if (n < 2) return -1; } else return -1; } if (t == 0 || cap == 0) return -1; switch (tolower(cunit[0])) { case 'g': cap *= 1000; case 'm': cap *= 1000; case 'k': cap *= 1000; case 0: break; default: return -1; } c->tunit = tolower(tunit[0]); if (timeconv(c->tunit, &unit) < 0) return -1; c->agetime = unit * t; c->capacity = cap; return 0; } /* Initialize leaky bucket conf for given user rate/capacity string. <0 on error */ int bucket_conf_init(struct bucket_conf *c, const char *rate) { if (parse_rate(rate, c) < 0) return -1; c->trigger = NULL; return 0; } /* Initialize leaky bucket instance. */ void bucket_init(struct leaky_bucket *b) { b->count = 0; b->excess = 0; b->tstamp = bucket_time(); } #ifdef TEST_LEAKY_BUCKET /* Stolen from the cpp documentation */ #define xstr(_s) str(_s) #define str(_s) #_s #define THRESHOLD_EVENTS_PER_PERIOD 100 #define EVENTS_PER_LOGGED_EVENT 10 #define SECONDS_PER_EVENT 86 /* Needs to be SECONDS_PER_EVENT * EVENTS_PER_LOGGED_EVENT * THRESHOLD_EVENTS_PER_PERIOD */ #define THRESHOLD_PERIOD 86000 #if THRESHOLD_PERIOD != (SECONDS_PER_EVENT * EVENTS_PER_LOGGED_EVENT * THRESHOLD_EVENTS_PER_PERIOD) # error THRESHOLD_PERIOD is Wrong! #endif #define RATE_STRING xstr(THRESHOLD_EVENTS_PER_PERIOD) " / " xstr(THRESHOLD_PERIOD) #define EVENTS_PER_PERIOD_IN_TEST (THRESHOLD_EVENTS_PER_PERIOD * EVENTS_PER_LOGGED_EVENT) #define PERIODS_TO_TEST 3 #define TOTAL_SECONDS_FOR_TEST (PERIODS_TO_TEST * THRESHOLD_PERIOD) #define TOTAL_EVENTS (PERIODS_TO_TEST * EVENTS_PER_PERIOD_IN_TEST) int main(int argc, char **argv) { struct bucket_conf c; struct leaky_bucket b; time_t start_time; time_t event_time; int ret; int i; #ifdef TEST_LEAKY_BUCKET_DEBUG printf("Testing with a rate of " RATE_STRING "\n"); #endif ret = bucket_conf_init(&c, RATE_STRING); if (ret) return ret; bucket_init(&b); start_time = b.tstamp; for (i = 1; i <= TOTAL_EVENTS; i++) { event_time = start_time + i * SECONDS_PER_EVENT; ret = __bucket_account(&c, &b, 1, event_time); #ifdef TEST_LEAKY_BUCKET_DEBUG if (ret) printf("Logging entry %d at %ld %ld\n", i, event_time - start_time, b.tstamp); #else if (i < THRESHOLD_EVENTS_PER_PERIOD) { if (!ret){ fprintf(stderr, "Did not log initial events - FAIL.\n"); return -1; } } else { if (!(i % EVENTS_PER_LOGGED_EVENT) && !ret) { fprintf(stderr, "Did not log initial events - FAIL.\n"); return -1; } } #endif } return 0; } #endif mcelog-128+dfsg/leaky-bucket.h000066400000000000000000000015011261732315200163030ustar00rootroot00000000000000#ifndef LEAKY_BUCKET_H #define LEAKY_BUCKET_H 1 #include /* Leaky bucket algorithm for triggers */ struct bucket_conf { unsigned capacity; unsigned agetime; unsigned char tunit; /* 'd','h','m','s' */ unsigned char log; char *trigger; }; struct leaky_bucket { unsigned count; unsigned excess; time_t tstamp; }; int bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc); int __bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc, time_t time); char *bucket_output(const struct bucket_conf *c, struct leaky_bucket *b); int bucket_conf_init(struct bucket_conf *c, const char *rate); void bucket_init(struct leaky_bucket *b); time_t bucket_time(void); void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b, time_t now); #endif mcelog-128+dfsg/list.h000066400000000000000000000142071261732315200147050ustar00rootroot00000000000000/* Stripped down version of the Linux 2.6.30 list.h */ #include /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ #define LIST_POISON1 ((void *) 0x00100100) #define LIST_POISON2 ((void *) 0x00200200) /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. * */ #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) struct list_head { struct list_head *next, *prev; }; #define LIST_HEAD_INIT(name) { &(name), &(name) } #define LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list; list->prev = list; } /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { next->prev = new; new->next = next; new->prev = prev; prev->next = new; } /** * list_add - add a new entry * @new: new entry to be added * @head: list head to add it after * * Insert a new entry after the specified head. * This is good for implementing stacks. */ static inline void list_add(struct list_head *new, struct list_head *head) { __list_add(new, head, head->next); } /** * list_add_tail - add a new entry * @new: new entry to be added * @head: list head to add it before * * Insert a new entry before the specified head. * This is useful for implementing queues. */ static inline void list_add_tail(struct list_head *new, struct list_head *head) { __list_add(new, head->prev, head); } /* * Delete a list entry by making the prev/next entries * point to each other. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_del(struct list_head * prev, struct list_head * next) { next->prev = prev; prev->next = next; } static inline void list_del(struct list_head *entry) { __list_del(entry->prev, entry->next); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } /** * list_is_last - tests whether @list is the last entry in list @head * @list: the entry to test * @head: the head of the list */ static inline int list_is_last(const struct list_head *list, const struct list_head *head) { return list->next == head; } /** * list_empty - tests whether a list is empty * @head: the list to test. */ static inline int list_empty(const struct list_head *head) { return head->next == head; } /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. */ #define list_entry(ptr, type, member) \ container_of(ptr, type, member) /** * list_first_entry - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_struct within the struct. * * Note, that list is expected to be not empty. */ #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) /** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. */ #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); \ pos = pos->next) /** * list_for_each_prev - iterate over a list backwards * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. */ #define list_for_each_prev(pos, head) \ for (pos = (head)->prev; pos != (head); \ pos = pos->prev) /** * list_for_each_safe - iterate over a list safe against removal of list entry * @pos: the &struct list_head to use as a loop cursor. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. */ #define list_for_each_safe(pos, n, head) \ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) /** * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry * @pos: the &struct list_head to use as a loop cursor. * @n: another &struct list_head to use as temporary storage * @head: the head for your list. */ #define list_for_each_prev_safe(pos, n, head) \ for (pos = (head)->prev, n = pos->prev; \ pos != (head); \ pos = n, n = pos->prev) /** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_struct within the struct. */ #define list_for_each_entry(pos, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) /** * list_for_each_entry_reverse - iterate backwards over list of given type. * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_struct within the struct. */ #define list_for_each_entry_reverse(pos, head, member) \ for (pos = list_entry((head)->prev, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry(pos->member.prev, typeof(*pos), member)) /** * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop cursor. * @n: another type * to use as temporary storage * @head: the head for your list. * @member: the name of the list_struct within the struct. */ #define list_for_each_entry_safe(pos, n, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member), \ n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) mcelog-128+dfsg/mcelog.8000066400000000000000000000241151261732315200151170ustar00rootroot00000000000000.TH MCELOG 8 "Mar 2015" "" "Linux's Administrator's Manual" .SH NAME mcelog \- Decode kernel machine check log on x86 machines .SH SYNOPSIS mcelog [options] [device] .br mcelog [options] \-\-daemon .br mcelog [options] \-\-client .br mcelog [options] \-\-ascii .br .\"mcelog [options] \-\-drop-old-memory .\".br .\"mcelog [options] \-\-reset-memory locator .\".br .\"mcelog [options] \-\-dump-memory[=locator] .br mcelog [options] \-\-is\-cpu\-supported .br mcelog \-\-version .SH DESCRIPTION X86 CPUs report errors detected by the CPU as .I machine check events (MCEs). These can be data corruption detected in the CPU caches, in main memory by an integrated memory controller, data transfer errors on the front side bus or CPU interconnect or other internal errors. Possible causes can be cosmic radiation, instable power supplies, cooling problems, broken hardware, running systems out of specification, or bad luck. Most errors can be corrected by the CPU by internal error correction mechanisms. Uncorrected errors cause machine check exceptions which may kill processes or panic the machine. A small number of corrected errors is usually not a cause for worry, but a large number can indicate future failure. When a corrected or recovered error happens the x86 kernel writes a record describing the MCE into a internal ring buffer available through the .I /dev/mcelog device .I mcelog retrieves errors from .I /dev/mcelog, decodes them into a human readable format and prints them on the standard output or optionally into the system log. Optionally it can also take more options like keeping statistics or triggering shell scripts on specific events. By default mcelog supports offlining memory pages with persistent corrected errors, offlining CPU cores if they developed cache problems, and otherwise logging specific events to the system log after they crossed a threshold. The normal operating modi for mcelog are running as a regular cron job (traditional way, deprecated), running as a trigger directly executed by the kernel, or running as a daemon with the .I \-\-daemon option. When an uncorrected machine check error happens that the kernel cannot recover from then it will usually panic the system. In this case when there was a warm reset after the panic mcelog should pick up the machine check errors after reboot. This is not possible after a cold reset. In addition mcelog can be used on the command line to decode the kernel output for a fatal machine check panic in text format using the .I \-\-ascii option. This is typically used to decode the panic console output of a fatal machine check, if the system was power cycled or mcelog didn't run immediately after reboot. When the panic triggers a kdump kexec crash kernel the crash kernel boot up script should log the machine checks to disk, otherwise they might be lost. Note that after mcelog retrieves an error the kernel doesn't store it anymore (different from .I dmesg(1)), so the output should be always saved somewhere and mcelog not run in uncontrolled ways. When invoked with the .I \-\-is\-cpu\-supported option mcelog exits with code 0 if the current CPU is supported, 1 otherwise. .SH OPTIONS When the .B \-\-syslog option is specified redirect output to system log. The .B \-\-syslog-error option causes the normal machine checks to be logged as .I LOG_ERR (implies .I \-\-syslog ). Normally only fatal errors or high level remarks are logged with error level. High level one line summaries of specific errors are also logged to the syslog by default unless mcelog operates in .I \-\-ascii mode. When the .B \-\-logfile=file option is specified append log output to the specified file. With the .B \-\-no-syslog option mcelog will never log anything to the syslog. When the .B \-\-cpu=cputype option is specified set the to be decoded CPU to .I cputype. See .I mcelog \-\-help for a list of valid CPUs. Note that specifying an incorrect CPU can lead to incorrect decoding output. Default is either the CPU of the machine that reported the machine check (needs a newer kernel version) or the CPU of the machine mcelog is running on, so normally this option doesn't have to be used. Older versions of mcelog had separate options for different CPU types. These are still implemented, but deprecated and undocumented now. With the .B \-\-dmi option mcelog will look up the DIMMs reported in machine checks in the .I SMBIOS/DMI tables of the BIOS and map the DIMMs to board identifiers. This only works when the BIOS reports the identifiers correctly. Unfortunately often the information reported by the BIOS is either subtly or obviously wrong or useless. This option requires that mcelog has read access to /dev/mem (normally requires root) and runs on the same machine in the same hardware configuration as when the machine check event happened. When .B \-\-ignorenodev is specified then mcelog will exit silently when the device cannot be opened. This is useful in virtualized environment with limited devices. When .B \-\-filter is specified .I mcelog will filter out known broken machine check events (default on). When the .B \-\-no-filter option is specified mcelog does not filter events. When .B \-\-raw is specified .I mcelog will not decode, but just dump the mcelog in a raw hex format. This can be useful for automatic post processing. When a device is specified the machine check logs are read from device instead of the default .I /dev/mcelog. With the .B \-\-ascii option mcelog decodes a fatal machine check panic generated by the kernel ("CPU n: Machine Check Exception ...") in ASCII from standard input and exits afterwards. Note that when the panic comes from a different machine than where mcelog is running on you might need to specify the correct cputype on older kernels. On newer kernels which output the .I PROCESSOR field this is not needed anymore. When the .B \-\-file filename option is specified .I mcelog \-\-ascii will read the ASCII machine check record from input file .I filename instead of standard input. With the .B \-\-config-file file option mcelog reads the specified config file. Default is .I /etc/mcelog/mcelog.conf See also .I CONFIG FILE below. With the .B \-\-daemon option mcelog will run in the background. This gives the fastest reaction time and is the recommended operating mode. If an output option isn't selected ( .I \-\-logfile or .I \-\-syslog or .I \-\-syslog-error ), this option implies .I \-\-logfile=/var/log/mcelog. Important messages will be logged as one-liner summaries to syslog unless .I \-\-no-syslog is given. The option .I \-\-foreground will prevent mcelog from giving up the terminal in daemon mode. This is intended for debugging. With the .B \-\-client option mcelog will query a running daemon for accumulated errors. With the .B \-\-cpumhz=mhz option assume the CPU has .I mhz frequency for decoding the time of the event using the CPU time stamp counter. This also forces decoding. Note this can be unreliable. on some systems with CPU frequency scaling or deep C states, where the CPU time stamp counter does not increase linearly. By default the frequency of the current CPU is used when mcelog determines it is safe to use. Newer kernels report the time directly in the event and don't need this anymore. The .B \-\-pidfile file option writes the process id of the daemon into file .I file. Only valid in daemon mode. Mcelog will enable extended error reporting from the memory controller on processors that support it unless you tell it not to with the .B \-\-no-imc-log option. You might need this option when decoding old logs from a system where this mode was not enabled. .\".B \-\-database filename .\"specifies the memory module error database file. Default is .\"/var/lib/memory-errors. It is only used together with DMI decoding. .\" .\" .\".B \-\-error\-trigger=cmd,thresh .\"When a memory module accumulates .\".I thresh .\"errors in the err database run command .\".I cmd. .\" .\".B \-\-drop-old-memory .\"Drop old DIMMs in the memory module database that are not plugged in .\"anymore. .\" .\".B \-\-reset\-memory=locator .\"When the DIMMs have suitable unique serial numbers mcelog .\"will automatically detect changed DIMMs. When the DIMMs don't .\"have those the user will have to use this option when changing .\"a DIMM to reset the error count in the error database. .\".I Locator .\"is the memory slot identifier printed on the motherboard. .\" .\".B \-\-dump-memory[=locator] .\"Dump error database information for memory module located .\"at .\".I locator. .\"When no locator is specified dump all. .B \-\-version displays the version of mcelog and exits. .SH CONFIG FILE mcelog supports a config file to set defaults. Command line options override the config file. By default the config file is read from .I /etc/mcelog/mcelog.conf unless overridden with the .I --config-file option. The general format is .I optionname = value White space is not allowed in value currently, except at the end where it is dropped Comments start with #. All command line options that are not commands can be specified in the config file. For example t to enable the .I --no-syslog option use .I no-syslog = yes (or no to disable). When the option has a argument use .I logfile = /tmp/logfile For more information on the config file please see .B mcelog.conf(5). .SH NOTES The kernel prefers old messages over new. If the log buffer overflows only old ones will be kept. The exact output in the log file depends on the CPU, unless the \-\-raw option is used. mcelog will report serious errors to the syslog during decoding. .SH SIGNALS When .I mcelog runs in daemon mode and receives a .I SIGUSR1 it will close and reopen the log files. This can be used to rotate logs without restarting the daemon. .SH FILES /dev/mcelog (char 10, minor 227) /etc/mcelog/mcelog.conf /var/log/mcelog /var/run/mcelog.pid .\"/var/lib/memory-errors .SH SEE ALSO .BR mcelog.conf(5), .BR mcelog.triggers(5) http://www.mcelog.org AMD x86-64 architecture programmer's manual, Volume 2, System programming Intel 64 and IA32 Architectures Software Developer's manual, Volume 3, System programming guide Chapter 15 and 16. http://www.intel.com/sdm Datasheet of your CPU. mcelog-128+dfsg/mcelog.c000066400000000000000000001001531261732315200151670ustar00rootroot00000000000000/* Copyright (C) 2004,2005,2006 Andi Kleen, SuSE Labs. Copyright (C) 2008 Intel Corporation Authors: Andi Kleen, Ying Huang Decode IA32/x86-64 machine check events in /dev/mcelog. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mcelog.h" #include "paths.h" #include "k8.h" #include "intel.h" #include "p4.h" #include "dmi.h" #include "dimm.h" #include "tsc.h" #include "version.h" #include "config.h" #include "diskdb.h" #include "memutil.h" #include "eventloop.h" #include "memdb.h" #include "server.h" #include "trigger.h" #include "client.h" #include "msg.h" #include "yellow.h" #include "page.h" #include "bus.h" #include "unknown.h" enum cputype cputype = CPU_GENERIC; char *logfn = LOG_DEV_FILENAME; int ignore_nodev; int filter_bogus = 1; int cpu_forced; static double cpumhz; static int cpumhz_forced; int ascii_mode; int dump_raw_ascii; int daemon_mode; static char *inputfile; char *processor_flags; static int foreground; int filter_memory_errors; static struct config_cred runcred = { .uid = -1U, .gid = -1U }; static int numerrors; static char pidfile_default[] = PID_FILE; static char logfile_default[] = LOG_FILE; static char *pidfile = pidfile_default; static char *logfile; static int debug_numerrors; int imc_log = -1; static int check_only = 0; static int is_cpu_supported(void); static void disclaimer(void) { Wprintf("Hardware event. This is not a software error.\n"); } static char *extended_bankname(unsigned bank) { static char buf[64]; switch (bank) { case MCE_THERMAL_BANK: return "THERMAL EVENT"; case MCE_TIMEOUT_BANK: return "Timeout waiting for exception on other CPUs"; case K8_MCE_THRESHOLD_BASE ... K8_MCE_THRESHOLD_TOP: return k8_bank_name(bank); /* add more extended banks here */ default: sprintf(buf, "Undecoded extended event %x", bank); return buf; } } static char *bankname(unsigned bank) { static char numeric[64]; if (bank >= MCE_EXTENDED_BANK) return extended_bankname(bank); switch (cputype) { case CPU_K8: return k8_bank_name(bank); CASE_INTEL_CPUS: return intel_bank_name(bank); /* add banks of other cpu types here */ default: sprintf(numeric, "BANK %d", bank); return numeric; } } static void resolveaddr(unsigned long long addr) { if (addr && do_dmi && dmi_forced) dmi_decodeaddr(addr); /* Should check for PCI resources here too */ } static int mce_filter(struct mce *m, unsigned recordlen) { if (!filter_bogus) return 1; /* Filter out known broken MCEs */ switch (cputype) { case CPU_K8: return mce_filter_k8(m); /* add more buggy CPUs here */ CASE_INTEL_CPUS: return mce_filter_intel(m, recordlen); default: case CPU_GENERIC: return 1; } } static void print_tsc(int cpunum, __u64 tsc, unsigned long time) { int ret = -1; char *buf = NULL; if (cpumhz_forced) ret = decode_tsc_forced(&buf, cpumhz, tsc); else if (!time) ret = decode_tsc_current(&buf, cpunum, cputype, cpumhz, tsc); Wprintf("TSC %llx %s", tsc, ret >= 0 && buf ? buf : ""); free(buf); } struct cpuid1 { unsigned stepping : 4; unsigned model : 4; unsigned family : 4; unsigned type : 2; unsigned res1 : 2; unsigned ext_model : 4; unsigned ext_family : 8; unsigned res2 : 4; }; static void parse_cpuid(u32 cpuid, u32 *family, u32 *model) { union { struct cpuid1 c; u32 v; } c; /* Algorithm from IA32 SDM 2a 3-191 */ c.v = cpuid; *family = c.c.family; if (*family == 0xf) *family += c.c.ext_family; *model = c.c.model; if (*family == 6 || *family == 0xf) *model += c.c.ext_model << 4; } static u32 unparse_cpuid(unsigned family, unsigned model) { union { struct cpuid1 c; u32 v; } c; c.c.family = family; if (family >= 0xf) { c.c.family = 0xf; c.c.ext_family = family - 0xf; } c.c.model = model & 0xf; if (family == 6 || family == 0xf) c.c.ext_model = model >> 4; return c.v; } static char *cputype_name[] = { [CPU_GENERIC] = "generic CPU", [CPU_P6OLD] = "Intel PPro/P2/P3/old Xeon", [CPU_CORE2] = "Intel Core", /* 65nm and 45nm */ [CPU_K8] = "AMD K8 and derivates", [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", [CPU_TULSA] = "Intel Xeon 7100 series", [CPU_INTEL] = "Intel generic architectural MCA", [CPU_XEON75XX] = "Intel Xeon 7500 series", [CPU_SANDY_BRIDGE] = "Sandy Bridge", /* Fill in better name */ [CPU_SANDY_BRIDGE_EP] = "Sandy Bridge EP", /* Fill in better name */ [CPU_IVY_BRIDGE] = "Ivy Bridge", /* Fill in better name */ [CPU_IVY_BRIDGE_EPEX] = "Intel Xeon v2 (Ivy Bridge) EP/EX", /* Fill in better name */ [CPU_HASWELL] = "Haswell", /* Fill in better name */ [CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", [CPU_BROADWELL] = "Broadwell", [CPU_KNIGHTS_LANDING] = "Knights Landing", [CPU_ATOM] = "ATOM", [CPU_SKYLAKE] "Skylake", }; static struct config_choice cpu_choices[] = { { "generic", CPU_GENERIC }, { "p6old", CPU_P6OLD }, { "core2", CPU_CORE2 }, { "k8", CPU_K8 }, { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, { "xeon7400", CPU_DUNNINGTON }, { "xeon5500", CPU_NEHALEM }, { "xeon5200", CPU_CORE2 }, { "xeon5000", CPU_P4 }, { "xeon5100", CPU_CORE2 }, { "xeon3100", CPU_CORE2 }, { "xeon3200", CPU_CORE2 }, { "core_i7", CPU_NEHALEM }, { "core_i5", CPU_NEHALEM }, { "core_i3", CPU_NEHALEM }, { "nehalem", CPU_NEHALEM }, { "westmere", CPU_NEHALEM }, { "xeon71xx", CPU_TULSA }, { "xeon7100", CPU_TULSA }, { "tulsa", CPU_TULSA }, { "intel", CPU_INTEL }, { "xeon75xx", CPU_XEON75XX }, { "xeon7500", CPU_XEON75XX }, { "xeon7200", CPU_CORE2 }, { "xeon7100", CPU_P4 }, { "sandybridge", CPU_SANDY_BRIDGE }, /* Fill in better name */ { "sandybridge-ep", CPU_SANDY_BRIDGE_EP }, /* Fill in better name */ { "ivybridge", CPU_IVY_BRIDGE }, /* Fill in better name */ { "ivybridge-ep", CPU_IVY_BRIDGE_EPEX }, { "ivybridge-ex", CPU_IVY_BRIDGE_EPEX }, { "haswell", CPU_HASWELL }, /* Fill in better name */ { "haswell-ep", CPU_HASWELL_EPEX }, { "haswell-ex", CPU_HASWELL_EPEX }, { "broadwell", CPU_BROADWELL }, { "knightslanding", CPU_KNIGHTS_LANDING }, { "xeon-v2", CPU_IVY_BRIDGE_EPEX }, { "xeon-v3", CPU_HASWELL_EPEX }, { "atom", CPU_ATOM }, { "skylake", CPU_SKYLAKE }, { NULL } }; static void print_cputypes(void) { struct config_choice *c; fprintf(stderr, "Valid CPUs:"); for (c = cpu_choices; c->name; c++) fprintf(stderr, " %s", c->name); fputc('\n', stderr); } static enum cputype lookup_cputype(char *name) { struct config_choice *c; for (c = cpu_choices; c->name; c++) { if (!strcasecmp(name, c->name)) return c->val; } fprintf(stderr, "Unknown CPU type `%s' specified\n", name); print_cputypes(); exit(1); } static char *vendor[] = { [0] = "Intel", [1] = "Cyrix", [2] = "AMD", [3] = "UMC", [4] = "vendor 4", [5] = "Centaur", [6] = "vendor 6", [7] = "Transmeta", [8] = "NSC" }; static unsigned cpuvendor_to_num(char *name) { unsigned i; unsigned v; char *end; v = strtoul(name, &end, 0); if (end > name) return v; for (i = 0; i < NELE(vendor); i++) if (!strcmp(name, vendor[i])) return i; return 0; } static char *cpuvendor_name(u32 cpuvendor) { return (cpuvendor < NELE(vendor)) ? vendor[cpuvendor] : "Unknown vendor"; } static enum cputype setup_cpuid(u32 cpuvendor, u32 cpuid) { u32 family, model; parse_cpuid(cpuid, &family, &model); switch (cpuvendor) { case X86_VENDOR_INTEL: return select_intel_cputype(family, model); case X86_VENDOR_AMD: if (family >= 15 && family <= 17) return CPU_K8; /* FALL THROUGH */ default: Eprintf("Unknown CPU type vendor %u family %x model %x", cpuvendor, family, model); return CPU_GENERIC; } } static void mce_cpuid(struct mce *m) { static int warned; if (m->cpuid) { enum cputype t = setup_cpuid(m->cpuvendor, m->cpuid); if (!cpu_forced) cputype = t; else if (t != cputype && t != CPU_GENERIC && !warned) { Eprintf("Forced cputype %s does not match cpu type %s from mcelog\n", cputype_name[cputype], cputype_name[t]); warned = 1; } } else if (cputype == CPU_GENERIC && !cpu_forced) { is_cpu_supported(); } } static void mce_prepare(struct mce *m) { mce_cpuid(m); if (!m->time) m->time = time(NULL); } static void dump_mce(struct mce *m, unsigned recordlen) { int n; int ismemerr = 0; unsigned cpu = m->extcpu ? m->extcpu : m->cpu; /* should not happen */ if (!m->finished) Wprintf("not finished?\n"); Wprintf("CPU %d %s ", cpu, bankname(m->bank)); if (m->tsc) print_tsc(cpu, m->tsc, m->time); Wprintf("\n"); if (m->ip) Wprintf("RIP%s %02x:%llx\n", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->ip); n = 0; if (m->status & MCI_STATUS_MISCV) n += Wprintf("MISC %llx ", m->misc); if (m->status & MCI_STATUS_ADDRV) n += Wprintf("ADDR %llx ", m->addr); if (n > 0) Wprintf("\n"); if (m->time) { time_t t = m->time; Wprintf("TIME %llu %s", m->time, ctime(&t)); } switch (cputype) { case CPU_K8: decode_k8_mc(m, &ismemerr); break; CASE_INTEL_CPUS: decode_intel_mc(m, cputype, &ismemerr, recordlen); break; /* add handlers for other CPUs here */ default: break; } /* decode all status bits here */ Wprintf("STATUS %llx MCGSTATUS %llx\n", m->status, m->mcgstatus); n = 0; if (recordlen >= offsetof(struct mce, cpuid) && m->mcgcap) n += Wprintf("MCGCAP %llx ", m->mcgcap); if (recordlen >= offsetof(struct mce, apicid)) n += Wprintf("APICID %x ", m->apicid); if (recordlen >= offsetof(struct mce, socketid)) n += Wprintf("SOCKETID %x ", m->socketid); if (n > 0) Wprintf("\n"); if (recordlen >= offsetof(struct mce, cpuid) && m->cpuid) { u32 fam, mod; parse_cpuid(m->cpuid, &fam, &mod); Wprintf("CPUID Vendor %s Family %u Model %u\n", cpuvendor_name(m->cpuvendor), fam, mod); } if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX && cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL && cputype != CPU_KNIGHTS_LANDING && cputype != CPU_SKYLAKE) resolveaddr(m->addr); if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) { diskdb_resolve_addr(m->addr); } } static void dump_mce_raw_ascii(struct mce *m, unsigned recordlen) { /* should not happen */ if (!m->finished) Wprintf("not finished?\n"); Wprintf("CPU %u\n", m->extcpu ? m->extcpu : m->cpu); Wprintf("BANK %d\n", m->bank); Wprintf("TSC %#llx\n", m->tsc); Wprintf("RIP %#02x:%#llx\n", m->cs, m->ip); Wprintf("MISC %#llx\n", m->misc); Wprintf("ADDR %#llx\n", m->addr); Wprintf("STATUS %#llx\n", m->status); Wprintf("MCGSTATUS %#llx\n", m->mcgstatus); if (recordlen >= offsetof(struct mce, cpuid)) Wprintf("PROCESSOR %u:%#x\n", m->cpuvendor, m->cpuid); #define CPRINT(str, field) \ if (recordlen >= offsetof(struct mce, field)) \ Wprintf(str "\n", m->field) CPRINT("TIME %llu", time); CPRINT("SOCKETID %u", socketid); CPRINT("APICID %u", apicid); CPRINT("MCGCAP %#llx", mcgcap); #undef CPRINT Wprintf("\n"); } int is_cpu_supported(void) { enum { VENDOR = 1, FAMILY = 2, MODEL = 4, MHZ = 8, FLAGS = 16, ALL = 0x1f } seen = 0; FILE *f; static int checked; if (checked) return 1; checked = 1; f = fopen("/proc/cpuinfo","r"); if (f != NULL) { int family = 0; int model = 0; char vendor[64] = { 0 }; char *line = NULL; size_t linelen = 0; double mhz; while (getdelim(&line, &linelen, '\n', f) > 0 && seen != ALL) { if (sscanf(line, "vendor_id : %63[^\n]", vendor) == 1) seen |= VENDOR; if (sscanf(line, "cpu family : %d", &family) == 1) seen |= FAMILY; if (sscanf(line, "model : %d", &model) == 1) seen |= MODEL; /* We use only Mhz of the first CPU, assuming they are the same (there are more sanity checks later to make this not as wrong as it sounds) */ if (sscanf(line, "cpu MHz : %lf", &mhz) == 1) { if (!cpumhz_forced) cpumhz = mhz; seen |= MHZ; } if (!strncmp(line, "flags", 5) && isspace(line[6])) { processor_flags = line; line = NULL; linelen = 0; seen |= FLAGS; } } if (seen == ALL) { if (!strcmp(vendor,"AuthenticAMD")) { if (family == 15) { cputype = CPU_K8; } else if (family >= 16) { SYSERRprintf("ERROR: AMD Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); return 0; } } else if (!strcmp(vendor,"GenuineIntel")) cputype = select_intel_cputype(family, model); /* Add checks for other CPUs here */ } else { Eprintf("warning: Cannot parse /proc/cpuinfo\n"); } fclose(f); free(line); } else Eprintf("warning: Cannot open /proc/cpuinfo\n"); return 1; } static char *skipspace(char *s) { while (isspace(*s)) ++s; return s; } static char *skip_syslog(char *s) { char *p; /* Handle syslog output */ p = strstr(s, "mcelog: "); if (p) return p + sizeof("mcelog: ") - 1; return s; } static char *skipgunk(char *s) { s = skip_syslog(s); s = skipspace(s); if (*s == '<') { s += strcspn(s, ">"); if (*s == '>') ++s; } s = skipspace(s); if (*s == '[') { s += strcspn(s, "]"); if (*s == ']') ++s; } s = skipspace(s); if (strncmp(s, "mce: [Hardware Error]:", 22) == 0) s += 22; return skipspace(s); } static inline int urange(unsigned val, unsigned lo, unsigned hi) { return val >= lo && val <= hi; } static int is_short(char *name) { return strlen(name) == 3 && isupper(name[0]) && islower(name[1]) && islower(name[2]); } static unsigned skip_date(char *s) { unsigned day, hour, min, year, sec; char dayname[11]; char month[11]; unsigned next; if (sscanf(s, "%10s %10s %u %u:%u:%u %u%n", dayname, month, &day, &hour, &min, &sec, &year, &next) != 7) return 0; if (!is_short(dayname) || !is_short(month) || !urange(day, 1, 31) || !urange(hour, 0, 24) || !urange(min, 0, 59) || !urange(sec, 0, 59) || year < 1900) return 0; return next; } static void dump_mce_final(struct mce *m, char *symbol, int missing, int recordlen, int dseen) { m->finished = 1; if (m->cpuid) mce_cpuid(m); if (!dump_raw_ascii) { if (!dseen) disclaimer(); dump_mce(m, recordlen); if (symbol[0]) Wprintf("RIP: %s\n", symbol); if (missing) Wprintf("(Fields were incomplete)\n"); } else dump_mce_raw_ascii(m, recordlen); flushlog(); } static char *skip_patterns[] = { "MCA:*", "MCi_MISC register valid*", "MC? status*", "Unsupported new Family*", "Kernel does not support page offline interface", NULL }; static int match_patterns(char *s, char **pat) { for (; *pat; pat++) if (!fnmatch(*pat, s, 0)) return 0; return 1; } #define FIELD(f) \ if (recordlen < endof_field(struct mce, f)) \ recordlen = endof_field(struct mce, f) /* Decode ASCII input for fatal messages */ static void decodefatal(FILE *inf) { struct mce m; char *line = NULL; size_t linelen = 0; int missing; char symbol[100]; int data; int next; char *s = NULL; unsigned cpuvendor; unsigned recordlen; int disclaimer_seen; ascii_mode = 1; if (do_dmi && dmi_forced) Wprintf( "WARNING: with --dmi mcelog --ascii must run on the same machine with the\n" " same BIOS/memory configuration as where the machine check occurred.\n"); restart: missing = 0; data = 0; next = 0; disclaimer_seen = 0; recordlen = 0; memset(&m, 0, sizeof(struct mce)); symbol[0] = '\0'; while (next > 0 || getdelim(&line, &linelen, '\n', inf) > 0) { int n = 0; char *start; s = next > 0 ? s + next : line; s = skipgunk(s); start = s; next = 0; if (!strncmp(s, "CPU ", 4)) { unsigned cpu = 0, bank = 0; n = sscanf(s, "CPU %u: Machine Check Exception: %16Lx Bank %d: %016Lx%n", &cpu, &m.mcgstatus, &bank, &m.status, &next); if (n == 1) { n = sscanf(s, "CPU %u BANK %u%n", &cpu, &bank, &next); if (n != 2) n = sscanf(s, "CPU %u %u%n", &cpu, &bank, &next); m.cpu = cpu; if (n < 2) missing++; else { m.bank = bank; FIELD(bank); } } else if (n <= 0) { missing++; } else if (n > 1) { FIELD(mcgstatus); m.cpu = cpu; if (n > 2) { m.bank = bank; FIELD(bank); } else if (n > 3) FIELD(status); if (n < 4) missing++; } } else if (!strncmp(s, "STATUS", 6)) { if ((n = sscanf(s,"STATUS %llx%n", &m.status, &next)) < 1) missing++; else FIELD(status); } else if (!strncmp(s, "MCGSTATUS", 6)) { if ((n = sscanf(s,"MCGSTATUS %llx%n", &m.mcgstatus, &next)) < 1) missing++; else FIELD(mcgstatus); } else if (!strncmp(s, "RIP", 3)) { unsigned cs = 0; if (!strncmp(s, "RIP !INEXACT!", 13)) s += 13; else s += 3; n = sscanf(s, "%02x:<%016Lx> {%99s}%n", &cs, &m.ip, symbol, &next); m.cs = cs; if (n < 2) missing++; else FIELD(ip); } else if (!strncmp(s, "TSC",3)) { if ((n = sscanf(s, "TSC %llx%n", &m.tsc, &next)) < 1) missing++; else FIELD(tsc); } else if (!strncmp(s, "ADDR",4)) { if ((n = sscanf(s, "ADDR %llx%n", &m.addr, &next)) < 1) missing++; else FIELD(addr); } else if (!strncmp(s, "MISC",4)) { if ((n = sscanf(s, "MISC %llx%n", &m.misc, &next)) < 1) missing++; else FIELD(misc); } else if (!strncmp(s, "PROCESSOR", 9)) { if ((n = sscanf(s, "PROCESSOR %u:%x%n", &cpuvendor, &m.cpuid, &next)) < 2) missing++; else { m.cpuvendor = cpuvendor; FIELD(cpuid); FIELD(cpuvendor); } } else if (!strncmp(s, "TIME", 4)) { if ((n = sscanf(s, "TIME %llu%n", &m.time, &next)) < 1) missing++; else FIELD(time); next += skip_date(s + next); } else if (!strncmp(s, "MCGCAP", 6)) { if ((n = sscanf(s, "MCGCAP %llx%n", &m.mcgcap, &next)) != 1) missing++; else FIELD(mcgcap); } else if (!strncmp(s, "APICID", 6)) { if ((n = sscanf(s, "APICID %x%n", &m.apicid, &next)) != 1) missing++; else FIELD(apicid); } else if (!strncmp(s, "SOCKETID", 8)) { if ((n = sscanf(s, "SOCKETID %u%n", &m.socketid, &next)) != 1) missing++; else FIELD(socketid); } else if (!strncmp(s, "CPUID", 5)) { unsigned fam, mod; char vendor[31]; if ((n = sscanf(s, "CPUID Vendor %30s Family %u Model %u\n", vendor, &fam, &mod)) < 3) missing++; else { m.cpuvendor = cpuvendor_to_num(vendor); m.cpuid = unparse_cpuid(fam, mod); FIELD(cpuid); FIELD(cpuvendor); } } else if (strstr(s, "HARDWARE ERROR")) disclaimer_seen = 1; else if (!strncmp(s, "(XEN)", 5)) { char *w; unsigned bank, cpu; if (strstr(s, "The hardware reports a non fatal, correctable incident occurred")) { w = strstr(s, "CPU"); if (w && sscanf(w, "CPU %d", &cpu)) { m.cpu = cpu; FIELD(cpu); } } else if ((n = sscanf(s, "(XEN) Bank %d: %llx at %llx", &bank, &m.status, &m.addr) >= 1)) { m.bank = bank; FIELD(bank); if (n >= 2) FIELD(status); if (n >= 3) FIELD(addr); } } else if (!match_patterns(s, skip_patterns)) n = 0; else { s = skipspace(s); if (*s && data) dump_mce_final(&m, symbol, missing, recordlen, disclaimer_seen); if (!dump_raw_ascii) Wprintf("%s", start); if (*s && data) goto restart; } if (n > 0) data = 1; } free(line); if (data) dump_mce_final(&m, symbol, missing, recordlen, disclaimer_seen); } static void remove_pidfile(void) { unlink(pidfile); if (pidfile != pidfile_default) free(pidfile); } static void signal_exit(int sig) { remove_pidfile(); _exit(EXIT_SUCCESS); } static void setup_pidfile(char *s) { char cwd[PATH_MAX]; char *c; if (*s != '/') { c = getcwd(cwd, PATH_MAX); if (!c) return; asprintf(&pidfile, "%s/%s", cwd, s); } else { asprintf(&pidfile, "%s", s); } return; } static void write_pidfile(void) { FILE *f; atexit(remove_pidfile); signal(SIGTERM, signal_exit); signal(SIGINT, signal_exit); signal(SIGQUIT, signal_exit); f = fopen(pidfile, "w"); if (!f) { Eprintf("Cannot open pidfile `%s'", pidfile); return; } fprintf(f, "%u", getpid()); fclose(f); } void usage(void) { fprintf(stderr, "Usage:\n" " mcelog [options] [mcelogdevice]\n" "Decode machine check error records from current kernel.\n" " mcelog [options] --daemon\n" "Run mcelog in daemon mode, waiting for errors from the kernel.\n" " mcelog [options] --client\n" "Query a currently running mcelog daemon for errors\n" " mcelog [options] --ascii < log\n" " mcelog [options] --ascii --file log\n" "Decode machine check ASCII output from kernel logs\n" "Options:\n" "--cpu CPU Set CPU type CPU to decode (see below for valid types)\n" "--cpumhz MHZ Set CPU Mhz to decode time (output unreliable, not needed on new kernels)\n" "--raw (with --ascii) Dump in raw ASCII format for machine processing\n" "--daemon Run in background waiting for events (needs newer kernel)\n" "--ignorenodev Exit silently when the device cannot be opened\n" "--file filename With --ascii read machine check log from filename instead of stdin\n" "--syslog Log decoded machine checks in syslog (default stdout or syslog for daemon)\n" "--syslog-error Log decoded machine checks in syslog with error level\n" "--no-syslog Never log anything to syslog\n" "--logfile filename Append log output to logfile instead of stdout\n" "--dmi Use SMBIOS information to decode DIMMs (needs root)\n" "--no-dmi Don't use SMBIOS information\n" "--dmi-verbose Dump SMBIOS information (for debugging)\n" "--filter Inhibit known bogus events (default on)\n" "--no-filter Don't inhibit known broken events\n" "--config-file filename Read config information from config file instead of " CONFIG_FILENAME "\n" "--foreground Keep in foreground (for debugging)\n" "--num-errors N Only process N errors (for testing)\n" "--pidfile file Write pid of daemon into file\n" "--no-imc-log Disable extended iMC logging\n" "--is-cpu-supported Exit with return code indicating whether the CPU is supported\n" ); diskdb_usage(); print_cputypes(); exit(1); } enum options { O_LOGFILE = O_COMMON, O_K8, O_P4, O_GENERIC, O_CORE2, O_INTEL_CPU, O_FILTER, O_DMI, O_NO_DMI, O_DMI_VERBOSE, O_SYSLOG, O_NO_SYSLOG, O_CPUMHZ, O_SYSLOG_ERROR, O_RAW, O_DAEMON, O_ASCII, O_CLIENT, O_VERSION, O_CONFIG_FILE, O_CPU, O_FILE, O_FOREGROUND, O_NUMERRORS, O_PIDFILE, O_DEBUG_NUMERRORS, O_NO_IMC_LOG, O_IS_CPU_SUPPORTED, }; static struct option options[] = { { "logfile", 1, NULL, O_LOGFILE }, { "k8", 0, NULL, O_K8 }, { "p4", 0, NULL, O_P4 }, { "generic", 0, NULL, O_GENERIC }, { "core2", 0, NULL, O_CORE2 }, { "intel-cpu", 1, NULL, O_INTEL_CPU }, { "ignorenodev", 0, &ignore_nodev, 1 }, { "filter", 0, &filter_bogus, 1 }, { "no-filter", 0, &filter_bogus, 0 }, { "dmi", 0, NULL, O_DMI }, { "no-dmi", 0, NULL, O_NO_DMI }, { "dmi-verbose", 1, NULL, O_DMI_VERBOSE }, { "syslog", 0, NULL, O_SYSLOG }, { "cpumhz", 1, NULL, O_CPUMHZ }, { "syslog-error", 0, NULL, O_SYSLOG_ERROR }, { "dump-raw-ascii", 0, &dump_raw_ascii, 1 }, { "raw", 0, &dump_raw_ascii, 1 }, { "no-syslog", 0, NULL, O_NO_SYSLOG }, { "daemon", 0, NULL, O_DAEMON }, { "ascii", 0, NULL, O_ASCII }, { "file", 1, NULL, O_FILE }, { "version", 0, NULL, O_VERSION }, { "config-file", 1, NULL, O_CONFIG_FILE }, { "cpu", 1, NULL, O_CPU }, { "foreground", 0, NULL, O_FOREGROUND }, { "client", 0, NULL, O_CLIENT }, { "num-errors", 1, NULL, O_NUMERRORS }, { "pidfile", 1, NULL, O_PIDFILE }, { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, DISKDB_OPTIONS {} }; static int modifier(int opt) { int v; switch (opt) { case O_LOGFILE: logfile = optarg; break; case O_K8: cputype = CPU_K8; cpu_forced = 1; break; case O_P4: cputype = CPU_P4; cpu_forced = 1; break; case O_GENERIC: cputype = CPU_GENERIC; cpu_forced = 1; break; case O_CORE2: cputype = CPU_CORE2; cpu_forced = 1; break; case O_INTEL_CPU: { unsigned fam, mod; if (sscanf(optarg, "%i,%i", &fam, &mod) != 2) usage(); cputype = select_intel_cputype(fam, mod); if (cputype == CPU_GENERIC) { fprintf(stderr, "Unknown Intel CPU\n"); usage(); } cpu_forced = 1; break; } case O_CPU: cputype = lookup_cputype(optarg); cpu_forced = 1; intel_cpu_init(cputype); break; case O_DMI: do_dmi = 1; dmi_forced = 1; break; case O_NO_DMI: dmi_forced = 1; do_dmi = 0; break; case O_DMI_VERBOSE: if (sscanf(optarg, "%i", &v) != 1) usage(); dmi_set_verbosity(v); break; case O_SYSLOG: openlog("mcelog", 0, LOG_DAEMON); syslog_opt = SYSLOG_ALL|SYSLOG_FORCE; break; case O_NO_SYSLOG: syslog_opt = SYSLOG_FORCE; break; case O_CPUMHZ: cpumhz_forced = 1; if (sscanf(optarg, "%lf", &cpumhz) != 1) usage(); break; case O_SYSLOG_ERROR: syslog_level = LOG_ERR; syslog_opt = SYSLOG_ALL|SYSLOG_FORCE; break; case O_DAEMON: daemon_mode = 1; if (!(syslog_opt & SYSLOG_FORCE)) syslog_opt = SYSLOG_REMARK|SYSLOG_ERROR; break; case O_FILE: inputfile = optarg; break; case O_FOREGROUND: foreground = 1; if (!(syslog_opt & SYSLOG_FORCE)) syslog_opt = SYSLOG_FORCE; break; case O_NUMERRORS: numerrors = atoi(optarg); break; case O_PIDFILE: setup_pidfile(optarg); break; case O_CONFIG_FILE: /* parsed in config.c */ break; case O_DEBUG_NUMERRORS: debug_numerrors = 1; break; case O_NO_IMC_LOG: imc_log = 0; break; case O_IS_CPU_SUPPORTED: check_only = 1; break; case 0: break; default: return 0; } return 1; } static void modifier_finish(void) { if(!foreground && daemon_mode && !logfile && !(syslog_opt & SYSLOG_LOG)) { logfile = logfile_default; } if (logfile) { if (open_logfile(logfile) < 0) { if (daemon_mode && !(syslog_opt & SYSLOG_FORCE)) syslog_opt = SYSLOG_ALL; SYSERRprintf("Cannot open logfile %s", logfile); if (!daemon_mode) exit(1); } } } void argsleft(int ac, char **av) { int opt; while ((opt = getopt_long(ac, av, "", options, NULL)) != -1) { if (modifier(opt) != 1) usage(); } } void no_syslog(void) { if (!(syslog_opt & SYSLOG_FORCE)) syslog_opt = 0; } static int combined_modifier(int opt) { int r = modifier(opt); if (r == 0) r = diskdb_modifier(opt); return r; } static void general_setup(void) { trigger_setup(); yellow_setup(); bus_setup(); unknown_setup(); config_cred("global", "run-credentials", &runcred); if (config_bool("global", "filter-memory-errors") == 1) filter_memory_errors = 1; } static void drop_cred(void) { if (runcred.uid != -1U && runcred.gid == -1U) { struct passwd *pw = getpwuid(runcred.uid); if (pw) runcred.gid = pw->pw_gid; } if (runcred.gid != -1U) { if (setgid(runcred.gid) < 0) SYSERRprintf("Cannot change group to %d", runcred.gid); } if (runcred.uid != -1U) { if (setuid(runcred.uid) < 0) SYSERRprintf("Cannot change user to %d", runcred.uid); } } static void process(int fd, unsigned recordlen, unsigned loglen, char *buf) { int i; int len, count; int finish = 0, flags; if (recordlen == 0) { Wprintf("no data in mce record\n"); return; } len = read(fd, buf, recordlen * loglen); if (len < 0) { SYSERRprintf("mcelog read"); return; } count = len / (int)recordlen; if (count == (int)loglen) { if ((ioctl(fd, MCE_GETCLEAR_FLAGS, &flags) == 0) && (flags & (1 << MCE_OVERFLOW))) Eprintf("Warning: MCE buffer is overflowed.\n"); } for (i = 0; (i < count) && !finish; i++) { struct mce *mce = (struct mce *)(buf + i*recordlen); mce_prepare(mce); if (numerrors > 0 && --numerrors == 0) finish = 1; if (!mce_filter(mce, recordlen)) continue; if (!dump_raw_ascii) { disclaimer(); Wprintf("MCE %d\n", i); dump_mce(mce, recordlen); } else dump_mce_raw_ascii(mce, recordlen); flushlog(); } if (debug_numerrors && numerrors <= 0) finish = 1; if (recordlen > sizeof(struct mce)) { Eprintf("warning: %lu bytes ignored in each record\n", (unsigned long)recordlen - sizeof(struct mce)); Eprintf("consider an update\n"); } if (finish) exit(0); } static void noargs(int ac, char **av) { if (getopt_long(ac, av, "", options, NULL) != -1) usage(); } static void parse_config(char **av) { static const char config_fn[] = CONFIG_FILENAME; const char *fn = config_file(av, config_fn); if (!fn) usage(); if (parse_config_file(fn) < 0) { /* If it's the default file don't complain if it isn't there */ if (fn != config_fn) { fprintf(stderr, "Cannot open config file %s\n", fn); exit(1); } return; } config_options(options, combined_modifier); } static void ascii_command(int ac, char **av) { FILE *f = stdin; argsleft(ac, av); if (inputfile) { f = fopen(inputfile, "r"); if (!f) { fprintf(stderr, "Cannot open input file `%s': %s\n", inputfile, strerror(errno)); exit(1); } /* f closed by exit */ } no_syslog(); checkdmi(); decodefatal(f); } static void client_command(int ac, char **av) { argsleft(ac, av); no_syslog(); // XXX modifiers ask_server("dump all bios\n"); ask_server("pages\n"); } struct mcefd_data { unsigned loglen; unsigned recordlen; char *buf; }; static void process_mcefd(struct pollfd *pfd, void *data) { struct mcefd_data *d = (struct mcefd_data *)data; assert((pfd->revents & POLLIN) != 0); process(pfd->fd, d->recordlen, d->loglen, d->buf); } static void handle_sigusr1(int sig) { reopenlog(); } int main(int ac, char **av) { struct mcefd_data d = {}; int opt; int fd; parse_config(av); while ((opt = getopt_long(ac, av, "", options, NULL)) != -1) { if (opt == '?') { usage(); } else if (combined_modifier(opt) > 0) { continue; } else if (opt == O_ASCII) { ascii_command(ac, av); exit(0); } else if (opt == O_CLIENT) { client_command(ac, av); exit(0); } else if (opt == O_VERSION) { noargs(ac, av); fprintf(stderr, "mcelog %s\n", MCELOG_VERSION); exit(0); } else if (diskdb_cmd(opt, ac, av)) { exit(0); } else if (opt == 0) break; } /* before doing anything else let's see if the CPUs are supported */ if (!cpu_forced && !is_cpu_supported()) { if (!check_only) fprintf(stderr, "CPU is unsupported\n"); exit(1); } if (check_only) exit(0); /* If the user didn't tell us not to use iMC logging, check if CPU supports it */ if (imc_log == -1) { switch (cputype) { case CPU_SANDY_BRIDGE_EP: case CPU_IVY_BRIDGE_EPEX: case CPU_HASWELL_EPEX: imc_log = 1; break; default: imc_log = 0; break; } } modifier_finish(); if (av[optind]) logfn = av[optind++]; if (av[optind]) usage(); checkdmi(); general_setup(); fd = open(logfn, O_RDONLY); if (fd < 0) { if (ignore_nodev) exit(0); SYSERRprintf("Cannot open `%s'", logfn); exit(1); } if (ioctl(fd, MCE_GET_RECORD_LEN, &d.recordlen) < 0) err("MCE_GET_RECORD_LEN"); if (ioctl(fd, MCE_GET_LOG_LEN, &d.loglen) < 0) err("MCE_GET_LOG_LEN"); d.buf = xalloc(d.recordlen * d.loglen); if (daemon_mode) { prefill_memdb(do_dmi); if (!do_dmi) closedmi(); server_setup(); page_setup(); if (imc_log) set_imc_log(cputype); drop_cred(); register_pollcb(fd, POLLIN, process_mcefd, &d); if (!foreground && daemon(0, need_stdout()) < 0) err("daemon"); if (pidfile) write_pidfile(); signal(SIGUSR1, handle_sigusr1); event_signal(SIGUSR1); eventloop(); } else { process(fd, d.recordlen, d.loglen, d.buf); } trigger_wait(); exit(0); } mcelog-128+dfsg/mcelog.conf000066400000000000000000000145071261732315200157010ustar00rootroot00000000000000# # Example config file for mcelog # mcelog is the user space backend that decodes and process machine check events # (cpu hardware errors) reported by the CPU to the kernel # # general format #optionname = value # white space is not allowed in value currently, except at the end where it is dropped # # In general all command line options that are not commands work here. # See man mcelog or mcelog --help for a list. # e.g. to enable the --no-syslog option use #no-syslog = yes (or no to disable) # when the option has a argument #logfile = /tmp/logfile # below are the options which are not command line options. # Set CPU type for which mcelog decodes events: #cpu = type # For valid values for type please see mcelog --help. # If this value is set incorrectly the decoded output will be likely incorrect. # By default when this parameter is not set mcelog uses the CPU it is running on # on very new kernels the mcelog events reported by the kernel also carry # the CPU type which is used too when available and not overriden. # Enable daemon mode: #daemon = yes # By default mcelog just processes the currently pending events and exits. # In daemon mode it will keep running as a daemon in the background and poll # the kernel for events and then decode them. # Filter out known broken events by default. filter = yes # Don't log memory errors individually. # They still get accounted if that is enabled. #filter-memory-errors = yes # output in undecoded raw format to be easier machine readable # (default is decoded). #raw = yes # Set CPU Mhz to decode uptime from time stamp counter (output # unreliable, not needed on new kernels which report the event time # directly. A lot of systems don't have a linear time stamp clock # and the output is wrong then. # Normally mcelog tries to figure out if it the TSC is reliable # and only uses the current frequency then. # Setting a frequency forces timestamp decoding. # This setting is obsolete with modern kernels which report the time # directly. #cpumhz = 1800.00 # log output options # Log decoded machine checks in syslog (default stdout or syslog for daemon) #syslog = yes # Log decoded machine checks in syslog with error level #syslog-error = yes # Never log anything to syslog #no-syslog = yes # Append log output to logfile instead of stdout. Only when no syslog logging is active #logfile = filename # Use SMBIOS information to decode DIMMs (needs root). # This function is not recommended to use right now and generally not needed. # The exception is memdb prepopulation, which is configured separately below. #dmi = no # When in daemon mode run as this user after set up. # Note that the triggers will run as this user too. # Setting this to non root will mean that triggers cannot take some corrective # action, like offlining objects. #run-credentials-user = root # group to run as daemon with # default to the group of the run-credentials-user #run-credentials-group = nobody [server] # user allowed to access client socket. # when set to * match any # root is always allowed to access. # default: root only client-user = root # group allowed to access mcelog # When no group is configured any group matches (but still user checking). # when set to * match any #client-group = root # Path to the unix socket for client<->server communication. # When no socket-path is configured the server will not start #socket-path = /var/run/mcelog-client # When mcelog starts it checks if a server is already running. This configures the timeout # for this check. #initial-ping-timeout = 2 # [dimm] # Is the in memory DIMM error tracking enabled? # Only works on systems with integrated memory controller and # which are supported. # Only takes effect in daemon mode. dimm-tracking-enabled = yes # Use DMI information from the BIOS to prepopulate DIMM database. # Note this might not work with all BIOS and requires mcelog to run as root. # Alternative is to let mcelog create DIMM objects on demand. dmi-prepopulate = yes # # Execute these triggers when the rate of corrected or uncorrected # Errors per DIMM exceeds the threshold. # Note when the hardware does not report DIMMs this might also # be per channel. # The default of 10/24h is reasonable for server quality # DDR3 DIMMs as of 2009/10. #uc-error-trigger = dimm-error-trigger uc-error-threshold = 1 / 24h #ce-error-trigger = dimm-error-trigger ce-error-threshold = 10 / 24h [socket] # Enable memory error accounting per socket. socket-tracking-enabled = yes # Threshold and trigger for uncorrected memory errors on a socket. # mem-uc-error-trigger = socket-memory-error-trigger mem-uc-error-threshold = 100 / 24h # Trigger script for corrected memory errors on a socket. mem-ce-error-trigger = socket-memory-error-trigger # Threshold on when to trigger a correct error for the socket. mem-ce-error-threshold = 100 / 24h # Log socket error threshold explicitely? mem-ce-error-log = yes # Trigger script for uncorrected bus error events bus-uc-threshold-trigger = bus-error-trigger # Trigger script for uncorrected IOMCA erors iomca-threshold-trigger = iomca-error-trigger # Trigger script for other uncategorized errors unknown-threshold-trigger = unknown-error-trigger [cache] # Processing of cache error thresholds reported by Intel CPUs. cache-threshold-trigger = cache-error-trigger # Should cache threshold events be logged explicitely? cache-threshold-log = yes [page] # Memory error accouting per 4K memory page. # Threshold for the correct memory errors trigger script. memory-ce-threshold = 10 / 24h # Trigger script for corrected errors. # memory-ce-trigger = page-error-trigger # Should page threshold events be logged explicitely? memory-ce-log = yes # specify the internal action in mcelog to exceeding a page error threshold # this is done in addition to executing the trigger script if available # off no action # account only account errors # soft try to soft-offline page without killing any processes # This requires an uptodate kernel. Might not be successfull. # hard try to hard-offline page by killing processes # Requires an uptodate kernel. Might not be successfull. # soft-then-hard First try to soft offline, then try hard offlining #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft [trigger] # Maximum number of running triggers children-max = 2 # execute triggers in this directory directory = /etc/mcelog mcelog-128+dfsg/mcelog.conf.5000066400000000000000000000157631261732315200160510ustar00rootroot00000000000000 ." Auto generated mcelog.conf manpage. Do not edit. .TH "mcelog.conf" 5 "mcelog" .SH NAME mcelog.conf \- mcelog.conf reference .SH SYNOPSIS .B /etc/mcelog.conf .SH DESCRIPTION /etc/mcelog.conf is the main configuration file for .B mcelog(8). This is configuration file separated into sections including a default section. General format .PP .B optionname = value .PP White space is not allowed in value currently, except at the end where it is dropped .PP .PP In general all command line options that are not commands work here. See man mcelog or mcelog --help for a list. e.g. to enable the --no-syslog option use .PP .B no-syslog = yes (or no to disable) .PP When the option has a argument .PP .B logfile = /tmp/logfile .PP Below are the options which are not command line options. .PP .PP Set cpu type for which mcelog decodes events: .PP .B cpu = type .PP For valid values for type please see mcelog --help. If this value is set incorrectly the decoded output will be likely incorrect. By default when this parameter is not set mcelog uses the CPU it is running on on very new kernels the mcelog events reported by the kernel also carry the CPU type which is used too when available and not overriden. .PP .PP Enable daemon mode: .PP .B daemon = yes .PP By default mcelog just processes the currently pending events and exits. In daemon mode it will keep running as a daemon in the background and poll the kernel for events and then decode them. .PP .PP Filter out known broken events by default. .PP .B filter = yes .PP Don't log memory errors individually. They still get accounted if that is enabled. .PP .B filter-memory-errors = yes .PP .PP Output in undecoded raw format to be easier machine readable (default is decoded). .PP .B raw = yes .PP .PP Set cpu mhz to decode uptime from time stamp counter (output unreliable, not needed on new kernels which report the event time directly. A lot of systems don't have a linear time stamp clock and the output is wrong then. Normally mcelog tries to figure out if it the TSC is reliable and only uses the current frequency then. Setting a frequency forces timestamp decoding. This setting is obsolete with modern kernels which report the time directly. .PP .B cpumhz = 1800.00 .PP .PP Log output options Log decoded machine checks in syslog (default stdout or syslog for daemon) .PP .B syslog = yes .PP Log decoded machine checks in syslog with error level .PP .B syslog-error = yes .PP Never log anything to syslog .PP .B no-syslog = yes .PP Append log output to logfile instead of stdout. only when no syslog logging is active .PP .B logfile = filename .PP .PP Use smbios information to decode dimms (needs root). This function is not recommended to use right now and generally not needed. The exception is memdb prepopulation, which is configured separately below. .PP .B dmi = no .PP .PP When in daemon mode run as this user after set up. Note that the triggers will run as this user too. Setting this to non root will mean that triggers cannot take some corrective action, like offlining objects. .PP .B run-credentials-user = root .PP .PP Group to run as daemon with default to the group of the run-credentials-user .PP .B run-credentials-group = nobody .PP .PP .SS "The server config section" User allowed to access client socket. when set to * match any root is always allowed to access. default: root only .PP .B client-user = root .PP Group allowed to access mcelog When no group is configured any group matches (but still user checking). when set to * match any .PP .B client-group = root .PP Path to the unix socket for client<->server communication. When no socket-path is configured the server will not start .PP .B socket-path = /var/run/mcelog-client .PP When mcelog starts it checks if a server is already running. this configures the timeout for this check. .PP .B initial-ping-timeout = 2 .PP .PP .SS "The dimm config section" Is the in memory dimm error tracking enabled? Only works on systems with integrated memory controller and which are supported. Only takes effect in daemon mode. .PP .B dimm-tracking-enabled = yes .PP Use dmi information from the bios to prepopulate dimm database. Note this might not work with all BIOS and requires mcelog to run as root. Alternative is to let mcelog create DIMM objects on demand. .PP .B dmi-prepopulate = yes .PP Execute these triggers when the rate of corrected or uncorrected Errors per DIMM exceeds the threshold. Note when the hardware does not report DIMMs this might also be per channel. The default of 10/24h is reasonable for server quality DDR3 DIMMs as of 2009/10. .PP .B uc-error-trigger = dimm-error-trigger .PP .B uc-error-threshold = 1 / 24h .PP .B ce-error-trigger = dimm-error-trigger .PP .B ce-error-threshold = 10 / 24h .PP .PP .SS "The socket config section" Enable memory error accounting per socket. .PP .B socket-tracking-enabled = yes .PP .PP Threshold and trigger for uncorrected memory errors on a socket. mem-uc-error-trigger = socket-memory-error-trigger .PP .PP .B mem-uc-error-threshold = 100 / 24h .PP .PP Trigger script for corrected memory errors on a socket. .PP .B mem-ce-error-trigger = socket-memory-error-trigger .PP .PP Threshold on when to trigger a correct error for the socket. .PP .PP .B mem-ce-error-threshold = 100 / 24h .PP .PP log socket error threshold explicitely? .PP .B mem-ce-error-log = yes .PP .PP Trigger script for uncorrected bus error events .PP .B bus-uc-threshold-trigger = bus-error-trigger .PP .PP Trigger script for uncorrected iomca erors .PP .B iomca-threshold-trigger = iomca-error-trigger .PP .PP Trigger script for other uncategorized errors .PP .B unknown-threshold-trigger = unknown-error-trigger .PP .PP .SS "The cache config section" Processing of cache error thresholds reported by intel cpus. .PP .B cache-threshold-trigger = cache-error-trigger .PP .PP Should cache threshold events be logged explicitely? .PP .B cache-threshold-log = yes .PP .PP .SS "The page config section" Memory error accouting per 4k memory page. Threshold for the correct memory errors trigger script. .PP .B memory-ce-threshold = 10 / 24h .PP .PP Trigger script for corrected errors. memory-ce-trigger = page-error-trigger .PP .PP Should page threshold events be logged explicitely? .PP .B memory-ce-log = yes .PP .PP Specify the internal action in mcelog to exceeding a page error threshold this is done in addition to executing the trigger script if available off no action account only account errors soft try to soft-offline page without killing any processes This requires an uptodate kernel. Might not be successfull. hard try to hard-offline page by killing processes Requires an uptodate kernel. Might not be successfull. soft-then-hard First try to soft offline, then try hard offlining .PP .B memory-ce-action = off|account|soft|hard|soft-then-hard .PP .B memory-ce-action = soft .PP .PP .SS "The trigger config section" Maximum number of running triggers .PP .B children-max = 2 .PP Execute triggers in this directory .PP .B directory = /etc/mcelog .PP .SH SEE ALSO .BR mcelog (8) , .B http://www.mcelog.org mcelog-128+dfsg/mcelog.cron000077500000000000000000000001071261732315200157070ustar00rootroot00000000000000#!/bin/bash /usr/sbin/mcelog --ignorenodev --filter >> /var/log/mcelog mcelog-128+dfsg/mcelog.h000066400000000000000000000111601261732315200151730ustar00rootroot00000000000000typedef unsigned long long u64; typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; #define __u64 u64 #define __u32 u32 #define __u16 u16 #define __u8 u8 /* kernel structure: */ /* Fields are zero when not available */ struct mce { __u64 status; __u64 misc; __u64 addr; __u64 mcgstatus; __u64 ip; __u64 tsc; /* cpu time stamp counter */ __u64 time; /* wall time_t when error was detected */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 pad1; __u16 pad2; __u32 cpuid; /* CPUID 1 EAX */ __u8 cs; /* code segment */ __u8 bank; /* machine check bank */ __u8 cpu; /* cpu number; obsolete; use extcpu now */ __u8 finished; /* entry is valid */ __u32 extcpu; /* linux cpu number that detected the error */ __u32 socketid; /* CPU socket ID */ __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ }; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 #define X86_VENDOR_UMC 3 #define X86_VENDOR_CENTAUR 5 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_NUM 9 #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ #define MCE_GET_RECORD_LEN _IOR('M', 1, int) #define MCE_GET_LOG_LEN _IOR('M', 2, int) #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) #define MCE_TIMEOUT_BANK (MCE_EXTENDED_BANK + 90) #define MCI_THRESHOLD_OVER (1ULL<<48) /* threshold error count overflow */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ #define MCI_STATUS_EN (1ULL<<60) /* error enabled */ #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_S (1ULL<<56) /* signalled */ #define MCI_STATUS_AR (1ULL<<55) /* action-required */ #define MCI_STATUS_FWST (1ULL<<37) /* Firmware updated status indicator */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* eip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ #define MCG_STATUS_LMCES (1ULL<<3) /* local machine check signaled */ #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ #define MCG_TES_P (1ULL<<11) /* Yellow bit cache threshold supported */ #define MCG_SER_P (1ULL<<24) /* MCA recovery / new status */ #define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */ #define MCG_LMCE_P (1ULL<<27) /* Local machine check supported */ #define NELE(x) (sizeof(x)/sizeof(*(x))) #define err(x) perror(x),exit(1) #define sizeof_field(t, f) (sizeof(((t *)0)->f)) #define endof_field(t, f) (sizeof(((t *)0)->f) + offsetof(t, f)) #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) #define round_down(x,y) ((x) & ~((y)-1)) #define BITS_PER_INT (sizeof(unsigned) * 8) #define BITS_PER_LONG (sizeof(unsigned long) * 8) #ifdef __GNUC__ #define PRINTFLIKE __attribute__((format(printf,1,2))) #define noreturn __attribute__((noreturn)) #else #define PRINTFLIKE #define noreturn #endif int Wprintf(char *fmt, ...) PRINTFLIKE; void Eprintf(char *fmt, ...) PRINTFLIKE; void SYSERRprintf(char *fmt, ...) PRINTFLIKE; void Lprintf(char *fmt, ...) PRINTFLIKE; void Gprintf(char *fmt, ...) PRINTFLIKE; extern int open_logfile(char *fn); /* Don't forget to update mcelog.c:cputype_name[] too */ enum cputype { CPU_GENERIC, CPU_P6OLD, CPU_CORE2, /* 65nm and 45nm */ CPU_K8, CPU_P4, CPU_NEHALEM, CPU_DUNNINGTON, CPU_TULSA, CPU_INTEL, /* Intel architectural errors */ CPU_XEON75XX, CPU_SANDY_BRIDGE, CPU_SANDY_BRIDGE_EP, CPU_IVY_BRIDGE, CPU_IVY_BRIDGE_EPEX, CPU_HASWELL, CPU_HASWELL_EPEX, CPU_BROADWELL, CPU_KNIGHTS_LANDING, CPU_ATOM, CPU_SKYLAKE, }; enum option_ranges { O_COMMON = 500, O_DISKDB = 1000, }; enum syslog_opt { SYSLOG_LOG = (1 << 0), /* normal decoding output to syslog */ SYSLOG_REMARK = (1 << 1), /* special warnings to syslog */ SYSLOG_ERROR = (1 << 2), /* errors during operation to syslog */ SYSLOG_ALL = SYSLOG_LOG|SYSLOG_REMARK|SYSLOG_ERROR, SYSLOG_FORCE = (1 << 3), }; extern void usage(void); extern void no_syslog(void); extern void argsleft(int ac, char **av); extern char *processor_flags; extern int force_tsc; extern enum syslog_opt syslog_opt; extern int syslog_level; extern enum cputype cputype; extern int filter_memory_errors; extern int imc_log; extern void set_imc_log(int cputype); mcelog-128+dfsg/mcelog.init000077500000000000000000000040011261732315200157060ustar00rootroot00000000000000#!/bin/sh # # Startup script for mcelog # # This should be customized for distribution standards # (using rc_status etc.) # The paths are hardcoded and are not automatically adjusted # for different prefix # ### BEGIN INIT INFO # Provides: mcelog # Default-Start: 3 5 # Default-Stop: 0 1 2 6 # Short-Description: mcelog hardware error logging # Description: Start the mcelog hardware error logging. # This logs and handles CPU hardware errors on x86 systems. ### END INIT INFO # put this is sysconfig # mcelog mode # valid values: daemon, trigger, cron # Recommended value daemon MCELOG_MODE=daemon # additional options to pass to the daemon # this only works in daemon mode # see the manpage for details. settings can be also # set in /etc/mcelog.conf MCELOG_OPTIONS="" # private settings MCELOG=${MCELOG:-/usr/sbin/mcelog} TRIGGER=/sys/devices/system/machinecheck/machinecheck0/trigger [ ! -x $MCELOG ] && ( echo "mcelog not found" ; exit 1 ) [ ! -r /dev/mcelog ] && ( echo "/dev/mcelog not active" ; exit 0 ) case "$MCELOG_MODE" in daemon) ;; trigger) ;; cron) echo "mcelog not started" exit 0 ;; *) echo "Unknown mcelog mode $MCELOG_MODE. Valid daemon/trigger/cron" exit 1 esac case "$1" in start) if [ "$MCELOG_MODE" = "daemon" ] ; then echo "Starting mcelog daemon" startproc $MCELOG --daemon $MCELOG_OPTIONS elif [ -f "$TRIGGER" ] ; then echo $MCELOG > "$TRIGGER" else echo No machine check capability fi ;; stop) if [ "$MCELOG_MODE" = "daemon" ] ; then echo "Stopping mcelog" killproc -TERM $MCELOG elif [ "$MCELOG_MODE" = "trigger" -a -f "$TRIGGER" ]; then echo "" > "$TRIGGER" else echo mcelog not running fi ;; try-restart) $0 status > /dev/null && $0 restart ;; restart) $0 stop $0 start ;; reload) $0 try-restart ;; force-reload) $0 try-restart ;; status) if [ "$MCELOG_MODE" = "daemon" ] ; then echo "Checking for mcelog" checkproc $MCELOG fi ;; *) echo "Usage: $0 {start|stop|try-restart|restart|status|force-reload|reload}" exit 1 esac mcelog-128+dfsg/mcelog.logrotate000066400000000000000000000004131261732315200167430ustar00rootroot00000000000000/var/log/mcelog { compress dateext maxage 365 rotate 99 size=+2048k notifempty missingok copytruncate postrotate chmod 644 /var/log/mcelog [ -r /var/run/mcelog.pid ] && kill -USR1 `cat /var/run/mcelog.pid` endscript } mcelog-128+dfsg/mcelog.service000066400000000000000000000003261261732315200164060ustar00rootroot00000000000000[Unit] Description=Machine Check Exception Logging Daemon After=syslog.target [Service] ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground StandardOutput=syslog [Install] WantedBy=multi-user.target mcelog-128+dfsg/mcelog.triggers.5000066400000000000000000000153331261732315200167430ustar00rootroot00000000000000'\" t .TH "mcelog.triggers" 5 "mcelog" .SH NAME mcelog.triggers \- mcelog trigger scripts reference .SH SYNOPSIS .B /etc/mcelog/bus-error-trigger .br .B /etc/mcelog/cache-error-trigger .br .B /etc/mcelog/dimm-error-trigger .br .B /etc/mcelog/iomca-error-trigger .br .B /etc/mcelog/page-error-trigger .br .B /etc/mcelog/socket-memory-error-trigger .br .B /etc/mcelog/unknown-error-trigger .br .SH DESCRIPTION .BR mcelog(8) maintains thresholds of errors using a .I leaky-bucket algorithm. When the number of errors in a specific time window exceeds a pre-configured threshold a .I trigger will be executed. Triggers are usually shell scripts in the .B /etc/mcelog directory but can be also other internal actions. Thresholds and triggers can be configured in .BR mcelog.conf(5) Trigger will run as the user configured for mcelog in .I mcelog.conf, by default root. The default trigger action can be overridden by specifying a different trigger script in the configuration file. Actions in addition to the default trigger (like notifying an administrator) can be put into the respective .I /etc/mcelog/*.local script which is executed after the default action. This allows updating the default scripts without overriding local actions. All trigger actions are also logged to syslog. .PP .B "The DIMM and socket memory error triggers" .PP The .B /etc/mcelog/dimm-error-trigger and .B /etc/mcelog/socket-memory-error-trigger scripts are executed when a DIMM or a CPU socket exceeds a configured corrected or uncorrected memory error threshold. The thresholds are configured in the .B mcelog.conf .I [dimm] and .I [socket] sections. The default triggers log a warning message in the system log. The triggers are only executed when mcelog runs as a daemon. Arguments are passed as environment variables .TS tab(:); l l. THRESHOLD:human readable threshold status MESSAGE:Human readable consolidated error message TOTALCOUNT:total corrected or uncorrected count of errors for current DIMM depending on what triggered the event LOCATION:Consolidated location as a single string DMI_LOCATION:DIMM location from DMI/SMBIOS if available DMI_NAME:DIMM identifier from DMI/SMBIOS if available DIMM:DIMM number reported by hardware CHANNEL:Channel number reported by hardware SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM CECOUNT:Total corrected error count for DIMM UCCOUNT:Total uncorrected error count for DIMM LASTEVENT:Time stamp of event that triggered threshold (in time_t format, seconds) THRESHOLD_COUNT:Total umber of events in current threshold time period of specific type .TE After the default action local actions in .B /etc/mcelog/dimm-error-trigger.local or respective .B /etc/mcelog/socket-memory-error-trigger.local are executed. .PP .B "The page error trigger" .PP The .B /etc/mcelog/page-error-trigger script is executed by mcelog in daemon mode when a page in memory exceeds a pre-configured corrected or uncorrected error threshold. mcelog internally also implements offlining the page through the kernel. This is configured through the .I [page] section of .BR mcelog.conf(5) .PP The environment arguments are the same as for the .I dimm-error-trigger script .PP After the default action local actions in .I /etc/mcelog/page-error-trigger.loccal are executed. .PP .B "The cache error trigger" .PP The .I /etc/mcelog/cache-error-trigger shell script is called for cache error handling in daemon mode when a CPU reports excessive corrected cache errors. This could be a indication for future uncorrected errors. .PP This trigger is configured through the .B [cache] section in the .BR mcelog.conf(5) configuration file. The threshold is defined by the CPU. The default trigger offlines the affected CPU cores, unless it is the last core running. .PP Arguments are passed as environment variables .TS tab(:); l l. MESSAGE:Human readable error message CPU:Linux CPU number that triggered the error LEVEL:Cache level affected by error TYPE:Cache type affected by error (Data,Instruction,Generic) AFFECTED_CPUS:List of CPUs sharing the affected cache SOCKETID:Socket ID of affected CPU .TE .PP After the default action local actions in .I /etc/mcelog/cache-error-trigger.local are executed. .PP .B "The bus-uc-threshold-trigger" .PP The .B bus-uc-threshold-trigger runs on uncorrected errors on a IO bus. It is configured through the .B bus-uc-threshold-trigger and .B bus-uc-threshold-trigger-threshold options in .I /etc/mcelog.conf(5). By default it logs a message with the error location to the system log. After the default action local actions in .I /etc/mcelog/bus-uc-error-trigger.local are executed. .PP Arguments are passed as environment variables .TS tab(:); l l. MESSAGE:Human readable consolidated error message. LOCATION:Consolidated location as a single string SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM LEVEL:Interconnect level PARTICIPATION:Processor Participation (Originator, Responder or Observer) REQUEST:Request type (read, write, prefetch, etc.) ORIGIN :Memory or IO TIMEOUT:The request timed out or not .TE .PP .B "The iomca-error-trigger" .PP The .B iomca-error-trigger runs when a socket receives bus or interconnect errors. It is configured through the .B iomca-error-trigger and .B iomca-error-trigger-threshold options in .I /etc/mcelog.conf. By default it logs a message with the error location to the system log. After the default action local actions in .I /etc/mcelog/iomca-error-trigger.local are executed. .PP Arguments are passed as environment variables .TS tab(:); l l. MESSAGE:Human readable consolidated error message LOCATION:Consolidated location as a single string SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM CPU:Linux CPU number that triggered the error SET:PCI segment number BUS:PCI bus number DEVICE:PCI device number FUNCTION:PCI function number .TE .PP .B "The unknown-error-trigger" .PP The .B unknown-error-trigger runs on any errors not otherwise categorized. It is configured through the .B unknown-error-trigger and .B unknown-error-trigger-threshold options in .I /etc/mcelog.conf. By default it logs a message to the system log. After the default action local actions in .I /etc/mcelog/unknown-error-trigger.local are executed. .PP Arguments are passed as environment variables .TS tab(:); l l. MESSAGE:Human readable consolidated error message LOCATION:Consolidated location as a single string SOCKETID:Socket ID of CPU that includes the memory controller with the DIMM CPU:Linux CPU number that triggered the error STATUS:IA32_MCi_STATUS register value ADDR:IA32_MCi_ADDR register value MISC:IA32_MCi_MISC register value MCGSTATUS:IA32_MCG_STATUS register value MCGCAP:IA32_MCG_CAP register value .TE .SH SEE ALSO http://www.mcelog.org .B mcelog(8), .B mcelog.conf(5) mcelog-128+dfsg/memdb.c000066400000000000000000000257601261732315200150170ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Simple in memory error database for mcelog running in daemon mode mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "mcelog.h" #include "memutil.h" #include "config.h" #include "dmi.h" #include "memdb.h" #include "leaky-bucket.h" #include "trigger.h" #include "intel.h" #include "page.h" struct memdimm { struct memdimm *next; int channel; /* -1: unknown */ int dimm; /* -1: unknown */ int socketid; struct err_type ce; struct err_type uc; char *name; char *location; struct dmi_memdev *memdev; }; struct err_triggers { struct bucket_conf ce_bucket_conf; struct bucket_conf uc_bucket_conf; char *type; }; #define SHASH 17 static int md_numdimms; static struct memdimm *md_dimms[SHASH]; static struct err_triggers dimms = { .type = "DIMM" }; static struct err_triggers sockets = { .type = "Socket" }; static int memdb_enabled; static int sockdb_enabled; #define FNV32_OFFSET 2166136261U #define FNV32_PRIME 0x01000193 #define O(x) ((x) & 0xff) /* FNV 1a 32bit, max 16k sockets, 8bit dimm/channel */ static unsigned dimmhash(unsigned socket, int dimm, unsigned ch) { unsigned hash = FNV32_OFFSET; hash = (hash ^ O(socket)) * FNV32_PRIME; hash = (hash ^ O(socket >> 8)) * FNV32_PRIME; hash = (hash ^ O(dimm)) * FNV32_PRIME; hash = (hash ^ O(ch)) * FNV32_PRIME; return hash % SHASH; } /* Search DIMM in hash table */ struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert) { struct memdimm *md; unsigned h; h = dimmhash(socketid, dimm, channel); for (md = md_dimms[h]; md; md = md->next) { if (md->socketid == socketid && md->channel == channel && md->dimm == dimm) break; } if (md || !insert) return md; md = xalloc(sizeof(struct memdimm)); md->next = md_dimms[h]; md_dimms[h] = md; md->socketid = socketid; md->channel = channel; md->dimm = dimm; md_numdimms++; bucket_init(&md->ce.bucket); bucket_init(&md->uc.bucket); return md; } enum { NUMLEN = 30, MAX_ENV = 20, }; static char *number(char *buf, long num) { snprintf(buf, NUMLEN, "%ld", num); return buf; } static char *format_location(struct memdimm *md) { char numbuf[NUMLEN], numbuf2[NUMLEN]; char *location; asprintf(&location, "SOCKET:%d CHANNEL:%s DIMM:%s [%s%s%s]", md->socketid, md->channel == -1 ? "?" : number(numbuf, md->channel), md->dimm == -1 ? "?" : number(numbuf2, md->dimm), md->location ? md->location : "", md->location && md->name ? " " : "", md->name ? md->name : ""); return location; } /* Run a user defined trigger when a error threshold is crossed. */ void memdb_trigger(char *msg, struct memdimm *md, time_t t, struct err_type *et, struct bucket_conf *bc) { struct leaky_bucket *bucket = &et->bucket; char *env[MAX_ENV]; int ei = 0; int i; char *location = format_location(md); char *thresh = bucket_output(bc, bucket); char *out; asprintf(&out, "%s: %s", msg, thresh); if (bc->log) { Gprintf("%s\n", out); Gprintf("Location %s\n", location); } if (bc->trigger == NULL) goto out; asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin"); asprintf(&env[ei++], "THRESHOLD=%s", thresh); asprintf(&env[ei++], "TOTALCOUNT=%lu", et->count); asprintf(&env[ei++], "LOCATION=%s", location); if (md->location) asprintf(&env[ei++], "DMI_LOCATION=%s", md->location); if (md->name) asprintf(&env[ei++], "DMI_NAME=%s", md->name); if (md->dimm != -1) asprintf(&env[ei++], "DIMM=%d", md->dimm); if (md->channel != -1) asprintf(&env[ei++], "CHANNEL=%d", md->channel); asprintf(&env[ei++], "SOCKETID=%d", md->socketid); asprintf(&env[ei++], "CECOUNT=%lu", md->ce.count); asprintf(&env[ei++], "UCCOUNT=%lu", md->uc.count); if (t) asprintf(&env[ei++], "LASTEVENT=%lu", t); asprintf(&env[ei++], "AGETIME=%u", bc->agetime); // XXX human readable version of agetime asprintf(&env[ei++], "MESSAGE=%s", out); asprintf(&env[ei++], "THRESHOLD_COUNT=%d", bucket->count); env[ei] = NULL; assert(ei < MAX_ENV); run_trigger(bc->trigger, NULL, env); for (i = 0; i < ei; i++) free(env[i]); out: free(location); free(out); free(thresh); } /* * Lost some errors. Assume they were CE. Only works for the sockets because * we have no clues where they are. */ static void account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned corr_err_cnt) { if (corr_err_cnt && --corr_err_cnt > 0) { md->ce.count += corr_err_cnt; if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, corr_err_cnt, m->time)) { char *msg; asprintf(&msg, "Fallback %s memory error count %d exceeded threshold", t->type, corr_err_cnt); memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf); free(msg); } } } static void account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m) { char *msg; asprintf(&msg, "%scorrected %s memory error count exceeded threshold", (m->status & MCI_STATUS_UC) ? "Un" : "", t->type); if (m->status & MCI_STATUS_UC) { md->uc.count++; if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time)) memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf); } else { md->ce.count++; if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time)) memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf); } free(msg); } /* * A memory error happened, record it in the memdb database and run * triggers if needed. * ch/dimm == -1: Unspecified DIMM on the channel */ void memory_error(struct mce *m, int ch, int dimm, unsigned corr_err_cnt, unsigned recordlen) { struct memdimm *md; if (recordlen < offsetof(struct mce, socketid)) { static int warned; if (!warned) { Eprintf("Cannot account memory errors because kernel does not report socketid"); warned = 1; } return; } if (memdb_enabled && (ch != -1 || dimm != -1)) { md = get_memdimm(m->socketid, ch, dimm, 1); account_memdb(&dimms, md, m); } if (sockdb_enabled) { md = get_memdimm(m->socketid, -1, -1, 1); account_over(&sockets, md, m, corr_err_cnt); account_memdb(&sockets, md, m); } } /* Compare two dimms for sorting. */ static int cmp_dimm(const void *a, const void *b) { const struct memdimm *ma = *(void **)a; const struct memdimm *mb = *(void **)b; if (ma->socketid != mb->socketid) return ma->socketid - mb->socketid; if (ma->channel != mb->channel) return ma->channel - mb->channel; return ma->dimm - mb->dimm; } /* Dump CE or UC errors */ static void dump_errtype(char *name, struct err_type *e, FILE *f, enum printflags flags, struct bucket_conf *bc) { int all = (flags & DUMP_ALL); char *s; bucket_age(bc, &e->bucket, bucket_time()); if (e->count || e->bucket.count || all) fprintf(f, "%s:\n", name); if (e->count || all) { fprintf(f, "\t%lu total\n", e->count); } if (bc->capacity && (e->bucket.count || all)) { s = bucket_output(bc, &e->bucket); fprintf(f, "\t%s\n", s); free(s); } } static void dump_bios(struct memdimm *md, FILE *f) { int n = 0; if (md->name) n += fprintf(f, "DMI_NAME \"%s\"", md->name); if (md->location) { if (n > 0) fputc(' ', f); n += fprintf(f, "DMI_LOCATION \"%s\"", md->location); } if (n > 0) fputc('\n', f); } static void dump_dimm(struct memdimm *md, FILE *f, enum printflags flags) { if (md->ce.count + md->uc.count > 0 || (flags & DUMP_ALL)) { fprintf(f, "SOCKET %u", md->socketid); if (md->channel == -1) fprintf(f, " CHANNEL any"); else fprintf(f, " CHANNEL %d", md->channel); if (md->dimm == -1) fprintf(f, " DIMM any"); else fprintf(f, " DIMM %d", md->dimm); fputc('\n', f); if (flags & DUMP_BIOS) dump_bios(md, f); dump_errtype("corrected memory errors", &md->ce, f, flags, &dimms.ce_bucket_conf); dump_errtype("uncorrected memory errors", &md->uc, f, flags, &dimms.uc_bucket_conf); } } /* Sort and dump DIMMs */ void dump_memory_errors(FILE *f, enum printflags flags) { int i, k; struct memdimm *md, **da; da = xalloc(sizeof(void *) * md_numdimms); k = 0; for (i = 0; i < SHASH; i++) { for (md = md_dimms[i]; md; md = md->next) da[k++] = md; } qsort(da, md_numdimms, sizeof(void *), cmp_dimm); for (i = 0; i < md_numdimms; i++) { if (i > 0) fputc('\n', f); else fprintf(f, "Memory errors\n"); dump_dimm(da[i], f, flags); } free(da); } void memdb_config(void) { int n; n = config_bool("dimm", "dimm-tracking-enabled"); if (n < 0) memdb_enabled = memory_error_support; else memdb_enabled = n; config_trigger("dimm", "ce-error", &dimms.ce_bucket_conf); config_trigger("dimm", "uc-error", &dimms.uc_bucket_conf); n = config_bool("socket", "socket-tracking-enabled"); if (n < 0) sockdb_enabled = memory_error_support; else sockdb_enabled = n; config_trigger("socket", "mem-ce-error", &sockets.ce_bucket_conf); config_trigger("socket", "mem-uc-error", &sockets.uc_bucket_conf); } static int parse_dimm_addr(char *bl, unsigned *socketid, unsigned *channel, unsigned *dimm) { if (!bl) return 0; if (sscanf(bl + strcspn(bl, "_"), "_Node%u_Channel%u_Dimm%u", socketid, channel, dimm) == 3) return 1; if (sscanf(bl, "NODE %u CHANNEL %u DIMM %u", socketid, channel, dimm) == 3) return 1; /* Add more DMI formats here */ /* For new AMI BIOS Node0_Bank0 */ if (sscanf(bl, "Node%u_Bank%u", socketid, dimm) == 2) return 1; /* For old AMI BIOS A1_BANK0*/ if (sscanf(bl, "A%u_BANK%u", socketid, dimm) == 2) return 1; return 0; } /* Prepopulate DIMM database from BIOS information */ void prefill_memdb(int do_dmi) { static int initialized; int i; int missed = 0; unsigned socketid, channel, dimm; if (initialized) return; memdb_config(); if (!memdb_enabled) return; initialized = 1; if (config_bool("dimm", "dmi-prepopulate") == 0 || !do_dmi) return; if (opendmi() < 0) return; for (i = 0; dmi_dimms[i]; i++) { struct memdimm *md; struct dmi_memdev *d = dmi_dimms[i]; char *bl; bl = dmi_getstring(&d->header, d->bank_locator); if (!parse_dimm_addr(bl, &socketid, &channel, &dimm)) { missed++; continue; } md = get_memdimm(socketid, channel, dimm, 1); if (md->memdev) { /* dups -- likely parse error */ missed++; continue; } md->memdev = d; md->location = xstrdup(bl); md->name = xstrdup(dmi_getstring(&d->header, d->device_locator)); } if (missed) { static int warned; if (!warned) { Eprintf("failed to prefill DIMM database from DMI data"); warned = 1; } } } mcelog-128+dfsg/memdb.h000066400000000000000000000011451261732315200150130ustar00rootroot00000000000000#include #include "leaky-bucket.h" struct err_type { struct leaky_bucket bucket; unsigned long count; }; enum printflags { DUMP_ALL = (1 << 0), DUMP_BIOS = (1 << 1), }; void prefill_memdb(int do_dmi); void memdb_config(void); void dump_memory_errors(FILE *f, enum printflags flags); void memory_error(struct mce *m, int channel, int dimm, unsigned corr_err_cnt, unsigned recordlen); struct memdimm; void memdb_trigger(char *msg, struct memdimm *md, time_t t, struct err_type *et, struct bucket_conf *bc); struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert); mcelog-128+dfsg/memutil.c000066400000000000000000000030171261732315200153760ustar00rootroot00000000000000/* Copyright (C) 2008 Intel Corporation Author: Andi Kleen Memory allocation utilities mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include "mcelog.h" #include "memutil.h" void Enomem(void) { Eprintf("out of memory"); exit(ENOMEM); } void *xalloc(size_t size) { void *m = calloc(1, size); if (!m) Enomem(); return m; } void *xalloc_nonzero(size_t size) { void *m = malloc(size); if (!m) Enomem(); return m; } void *xrealloc(void *old, size_t size) { void *m = realloc(old, size); if (!m) Enomem(); return m; } char *xstrdup(char *str) { str = strdup(str); if (!str) Enomem(); return str; } /* Override weak glibc version */ int asprintf(char **strp, const char *fmt, ...) { int n; va_list ap; va_start(ap, fmt); n = vasprintf(strp, fmt, ap); va_end(ap); if (n < 0) Enomem(); return n; } mcelog-128+dfsg/memutil.h000066400000000000000000000002501261732315200153770ustar00rootroot00000000000000#include void *xalloc(size_t size); void *xalloc_nonzero(size_t size); void *xrealloc(void *old, size_t size); char *xstrdup(char *str); void Enomem(void); mcelog-128+dfsg/msg.c000066400000000000000000000064551261732315200145210ustar00rootroot00000000000000#define _GNU_SOURCE 1 #include #include #include #include #include #include #include "mcelog.h" #include "msg.h" #include "memutil.h" enum syslog_opt syslog_opt = SYSLOG_REMARK; int syslog_level = LOG_WARNING; static FILE *output_fh; static char *output_fn; int need_stdout(void) { return !output_fh && (syslog_opt == 0); } int open_logfile(char *fn) { output_fh = fopen(fn, "a"); if (output_fh) { char *old = output_fn; output_fn = xstrdup(fn); free(old); return 0; } return -1; } static void opensyslog(void) { static int syslog_opened; if (syslog_opened) return; syslog_opened = 1; openlog("mcelog", 0, 0); } /* For warning messages that should reach syslog */ void Lprintf(char *fmt, ...) { va_list ap; if (syslog_opt & SYSLOG_REMARK) { va_start(ap, fmt); opensyslog(); vsyslog(LOG_ERR, fmt, ap); va_end(ap); } if (output_fh || !(syslog_opt & SYSLOG_REMARK)) { va_start(ap, fmt); opensyslog(); vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } } /* For errors during operation */ void Eprintf(char *fmt, ...) { FILE *f = output_fh ? output_fh : stderr; va_list ap; if (!(syslog_opt & SYSLOG_ERROR) || output_fh) { va_start(ap, fmt); fputs("mcelog: ", f); vfprintf(f, fmt, ap); if (*fmt && fmt[strlen(fmt)-1] != '\n') fputc('\n', f); va_end(ap); } if (syslog_opt & SYSLOG_ERROR) { va_start(ap, fmt); opensyslog(); vsyslog(LOG_ERR, fmt, ap); va_end(ap); } } void SYSERRprintf(char *fmt, ...) { char *err = strerror(errno); va_list ap; FILE *f = output_fh ? output_fh : stderr; if (!(syslog_opt & SYSLOG_ERROR) || output_fh) { va_start(ap, fmt); fputs("mcelog: ", f); vfprintf(f, fmt, ap); fprintf(f, ": %s\n", err); va_end(ap); } if (syslog_opt & SYSLOG_ERROR) { char *fmt2; va_start(ap, fmt); opensyslog(); asprintf(&fmt2, "%s: %s\n", fmt, err); vsyslog(LOG_ERR, fmt2, ap); free(fmt2); va_end(ap); } } /* Write to syslog with line buffering */ static int vlinesyslog(char *fmt, va_list ap) { static char line[200]; int n; int lend = strlen(line); int w = vsnprintf(line + lend, sizeof(line)-lend, fmt, ap); while (line[n = strcspn(line, "\n")] != 0) { line[n] = 0; syslog(syslog_level, "%s", line); memmove(line, line + n + 1, strlen(line + n + 1) + 1); } return w; } /* For decoded machine check output */ int Wprintf(char *fmt, ...) { int n = 0; va_list ap; if (syslog_opt & SYSLOG_LOG) { va_start(ap,fmt); opensyslog(); n = vlinesyslog(fmt, ap); va_end(ap); } if (!(syslog_opt & SYSLOG_LOG) || output_fh) { va_start(ap,fmt); n = vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } return n; } /* For output that should reach both syslog and normal log */ void Gprintf(char *fmt, ...) { va_list ap; if (syslog_opt & (SYSLOG_REMARK|SYSLOG_LOG)) { va_start(ap,fmt); vlinesyslog(fmt, ap); va_end(ap); } if (!(syslog_opt & SYSLOG_LOG) || output_fh) { va_start(ap,fmt); vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } } void flushlog(void) { FILE *f = output_fh ? output_fh : stdout; fflush(f); } void reopenlog(void) { if (output_fn && output_fh) { fclose(output_fh); output_fh = NULL; if (open_logfile(output_fn) < 0) SYSERRprintf("Cannot reopen logfile `%s'", output_fn); } } mcelog-128+dfsg/msg.h000066400000000000000000000001371261732315200145150ustar00rootroot00000000000000int need_stdout(void); void flushlog(void); void reopenlog(void); /* others are in mcelog.h */ mcelog-128+dfsg/msr.c000066400000000000000000000026331261732315200145260ustar00rootroot00000000000000#include "mcelog.h" #include #include #include #include #include static void domsr(int cpu, int msr, int bit) { char fpath[32]; unsigned long long data; int fd; sprintf(fpath, "/dev/cpu/%d/msr", cpu); fd = open(fpath, O_RDWR); if (fd == -1) { switch (errno) { case ENOENT: SYSERRprintf("Warning: cpu %d offline?, imc_log not set\n", cpu); return; default: SYSERRprintf("Cannot open %s to set imc_log\n", fpath); exit(1); } } if (pread(fd, &data, sizeof data, msr) != sizeof data) { SYSERRprintf("Cannot read MSR_ERROR_CONTROL from %s\n", fpath); exit(1); } data |= bit; if (pwrite(fd, &data, sizeof data, msr) != sizeof data) { SYSERRprintf("Cannot write MSR_ERROR_CONTROL to %s\n", fpath); exit(1); } if (pread(fd, &data, sizeof data, msr) != sizeof data) { SYSERRprintf("Cannot re-read MSR_ERROR_CONTROL from %s\n", fpath); exit(1); } if ((data & bit) == 0) Lprintf("No DIMM detection available on cpu %d (normal in virtual environments)\n", cpu); close(fd); } void set_imc_log(int cputype) { int cpu, ncpus = sysconf(_SC_NPROCESSORS_CONF); int msr, bit; switch (cputype) { case CPU_SANDY_BRIDGE_EP: case CPU_IVY_BRIDGE_EPEX: case CPU_HASWELL_EPEX: msr = 0x17f; /* MSR_ERROR_CONTROL */ bit = 0x2; /* MemError Log Enable */ break; default: return; } for (cpu = 0; cpu < ncpus; cpu++) domsr(cpu, msr, bit); } mcelog-128+dfsg/nehalem.c000066400000000000000000000132551261732315200153400ustar00rootroot00000000000000/* Copyright (C) 2008 Intel Corporation Decode Intel Nehalem specific machine check errors. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #include #include #include "mcelog.h" #include "nehalem.h" #include "bitfield.h" #include "memdb.h" #include "xeon75xx.h" /* See IA32 SDM Vol3B Appendix E.3.2 ff */ /* MC1_STATUS error */ static struct field qpi_status[] = { SBITFIELD(16, "QPI header had bad parity"), SBITFIELD(17, "QPI Data packet had bad parity"), SBITFIELD(18, "Number of QPI retries exceeded"), SBITFIELD(19, "Received QPI data packet that was poisoned by sender"), SBITFIELD(20, "QPI reserved 20"), SBITFIELD(21, "QPI reserved 21"), SBITFIELD(22, "QPI received unsupported message encoding"), SBITFIELD(23, "QPI credit type is not supported"), SBITFIELD(24, "Sender sent too many QPI flits to the receiver"), SBITFIELD(25, "QPI Sender sent a failed response to receiver"), SBITFIELD(26, "Clock jitter detected in internal QPI clocking"), {} }; static struct field qpi_misc[] = { SBITFIELD(14, "QPI misc reserved 14"), SBITFIELD(15, "QPI misc reserved 15"), SBITFIELD(24, "QPI Interleave/Head Indication Bit (IIB)"), {} }; static struct numfield qpi_numbers[] = { HEXNUMBER(0, 7, "QPI class and opcode of packet with error"), HEXNUMBER(8, 13, "QPI Request Transaction ID"), NUMBERFORCE(16, 18, "QPI Requestor/Home Node ID (RHNID)"), HEXNUMBER(19, 23, "QPI miscreserved 19-23"), {}, }; static struct field nhm_memory_status[] = { SBITFIELD(16, "Memory read ECC error"), SBITFIELD(17, "Memory ECC error occurred during scrub"), SBITFIELD(18, "Memory write parity error"), SBITFIELD(19, "Memory error in half of redundant memory"), SBITFIELD(20, "Memory reserved 20"), SBITFIELD(21, "Memory access out of range"), SBITFIELD(22, "Memory internal RTID invalid"), SBITFIELD(23, "Memory address parity error"), SBITFIELD(24, "Memory byte enable parity error"), {} }; static struct numfield nhm_memory_status_numbers[] = { HEXNUMBER(25, 37, "Memory MISC reserved 25..37"), NUMBERFORCE(38, 52, "Memory corrected error count (CORE_ERR_CNT)"), HEXNUMBER(53, 56, "Memory MISC reserved 53..56"), {} }; static struct numfield nhm_memory_misc_numbers[] = { HEXNUMBERFORCE(0, 7, "Memory transaction Tracker ID (RTId)"), NUMBERFORCE(16, 17, "Memory DIMM ID of error"), NUMBERFORCE(18, 19, "Memory channel ID of error"), HEXNUMBERFORCE(32, 63, "Memory ECC syndrome"), {} }; static char *internal_errors[] = { [0x0] = "No Error", [0x3] = "Reset firmware did not complete", [0x8] = "Received an invalid CMPD", [0xa] = "Invalid Power Management Request", [0xd] = "Invalid S-state transition", [0x11] = "VID controller does not match POC controller selected", [0x1a] = "MSID from POC does not match CPU MSID", }; static struct field internal_error_status[] = { FIELD(24, internal_errors), {} }; static struct numfield internal_error_numbers[] = { HEXNUMBER(16, 23, "Internal machine check status reserved 16..23"), HEXNUMBER(32, 56, "Internal machine check status reserved 32..56"), {}, }; /* Generic architectural memory controller encoding */ static char *mmm_mnemonic[] = { "GEN", "RD", "WR", "AC", "MS", "RES5", "RES6", "RES7" }; static char *mmm_desc[] = { "Generic undefined request", "Memory read error", "Memory write error", "Address/Command error", "Memory scrubbing error", "Reserved 5", "Reserved 6", "Reserved 7" }; void decode_memory_controller(u32 status, u8 bank) { char channel[30]; if ((status & 0xf) == 0xf) strcpy(channel, "unspecified"); else { if (cputype == CPU_KNIGHTS_LANDING) /* Fix for Knights Landing MIC */ sprintf(channel, "%u", (status & 0xf) + 3 * (bank == 15)); else sprintf(channel, "%u", status & 0xf); } Wprintf("MEMORY CONTROLLER %s_CHANNEL%s_ERR\n", mmm_mnemonic[(status >> 4) & 7], channel); Wprintf("Transaction: %s\n", mmm_desc[(status >> 4) & 7]); } void nehalem_decode_model(u64 status, u64 misc) { u32 mca = status & 0xffff; if ((mca >> 11) == 1) { /* bus and interconnect QPI */ decode_bitfield(status, qpi_status); if (status & MCI_STATUS_MISCV) { decode_numfield(misc, qpi_numbers); decode_bitfield(misc, qpi_misc); } } else if (mca == 0x0001) { /* internal unspecified */ decode_bitfield(status, internal_error_status); decode_numfield(status, internal_error_numbers); } else if ((mca >> 7) == 1) { /* memory controller */ decode_bitfield(status, nhm_memory_status); decode_numfield(status, nhm_memory_status_numbers); if (status & MCI_STATUS_MISCV) decode_numfield(misc, nhm_memory_misc_numbers); } } /* Only core errors supported. Same as Nehalem */ void xeon75xx_decode_model(struct mce *m, unsigned msize) { u64 status = m->status; u32 mca = status & 0xffff; if (mca == 0x0001) { /* internal unspecified */ decode_bitfield(status, internal_error_status); decode_numfield(status, internal_error_numbers); } xeon75xx_decode_dimm(m, msize); } /* Nehalem-EP specific DIMM decoding */ void nehalem_memerr_misc(struct mce *m, int *channel, int *dimm) { if (m->status & MCI_STATUS_MISCV) { *channel = EXTRACT(m->misc, 18, 19); *dimm = EXTRACT(m->misc, 16, 17); } } mcelog-128+dfsg/nehalem.h000066400000000000000000000003421261732315200153360ustar00rootroot00000000000000void nehalem_decode_model(u64 status, u64 misc); void xeon75xx_decode_model(struct mce *m, unsigned msize); void decode_memory_controller(u32 status, u8 bank); void nehalem_memerr_misc(struct mce *m, int *channel, int *dimm); mcelog-128+dfsg/p4.c000066400000000000000000000252351261732315200142530ustar00rootroot00000000000000/* Copyright (c) 2005 by Intel Corp. Decode Intel machine check (generic and P4 specific) mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Authors: Racing Guo Andi Kleen */ #include #include #include "mcelog.h" #include "p4.h" #include "core2.h" #include "nehalem.h" #include "dunnington.h" #include "tulsa.h" #include "intel.h" #include "yellow.h" #include "bus.h" #include "unknown.h" #include "bitfield.h" #include "sandy-bridge.h" #include "ivy-bridge.h" #include "haswell.h" /* decode mce for P4/Xeon and Core2 family */ static char* get_TT_str(__u8 t) { static char* TT[] = {"Instruction", "Data", "Generic", "Unknown"}; if (t >= NELE(TT)) { return "UNKNOWN"; } return TT[t]; } static char* get_LL_str(__u8 ll) { static char* LL[] = {"Level-0", "Level-1", "Level-2", "Level-3"}; if (ll > NELE(LL)) { return "UNKNOWN"; } return LL[ll]; } static char* get_RRRR_str(__u8 rrrr) { static struct { __u8 value; char* str; } RRRR [] = { {0, "Generic"}, {1, "Read"}, {2, "Write" }, {3, "Data-Read"}, {4, "Data-Write"}, {5, "Instruction-Fetch"}, {6, "Prefetch"}, {7, "Eviction"}, {8, "Snoop"} }; unsigned i; for (i = 0; i < (int)NELE(RRRR); i++) { if (RRRR[i].value == rrrr) { return RRRR[i].str; } } return "UNKNOWN"; } static char* get_PP_str(__u8 pp) { static char* PP[] = { "Local-CPU-originated-request", "Responed-to-request", "Observed-error-as-third-party", "Generic" }; if (pp >= NELE(PP)) { return "UNKNOWN"; } return PP[pp]; } static char* get_T_str(__u8 t) { static char* T[] = {"Request-did-not-timeout", "Request-timed-out"}; if (t >= NELE(T)) { return "UNKNOWN"; } return T[t]; } static char* get_II_str(__u8 i) { static char* II[] = {"Memory-access", "Reserved", "IO", "Other-transaction"}; if (i >= NELE(II)) { return "UNKNOWN"; } return II[i]; } static int decode_mca(u64 status, u64 misc, u64 track, int cpu, int *ismemerr, int socket, u8 bank) { #define TLB_LL_MASK 0x3 /*bit 0, bit 1*/ #define TLB_LL_SHIFT 0x0 #define TLB_TT_MASK 0xc /*bit 2, bit 3*/ #define TLB_TT_SHIFT 0x2 #define CACHE_LL_MASK 0x3 /*bit 0, bit 1*/ #define CACHE_LL_SHIFT 0x0 #define CACHE_TT_MASK 0xc /*bit 2, bit 3*/ #define CACHE_TT_SHIFT 0x2 #define CACHE_RRRR_MASK 0xF0 /*bit 4, bit 5, bit 6, bit 7 */ #define CACHE_RRRR_SHIFT 0x4 #define BUS_LL_MASK 0x3 /* bit 0, bit 1*/ #define BUS_LL_SHIFT 0x0 #define BUS_II_MASK 0xc /*bit 2, bit 3*/ #define BUS_II_SHIFT 0x2 #define BUS_RRRR_MASK 0xF0 /*bit 4, bit 5, bit 6, bit 7 */ #define BUS_RRRR_SHIFT 0x4 #define BUS_T_MASK 0x100 /*bit 8*/ #define BUS_T_SHIFT 0x8 #define BUS_PP_MASK 0x600 /*bit 9, bit 10*/ #define BUS_PP_SHIFT 0x9 u32 mca; int ret = 0; static char *msg[] = { [0] = "No Error", [1] = "Unclassified", [2] = "Microcode ROM parity error", [3] = "External error", [4] = "FRC error", [5] = "Internal parity error", [6] = "SMM Handler Code Access Violation", }; mca = status & 0xffff; if (mca & (1UL << 12)) { Wprintf("corrected filtering (some unreported errors in same region)\n"); mca &= ~(1UL << 12); } if (mca < NELE(msg)) { Wprintf("%s\n", msg[mca]); return ret; } if ((mca >> 2) == 3) { unsigned levelnum; char *level; levelnum = mca & 3; level = get_LL_str(levelnum); Wprintf("%s Generic cache hierarchy error\n", level); if (track == 2) run_yellow_trigger(cpu, -1, levelnum, "unknown", level, socket); } else if (test_prefix(4, mca)) { unsigned levelnum, typenum; char *level, *type; typenum = (mca & TLB_TT_MASK) >> TLB_TT_SHIFT; type = get_TT_str(typenum); levelnum = (mca & TLB_LL_MASK) >> TLB_LL_SHIFT; level = get_LL_str(levelnum); Wprintf("%s TLB %s Error\n", type, level); if (track == 2) run_yellow_trigger(cpu, typenum, levelnum, type, level, socket); } else if (test_prefix(8, mca)) { unsigned typenum = (mca & CACHE_TT_MASK) >> CACHE_TT_SHIFT; unsigned levelnum = (mca & CACHE_LL_MASK) >> CACHE_LL_SHIFT; char *type = get_TT_str(typenum); char *level = get_LL_str(levelnum); Wprintf("%s CACHE %s %s Error\n", type, level, get_RRRR_str((mca & CACHE_RRRR_MASK) >> CACHE_RRRR_SHIFT)); if (track == 2) run_yellow_trigger(cpu, typenum, levelnum, type, level,socket); } else if (test_prefix(10, mca)) { if (mca == 0x400) Wprintf("Internal Timer error\n"); else Wprintf("Internal unclassified error: %x\n", mca & 0xffff); ret = 1; } else if (test_prefix(11, mca)) { char *level, *pp, *rrrr, *ii, *timeout; level = get_LL_str((mca & BUS_LL_MASK) >> BUS_LL_SHIFT); pp = get_PP_str((mca & BUS_PP_MASK) >> BUS_PP_SHIFT); rrrr = get_RRRR_str((mca & BUS_RRRR_MASK) >> BUS_RRRR_SHIFT); ii = get_II_str((mca & BUS_II_MASK) >> BUS_II_SHIFT); timeout = get_T_str((mca & BUS_T_MASK) >> BUS_T_SHIFT); Wprintf("BUS error: %d %d %s %s %s %s %s\n", socket, cpu, level, pp, rrrr, ii, timeout); run_bus_trigger(socket, cpu, level, pp, rrrr, ii, timeout); /* IO MCA - reported as bus/interconnect with specific PP,T,RRRR,II,LL values * and MISCV set. MISC register points to root port that reported the error * need to cross check with AER logs for more details. * See: http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html */ if ((status & MCI_STATUS_MISCV) && (status & 0xefff) == 0x0e0b) { int seg, bus, dev, fn; seg = EXTRACT(misc, 32, 39); bus = EXTRACT(misc, 24, 31); dev = EXTRACT(misc, 19, 23); fn = EXTRACT(misc, 16, 18); Wprintf("IO MCA reported by root port %x:%02x:%02x.%x\n", seg, bus, dev, fn); run_iomca_trigger(socket, cpu, seg, bus, dev, fn); } } else if (test_prefix(7, mca)) { decode_memory_controller(mca, bank); *ismemerr = 1; } else { Wprintf("Unknown Error %x\n", mca); ret = 1; } return ret; } static void p4_decode_model(__u32 model) { static struct { int value; char *str; }MD []= { {16, "FSB address parity"}, {17, "Response hard fail"}, {18, "Response parity"}, {19, "PIC and FSB data parity"}, {20, "Invalid PIC request(Signature=0xF04H)"}, {21, "Pad state machine"}, {22, "Pad strobe glitch"}, {23, "Pad address glitch"} }; unsigned i; Wprintf("Model:"); for (i = 0; i < NELE(MD); i++) { if (model & (1 << MD[i].value)) Wprintf("%s\n",MD[i].str); } Wprintf("\n"); } static void decode_tracking(u64 track) { static char *msg[] = { [1] = "green", [2] = "yellow\n" "Large number of corrected cache errors. System operating, but might lead\n" "to uncorrected errors soon", [3] ="res3" }; if (track) { Wprintf("Threshold based error status: %s\n", msg[track]); } } static const char *arstate[4] = { [0] = "UCNA", [1] = "AR", [2] = "SRAO", [3] = "SRAR" }; static int decode_mci(__u64 status, __u64 misc, int cpu, unsigned mcgcap, int *ismemerr, int socket, __u8 bank) { u64 track = 0; Wprintf("MCi status:\n"); if (!(status & MCI_STATUS_VAL)) Wprintf("Machine check not valid\n"); if (status & MCI_STATUS_OVER) Wprintf("Error overflow\n"); if (status & MCI_STATUS_UC) Wprintf("Uncorrected error\n"); else Wprintf("Corrected error\n"); if (status & MCI_STATUS_EN) Wprintf("Error enabled\n"); if (status & MCI_STATUS_MISCV) Wprintf("MCi_MISC register valid\n"); if (status & MCI_STATUS_ADDRV) Wprintf("MCi_ADDR register valid\n"); if (status & MCI_STATUS_PCC) Wprintf("Processor context corrupt\n"); if (status & (MCI_STATUS_S|MCI_STATUS_AR)) Wprintf("%s\n", arstate[(status >> 55) & 3]); if ((mcgcap & MCG_SER_P) && (status & MCI_STATUS_FWST)) { Wprintf("Firmware may have updated this error\n"); } if ((mcgcap == 0 || (mcgcap & MCG_TES_P)) && !(status & MCI_STATUS_UC)) { track = (status >> 53) & 3; decode_tracking(track); } Wprintf("MCA: "); return decode_mca(status, misc, track, cpu, ismemerr, socket, bank); } static void decode_mcg(__u64 mcgstatus) { Wprintf("MCG status:"); if (mcgstatus & MCG_STATUS_RIPV) Wprintf("RIPV "); if (mcgstatus & MCG_STATUS_EIPV) Wprintf("EIPV "); if (mcgstatus & MCG_STATUS_MCIP) Wprintf("MCIP "); if (mcgstatus & MCG_STATUS_LMCES) Wprintf("LMCE "); Wprintf("\n"); } static void decode_thermal(struct mce *log, int cpu) { if (log->status & 1) { Gprintf( "Processor %d heated above trip temperature. Throttling enabled.\n", cpu); Gprintf( "Please check your system cooling. Performance will be impacted\n"); } else { Gprintf("Processor %d below trip temperature. Throttling disabled\n", cpu); } } void decode_intel_mc(struct mce *log, int cputype, int *ismemerr, unsigned size) { int socket = size > offsetof(struct mce, socketid) ? (int)log->socketid : -1; int cpu = log->extcpu ? log->extcpu : log->cpu; if (log->bank == MCE_THERMAL_BANK) { decode_thermal(log, cpu); run_unknown_trigger(socket, cpu, log); return; } decode_mcg(log->mcgstatus); if (decode_mci(log->status, log->misc, cpu, log->mcgcap, ismemerr, socket, log->bank)) run_unknown_trigger(socket, cpu, log); if (test_prefix(11, (log->status & 0xffffL))) { switch (cputype) { case CPU_P6OLD: p6old_decode_model(log->status); break; case CPU_DUNNINGTON: case CPU_CORE2: core2_decode_model(log->status); break; case CPU_TULSA: case CPU_P4: p4_decode_model(log->status & 0xffff0000L); break; case CPU_NEHALEM: case CPU_XEON75XX: core2_decode_model(log->status); break; } } /* Model specific addon information */ switch (cputype) { case CPU_NEHALEM: nehalem_decode_model(log->status, log->misc); break; case CPU_DUNNINGTON: dunnington_decode_model(log->status); break; case CPU_TULSA: tulsa_decode_model(log->status, log->misc); break; case CPU_XEON75XX: xeon75xx_decode_model(log, size); break; case CPU_SANDY_BRIDGE: case CPU_SANDY_BRIDGE_EP: snb_decode_model(cputype, log->bank, log->status, log->misc); break; case CPU_IVY_BRIDGE_EPEX: ivb_decode_model(cputype, log->bank, log->status, log->misc); break; case CPU_HASWELL_EPEX: hsw_decode_model(cputype, log->bank, log->status, log->misc); break; } } char *intel_bank_name(int num) { static char bname[64]; sprintf(bname, "BANK %d", num); return bname; } mcelog-128+dfsg/p4.h000066400000000000000000000001571261732315200142540ustar00rootroot00000000000000char *intel_bank_name(int num); void decode_intel_mc(struct mce *log, int cpu, int *ismemerr, unsigned len); mcelog-128+dfsg/page.c000066400000000000000000000150471261732315200146440ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Memory error accounting per page mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* NB investigate other data structures. Primary consideration would be space efficiency. rbtree nodes are rather large. Do we need aging? Right now the only way to get rid of old nodes is to restart. */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "memutil.h" #include "mcelog.h" #include "rbtree.h" #include "leaky-bucket.h" #include "page.h" #include "config.h" #include "memdb.h" #include "sysfs.h" #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) enum { PAGE_ONLINE = 0, PAGE_OFFLINE = 1, PAGE_OFFLINE_FAILED = 2 }; struct mempage { struct rb_node nd; u64 addr; struct err_type ce; char offlined; char triggered; // 2(32bit)-6(64bit) bytes of padding to play with here }; static struct rb_root mempage_root; static struct bucket_conf page_trigger_conf; static const char *page_state[] = { [PAGE_ONLINE] = "online", [PAGE_OFFLINE] = "offline", [PAGE_OFFLINE_FAILED] = "offline-failed", }; static struct mempage *mempage_lookup(u64 addr) { struct rb_node *n = mempage_root.rb_node; while (n) { struct mempage *mp = rb_entry(n, struct mempage, nd); if (addr < mp->addr) n = n->rb_left; else if (addr > mp->addr) n = n->rb_right; else return mp; } return NULL; } static struct mempage * mempage_insert_lookup(u64 addr, struct rb_node * node) { struct rb_node **p = &mempage_root.rb_node; struct rb_node *parent = NULL; struct mempage *mp; while (*p) { parent = *p; mp = rb_entry(parent, struct mempage, nd); if (addr < mp->addr) p = &(*p)->rb_left; else if (addr > mp->addr) p = &(*p)->rb_right; else return mp; } rb_link_node(node, parent, p); return NULL; } static struct mempage *mempage_insert(u64 addr, struct mempage *mp) { mp->addr = addr; mp = mempage_insert_lookup(addr, &mp->nd); if (mp != NULL) rb_insert_color(&mp->nd, &mempage_root); return mp; } /* Following arrays need to be all kept in sync with the enum */ enum otype { OFFLINE_OFF, OFFLINE_ACCOUNT, OFFLINE_SOFT, OFFLINE_HARD, OFFLINE_SOFT_THEN_HARD }; static const char *kernel_offline[] = { [OFFLINE_SOFT] = "/sys/devices/system/memory/soft_offline_page", [OFFLINE_HARD] = "/sys/devices/system/memory/hard_offline_page", [OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page" }; static struct config_choice offline_choice[] = { { "off", OFFLINE_OFF }, { "account", OFFLINE_ACCOUNT }, { "soft", OFFLINE_SOFT }, { "hard", OFFLINE_HARD }, { "soft-then-hard", OFFLINE_SOFT_THEN_HARD }, {} }; static enum otype offline = OFFLINE_OFF; static int do_memory_offline(u64 addr, enum otype type) { return sysfs_write(kernel_offline[type], "%#llx", addr); } static int memory_offline(u64 addr) { if (offline == OFFLINE_SOFT_THEN_HARD) { if (do_memory_offline(addr, OFFLINE_SOFT) < 0) { Lprintf("Soft offlining of page %llx failed, trying hard offlining\n", addr); return do_memory_offline(addr, OFFLINE_HARD); } return 0; } return do_memory_offline(addr, offline); } static void offline_action(struct mempage *mp, u64 addr) { if (offline <= OFFLINE_ACCOUNT) return; Lprintf("Offlining page %llx\n", addr); if (memory_offline(addr) < 0) { Lprintf("Offlining page %llx failed: %s\n", addr, strerror(errno)); mp->offlined = PAGE_OFFLINE_FAILED; } else mp->offlined = PAGE_OFFLINE; } void account_page_error(struct mce *m, int channel, int dimm) { u64 addr = m->addr; struct mempage *mp; time_t t; unsigned cpu = m->extcpu ? m->extcpu : m->cpu; if (offline == OFFLINE_OFF) return; if (!(m->status & MCI_STATUS_ADDRV) || (m->status & MCI_STATUS_UC)) return; switch (cputype) { case CPU_SANDY_BRIDGE_EP: /* * On SNB-EP platform we see corrected errors reported with * address in Bank 5 from hardware (depending on BIOS setting), * in the meanwhile, a duplicate record constructed from * information found by "firmware first" APEI code. Ignore the * duplicate information so that we don't double count errors. * * NOTE: the record from APEI fake this error from CPU 0 BANK 1. */ if (m->bank == 1 && cpu == 0) return; default: break; } t = m->time; addr &= ~((u64)PAGE_SIZE - 1); mp = mempage_lookup(addr); if (!mp) { mp = xalloc(sizeof(struct mempage)); bucket_init(&mp->ce.bucket); mempage_insert(addr, mp); } ++mp->ce.count; if (__bucket_account(&page_trigger_conf, &mp->ce.bucket, 1, t)) { struct memdimm *md; char *msg; char *thresh; if (mp->offlined != PAGE_ONLINE) return; /* Only do triggers and messages for online pages */ thresh = bucket_output(&page_trigger_conf, &mp->ce.bucket); md = get_memdimm(m->socketid, channel, dimm, 1); asprintf(&msg, "Corrected memory errors on page %llx exceed threshold %s", addr, thresh); free(thresh); memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf); free(msg); mp->triggered = 1; offline_action(mp, addr); } } void dump_page_errors(FILE *f) { char *msg; struct rb_node *r; long k; k = 0; for (r = rb_first(&mempage_root); r; r = rb_next(r)) { struct mempage *p = rb_entry(r, struct mempage, nd); if (k++ == 0) fprintf(f, "Per page corrected memory statistics:\n"); msg = bucket_output(&page_trigger_conf, &p->ce.bucket); fprintf(f, "%llx: total %lu seen \"%s\" %s%s\n", p->addr, p->ce.count, msg, page_state[(unsigned)p->offlined], p->triggered ? " triggered" : ""); free(msg); fputc('\n', f); } } void page_setup(void) { int n; config_trigger("page", "memory-ce", &page_trigger_conf); n = config_choice("page", "memory-ce-action", offline_choice); if (n >= 0) offline = n; if (offline > OFFLINE_ACCOUNT && !sysfs_available(kernel_offline[offline], W_OK)) { Lprintf("Kernel does not support page offline interface\n"); offline = OFFLINE_ACCOUNT; } } mcelog-128+dfsg/page.h000066400000000000000000000002551261732315200146440ustar00rootroot00000000000000#include #include struct memdimm; void account_page_error(struct mce *m, int channel, int dimm); void dump_page_errors(FILE *); void page_setup(void); mcelog-128+dfsg/paths.h000066400000000000000000000004461261732315200150510ustar00rootroot00000000000000#define PREFIX "" #define LOG_DEV_FILENAME "/dev/mcelog" #define DIMM_DB_FILENAME PREFIX "/var/lib/memory-errors" #define CONFIG_FILENAME PREFIX "/etc/mcelog/mcelog.conf" #define SOCKET_PATH "/var/run/mcelog-client" #define LOG_FILE "/var/log/mcelog" #define PID_FILE "/var/run/mcelog.pid" mcelog-128+dfsg/rbtree.c000066400000000000000000000205161261732315200152100ustar00rootroot00000000000000/* Red Black Trees (C) 1999 Andrea Arcangeli (C) 2002 David Woodhouse Taken from the Linux 2.6.30 source with some minor modificatons. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA linux/lib/rbtree.c */ #include "rbtree.h" static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) { struct rb_node *right = node->rb_right; struct rb_node *parent = rb_parent(node); if ((node->rb_right = right->rb_left)) rb_set_parent(right->rb_left, node); right->rb_left = node; rb_set_parent(right, parent); if (parent) { if (node == parent->rb_left) parent->rb_left = right; else parent->rb_right = right; } else root->rb_node = right; rb_set_parent(node, right); } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) { struct rb_node *left = node->rb_left; struct rb_node *parent = rb_parent(node); if ((node->rb_left = left->rb_right)) rb_set_parent(left->rb_right, node); left->rb_right = node; rb_set_parent(left, parent); if (parent) { if (node == parent->rb_right) parent->rb_right = left; else parent->rb_left = left; } else root->rb_node = left; rb_set_parent(node, left); } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); if (parent == gparent->rb_left) { { register struct rb_node *uncle = gparent->rb_right; if (uncle && rb_is_red(uncle)) { rb_set_black(uncle); rb_set_black(parent); rb_set_red(gparent); node = gparent; continue; } } if (parent->rb_right == node) { struct rb_node *tmp; __rb_rotate_left(parent, root); tmp = parent; parent = node; node = tmp; } rb_set_black(parent); rb_set_red(gparent); __rb_rotate_right(gparent, root); } else { { struct rb_node *uncle = gparent->rb_left; if (uncle && rb_is_red(uncle)) { rb_set_black(uncle); rb_set_black(parent); rb_set_red(gparent); node = gparent; continue; } } if (parent->rb_left == node) { struct rb_node *tmp; __rb_rotate_right(parent, root); tmp = parent; parent = node; node = tmp; } rb_set_black(parent); rb_set_red(gparent); __rb_rotate_left(gparent, root); } } rb_set_black(root->rb_node); } static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, struct rb_root *root) { struct rb_node *other; while ((!node || rb_is_black(node)) && node != root->rb_node) { if (parent->rb_left == node) { other = parent->rb_right; if (rb_is_red(other)) { rb_set_black(other); rb_set_red(parent); __rb_rotate_left(parent, root); other = parent->rb_right; } if ((!other->rb_left || rb_is_black(other->rb_left)) && (!other->rb_right || rb_is_black(other->rb_right))) { rb_set_red(other); node = parent; parent = rb_parent(node); } else { if (!other->rb_right || rb_is_black(other->rb_right)) { rb_set_black(other->rb_left); rb_set_red(other); __rb_rotate_right(other, root); other = parent->rb_right; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); rb_set_black(other->rb_right); __rb_rotate_left(parent, root); node = root->rb_node; break; } } else { other = parent->rb_left; if (rb_is_red(other)) { rb_set_black(other); rb_set_red(parent); __rb_rotate_right(parent, root); other = parent->rb_left; } if ((!other->rb_left || rb_is_black(other->rb_left)) && (!other->rb_right || rb_is_black(other->rb_right))) { rb_set_red(other); node = parent; parent = rb_parent(node); } else { if (!other->rb_left || rb_is_black(other->rb_left)) { rb_set_black(other->rb_right); rb_set_red(other); __rb_rotate_left(other, root); other = parent->rb_left; } rb_set_color(other, rb_color(parent)); rb_set_black(parent); rb_set_black(other->rb_left); __rb_rotate_right(parent, root); node = root->rb_node; break; } } } if (node) rb_set_black(node); } void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *child, *parent; int color; if (!node->rb_left) child = node->rb_right; else if (!node->rb_right) child = node->rb_left; else { struct rb_node *old = node, *left; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; child = node->rb_right; parent = rb_parent(node); color = rb_color(node); if (child) rb_set_parent(child, parent); if (parent == old) { parent->rb_right = child; parent = node; } else parent->rb_left = child; node->rb_parent_color = old->rb_parent_color; node->rb_right = old->rb_right; node->rb_left = old->rb_left; if (rb_parent(old)) { if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else rb_parent(old)->rb_right = node; } else root->rb_node = node; rb_set_parent(old->rb_left, node); if (old->rb_right) rb_set_parent(old->rb_right, node); goto color; } parent = rb_parent(node); color = rb_color(node); if (child) rb_set_parent(child, parent); if (parent) { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; } else root->rb_node = child; color: if (color == RB_BLACK) __rb_erase_color(child, parent, root); } /* * This function returns the first node (in sort order) of the tree. */ struct rb_node *rb_first(const struct rb_root *root) { struct rb_node *n; n = root->rb_node; if (!n) return NULL; while (n->rb_left) n = n->rb_left; return n; } struct rb_node *rb_last(const struct rb_root *root) { struct rb_node *n; n = root->rb_node; if (!n) return NULL; while (n->rb_right) n = n->rb_right; return n; } struct rb_node *rb_next(const struct rb_node *node) { struct rb_node *parent; if (rb_parent(node) == node) return NULL; /* If we have a right-hand child, go down and then left as far as we can. */ if (node->rb_right) { node = node->rb_right; while (node->rb_left) node=node->rb_left; return (struct rb_node *)node; } /* No right-hand children. Everything down and left is smaller than us, so any 'next' node must be in the general direction of our parent. Go up the tree; any time the ancestor is a right-hand child of its parent, keep going up. First time it's a left-hand child of its parent, said parent is our 'next' node. */ while ((parent = rb_parent(node)) && node == parent->rb_right) node = parent; return parent; } struct rb_node *rb_prev(const struct rb_node *node) { struct rb_node *parent; if (rb_parent(node) == node) return NULL; /* If we have a left-hand child, go down and then right as far as we can. */ if (node->rb_left) { node = node->rb_left; while (node->rb_right) node=node->rb_right; return (struct rb_node *)node; } /* No left-hand children. Go up till we find an ancestor which is a right-hand child of its parent */ while ((parent = rb_parent(node)) && node == parent->rb_left) node = parent; return parent; } void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root) { struct rb_node *parent = rb_parent(victim); /* Set the surrounding nodes to point to the replacement */ if (parent) { if (victim == parent->rb_left) parent->rb_left = new; else parent->rb_right = new; } else { root->rb_node = new; } if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); /* Copy the pointers/colour from the victim to the replacement */ *new = *victim; } mcelog-128+dfsg/rbtree.h000066400000000000000000000117561261732315200152230ustar00rootroot00000000000000/* Red Black Trees (C) 1999 Andrea Arcangeli Taken from the Linux 2.6.30 source. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA linux/include/linux/rbtree.h To use rbtrees you'll have to implement your own insert and search cores. This will avoid us to use callbacks and to drop drammatically performances. I know it's not the cleaner way, but in C (not in C++) to get performances and genericity... Some example of insert and search follows here. The search is a plain normal search over an ordered tree. The insert instead must be implemented int two steps: as first thing the code must insert the element in order as a red leaf in the tree, then the support library function rb_insert_color() must be called. Such function will do the not trivial work to rebalance the rbtree if necessary. ----------------------------------------------------------------------- static inline struct page * rb_search_page_cache(struct inode * inode, unsigned long offset) { struct rb_node * n = inode->i_rb_page_cache.rb_node; struct page * page; while (n) { page = rb_entry(n, struct page, rb_page_cache); if (offset < page->offset) n = n->rb_left; else if (offset > page->offset) n = n->rb_right; else return page; } return NULL; } static inline struct page * __rb_insert_page_cache(struct inode * inode, unsigned long offset, struct rb_node * node) { struct rb_node ** p = &inode->i_rb_page_cache.rb_node; struct rb_node * parent = NULL; struct page * page; while (*p) { parent = *p; page = rb_entry(parent, struct page, rb_page_cache); if (offset < page->offset) p = &(*p)->rb_left; else if (offset > page->offset) p = &(*p)->rb_right; else return page; } rb_link_node(node, parent, p); return NULL; } static inline struct page * rb_insert_page_cache(struct inode * inode, unsigned long offset, struct rb_node * node) { struct page * ret; if ((ret = __rb_insert_page_cache(inode, offset, node))) goto out; rb_insert_color(node, &inode->i_rb_page_cache); out: return ret; } ----------------------------------------------------------------------- */ #ifndef _LINUX_RBTREE_H #define _LINUX_RBTREE_H #include #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) struct rb_node { unsigned long rb_parent_color; #define RB_RED 0 #define RB_BLACK 1 struct rb_node *rb_right; struct rb_node *rb_left; } __attribute__((aligned(sizeof(long)))); /* The alignment might seem pointless, but allegedly CRIS needs it */ struct rb_root { struct rb_node *rb_node; }; #define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) #define rb_color(r) ((r)->rb_parent_color & 1) #define rb_is_red(r) (!rb_color(r)) #define rb_is_black(r) rb_color(r) #define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) #define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; } static inline void rb_set_color(struct rb_node *rb, int color) { rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; } #define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) #define RB_EMPTY_NODE(node) (rb_parent(node) == node) #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, struct rb_node ** rb_link) { node->rb_parent_color = (unsigned long )parent; node->rb_left = node->rb_right = NULL; *rb_link = node; } #endif /* _LINUX_RBTREE_H */ mcelog-128+dfsg/sandy-bridge.c000066400000000000000000000110121261732315200162640ustar00rootroot00000000000000/* Copyright (C) 2010 Intel Corporation Decode Intel Sandy Bridge specific machine check errors. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #include "mcelog.h" #include "bitfield.h" #include "sandy-bridge.h" #include "memdb.h" /* See IA32 SDM Vol3B Table 16.4.1 */ static char *pcu_1[] = { [0] = "No error", [1] = "Non_IMem_Sel", [2] = "I_Parity_Error", [3] = "Bad_OpCode", [4] = "I_Stack_Underflow", [5] = "I_Stack_Overflow", [6] = "D_Stack_Underflow", [7] = "D_Stack_Overflow", [8] = "Non-DMem_Sel", [9] = "D_Parity_Error" }; static char *pcu_2[] = { [0x00] = "No Error", [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", [0x0E] = "MC_MC_CPD_UNCPD_ST_TIMEOUT", [0x0F] = "MC_PKGS_SAFE_WP_TIMEOUT", [0x43] = "MC_PECI_MAILBOX_QUIESCE_TIMEOUT", [0x5C] = "MC_MORE_THAN_ONE_LT_AGENT", [0x60] = "MC_INVALID_PKGS_REQ_PCH", [0x61] = "MC_INVALID_PKGS_REQ_QPI", [0x62] = "MC_INVALID_PKGS_RES_QPI", [0x63] = "MC_INVALID_PKGC_RES_PCH", [0x64] = "MC_INVALID_PKG_STATE_CONFIG", [0x70] = "MC_WATCHDG_TIMEOUT_PKGC_SLAVE", [0x71] = "MC_WATCHDG_TIMEOUT_PKGC_MASTER", [0x72] = "MC_WATCHDG_TIMEOUT_PKGS_MASTER", [0x7A] = "MC_HA_FAILSTS_CHANGE_DETECTED", [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT", }; static struct field pcu_mc4[] = { FIELD(16, pcu_1), FIELD(24, pcu_2), {} }; static struct field memctrl_mc8[] = { SBITFIELD(16, "Address parity error"), SBITFIELD(17, "HA Wrt buffer Data parity error"), SBITFIELD(18, "HA Wrt byte enable parity error"), SBITFIELD(19, "Corrected patrol scrub error"), SBITFIELD(20, "Uncorrected patrol scrub error"), SBITFIELD(21, "Corrected spare error"), SBITFIELD(22, "Uncorrected spare error"), {} }; void snb_decode_model(int cputype, int bank, u64 status, u64 misc) { switch (bank) { case 4: Wprintf("PCU: "); decode_bitfield(status, pcu_mc4); Wprintf("\n"); break; case 6: case 7: if (cputype == CPU_SANDY_BRIDGE_EP) { /* MCACOD already decoded */ Wprintf("QPI\n"); } break; case 8: case 9: case 10: case 11: Wprintf("MemCtrl: "); decode_bitfield(status, memctrl_mc8); Wprintf("\n"); break; } } /* * Sandy Bridge EP and EP4S processors (family 6, model 45) support additional * logging for corrected errors in the integrated memory controller (IMC) * banks. The mode is off by default, but can be enabled by setting the * "MemError Log Enable" * bit in MSR_ERROR_CONTROL (MSR 0x17f). * The documentation in the August 2012 edition of Intel's Software developer * manual has some minor errors because the worng version of table 16-16 * "Intel IMC MC Error Codes for IA32_MCi_MISC (i= 8, 11)" was included. * Corrections are: * Bit 62 is the "VALID" bit for the "first-device" bits in MISC and STATUS * Bit 63 is the "VALID" bit for the "second-device" bits in MISC * Bits 58:56 and 61:59 should be marked as "reserved". * There should also be a footnote explaining how the "failing rank" fields * can be converted to a DIMM number within a channel for systems with either * two or three DIMMs per channel. */ static int failrank2dimm(unsigned failrank, int socket, int channel) { switch (failrank) { case 0: case 1: case 2: case 3: return 0; case 4: case 5: return 1; case 6: case 7: if (get_memdimm(socket, channel, 2, 0)) return 2; else return 1; } return -1; } void sandy_bridge_ep_memerr_misc(struct mce *m, int *channel, int *dimm) { u64 status = m->status; unsigned failrank, chan; /* Ignore unless this is an corrected extended error from an iMC bank */ if (!imc_log || m->bank < 8 || m->bank > 11 || (status & MCI_STATUS_UC) || !test_prefix(7, status & 0xefff)) return; chan = EXTRACT(status, 0, 3); if (chan == 0xf) return; if (EXTRACT(m->misc, 62, 62)) { failrank = EXTRACT(m->misc, 46, 50); dimm[0] = failrank2dimm(failrank, m->socketid, chan); channel[0] = chan; } if (EXTRACT(m->misc, 63, 63)) { failrank = EXTRACT(m->misc, 51, 55); dimm[1] = failrank2dimm(failrank, m->socketid, chan); channel[1] = chan; } } mcelog-128+dfsg/sandy-bridge.h000066400000000000000000000002161261732315200162750ustar00rootroot00000000000000void snb_decode_model(int cputype, int bank, u64 status, u64 misc); void sandy_bridge_ep_memerr_misc(struct mce *m, int *channel, int *dimm); mcelog-128+dfsg/server.c000066400000000000000000000175501261732315200152370ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Simple event-driven unix network server for client access. Process commands and buffer output. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include #include #include #include #include "mcelog.h" #include "server.h" #include "eventloop.h" #include "config.h" #include "memdb.h" #include "memutil.h" #include "paths.h" #include "page.h" #define PAIR(x) x, sizeof(x)-1 struct clientcon { char *inbuf; /* 0 terminated */ char *inptr; char *outbuf; size_t outcur; size_t outlen; }; static char *client_path = SOCKET_PATH; static int initial_ping_timeout = 2; static struct config_cred acc = { .uid = 0, .gid = -1U }; static void free_outbuf(struct clientcon *cc) { free(cc->outbuf); cc->outbuf = NULL; cc->outcur = cc->outlen = 0; } static void free_inbuf(struct clientcon *cc) { free(cc->inbuf); cc->inbuf = NULL; cc->inptr = NULL; } static void free_cc(struct clientcon *cc) { free(cc->outbuf); free(cc->inbuf); free(cc); } static void sendstring(int fd, char *str) { send(fd, str, strlen(str), MSG_DONTWAIT|MSG_NOSIGNAL); } static void dispatch_dump(FILE *fh, char *s) { char *p; enum printflags printflags = 0; while ((p = strsep(&s, " ")) != NULL) { if (!strcmp(p, "dump")) ; else if (!strcmp(p, "bios")) printflags |= DUMP_BIOS; else if (!strcmp(p, "all")) printflags |= DUMP_ALL; else fprintf(fh, "Unknown dump parameter\n"); } dump_memory_errors(fh, printflags); fprintf(fh, "done\n"); } static void dispatch_pages(FILE *fh) { dump_page_errors(fh); fprintf(fh, "done\n"); } static void dispatch_commands(char *line, FILE *fh) { char *s; while ((s = strsep(&line, "\n")) != NULL) { while (isspace(*s)) line++; if (!strncmp(s, "dump", 4)) dispatch_dump(fh, s); else if (!strncmp(s, "pages", 5)) dispatch_pages(fh); else if (!strcmp(s, "ping")) fprintf(fh, "pong\n"); else if (*s != 0) fprintf(fh, "Unknown command\n"); } } /* assumes commands don't cross records */ static void process_cmd(struct clientcon *cc) { FILE *fh; assert(cc->outbuf == NULL); fh = open_memstream(&cc->outbuf, &cc->outlen); if (!fh) Enomem(); cc->outcur = 0; dispatch_commands(cc->inbuf, fh); if (ferror(fh) || fclose(fh) != 0) Enomem(); } /* check if client is allowed to access */ static int access_check(int fd, struct msghdr *msg) { struct cmsghdr *cmsg; struct ucred *uc; /* check credentials */ cmsg = CMSG_FIRSTHDR(msg); if (cmsg == NULL || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_CREDENTIALS) { Eprintf("Did not receive credentials over client unix socket %p\n", cmsg); return -1; } uc = (struct ucred *)CMSG_DATA(cmsg); if (uc->uid == 0 || (acc.uid != -1U && uc->uid == acc.uid) || (acc.gid != -1U && uc->gid == acc.gid)) return 0; Eprintf("rejected client access from pid:%u uid:%u gid:%u\n", uc->pid, uc->uid, uc->gid); sendstring(fd, "permission denied\n"); return -1; } /* retrieve commands from client */ static int client_input(int fd, struct clientcon *cc) { char ctlbuf[CMSG_SPACE(sizeof(struct ucred))]; struct iovec miov; struct msghdr msg = { .msg_iov = &miov, .msg_iovlen = 1, .msg_control = ctlbuf, .msg_controllen = sizeof(ctlbuf), }; int n, n2; assert(cc->inbuf == NULL); if (ioctl(fd, FIONREAD, &n) < 0) return -1; if (n == 0) return 0; cc->inbuf = xalloc_nonzero(n + 1); cc->inbuf[n] = 0; cc->inptr = cc->inbuf; miov.iov_base = cc->inbuf; miov.iov_len = n; n2 = recvmsg(fd, &msg, 0); if (n2 < n) return -1; return access_check(fd, &msg) == 0 ? n : -1; } /* process input/out on client socket */ static void client_event(struct pollfd *pfd, void *data) { int events = pfd->revents; struct clientcon *cc = (struct clientcon *)data; int n; if (events & ~(POLLIN|POLLOUT)) /* error/close */ goto error; if (events & POLLOUT) { if (cc->outcur < cc->outlen) { n = send(pfd->fd, cc->outbuf + cc->outcur, cc->outlen - cc->outcur, MSG_DONTWAIT|MSG_NOSIGNAL); if (n < 0) { /* EAGAIN here? but should not happen */ goto error; } cc->outcur += n; } if (cc->outcur == cc->outlen) free_outbuf(cc); } if (events & POLLIN) { n = client_input(pfd->fd, cc); if (n < 0) goto error; process_cmd(cc); free_inbuf(cc); } pfd->events = cc->outbuf ? POLLOUT : POLLIN; return; error: if (pfd->revents & POLLERR) SYSERRprintf("error while reading from client"); close(pfd->fd); unregister_pollcb(pfd); free_cc(cc); } /* accept a new client */ static void client_accept(struct pollfd *pfd, void *data) { struct clientcon *cc = NULL; int nfd = accept(pfd->fd, NULL, 0); int on; if (nfd < 0) { SYSERRprintf("accept failed on client socket"); return; } on = 1; if (setsockopt(nfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)) < 0) { SYSERRprintf("Cannot enable credentials passing on client socket"); goto cleanup; } cc = xalloc(sizeof(struct clientcon)); if (register_pollcb(nfd, POLLIN, client_event, cc) < 0) { sendstring(nfd, "mcelog server too busy\n"); goto cleanup; } return; cleanup: free(cc); close(nfd); } static void server_config(void) { char *s; long v; config_cred("server", "client", &acc); if ((s = config_string("server", "socket-path")) != NULL) client_path = s; if (config_number("server", "initial-ping-timeout", "%u", &v) == 0) initial_ping_timeout = v; } static sigjmp_buf ping_timeout_ctx; static void ping_timeout(int sig) { siglongjmp(ping_timeout_ctx, 1); } /* server still running? */ static int server_ping(struct sockaddr_un *un) { struct sigaction oldsa; struct sigaction sa = { .sa_handler = ping_timeout }; int ret, n; char buf[10]; int fd = socket(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) return 0; sigaction(SIGALRM, &sa, &oldsa); if (sigsetjmp(ping_timeout_ctx, 1) == 0) { ret = -1; alarm(initial_ping_timeout); if (connect(fd, un, sizeof(struct sockaddr_un)) < 0) goto cleanup; if (write(fd, PAIR("ping\n")) < 0) goto cleanup; if ((n = read(fd, buf, 10)) < 0) goto cleanup; if (n == 5 && !memcmp(buf, "pong\n", 5)) ret = 0; } else ret = -1; cleanup: sigaction(SIGALRM, &oldsa, NULL); alarm(0); close(fd); return ret; } void server_setup(void) { int fd; struct sockaddr_un adr; server_config(); if (client_path[0] == 0) return; if (strlen(client_path) >= sizeof(adr.sun_path) - 1) { Eprintf("Client socket path `%s' too long for unix socket", client_path); return; } memset(&adr, 0, sizeof(struct sockaddr_un)); adr.sun_family = AF_UNIX; strncpy(adr.sun_path, client_path, sizeof(adr.sun_path) - 1); if (access(client_path, F_OK) == 0) { if (server_ping(&adr) == 0) { Eprintf("mcelog server already running\n"); exit(1); } unlink(client_path); } fd = socket(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) { SYSERRprintf("cannot open listening socket"); return; } if (bind(fd, (struct sockaddr *)&adr, sizeof(struct sockaddr_un)) < 0) { SYSERRprintf("Cannot bind to client unix socket `%s'", client_path); goto cleanup; } listen(fd, 10); register_pollcb(fd, POLLIN, client_accept, NULL); return; cleanup: close(fd); exit(1); } mcelog-128+dfsg/server.h000066400000000000000000000000311261732315200152260ustar00rootroot00000000000000void server_setup(void); mcelog-128+dfsg/sysfs.c000066400000000000000000000046711261732315200151000ustar00rootroot00000000000000/* Copyright (C) 2008 Intel Corporation Author: Andi Kleen Read/Write sysfs values mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include "mcelog.h" #include "sysfs.h" #include "memutil.h" char *read_field(char *base, char *name) { char *fn, *val; int n, fd; struct stat st; char *s; char *buf = NULL; asprintf(&fn, "%s/%s", base, name); fd = open(fn, O_RDONLY); if (fstat(fd, &st) < 0) goto bad; buf = xalloc(st.st_size); free(fn); if (fd < 0) goto bad; n = read(fd, buf, st.st_size); close(fd); if (n < 0) goto bad; val = xalloc(n); memcpy(val, buf, n); free(buf); s = strchr(val, '\n'); if (s) *s = 0; return val; bad: SYSERRprintf("Cannot read sysfs field %s/%s", base, name); return xstrdup(""); } unsigned read_field_num(char *base, char *name) { unsigned num; char *val = read_field(base, name); int n = sscanf(val, "%u", &num); free(val); if (n != 1) { Eprintf("Cannot parse number in sysfs field %s/%s\n", base,name); return 0; } return num; } unsigned read_field_map(char *base, char *name, struct map *map) { char *val = read_field(base, name); for (; map->name; map++) { if (!strcmp(val, map->name)) break; } free(val); if (map->name) return map->value; Eprintf("sysfs field %s/%s has unknown string value `%s'\n", base, name, val); return -1; } int sysfs_write(const char *name, const char *fmt, ...) { int e; int n; char *buf; va_list ap; int fd = open(name, O_WRONLY); if (fd < 0) return -1; va_start(ap, fmt); n = vasprintf(&buf, fmt, ap); va_end(ap); n = write(fd, buf, n); e = errno; close(fd); free(buf); errno = e; return n; } int sysfs_available(const char *name, int flags) { return access(name, flags) == 0; } mcelog-128+dfsg/sysfs.h000066400000000000000000000005351261732315200151000ustar00rootroot00000000000000 struct map { char *name; int value; }; char *read_field(char *base, char *name); unsigned read_field_num(char *base, char *name); unsigned read_field_map(char *base, char *name, struct map *map); int sysfs_write(const char *name, const char *format, ...) __attribute__((format(printf,2,3))); int sysfs_available(const char *name, int flags); mcelog-128+dfsg/tests/000077500000000000000000000000001261732315200147175ustar00rootroot00000000000000mcelog-128+dfsg/tests/Makefile000066400000000000000000000003061261732315200163560ustar00rootroot00000000000000.PHONY: test clean DEBUG = test: ./test cache "${DEBUG}" ./test page "${DEBUG}" ./test memdb "${DEBUG}" ./test socket "${DEBUG}" ./test pfa "${DEBUG}" clean: rm -f */*log rm -f */results* mcelog-128+dfsg/tests/cache/000077500000000000000000000000001261732315200157625ustar00rootroot00000000000000mcelog-128+dfsg/tests/cache/cache.conf000066400000000000000000000001651261732315200176760ustar00rootroot00000000000000# trigger: 2 num-errors = 3 [cache] cache-threshold-trigger = ../trigger [trigger] directory = . children-max = 3 mcelog-128+dfsg/tests/cache/inject000077500000000000000000000003131261732315200171610ustar00rootroot00000000000000#!/bin/sh PATH=$PATH:$(pwd)/../../../mce-inject ../../input/GENCACHE 1 1 data green | mce-inject ../../input/GENCACHE 1 1 data yellow | mce-inject ../../input/GENCACHE 1 2 generic yellow | mce-inject mcelog-128+dfsg/tests/memdb/000077500000000000000000000000001261732315200160035ustar00rootroot00000000000000mcelog-128+dfsg/tests/memdb/inject000077500000000000000000000003221261732315200172020ustar00rootroot00000000000000#!/bin/sh B=$(pwd)/../.. PATH=$PATH:$B/../mce-inject $B/input/GENMEM 0 1 0 2 | mce-inject #$B/input/GENMEM 0 0 0 0 1 | mce-inject #$B/input/GENMEM 0 1 0 0 1 | mce-inject $B/input/GENMEM 0 2 0 3 | mce-inject mcelog-128+dfsg/tests/memdb/memdb-1.conf000066400000000000000000000004051261732315200200730ustar00rootroot00000000000000# trigger: 2 cpu = nehalem dmi = yes filter-memory-errors = yes num-errors = 2 [dimm] dimm-tracking-enabled = yes ce-error-trigger = ../trigger ce-error-threshold = 1 / 1min uc-error-trigger = ../trigger uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/memdb/memdb-2.conf000066400000000000000000000004071261732315200200760ustar00rootroot00000000000000# trigger: 0 cpu = nehalem dmi = yes #filter-memory-errors = yes num-errors = 2 [dimm] dimm-tracking-enabled = yes ce-error-trigger = ../trigger ce-error-threshold = 2 / 1min uc-error-trigger = ../trigger uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/page/000077500000000000000000000000001261732315200156335ustar00rootroot00000000000000mcelog-128+dfsg/tests/page/inject000077500000000000000000000004221261732315200170330ustar00rootroot00000000000000#!/bin/sh PATH=$PATH:$(pwd)/../../../mce-inject P=$RANDOM ../../input/GENPAGE $P | mce-inject ../../input/GENPAGE $P 0 1 1 5 | mce-inject ../../input/GENPAGE $P | mce-inject ../../input/GENPAGE | mce-inject ../../input/GENPAGE | mce-inject ../../input/GENPAGE | mce-inject mcelog-128+dfsg/tests/page/page-account.conf000066400000000000000000000003161261732315200210500ustar00rootroot00000000000000# trigger: 1 num-errors = 6 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = account [trigger] directory = . mcelog-128+dfsg/tests/page/page-hard.conf000066400000000000000000000003131261732315200203270ustar00rootroot00000000000000# trigger: 1 num-errors = 6 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = hard [trigger] directory = . mcelog-128+dfsg/tests/page/page-memdb.conf000066400000000000000000000005771261732315200205110ustar00rootroot00000000000000# trigger: 7 cpu = nehalem dmi = yes num-errors = 6 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft [dimm] dimm-tracking-enabled = yes ce-error-trigger = ../trigger ce-error-threshold = 1 / 1min uc-error-trigger = ../trigger uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/page/page-off.conf000066400000000000000000000003121261732315200201620ustar00rootroot00000000000000# trigger: 0 num-errors = 6 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = off [trigger] directory = . mcelog-128+dfsg/tests/page/page-soft-then-hard.conf000066400000000000000000000003251261732315200222370ustar00rootroot00000000000000# trigger: 4 num-errors = 6 [page] memory-ce-threshold = 1 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft-then-hard [trigger] directory = . mcelog-128+dfsg/tests/page/page-soft.conf000066400000000000000000000003131261732315200203640ustar00rootroot00000000000000# trigger: 1 num-errors = 6 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft [trigger] directory = . mcelog-128+dfsg/tests/pfa/000077500000000000000000000000001261732315200154655ustar00rootroot00000000000000mcelog-128+dfsg/tests/pfa/PFA_test_howto000066400000000000000000000157771261732315200203160ustar00rootroot00000000000000This README file describes the steps of testing PFA (Predictive Failure Analysis) functionality of mcelog under Linux which is facilitated by using mce-inject. PFA is a RAS Feature. PFA capable system can monitor corrected hardware errors and take corrective action in advance before uncorrected error happen. For example, PFA should offline a memory page if more than 10 errors per hour on a memory page are found. It mostly focuses on memory errors. 0. Preparation work ******************************* - Install the Linux kernel with full MCE injection support Make sure following configuration options are enabled: CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y CONFIG_X86_MCE_INJECT=y or CONFIG_X86_MCE_INJECT=m - Build mcelog and install in /usr/bin (or rather first in your $PATH) # cd $HOME/mcelog # make # make install - Get mce-inject git version from git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git and install in /usr/bin (or rather first in your $PATH) # cd $HOME # git clone git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git # cd mce-inject # make # make install - Install page-types tool, which is accompanied with Linux kernel source (2.6.32 or newer). # cd $KERNEL_SRC/Documentation/vm/ # gcc -o page-types page-types.c # cp page-types /usr/bin/ 1. Start PFA test ******************************* The PFA test cases in mcelog are in the following directories: - mcelog/tests/pfa #page level pfa test cases You can run all PFA test cases simply just by typing: # cd mcelog/tests # ./test pfa all the test cases in the specified subdirectory will be ran and the test results will be saved in files: mcelog/tests/pfa/results When you examine the content of the file, you will find such results: - if one case passed: "*.conf: triggers trigger as expected" - if one case failed "*.conf: triggers did not trigger as expected: $expected_num != $actual_got_num" you can refer to the "*.log" file in the specific subdirectory for the log saved by mcelog. 2. Modify or add new test cases ******************************* If you want to modify the existing test cases or add your own case, the following description will have a more detailed look which might help: - To add or run a page level PFA test, you need first get a configure file in mcelog/tests/pfa/ directory defining mainly the threshold and trigger actions you want, then the number of trigger events you expect to happen. - A typical configure file is as following: mcelog/tests/pfa/page-account.conf ---------------------------------------------------------- # trigger: 5 # num-errors = 3 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = account [trigger] directory = . ------------------------------------------------------------ - “# trigger: 5” Specify the count number of triggers you expect to get based on the threshold defined in "memory-ce-threshold" described below. mcelog/tests/test harness in the end will compare this count number with the number of actual trigger events got from the log to verify the test results. please note the "#" is needed for mcelog/tests/test harness to read here. - "# num-errors = 3" "num-errors" is a mcelog configure option. if uncomment, it is used by mcelog to stop processing the stored machine check records in mcelog buffer read from /dev/mcelog and return(for debug purpose) when the number is reached even there might be: - still some unprocessed records left in the buffer which will be ignored - or there are not enough records in /dev/mcelog the program will not return. if not set as in this example, mcelog will return until finish processing all the records. When you are not sure what should be the correct num-errors number, it is not recommended to set this option. - "memory-ce-threshold = 2 / 1h" Define the threshold for memory corrected errors per page. Here means if there are 2 corrected errors detected in one page within 1 hour, the trigger defined in “memory-ce-trigger” described below will be called. - "memory-ce-trigger = ../trigger" Specify the trigger you want when exceeding the threshold. Here mcelog/tests/trigger will be called which simply print some text for testing. - "memory-ce-action = account" specify the internal action in mcelog to exceeding a memory corrected error threshold. This is done in addition to executing the trigger script if available. - off: No action - account: only account errors - soft: try to soft-offline page without killing any processes This requires an update kernel. Might not be successful - hard: try to hard-offline page without killing any processes This requires an update kernel. Might not be successful - soft-then-hard: First try to soft offline, then try hard offlining The offline action is based on the sysfs_wirte action of: /sys/devices/system/memory/soft_offline_page or /sys/devices/system/memory/hard_offline_page Please note that offlining does not work for all pages, but only for pages in the Linux page cache or free pages. And if offline action(soft, hard, or soft-then-hard)are chosen in "memory-ce-action", there will trigger only once for each page,no matter the offline action taken was successful or failed. 3. Influencing factors of the trigger results ******************************* The correct expectation of triggers depends on 4 factors: - The count number of trigger expectation defined in "pfa/*.conf" file As described above, in our example the trigger expectation are defined to be 5 times which means the "mcelog/tests/pfa/inject" script will randomly chosen 5 free pages to inject in turn and do the MCE injection on each page for $memory-ce-threshold times. - The threshold defined in “memory-ce-trigger” of "pfa/*.conf" file As described above, for “memory-ce-threshold = 2 / 1h” in our example, "mcelog/tests/pfa/inject" script will do the MCE injection 2 times continuously for each chosen page to make the trigger happen. - The “memory-ce-action” defined in "pfa/*.conf" file As described above. if the “memory-ce-action” is soft/hard/soft-then-hard, no matter offlining action succeed or not, triggers_per_page calculation will changed to be: triggers_per_page = INT(injections_per_page / memory-ce-threshold) >= 1? 1:0 - The actual number of records read out if "num-errors" defined in "pfa/*.conf" file As described above. mcelog will just read out $num-errors records, that means: readout_total_injections = MIN(num-errors, injection_per_page * actual-inject_pages) this might affect the trigger counts for some last injected pages since not all the machine check records from /dev/mcelog are processed and counted. mcelog-128+dfsg/tests/pfa/inject000077500000000000000000000033621261732315200166730ustar00rootroot00000000000000#!/bin/sh PATH=$PATH:$(pwd)/../../../mce-inject page_type="slab buddy mmap anonymous nopage huge" function get_free_page() { local rand=0 cnt=`page-types -Nl -b $1 | tee page_$1 | wc -l` if [ $cnt -gt 1 ]; then rand=$(expr $RANDOM % $cnt + 1) if [ ${rand} -eq 1 ]; then # skip the title line of output ((rand++)) fi page=`awk -v line=${rand} 'NR == line {print $1}' page_$1` echo 0x${page} else echo 0 fi rm -f page_$1 } if [ "$1" = "" ]; then echo "usage $0 conf_file" exit 1 fi if [ ! -f $1 ]; then echo "configure file not exists: $1" exit 1 fi which page-types > /dev/null 2>&1 if [ $? -ne 0 ];then echo "please install page-types tool first" exit 1 fi echo "+++ start the injection for $1 +++" NUMT="$(awk '/# trigger: / { print $3}' $1)" THRESHOLD="$(awk '/memory-ce-threshold = / { print $3}' $1)" if [ "$NUMT" -eq 0 ]; then echo "No injection will be done!" exit 0 fi if [ "$THRESHOLD" -eq 0 ]; then echo "Threshold should not be 0!" exit 1 fi trigger_cnt=0 while [ "$trigger_cnt" -lt "$NUMT" ]; do for i in ${page_type}; do P=$(get_free_page $i) if [ "$P" = "0" ]; then continue fi if [ "$trigger_cnt" -ge "$NUMT" ]; then exit 0; fi inject_cnt=0 while [ "$inject_cnt" -lt "$THRESHOLD" ]; do echo "inject for page type $i at physical address ${P}000 [ NO. $inject_cnt ]" ../../input/GENPAGE $P | mce-inject inject_cnt=$(($inject_cnt+1)) done if [ "$inject_cnt" -eq "$THRESHOLD" ]; then trigger_cnt=$(($trigger_cnt+1)) fi done if [ "$trigger_cnt" -eq 0 ]; then echo "None available free pages found!" exit 1 fi done mcelog-128+dfsg/tests/pfa/page-account.conf000066400000000000000000000003171261732315200207030ustar00rootroot00000000000000# trigger: 1 #num-errors = 3 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = account [trigger] directory = . mcelog-128+dfsg/tests/pfa/page-hard.conf000066400000000000000000000003141261732315200201620ustar00rootroot00000000000000# trigger: 1 #num-errors = 3 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = hard [trigger] directory = . mcelog-128+dfsg/tests/pfa/page-soft-then-hard.conf000066400000000000000000000003261261732315200220720ustar00rootroot00000000000000# trigger: 1 #num-errors = 3 [page] memory-ce-threshold = 1 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft-then-hard [trigger] directory = . mcelog-128+dfsg/tests/pfa/page-soft.conf000066400000000000000000000003141261732315200202170ustar00rootroot00000000000000# trigger: 1 #num-errors = 3 [page] memory-ce-threshold = 2 / 1h memory-ce-trigger = ../trigger #memory-ce-action = off|account|soft|hard|soft-then-hard memory-ce-action = soft [trigger] directory = . mcelog-128+dfsg/tests/server/000077500000000000000000000000001261732315200162255ustar00rootroot00000000000000mcelog-128+dfsg/tests/server/inject000066400000000000000000000003131261732315200174210ustar00rootroot00000000000000#!/bin/sh PATH=$PATH:$(pwd)/../../../mce-inject ../../input/GENCACHE 1 1 data green | mce-inject ../../input/GENCACHE 1 1 data yellow | mce-inject ../../input/GENCACHE 1 2 generic yellow | mce-inject mcelog-128+dfsg/tests/server/server.conf000066400000000000000000000002361261732315200204030ustar00rootroot00000000000000# trigger: 2 num-errors = 3 [cache] cache-threshold-trigger = ../trigger [server] socket-path = /tmp/mcelog-client [trigger] directory = . children-max = 3 mcelog-128+dfsg/tests/socket/000077500000000000000000000000001261732315200162075ustar00rootroot00000000000000mcelog-128+dfsg/tests/socket/inject000077500000000000000000000003221261732315200174060ustar00rootroot00000000000000#!/bin/sh B=$(pwd)/../.. PATH=$PATH:$B/../mce-inject $B/input/GENMEM 0 1 0 2 | mce-inject #$B/input/GENMEM 0 0 0 0 1 | mce-inject #$B/input/GENMEM 0 1 0 0 1 | mce-inject $B/input/GENMEM 0 2 0 3 | mce-inject mcelog-128+dfsg/tests/socket/socket-1.conf000066400000000000000000000004421261732315200205040ustar00rootroot00000000000000# trigger: 2 cpu = nehalem dmi = yes num-errors = 2 [dimm] dimm-tracking-enabled = yes [socket] socket-tracking-enabled = yes mem-ce-error-trigger = ../trigger mem-ce-error-threshold = 1 / 1min mem-uc-error-trigger = ../trigger mem-uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/socket/socket-2.conf000066400000000000000000000004421261732315200205050ustar00rootroot00000000000000# trigger: 1 cpu = nehalem dmi = yes num-errors = 2 [dimm] dimm-tracking-enabled = yes [socket] socket-tracking-enabled = yes mem-ce-error-trigger = ../trigger mem-ce-error-threshold = 2 / 1min mem-uc-error-trigger = ../trigger mem-uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/socket/socket-memdb.conf000066400000000000000000000006751261732315200214400ustar00rootroot00000000000000# trigger: 4 cpu = nehalem dmi = yes num-errors = 2 [dimm] dimm-tracking-enabled = yes [socket] socket-tracking-enabled = yes mem-ce-error-trigger = ../trigger mem-ce-error-threshold = 1 / 1min mem-uc-error-trigger = ../trigger mem-uc-error-threshold = 1 / 1min [dimm] dimm-tracking-enabled = yes ce-error-trigger = ../trigger ce-error-threshold = 1 / 1min uc-error-trigger = ../trigger uc-error-threshold = 1 / 1min [trigger] directory = . mcelog-128+dfsg/tests/test000077500000000000000000000030331261732315200156230ustar00rootroot00000000000000#!/bin/bash # simple test harness for mcelog daemon trigger test cases # ./test subdir [debugger] # run mcelog test in specific sub directory # requires root rights and a built mce-inject in ../../mce-inject or $PATH # warning: this kills any other running mcelogs D=${2:-} if [ "$1" = "" ] ; then echo "usage $0 testdir" exit 1 fi if [ "$(whoami)" != "root" ] ; then echo "Must run as root" exit 1 fi [ ! -f /dev/mce-inject ] && modprobe mce-inject echo "++++++++++++ running $1 test +++++++++++++++++++" # disable trigger echo -n "" > /sys/devices/system/machinecheck/machinecheck0/trigger killall mcelog || true #killwatchdog() { # kill %1 || true #} # #watchdog() { # sleep 10 # echo timeout waiting for mcelog # killall mcelog #} cd $1 #trap killwatchdog 0 #watchdog & rm -f *.log rm -f results for conf in `ls *.conf` do log=`echo $conf | sed "s/conf/log/g"` ./inject $conf $D ../../mcelog --foreground --daemon --debug-numerrors --config $conf --logfile $log >> result # let triggers finish sleep 1 NUMT="$(awk '/# trigger: / { print $3 }' $conf)" NUMC="$(grep -c 'Running trigger' $log || true)" if [ "$NUMT" != 0 ] ; then if [ "$NUMC" = 0 ] ; then echo "$conf: no triggers at all" >> results fi fi if [ "$NUMT" != "" ] ; then if [ "$NUMC" != "$NUMT" ] ; then echo "$conf: triggers did not trigger as expected: $NUMT != $NUMC" >> results else echo "$conf: triggers trigger as expected" >> results fi else echo "$conf: did not declare number of triggers" >> results fi done #trap "" 0 #killwatchdog mcelog-128+dfsg/tests/trigger000077500000000000000000000000561261732315200163110ustar00rootroot00000000000000#!/bin/sh echo trigger env | egrep -v 'PATH' mcelog-128+dfsg/tests/unknown/000077500000000000000000000000001261732315200164165ustar00rootroot00000000000000mcelog-128+dfsg/tests/unknown/inject000077500000000000000000000001561261732315200176220ustar00rootroot00000000000000#!/bin/sh B=$(pwd)/../.. PATH=$PATH:$B/../mce-inject mce-inject $B/input/iomca mce-inject $B/input/unknown mcelog-128+dfsg/tests/unknown/unknown.conf000066400000000000000000000002621261732315200207640ustar00rootroot00000000000000# trigger: 3 num-errors = 2 [socket] bus-uc-threshold-trigger = ../trigger iomca-threshold-trigger = ../trigger unknown-threshold-trigger = ../trigger [trigger] directory = . mcelog-128+dfsg/trigger.c000066400000000000000000000075671261732315200154030ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Manage trigger commands running as separate processes. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include "trigger.h" #include "eventloop.h" #include "list.h" #include "mcelog.h" #include "memutil.h" #include "config.h" struct child { struct list_head nd; pid_t child; const char *name; }; static LIST_HEAD(childlist); static int num_children; static int children_max = 4; static char *trigger_dir; pid_t mcelog_fork(const char *name) { pid_t child; struct child *c; child = fork(); if (child <= 0) return child; num_children++; c = xalloc(sizeof(struct child)); c->name = name; c->child = child; list_add_tail(&c->nd, &childlist); return child; } // note: trigger must be allocated, e.g. from config void run_trigger(char *trigger, char *argv[], char **env) { pid_t child; char *fallback_argv[] = { trigger, NULL, }; if (!argv) argv = fallback_argv; Lprintf("Running trigger `%s'\n", trigger); if (children_max > 0 && num_children >= children_max) { Eprintf("Too many trigger children running already\n"); return; } child = mcelog_fork(trigger); if (child < 0) { SYSERRprintf("Cannot create process for trigger"); return; } if (child == 0) { if (trigger_dir) chdir(trigger_dir); execve(trigger, argv, env); _exit(127); } } /* Clean up child on SIGCHLD */ static void finish_child(pid_t child, int status) { struct child *c, *tmpc; list_for_each_entry_safe (c, tmpc, &childlist, nd) { if (c->child == child) { if (WIFEXITED(status) && WEXITSTATUS(status)) { Eprintf("Trigger `%s' exited with status %d\n", c->name, WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { Eprintf("Trigger `%s' died with signal %s\n", c->name, strsignal(WTERMSIG(status))); } list_del(&c->nd); free(c); num_children--; return; } } abort(); } /* Runs only directly after ppoll */ static void child_handler(int sig, siginfo_t *si, void *ctx) { int status; pid_t pid; if (waitpid(si->si_pid, &status, WNOHANG) < 0) { SYSERRprintf("Cannot collect child %d", si->si_pid); return; } finish_child(si->si_pid, status); /* Check other child(ren)'s status to avoid zombie process */ while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { finish_child(pid, status); } } void trigger_setup(void) { char *s; struct sigaction sa = { .sa_sigaction = child_handler, .sa_flags = SA_SIGINFO|SA_NOCLDSTOP|SA_RESTART, }; sigaction(SIGCHLD, &sa, NULL); event_signal(SIGCHLD); config_number("trigger", "children-max", "%d", &children_max); s = config_string("trigger", "directory"); if (s) { if (access(s, R_OK|X_OK) < 0) SYSERRprintf("Cannot access trigger directory `%s'", s); trigger_dir = s; } } void trigger_wait(void) { int sig; sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGCHLD); while (num_children > 0) { if (sigwait(&mask, &sig) < 0) SYSERRprintf("sigwait waiting for children"); } } int trigger_check(char *s) { char *name; int rc; if (trigger_dir) asprintf(&name, "%s/%s", trigger_dir, s); else name = s; rc = access(name, R_OK|X_OK); if (trigger_dir) free(name); return rc; } mcelog-128+dfsg/trigger.h000066400000000000000000000002651261732315200153740ustar00rootroot00000000000000void run_trigger(char *trigger, char *argv[], char **env); void trigger_setup(void); void trigger_wait(void); int trigger_check(char *); pid_t mcelog_fork(const char *thread_name); mcelog-128+dfsg/triggers/000077500000000000000000000000001261732315200154035ustar00rootroot00000000000000mcelog-128+dfsg/triggers/bus-error-trigger000077500000000000000000000014411261732315200207120ustar00rootroot00000000000000#!/bin/sh # This shell script can be executed by mcelog in daemon mode when a sockets # receives Bus and Interconnect errors # # environment: # MESSAGE Human readable consolidated error message # LOCATION Consolidated location as a single string # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # LEVEL Interconnect level # PARTICIPATION Processor Participation (Originator, Responder or Observer) # REQUEST Request type (read, write, prefetch, etc.) # ORIGIN Memory or IO # TIMEOUT The request timed out or not # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./bus-error-trigger.local ] && . ./bus-error-trigger.local exit 0 mcelog-128+dfsg/triggers/cache-error-trigger000077500000000000000000000020131261732315200211600ustar00rootroot00000000000000#!/bin/bash # cache error trigger. This shell script is executed by mcelog in daemon mode # when a CPU reports excessive corrected cache errors. This could be a indication # for future uncorrected errors. # # environment: # MESSAGE Human readable error message # CPU Linux CPU number that triggered the error # LEVEL Cache level affected by error # TYPE Cache type affected by error (Data,Instruction,Generic) # AFFECTED_CPUS List of CPUs sharing the affected cache # SOCKETID Socket ID of affected CPU # # note: will run as mcelog configured user # this can be changed in mcelog.conf # # offline the CPUs sharing the affected cache # EXIT=0 for i in $AFFECTED_CPUS ; do logger -s -p daemon.crit -t mcelog "Offlining CPU $i due to cache error threshold" F=$(printf "/sys/devices/system/cpu/cpu%d/online" $i) echo 0 > $F if [ "$(< $F)" != "0" ] ; then logger -s -p daemon.warn -t mcelog "Offlining CPU $i failed" EXIT=1 fi done [ -x ./cache-error-trigger.local ] && . ./cache-error-trigger.local exit $EXIT mcelog-128+dfsg/triggers/dimm-error-trigger000077500000000000000000000022751261732315200210550ustar00rootroot00000000000000#!/bin/sh # This shell script can be executed by mcelog in daemon mode when a DIMM # exceeds a pre-configured error threshold # # environment: # THRESHOLD human readable threshold status # MESSAGE Human readable consolidated error message # TOTALCOUNT total count of errors for current DIMM of CE/UC depending on # what triggered the event # LOCATION Consolidated location as a single string # DMI_LOCATION DIMM location from DMI/SMBIOS if available # DMI_NAME DIMM identifier from DMI/SMBIOS if available # DIMM DIMM number reported by hardware # CHANNEL Channel number reported by hardware # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # CECOUNT Total corrected error count for DIMM # UCCOUNT Total uncorrected error count for DIMM # LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds) # THRESHOLD_COUNT Total umber of events in current threshold time period of specific type # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./dimm-error-trigger.local ] && . ./dimm-error-trigger.local exit 0 mcelog-128+dfsg/triggers/iomca-error-trigger000077500000000000000000000013461261732315200212150ustar00rootroot00000000000000#!/bin/sh # This shell script can be executed by mcelog in daemon mode when a sockets # receives Bus and Interconnect errors # # environment: # MESSAGE Human readable consolidated error message # LOCATION Consolidated location as a single string # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # CPU Linux CPU number that triggered the error # SET PCI segment number # BUS PCI bus number # DEVICE PCI device number # FUNCTION PCI function number # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./iomca-error-trigger.local ] && . ./iomca-error-trigger.local exit 0 mcelog-128+dfsg/triggers/page-error-trigger000077500000000000000000000024341261732315200210400ustar00rootroot00000000000000#!/bin/sh # This shell script can be executed by mcelog in daemon mode when a page # in memory exceeds a pre-configured corrected error threshold. # mcelog internally also supports offlining the page through the kernel. # # environment: # THRESHOLD human readable threshold status # MESSAGE Human readable consolidated error message # TOTALCOUNT total count of errors for current DIMM of CE/UC depending on # what triggered the event # LOCATION Consolidated location as a single string # DMI_LOCATION DIMM location from DMI/SMBIOS if available # DMI_NAME DIMM identifier from DMI/SMBIOS if available # DIMM DIMM number reported by hardware # CHANNEL Channel number reported by hardware # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # CECOUNT Total corrected error count for DIMM # UCCOUNT Total uncorrected error count for DIMM # LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds) # THRESHOLD_COUNT Total umber of events in current threshold time period of specific type # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./page-error-trigger.local ] && . ./page-error-trigger.local exit 0 mcelog-128+dfsg/triggers/socket-memory-error-trigger000077500000000000000000000020411261732315200227140ustar00rootroot00000000000000#!/bin/sh # This shell script can be executed by mcelog in daemon mode when a sockets # exceeds a pre-configured error threshold for memory errors # # environment: # THRESHOLD human readable threshold status # MESSAGE Human readable consolidated error message # TOTALCOUNT total count of errors for current socket of CE/UC depending on # what triggered the event # LOCATION Consolidated location as a single string # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # CECOUNT Total corrected error count for socket # UCCOUNT Total uncorrected error count for socket # LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds) # THRESHOLD_COUNT Total umber of events in current threshold time period of specific type # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./socket-memory-error-trigger.local ] && . ./socket-memory-error-trigger.local exit 0 mcelog-128+dfsg/triggers/unknown-error-trigger000077500000000000000000000016631261732315200216260ustar00rootroot00000000000000#!/bin/sh # This shell script is executed by mcelog in daemon mode when # an not otherwise handled machine check error happens. # # environment: # MESSAGE Human readable consolidated error message # LOCATION Consolidated location as a single string # SOCKETID Socket ID of CPU that includes the memory controller with the DIMM # CPU Linux CPU number that triggered the error # STATUS IA32_MCi_STATUS register value # ADDR IA32_MCi_ADDR register value # MISC IA32_MCi_MISC register value # MCGSTATUS IA32_MCG_STATUS register value # MCGCAP IA32_MCG_CAP register value # For details on the register layout please see the Intel SDM http://www.intel.com/sdm # volume 3, chapter 15 # # note: will run as mcelog configured user # this can be changed in mcelog.conf logger -s -p daemon.err -t mcelog "$MESSAGE" logger -s -p daemon.err -t mcelog "Location: $LOCATION" [ -x ./unknown-error-trigger.local ] && . ./unknown-error-trigger.local exit 0 mcelog-128+dfsg/tsc.c000066400000000000000000000103121261732315200145070ustar00rootroot00000000000000/* Copyright (C) 2006 Andi Kleen, SuSE Labs. Decode TSC value into human readable uptime mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. dmi is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include "mcelog.h" #include "tsc.h" #include "intel.h" static unsigned scale(u64 *tsc, unsigned unit, double mhz) { u64 v = (u64)(mhz * 1000000) * unit; unsigned u = *tsc / v; *tsc -= u * v; return u; } static int fmt_tsc(char **buf, u64 tsc, double mhz) { unsigned days, hours, mins, secs; if (mhz == 0.0) return -1; days = scale(&tsc, 3600 * 24, mhz); hours = scale(&tsc, 3600, mhz); mins = scale(&tsc, 60, mhz); secs = scale(&tsc, 1, mhz); asprintf(buf, "[at %.0f Mhz %u days %u:%u:%u uptime (unreliable)]", mhz, days, hours, mins, secs); return 0; } static double cpufreq_mhz(int cpu, double infomhz) { double mhz; FILE *f; char *fn; asprintf(&fn, "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu); f = fopen(fn, "r"); free(fn); if (!f) { /* /sys exists, but no cpufreq -- use value from cpuinfo */ if (access("/sys/devices", F_OK) == 0) return infomhz; /* /sys not mounted. We don't know if cpufreq is active or not, so must fallback */ return 0.0; } if (fscanf(f, "%lf", &mhz) != 1) mhz = 0.0; mhz /= 1000; fclose(f); return mhz; } int decode_tsc_forced(char **buf, double mhz, u64 tsc) { return fmt_tsc(buf, tsc, mhz); } static int deep_sleep_states(int cpu) { int ret; char *fn; FILE *f; char *line = NULL; size_t linelen = 0; /* When cpuidle is active assume there are deep sleep states */ asprintf(&fn, "/sys/devices/system/cpu/cpu%d/cpuidle", cpu); ret = access(fn, X_OK); free(fn); if (ret == 0) return 1; asprintf(&fn, "/proc/acpi/processor/CPU%d/power", cpu); f = fopen(fn, "r"); free(fn); if (!f) return 0; while ((getline(&line, &linelen, f)) > 0) { int n; if ((sscanf(line, " C%d:", &n)) == 1) { if (n > 1) { char *p = strstr(line, "usage"); if (p && sscanf(p, "usage[%d]", &n) == 1 && n > 0) return 1; } } } free(line); fclose(f); return 0; } /* Try to figure out if this CPU has a somewhat reliable TSC clock */ static int tsc_reliable(int cputype, int cpunum) { if (!processor_flags) return 0; /* Trust the kernel */ if (strstr(processor_flags, "nonstop_tsc")) return 1; /* TSC does not change frequency TBD: really old kernels don't set that */ if (!strstr(processor_flags, "constant_tsc")) return 0; /* We don't know the frequency on non Intel CPUs because the kernel doesn't report them (e.g. AMD GH TSC doesn't run at highest P-state). But then the kernel can just report the real time too. Also a lot of AMD and VIA CPUs have unreliable TSC, so would need special rules here too. */ if (!is_intel_cpu(cputype)) return 0; if (deep_sleep_states(cpunum) && cputype != CPU_NEHALEM) return 0; return 1; } int decode_tsc_current(char **buf, int cpunum, enum cputype cputype, double mhz, unsigned long long tsc) { double cmhz; if (!tsc_reliable(cputype, cpunum)) return -1; cmhz = cpufreq_mhz(cpunum, mhz); if (cmhz != 0.0) mhz = cmhz; return fmt_tsc(buf, tsc, mhz); } #ifdef STANDALONE int is_intel_cpu(int cpu) { return 1; } /* claim this TSC is reliable always */ char *processor_flags = "nonstop_tsc"; static inline u64 rdtscll(void) { unsigned a,b; asm volatile("rdtsc" : "=a" (a), "=d" (b)); return (u64)a | (((u64)b) << 32); } int main(void) { char *buf; u64 tsc = rdtscll(); printf("%Lx tsc\n", tsc); if (decode_tsc_current(&buf, 0, CPU_CORE2, 0.0, tsc) >= 0) printf("%s\n", buf); else printf("failed\n"); return 0; } #endif mcelog-128+dfsg/tsc.h000066400000000000000000000002761261732315200145240ustar00rootroot00000000000000enum cputype; int decode_tsc_current(char **buf, int cpunum, enum cputype cputype, double mhz, unsigned long long tsc); int decode_tsc_forced(char **buf, double mhz, __u64 tsc); mcelog-128+dfsg/tulsa.c000066400000000000000000000102611261732315200150510ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Decode Intel Xeon MP 7100 series (Tulsa) specific machine check errors. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #include #include #include "mcelog.h" #include "bitfield.h" #include "tulsa.h" /* See IA32 SDM Vol3B Appendix E.4.1 ff */ static struct numfield corr_numbers[] = { NUMBER(32, 39, "Corrected events"), {} }; static struct numfield ecc_numbers[] = { HEXNUMBER(44, 51, "ECC syndrome"), {}, }; static struct field tls_bus_status[] = { SBITFIELD(16, "Parity error detected during FSB request phase"), SBITFIELD(17, "Partity error detected on Core 0 request's address field"), SBITFIELD(18, "Partity error detected on Core 1 request's address field"), FIELD(19, reserved_1bit), SBITFIELD(20, "Parity error on FSB response field detected"), SBITFIELD(21, "FSB data parity error on inbound date detected"), SBITFIELD(22, "Data parity error on data received from Core 0 detected"), SBITFIELD(23, "Data parity error on data received from Core 1 detected"), SBITFIELD(24, "Detected an Enhanced Defer parity error phase A or phase B"), SBITFIELD(25, "Data ECC event to error on inbound data correctable or uncorrectable"), SBITFIELD(26, "Pad logic detected a data strobe glitch or sequencing error"), SBITFIELD(27, "Pad logic detected a request strobe glitch or sequencing error"), FIELD(28, reserved_3bits), FIELD(31, reserved_1bit), {} }; static char *tls_front_error[0xf] = { [0x1] = "Inclusion error from core 0", [0x2] = "Inclusion error from core 1", [0x3] = "Write Exclusive error from core 0", [0x4] = "Write Exclusive error from core 1", [0x5] = "Inclusion error from FSB", [0x6] = "SNP stall error from FSB", [0x7] = "Write stall error from FSB", [0x8] = "FSB Arbiter Timeout error", [0x9] = "CBC OOD Queue Underflow/overflow", }; static char *tls_int_error[0xf] = { [0x1] = "Enhanced Intel SpeedStep Technology TM1-TM2 Error", [0x2] = "Internal timeout error", [0x3] = "Internal timeout error", [0x4] = "Intel Cache Safe Technology Queue full error\n" "or disabled ways in a set overflow", }; struct field tls_int_status[] = { FIELD(8, tls_int_error), {} }; struct field tls_front_status[] = { FIELD(0, tls_front_error), {} }; struct field tls_cecc[] = { SBITFIELD(0, "Correctable ECC event on outgoing FSB data"), SBITFIELD(1, "Correctable ECC event on outgoing core 0 data"), SBITFIELD(2, "Correctable ECC event on outgoing core 1 data"), {} }; struct field tls_uecc[] = { SBITFIELD(0, "Uncorrectable ECC event on outgoing FSB data"), SBITFIELD(1, "Uncorrectable ECC event on outgoing core 0 data"), SBITFIELD(2, "Uncorrectable ECC event on outgoing core 1 data"), {} }; static void tulsa_decode_bus(u64 status) { decode_bitfield(status, tls_bus_status); } static void tulsa_decode_internal(u64 status) { u32 mca = (status >> 16) & 0xffff; if ((mca & 0xfff0) == 0) decode_bitfield(mca, tls_front_status); else if ((mca & 0xf0ff) == 0) decode_bitfield(mca, tls_int_status); else if ((mca & 0xfff0) == 0xc000) decode_bitfield(mca, tls_cecc); else if ((mca & 0xfff0) == 0xe000) decode_bitfield(mca, tls_uecc); } void tulsa_decode_model(u64 status, u64 misc) { decode_numfield(status, corr_numbers); if (status & (1ULL << 52)) decode_numfield(status, ecc_numbers); /* MISC register not documented in the SDM. Let's just dump hex for now. */ if (status & MCI_STATUS_MISCV) Wprintf("MISC format %llx value %llx\n", (status >> 40) & 3, misc); if ((status & 0xffff) == 0xe0f) tulsa_decode_bus(status); else if ((status & 0xffff) == (1 << 10)) tulsa_decode_internal(status); } mcelog-128+dfsg/tulsa.h000066400000000000000000000000571261732315200150600ustar00rootroot00000000000000void tulsa_decode_model(u64 status, u64 misc); mcelog-128+dfsg/unknown.c000066400000000000000000000041261261732315200154230ustar00rootroot00000000000000/* Copyright (C) 20014 Intel Corporation Author: Rui Wang Handle all other unknown error requests. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system. */ #define _GNU_SOURCE 1 #include #include #include #include #include #include "memutil.h" #include "mcelog.h" #include "config.h" #include "trigger.h" #include "unknown.h" static char *unknown_trigger; enum { MAX_ENV = 20, }; void unknown_setup(void) { unknown_trigger = config_string("socket", "unknown-threshold-trigger"); if (unknown_trigger && trigger_check(unknown_trigger) < 0) { SYSERRprintf("Cannot access unknown threshold trigger `%s'", unknown_trigger); exit(1); } } void run_unknown_trigger(int socket, int cpu, struct mce *log) { int ei = 0; char *env[MAX_ENV]; int i; char *msg; char *location; if (!unknown_trigger) return; if (socket >= 0) asprintf(&location, "CPU %d on socket %d", cpu, socket); else asprintf(&location, "CPU %d", cpu); asprintf(&msg, "%s received unknown error", location); asprintf(&env[ei++], "LOCATION=%s", location); free(location); if (socket >= 0) asprintf(&env[ei++], "SOCKETID=%d", socket); asprintf(&env[ei++], "MESSAGE=%s", msg); asprintf(&env[ei++], "CPU=%d", cpu); asprintf(&env[ei++], "STATUS=%llx", log->status); asprintf(&env[ei++], "MISC=%llx", log->misc); asprintf(&env[ei++], "ADDR=%llx", log->addr); asprintf(&env[ei++], "MCGSTATUS=%llx", log->mcgstatus); asprintf(&env[ei++], "MCGCAP=%llx", log->mcgcap); env[ei] = NULL; assert(ei < MAX_ENV); run_trigger(unknown_trigger, NULL, env); for (i = 0; i < ei; i++) free(env[i]); free(msg); } mcelog-128+dfsg/unknown.h000066400000000000000000000001321261732315200154210ustar00rootroot00000000000000void unknown_setup(void); void run_unknown_trigger(int socket, int cpu, struct mce *log); mcelog-128+dfsg/version.h000066400000000000000000000000671261732315200154160ustar00rootroot00000000000000extern char version[]; #define MCELOG_VERSION version mcelog-128+dfsg/xeon75xx.c000066400000000000000000000023021261732315200154230ustar00rootroot00000000000000/* Copyright (C) 2009/2010 Intel Corporation Decode Intel Xeon75xx memory errors. Requires the mce-75xx.ko driver load. The core errors are the same as Nehalem. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Author: Andi Kleen */ #include #include #include "mcelog.h" #include "xeon75xx.h" /* This used to decode the old xeon 75xx memory error aux format. But that has never been merged into mainline kernels, so removed it again. */ void xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm) { } void xeon75xx_decode_dimm(struct mce *m, unsigned msize) { } mcelog-128+dfsg/xeon75xx.h000066400000000000000000000002161261732315200154320ustar00rootroot00000000000000void xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm); void xeon75xx_decode_dimm(struct mce *m, unsigned msize); mcelog-128+dfsg/yellow.c000066400000000000000000000060441261732315200152400ustar00rootroot00000000000000/* Copyright (C) 2009 Intel Corporation Author: Andi Kleen Handle 'yellow bit' cache error threshold indications. mcelog is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2. mcelog is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should find a copy of v2 of the GNU General Public License somewhere on your Linux system; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define _GNU_SOURCE 1 #include #include #include #include #include #include "memutil.h" #include "mcelog.h" #include "config.h" #include "trigger.h" #include "yellow.h" #include "cache.h" #define BITS_PER_U (sizeof(unsigned) * 8) #define test_bit(i, a) (((unsigned *)(a))[(i) / BITS_PER_U] & (1U << ((i) % BITS_PER_U))) static char *yellow_trigger; static int yellow_log = 1; enum { MAX_ENV = 10, }; static char *cpulist(char *prefix, unsigned *cpumask, unsigned cpumasklen) { unsigned i, k; char *buf = NULL; size_t size = 0; FILE *f = open_memstream(&buf, &size); if (!f) Enomem(); fprintf(f, "%s", prefix); k = 0; for (i = 0; i < cpumasklen * 8; i++) { if (test_bit(i, cpumask)) { fprintf(f, "%s%u", k > 0 ? " " : "", i); k++; } } fclose(f); return buf; } void run_yellow_trigger(int cpu, int tnum, int lnum, char *ts, char *ls, int socket) { int ei = 0; char *env[MAX_ENV]; unsigned *cpumask; int cpumasklen; int i; char *msg; char *location; if (socket >= 0) asprintf(&location, "CPU %d on socket %d", cpu, socket); else asprintf(&location, "CPU %d", cpu); asprintf(&msg, "%s has large number of corrected cache errors in %s %s", location, ls, ts); free(location); if (yellow_log) { Lprintf("%s\n", msg); Lprintf("System operating correctly, but might lead to uncorrected cache errors soon\n"); } if (!yellow_trigger) goto out; if (socket >= 0) asprintf(&env[ei++], "SOCKETID=%d", socket); asprintf(&env[ei++], "MESSAGE=%s", msg); asprintf(&env[ei++], "CPU=%d", cpu); asprintf(&env[ei++], "LEVEL=%d", lnum); asprintf(&env[ei++], "TYPE=%s", ts); if (cache_to_cpus(cpu, lnum, tnum, &cpumasklen, &cpumask) >= 0) env[ei++] = cpulist("AFFECTED_CPUS=", cpumask, cpumasklen); else asprintf(&env[ei++], "AFFECTED_CPUS=unknown"); env[ei] = NULL; assert(ei < MAX_ENV); run_trigger(yellow_trigger, NULL, env); for (i = 0; i < ei; i++) free(env[i]); out: free(msg); } void yellow_setup(void) { int n; yellow_trigger = config_string("cache", "cache-threshold-trigger"); if (yellow_trigger && trigger_check(yellow_trigger) < 0) { SYSERRprintf("Cannot access cache threshold trigger `%s'", yellow_trigger); exit(1); } n = config_bool("cache", "cache-threshold-log"); if (n >= 0) yellow_log = n; } mcelog-128+dfsg/yellow.h000066400000000000000000000001571261732315200152440ustar00rootroot00000000000000void yellow_setup(void); void run_yellow_trigger(int cpu, int tnum, int lnum, char *ts, char *ls, int socket);