Debian mcelog: Cannot open `/dev/mcelog': No such device
Hi,
On Debian with Linux 6.1.0-17-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30) x86_64 GNU/Linux, mcelog can't access /dev/mcelog and the daemon does not start.
Is this a bug? or Am I Missing something? I tried on two different Debian server.
I always get this:
systemctl status mcelog.service
× mcelog.service - Machine Check Exception Logging Daemon
Loaded: loaded (/lib/systemd/system/mcelog.service; enabled; preset: enabled)
Active: failed (Result: exit-code) since Thu 2024-01-18 21:26:35 KST; 5s ago
Duration: 9ms
Process: 2799 ExecStart=/usr/sbin/mcelog --daemon --foreground (code=exited, status=1/FAILURE)
Main PID: 2799 (code=exited, status=1/FAILURE)
CPU: 4ms
janv. 18 21:26:35 serveur-k systemd[1]: Started mcelog.service - Machine Check Exception Logging Daemon.
janv. 18 21:26:35 serveur-k systemd[1]: mcelog.service: Main process exited, code=exited, status=1/FAILURE
janv. 18 21:26:35 serveur-k mcelog[2799]: mcelog: Cannot open `/dev/mcelog': No such device
janv. 18 21:26:35 serveur-k systemd[1]: mcelog.service: Failed with result 'exit-code'.
here is the strace of the command:
strace -o /tmp/mcelog.out -f /usr/sbin/mcelog --daemon
...
cat /tmp/mcelog.out
2883 execve("/usr/sbin/mcelog", ["/usr/sbin/mcelog", "--daemon", "--foreground"], 0x7ffc8ce71a48 /* 25 vars */) = 0
2883 brk(NULL) = 0x5640e378d000
2883 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f821b6af000
2883 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
2883 newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=20082, ...}, AT_EMPTY_PATH) = 0
2883 mmap(NULL, 20082, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f821b6aa000
2883 close(3) = 0
2883 openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
2883 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220s\2\0\0\0\0\0"..., 832) = 832
2883 pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
2883 newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=1922136, ...}, AT_EMPTY_PATH) = 0
2883 pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
2883 mmap(NULL, 1970000, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f821b4c9000
2883 mmap(0x7f821b4ef000, 1396736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x26000) = 0x7f821b4ef000
2883 mmap(0x7f821b644000, 339968, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x17b000) = 0x7f821b644000
2883 mmap(0x7f821b697000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1ce000) = 0x7f821b697000
2883 mmap(0x7f821b69d000, 53072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f821b69d000
2883 close(3) = 0
2883 mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f821b4c6000
2883 arch_prctl(ARCH_SET_FS, 0x7f821b4c6740) = 0
2883 set_tid_address(0x7f821b4c6a10) = 2883
2883 set_robust_list(0x7f821b4c6a20, 24) = 0
2883 rseq(0x7f821b4c7060, 0x20, 0, 0x53053053) = 0
2883 mprotect(0x7f821b697000, 16384, PROT_READ) = 0
2883 mprotect(0x5640e36bc000, 4096, PROT_READ) = 0
2883 mprotect(0x7f821b6e1000, 8192, PROT_READ) = 0
2883 prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
2883 munmap(0x7f821b6aa000, 20082) = 0
2883 getrandom("\x42\x20\x91\x14\x79\x7a\x45\xdf", 8, GRND_NONBLOCK) = 8
2883 brk(NULL) = 0x5640e378d000
2883 brk(0x5640e37ae000) = 0x5640e37ae000
2883 openat(AT_FDCWD, "/etc/mcelog/mcelog.conf", O_RDONLY) = 3
2883 newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=1609, ...}, AT_EMPTY_PATH) = 0
2883 read(3, "#\n# config file for mcelog\n# For"..., 4096) = 1609
2883 read(3, "", 4096) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/proc/cpuinfo", O_RDONLY) = 3
2883 newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
2883 read(3, "processor\t: 0\nvendor_id\t: Genuin"..., 1024) = 1024
2883 read(3, "map clflushopt clwb intel_pt avx"..., 1024) = 1024
2883 close(3) = 0
2883 access("/sys/firmware/dmi/entries/0-0/raw", R_OK) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/16-0/raw", O_RDONLY) = 3
2883 read(3, "\20\0276\0\3\3\5\0\0\0`\376\377\f\0\0\0\0\0\0\0\0\0\0\0", 1024) = 25
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/16-1/raw", O_RDONLY) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-0/raw", O_RDONLY) = 3
2883 read(3, "\21(7\0006\0\376\377H\0@\0\377\177\t\0\1\2\32\200\0j\n\3\4\5\6\2\0\0\1\0"..., 1024) = 111
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-1/raw", O_RDONLY) = 3
2883 read(3, "\21(8\0006\0\376\377H\0@\0\377\177\t\0\1\2\32\200\0j\n\3\4\5\6\2\0\0\1\0"..., 1024) = 111
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-2/raw", O_RDONLY) = 3
2883 read(3, "\21(9\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-3/raw", O_RDONLY) = 3
2883 read(3, "\21(:\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-4/raw", O_RDONLY) = 3
2883 read(3, "\21(;\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-5/raw", O_RDONLY) = 3
2883 read(3, "\21(<\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-6/raw", O_RDONLY) = 3
2883 read(3, "\21(=\0006\0\376\377H\0@\0\377\177\t\0\1\2\32\200\0j\n\3\4\5\6\2\0\0\1\0"..., 1024) = 111
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-7/raw", O_RDONLY) = 3
2883 read(3, "\21(>\0006\0\376\377H\0@\0\377\177\t\0\1\2\32\200\0j\n\3\4\5\6\2\0\0\1\0"..., 1024) = 111
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-8/raw", O_RDONLY) = 3
2883 read(3, "\21(?\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-9/raw", O_RDONLY) = 3
2883 read(3, "\21(@\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-10/raw", O_RDONLY) = 3
2883 read(3, "\21(A\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-11/raw", O_RDONLY) = 3
2883 read(3, "\21(B\0006\0\376\377\0\0\0\0\0\0\2\0\1\2\2\4\0\0\0\3\4\5\6\0\0\0\0\0"..., 1024) = 88
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/17-12/raw", O_RDONLY) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/19-0/raw", O_RDONLY) = 3
2883 read(3, "\23\37C\0\0\0\0\0\377\377\37\0006\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 33
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/19-1/raw", O_RDONLY) = 3
2883 read(3, "\23\37D\0\0\0@\0\377\377\37\0106\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 33
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/19-2/raw", O_RDONLY) = 3
2883 read(3, "\23\37E\0\0\0 \10\377\377\37\0206\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 33
2883 read(3, "", 1024) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/19-3/raw", O_RDONLY) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 openat(AT_FDCWD, "/sys/firmware/dmi/entries/20-0/raw", O_RDONLY) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 mmap(NULL, 528384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f821b445000
2883 rt_sigaction(SIGCHLD, {sa_handler=0x5640e36ac808, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO|SA_NOCLDSTOP, sa_restorer=0x7f821b504fd0}, NULL, 8) = 0
2883 rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
2883 rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
2883 rt_sigprocmask(SIG_BLOCK, [CHLD], NULL, 8) = 0
2883 access("/etc/mcelog", R_OK|X_OK) = 0
2883 access("/etc/mcelog/cache-error-trigger", R_OK|X_OK) = 0
2883 socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
2883 connect(3, {sa_family=AF_UNIX, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 close(3) = 0
2883 socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 3
2883 connect(3, {sa_family=AF_UNIX, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (Aucun fichier ou dossier de ce type)
2883 close(3) = 0
2883 newfstatat(AT_FDCWD, "/etc/nsswitch.conf", {st_mode=S_IFREG|0644, st_size=526, ...}, 0) = 0
2883 newfstatat(AT_FDCWD, "/", {st_mode=S_IFDIR|0755, st_size=4096, ...}, 0) = 0
2883 openat(AT_FDCWD, "/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = 3
2883 newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=526, ...}, AT_EMPTY_PATH) = 0
2883 read(3, "# /etc/nsswitch.conf\n#\n# Example"..., 4096) = 526
2883 read(3, "", 4096) = 0
2883 newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=526, ...}, AT_EMPTY_PATH) = 0
2883 close(3) = 0
2883 openat(AT_FDCWD, "/etc/passwd", O_RDONLY|O_CLOEXEC) = 3
2883 newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=1195, ...}, AT_EMPTY_PATH) = 0
2883 lseek(3, 0, SEEK_SET) = 0
2883 read(3, "root:x:0:0:root:/root:/bin/bash\n"..., 4096) = 1195
2883 close(3) = 0
2883 openat(AT_FDCWD, "/dev/mcelog", O_RDONLY) = -1 ENODEV (Aucun périphérique de ce type)
2883 write(2, "mcelog: ", 8) = 8
2883 write(2, "Cannot open `/dev/mcelog'", 25) = 25
2883 write(2, ": No such device\n", 17) = 17
2883 exit_group(1) = ?
2883 +++ exited with 1 +++
I install mcelog from git:
git clone git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git
apt install build-essential
cd mcelog && make && make install
cp mcelog.service /usr/lib/systemd/system
systemctl enable mcelog.service
echo "ACTION=="add", KERNEL=="mcelog", SUBSYSTEM=="misc", TAG+="systemd", ENV{SYSTEMD_WANTS}+="mcelog.service"
" > /usr/lib/udev/rules.d/55-mce.rules
reboot
CONFIG_X86_MCE is enable:
cat /boot/config-6.1.0-17-amd64| grep MCE
CONFIG_X86_MCE=y
# CONFIG_X86_MCELOG_LEGACY is not set
CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
CONFIG_X86_MCE_THRESHOLD=y
CONFIG_X86_MCE_INJECT=m
CONFIG_IR_MCE_KBD_DECODER=m
CONFIG_IR_MCEUSB=m
CONFIG_EDAC_DECODE_MCE=m
CONFIG_XEN_MCE_LOG=y
here is the config:
cat /etc/mcelog/mcelog.conf
#
# config file for mcelog
# For further options, see the mcelog manpage and documentation
#
# Filter out known broken events by default
filter = yes
# don't log memory errors individually
#filter-memory-errors = yes
# output in undecoded raw format to be easier machine readable
#raw = yes
# Added by me
run-credentials-user = daemon
# Not working when run as systemd service, must be run manually
[server]
# An upstream bug prevents this from being disabled
# Only allow root to connect by default
client-user = root
# Path to socket client uses to connect
socket-path = /var/run/mcelog-client
[dimm]
# Enable DIMM-tracking
dimm-tracking-enabled = yes
# Disable DIMM DMI pre-population unless supported on your system
dmi-prepopulate = no
# execute these triggers when the rate of corrected or uncorrected
# errors per DIMM exceeds the threshold
uc-error-trigger = dimm-error-trigger
uc-error-threshold = 1 / 24h
ce-error-trigger = dimm-error-trigger
ce-error-threshold = 10 / 24h
[socket]
# Memory error accounting per socket
socket-tracing-enabled = yes
mem-uc-error-threshold = 100 / 24h
mem-ce-error-trigger = socket-memory-error-trigger
mem-ce-error-threshold = 100 / 24h
mem-ce-error-log = yes
[cache]
# Attempt to off-line CPUs causing cache errors
cache-threshold-trigger = cache-error-trigger
cache-threshold-log = yes
[page]
# Try to soft-offline a 4K page if it exceeds the threshold
memory-ce-threshold = 10 / 24h
memory-ce-trigger = page-error-trigger
memory-ce-log = yes
memory-ce-action = soft
[trigger]
# Maximum number of running triggers
children-max = 2
directory = /etc/mcelog
Regards
On Debian with Linux 6.1.0-17-amd64 https://github.com/andikleen/mcelog/issues/1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30) x86_64 GNU/Linux, mcelog can't access /dev/mcelog and the daemon does not start. Is this a bug? or Am I Missing something? I tried on two different Debian server.
Does Debian build the Linux kernel with:
CONFIG_X86_MCELOG_LEGACY=y
That's required to support the /dev/mcelog device file.
-Tony