NetBSD Problem Report #52347
From martin@duskware.de Wed Jun 28 15:57:01 2017
Return-Path: <martin@duskware.de>
Received: from mail.netbsd.org (mail.netbsd.org [199.233.217.200])
(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
(Client CN "mail.netbsd.org", Issuer "Postmaster NetBSD.org" (verified OK))
by mollari.NetBSD.org (Postfix) with ESMTPS id 05BE47A2BA
for <gnats-bugs@gnats.NetBSD.org>; Wed, 28 Jun 2017 15:57:01 +0000 (UTC)
Message-Id: <20170628155649.08FDA5CC761@emmas.aprisoft.de>
Date: Wed, 28 Jun 2017 17:56:49 +0200 (CEST)
From: martin@NetBSD.org
Reply-To: martin@NetBSD.org
To: gnats-bugs@NetBSD.org
Subject: ww mutex class mismatch
X-Send-Pr-Version: 3.95
>Number: 52347
>Category: kern
>Synopsis: ww mutex class mismatch
>Confidential: no
>Severity: critical
>Priority: high
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Wed Jun 28 16:00:00 +0000 2017
>Last-Modified: Thu Jul 06 08:50:01 +0000 2017
>Originator: Martin Husemann
>Release: NetBSD 8.99.1
>Organization:
The NetBSD Foundation, Inc.
>Environment:
System: NetBSD night-owl.duskware.de 8.99.1 NetBSD 8.99.1 (NIGHT-OWL) #516: Mon Jun 26 15:40:28 CEST 2017 martin@night-owl.duskware.de:/usr/src/sys/arch/amd64/compile/NIGHT-OWL amd64
Architecture: x86_64
Machine: amd64
>Description:
While using something that uses gtk3, I got this crash (seen it before, but
last time no crash dump):
> fgrep "savecore: reboot" /var/log/messages
Jun 28 17:42:32 night-owl savecore: reboot after panic: panic: kernel diagnostic assertion "(ctx->wwx_class == mutex->wwm_u.ctx->wwx_class)" failed: file "../../../../external/bsd/drm2/linux/linux_ww_mutex.c", line 304 ww mutex class mismatch: 0xffffffff812b6bc0 != 0x0
> gdb netbsd.gdb
[..]
Reading symbols from netbsd.gdb...done.
warning: ../../../../gdbscripts/kdump: No such file or directory
(gdb) target kvm /tmp/netbsd.core
0xffffffff80229485 in cpu_reboot (howto=howto@entry=256,
bootstr=bootstr@entry=0x0) at ../../../../arch/amd64/amd64/machdep.c:674
674 dumpsys();
(gdb) bt
#0 0xffffffff80229485 in cpu_reboot (howto=howto@entry=256,
bootstr=bootstr@entry=0x0) at ../../../../arch/amd64/amd64/machdep.c:674
#1 0xffffffff80632474 in db_sync_cmd (addr=<optimized out>,
have_addr=<optimized out>, count=<optimized out>, modif=<optimized out>)
at ../../../../ddb/db_command.c:1380
#2 0xffffffff80632c3e in db_command (
last_cmdp=last_cmdp@entry=0xffffffff812657a0 <db_last_command>)
at ../../../../ddb/db_command.c:914
#3 0xffffffff80632fa5 in db_command_loop ()
at ../../../../ddb/db_command.c:572
#4 0xffffffff806367be in db_trap (type=type@entry=1, code=code@entry=0)
at ../../../../ddb/db_trap.c:90
#5 0xffffffff80225f32 in kdb_trap (type=type@entry=1, code=code@entry=0,
regs=regs@entry=0xffffe40041379670)
at ../../../../arch/amd64/amd64/db_interface.c:234
#6 0xffffffff8022a80e in trap (frame=0xffffe40041379670)
at ../../../../arch/amd64/amd64/trap.c:293
#7 0xffffffff8020108e in alltraps ()
#8 0xffffffff80224945 in breakpoint ()
#9 0xffffffff80814bc3 in vpanic (
fmt=0xffffffff80f58218 "kernel %sassertion \"%s\" failed: file \"%s\", line %d ww mutex class mismatch: %p != %p", ap=ap@entry=0xffffe400413797a8)
at ../../../../kern/subr_prf.c:340
#10 0xffffffff80ad9485 in kern_assert (
fmt=fmt@entry=0xffffffff80f58218 "kernel %sassertion \"%s\" failed: file \"%s\", line %d ww mutex class mismatch: %p != %p")
at ../../../../../../lib/libkern/kern_assert.c:51
#11 0xffffffff80aaaadc in ww_mutex_lock_wait_sig (
mutex=mutex@entry=0xffffe4012d67b620, ctx=ctx@entry=0xffffe40041379af8)
at ../../../../external/bsd/drm2/linux/linux_ww_mutex.c:302
#12 0xffffffff80aab9aa in linux_ww_mutex_lock_slow_interruptible (
mutex=0xffffe4012d67b620, ctx=ctx@entry=0xffffe40041379af8)
at ../../../../external/bsd/drm2/linux/linux_ww_mutex.c:732
#13 0xffffffff80ab2d86 in ttm_eu_reserve_buffers (
ticket=ticket@entry=0xffffe40041379af8, list=list@entry=0xffffe40041379a20)
at ../../../../external/bsd/drm2/dist/drm/ttm/ttm_execbuf_util.c:157
#14 0xffffffff8099ac53 in radeon_bo_list_validate (rdev=0xffff800007067000,
ticket=ticket@entry=0xffffe40041379af8,
head=head@entry=0xffffe40041379a20, ring=0)
at ../../../../external/bsd/drm2/dist/drm/radeon/radeon_object.c:453
#15 0xffffffff80986231 in radeon_cs_parser_relocs (p=0xffffe400413799d0)
at ../../../../external/bsd/drm2/dist/drm/radeon/radeon_cs.c:180
#16 radeon_cs_ioctl (dev=<optimized out>, data=<optimized out>,
filp=<optimized out>)
at ../../../../external/bsd/drm2/dist/drm/radeon/radeon_cs.c:631
#17 0xffffffff806d8c13 in drm_ioctl (fp=<optimized out>, cmd=<optimized out>,
data=0xffffe40041379de0) at ../../../../external/bsd/drm2/drm/drm_drv.c:676
#18 0xffffffff80820311 in sys_ioctl (l=<optimized out>,
uap=0xffffe40041379f00, retval=<optimized out>)
at ../../../../kern/sys_generic.c:671
#19 0xffffffff8024b2ac in sy_call (rval=0xffffe40041379eb0,
uap=0xffffe40041379f00, l=0xffffe40118907180,
sy=0xffffffff81272bd0 <sysent+1296>) at ../../../../sys/syscallvar.h:65
#20 sy_invoke (code=54, rval=0xffffe40041379eb0, uap=0xffffe40041379f00,
l=0xffffe40118907180, sy=0xffffffff81272bd0 <sysent+1296>)
at ../../../../sys/syscallvar.h:94
#21 syscall (frame=0xffffe40041379f00)
at ../../../../arch/x86/x86/syscall.c:144
#22 0xffffffff80200771 in Xsyscall ()
(gdb) up 11
#11 0xffffffff80aaaadc in ww_mutex_lock_wait_sig (
mutex=mutex@entry=0xffffe4012d67b620, ctx=ctx@entry=0xffffe40041379af8)
at ../../../../external/bsd/drm2/linux/linux_ww_mutex.c:302
302 KASSERTMSG((ctx->wwx_class == mutex->wwm_u.ctx->wwx_class),
(gdb) p *ctx
$1 = {wwx_class = 0xffffffff812b6bc0 <reservation_ww_class>,
wwx_owner = 0xffffe40118907180, wwx_ticket = 50953, wwx_acquired = 0,
wwx_acquire_done = false, wwx_rb_node = {rb_nodes = {0x0, 0x0}, rb_info = 0}}
(gdb) p *mutex
$2 = {wwm_state = WW_CTX, wwm_u = {owner = 0xffffe4004134faf8,
ctx = 0xffffe4004134faf8}, wwm_lock = {u = {mtxa_owner = 67073}},
wwm_class = 0xffffffff812b6bc0 <reservation_ww_class>, wwm_waiters = {
rbt_root = 0x0, rbt_ops = 0xffffffff80e91c00 <ww_acquire_ctx_rb_ops>,
rbt_minmax = {0xffffe4012d67b640, 0xffffe4012d67b640}}, wwm_cv = {
cv_opaque = {0x0, 0xffffe4012d67b660, 0xffffffff80f587d5}}}
(gdb) p *mutex->wwm_u.ctx
$3 = {wwx_class = 0xffffe4004134fd60, wwx_owner = 0xffffe40107170940,
wwx_ticket = 18446713288477965536, wwx_acquired = 2149602382,
wwx_acquire_done = 255, wwx_rb_node = {rb_nodes = {0x0, 0xffffe400bcd5f284},
rb_info = 88}}
(gdb) p *mutex->wwm_u.ctx->wwx_class
$4 = {wwc_ticket = 18446713288477949952}
(gdb) quit
>How-To-Repeat:
Just use gtk3 base stuff with a radeon on -current?
>Fix:
n/a
>Audit-Trail:
From: coypu@sdf.org
To: gnats-bugs@NetBSD.org
Cc:
Subject: Re: kern/52347: ww mutex class mismatch
Date: Wed, 28 Jun 2017 17:26:50 +0000
One thing I see in linux is that it has (and we don't)
in radeon_cs_ioctl:
if (rdev->in_reset) {
up_read(&rdev->exclusive_lock);
r = radeon_gpu_reset(rdev);
if (!r)
r = -EAGAIN;
return r;
}
And in radeon_gpu_rest (which we do have)
rdev->in_reset = true;
... precarious things, I assume ...
drm_helper_resume_force_mode(rdev->ddev);
rdev->in_reset = false;
We do drm_helper_resume_force_mode etc without in_reset without
something blocking entry to radeon_cs_ioctl.
It's possible I misunderstand rwlocks (hence the question to tech-kern)
and the lock prevents this too.
From: coypu@sdf.org
To: gnats-bugs@NetBSD.org
Cc:
Subject: Re: kern/52347: ww mutex class mismatch
Date: Wed, 28 Jun 2017 22:16:21 +0000
Never mind the previous, they reduced the lock and added another
confusing thing and it's confusing but we shouldn't be able to enter
drm_ioctl.
From: Martin Husemann <martin@duskware.de>
To: gnats-bugs@NetBSD.org
Cc:
Subject: Re: kern/52347: ww mutex class mismatch
Date: Thu, 6 Jul 2017 10:06:27 +0200
Taylor asked me to run a lockdebug kernel and indeed that fires ~immediately.
No crash dump, so manual transcripton:
LOCKDEBUG: Wait/wound mutex error: linux_wm_mutex_unlock,826: not locked
..
linux_wm_mutex_unlock() at netbsd:linux_wm_mutex_unlock+0x64
ttm_eu_fence_buffer_objects() at netbsd:radeon_cs_parser_fini+0x1d5
radeon_cs_ioctl() at netbsd:radeon_cs_ioctl+0x6d2
drm_ioctl() at netbsd:drm_ioctl+0x11e
sys_ioctl() at netbsd:sys_ioctl+0x101
and the source lines:
(gdb) list *(radeon_cs_parser_fini+0x1d5)
0xffffffff809887af is in radeon_cs_parser_fini (../../../../external/bsd/drm2/dist/drm/radeon/radeon_cs.c:411).
406 } else if (backoff) {
407 ttm_eu_backoff_reservation(&parser->ticket,
408 &parser->validated);
409 }
410
411 if (parser->relocs != NULL) {
412 for (i = 0; i < parser->nrelocs; i++) {
413 if (parser->relocs[i].gobj)
414 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
415 }
(gdb) list *(radeon_cs_ioctl+0x6d2)
0xffffffff80989345 is in radeon_cs_ioctl (../../../../external/bsd/drm2/include/linux/rwsem.h:84).
79
80 static inline void
81 up_read(struct rw_semaphore *rwsem)
82 {
83
84 rw_exit(&rwsem->rws_lock);
85 }
86
87 static inline void
88 up_write(struct rw_semaphore *rwsem)
(gdb) list *(drm_ioctl+0x11e)
0xffffffff806d8a03 is in drm_ioctl (../../../../external/bsd/drm2/drm/drm_drv.c:676).
671
672 if (!ISSET(ioctl->flags, DRM_UNLOCKED))
673 mutex_lock(&drm_global_mutex);
674
675 /* XXX errno Linux->NetBSD */
676 error = -(*ioctl->func)(dev, data, file);
677
678 if (!ISSET(ioctl->flags, DRM_UNLOCKED))
679 mutex_unlock(&drm_global_mutex);
680
Martin
From: Martin Husemann <martin@duskware.de>
To: gnats-bugs@NetBSD.org
Cc:
Subject: Re: kern/52347: ww mutex class mismatch
Date: Thu, 6 Jul 2017 10:46:01 +0200
Additional gdb output:
0xffffffff80989339 <radeon_cs_ioctl+1734>: lea -0x3a0(%rbp),%rdi
0xffffffff80989340 <radeon_cs_ioctl+1741>:
callq 0xffffffff809885da <radeon_cs_parser_fini>
0xffffffff80989345 <radeon_cs_ioctl+1746>: mov -0x3a8(%rbp),%rdi
(gdb) list *(0xffffffff80989340)
0xffffffff80989340 is in radeon_cs_ioctl (../../../../external/bsd/drm2/dist/drm/radeon/radeon_cs.c:654).
649 r = radeon_cs_ib_vm_chunk(rdev, &parser);
650 if (r) {
651 goto out;
652 }
653 out:
654 radeon_cs_parser_fini(&parser, r, true);
655 up_read(&rdev->exclusive_lock);
656 r = radeon_cs_handle_lockup(rdev, r);
657 return r;
658 }
Martin
(Contact us)
$NetBSD: query-full-pr,v 1.39 2013/11/01 18:47:49 spz Exp $
$NetBSD: gnats_config.sh,v 1.8 2006/05/07 09:23:38 tsutsui Exp $
Copyright © 1994-2014
The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.