NetBSD Problem Report #55664
From www@netbsd.org Wed Sep 16 22:06:13 2020
Return-Path: <www@netbsd.org>
Received: from mail.netbsd.org (mail.netbsd.org [199.233.217.200])
(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
(Client CN "mail.NetBSD.org", Issuer "mail.NetBSD.org CA" (not verified))
by mollari.NetBSD.org (Postfix) with ESMTPS id 2561E1A9239
for <gnats-bugs@gnats.NetBSD.org>; Wed, 16 Sep 2020 22:06:13 +0000 (UTC)
Message-Id: <20200916220611.E4CB81A923A@mollari.NetBSD.org>
Date: Wed, 16 Sep 2020 22:06:11 +0000 (UTC)
From: nruslan_devel@yahoo.com
Reply-To: nruslan_devel@yahoo.com
To: gnats-bugs@NetBSD.org
Subject: rump race condition
X-Send-Pr-Version: www-1.0
>Number: 55664
>Category: kern
>Synopsis: rump race condition
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Wed Sep 16 22:10:00 +0000 2020
>Last-Modified: Mon Nov 02 17:15:01 +0000 2020
>Originator: Ruslan Nikolaev
>Release: master
>Organization:
Virginia Tech
>Environment:
>Description:
While working on the SMP version of rumprun (https://github.com/ssrg-vt/rumprun-smp), we found several issues:
1. A race condition (bug) in sys/rump/librump/rumpkern/intr.c since rumpuser_cv_signal() is called without holding a mutex
2. sleepq is implemented using a single (global) conditional variable; that should be done per each sleepq separately
Below I attach a patch (relocated) for the master branch. Please let us know what you think. We also have some other fixes for rump but they should probably be considered separately.
>How-To-Repeat:
>Fix:
diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c
index cb58236fcb05..460e077b526f 100644
--- a/sys/kern/kern_condvar.c
+++ b/sys/kern/kern_condvar.c
@@ -102,6 +102,7 @@ void
cv_destroy(kcondvar_t *cv)
{
+ sleepq_destroy(CV_SLEEPQ(cv));
#ifdef DIAGNOSTIC
KASSERT(cv_is_valid(cv));
KASSERT(!cv_has_waiters(cv));
diff --git a/sys/rump/include/rump-sys/kern.h b/sys/rump/include/rump-sys/kern.h
index cac0b96d0698..a54012df9d01 100644
--- a/sys/rump/include/rump-sys/kern.h
+++ b/sys/rump/include/rump-sys/kern.h
@@ -174,6 +174,7 @@ void rump_syscall_boot_establish(const struct rump_onesyscall *, size_t);
void rump_schedlock_cv_wait(struct rumpuser_cv *);
int rump_schedlock_cv_timedwait(struct rumpuser_cv *,
const struct timespec *);
+void rump_schedlock_cv_signal(struct cpu_info *, struct rumpuser_cv *);
void rump_user_schedule(int, void *);
void rump_user_unschedule(int, int *, void *);
diff --git a/sys/rump/librump/rumpkern/intr.c b/sys/rump/librump/rumpkern/intr.c
index 651665ff11bb..829154f187b2 100644
--- a/sys/rump/librump/rumpkern/intr.c
+++ b/sys/rump/librump/rumpkern/intr.c
@@ -464,7 +464,7 @@ rump_softint_run(struct cpu_info *ci)
for (i = 0; i < SOFTINT_COUNT; i++) {
if (!TAILQ_EMPTY(&si_lvl[i].si_pending))
- rumpuser_cv_signal(si_lvl[i].si_cv);
+ rump_schedlock_cv_signal(ci, si_lvl[i].si_cv);
}
}
diff --git a/sys/rump/librump/rumpkern/scheduler.c b/sys/rump/librump/rumpkern/scheduler.c
index 8b4a519c8cda..231cacba3ac9 100644
--- a/sys/rump/librump/rumpkern/scheduler.c
+++ b/sys/rump/librump/rumpkern/scheduler.c
@@ -179,6 +179,16 @@ rump_scheduler_init(int numcpu)
mutex_init(&unruntime_lock, MUTEX_DEFAULT, IPL_SCHED);
}
+void
+rump_schedlock_cv_signal(struct cpu_info *ci, struct rumpuser_cv *cv)
+{
+ struct rumpcpu *rcpu = cpuinfo_to_rumpcpu(ci);
+
+ rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
+ rumpuser_cv_signal(cv);
+ rumpuser_mutex_exit(rcpu->rcpu_mtx);
+}
+
/*
* condvar ops using scheduler lock as the rumpuser interlock.
*/
diff --git a/sys/rump/librump/rumpkern/sleepq.c b/sys/rump/librump/rumpkern/sleepq.c
index 4ce5142226cb..b047a03166b8 100644
--- a/sys/rump/librump/rumpkern/sleepq.c
+++ b/sys/rump/librump/rumpkern/sleepq.c
@@ -40,25 +40,20 @@ __KERNEL_RCSID(0, "$NetBSD: sleepq.c,v 1.20 2020/04/25 15:42:15 bouyer Exp $");
#include <rump-sys/kern.h>
syncobj_t sleep_syncobj;
-static kcondvar_t sq_cv;
-static int
-sqinit1(void)
+void
+sleepq_init(sleepq_t *sq)
{
- cv_init(&sq_cv, "sleepq");
-
- return 0;
+ LIST_INIT(sq);
+ cv_init(&sq->sq_cv, "sleepq");
}
void
-sleepq_init(sleepq_t *sq)
+sleepq_destroy(sleepq_t *sq)
{
- static ONCE_DECL(sqctl);
-
- RUN_ONCE(&sqctl, sqinit1);
- LIST_INIT(sq);
+ cv_destroy(&sq->sq_cv);
}
void
@@ -83,7 +78,7 @@ sleepq_block(int timo, bool catch)
while (l->l_wchan) {
l->l_mutex = mp; /* keep sleepq lock until woken up */
- error = cv_timedwait(&sq_cv, mp, timo);
+ error = cv_timedwait(&l->l_sleepq->sq_cv, mp, timo);
if (error == EWOULDBLOCK || error == EINTR) {
if (l->l_wchan) {
LIST_REMOVE(l, l_sleepchain);
@@ -118,7 +113,7 @@ sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp)
}
}
if (found)
- cv_broadcast(&sq_cv);
+ cv_broadcast(&sq->sq_cv);
mutex_spin_exit(mp);
}
@@ -130,7 +125,7 @@ sleepq_unsleep(struct lwp *l, bool cleanup)
l->l_wchan = NULL;
l->l_wmesg = NULL;
LIST_REMOVE(l, l_sleepchain);
- cv_broadcast(&sq_cv);
+ cv_broadcast(&l->l_sleepq->sq_cv);
if (cleanup) {
mutex_spin_exit(l->l_mutex);
diff --git a/sys/sys/sleepq.h b/sys/sys/sleepq.h
index c721837bb635..c3f502cf0246 100644
--- a/sys/sys/sleepq.h
+++ b/sys/sys/sleepq.h
@@ -45,13 +45,21 @@
* Generic sleep queues.
*/
+#ifdef _RUMPKERNEL
+# include <sys/condvar.h>
+struct sleepq {
+ LIST_HEAD(, lwp); /* anonymous struct */
+ kcondvar_t sq_cv;
+};
+#else
+LIST_HEAD(sleepq, lwp);
+#endif
+
#define SLEEPTAB_HASH_SHIFT 7
#define SLEEPTAB_HASH_SIZE (1 << SLEEPTAB_HASH_SHIFT)
#define SLEEPTAB_HASH_MASK (SLEEPTAB_HASH_SIZE - 1)
#define SLEEPTAB_HASH(wchan) (((uintptr_t)(wchan) >> 8) & SLEEPTAB_HASH_MASK)
-LIST_HEAD(sleepq, lwp);
-
typedef struct sleepq sleepq_t;
typedef struct sleeptab {
@@ -72,6 +80,16 @@ void sleepq_changepri(lwp_t *, pri_t);
void sleepq_lendpri(lwp_t *, pri_t);
int sleepq_block(int, bool);
+#ifdef _RUMPKERNEL
+void
+sleepq_destroy(sleepq_t *);
+#else
+static inline void
+sleepq_destroy(sleepq_t *sq)
+{
+}
+#endif
+
void sleeptab_init(sleeptab_t *);
extern sleeptab_t sleeptab;
>Audit-Trail:
From: Ruslan Nikolaev <nruslan_devel@yahoo.com>
To: gnats-bugs@netbsd.org, kern-bug-people@netbsd.org,
gnats-admin@netbsd.org, netbsd-bugs@netbsd.org
Cc:
Subject: Re: kern/55664: rump race condition
Date: Wed, 23 Sep 2020 15:30:27 -0400
I was wondering if there is any update on that.
From: Christos Zoulas <christos@zoulas.com>
To: gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org,
gnats-admin@netbsd.org,
netbsd-bugs@netbsd.org,
nruslan_devel@yahoo.com
Subject: Re: kern/55664: rump race condition
Date: Sun, 1 Nov 2020 12:47:42 -0500
--Apple-Mail=_4CED2A92-2D99-4473-BC8B-D343C781E17E
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=us-ascii
I just looked at it. Part 1 is fine, but the implementation of part 2 =
(adding an additional kcondvar_t to struct sleepq depending on #ifdef =
_RUMPKERN) is something that we don't do in general. We try to keep the =
code to have the least amount of #ifdef _RUMPKERN and the data =
structures be the same between the two implementations. Perhaps there is =
a different way to do this?
christos
> On Sep 23, 2020, at 5:40 PM, Ruslan Nikolaev <nruslan_devel@yahoo.com> =
wrote:
>=20
> The following reply was made to PR kern/55664; it has been noted by =
GNATS.
>=20
> From: Ruslan Nikolaev <nruslan_devel@yahoo.com>
> To: gnats-bugs@netbsd.org, kern-bug-people@netbsd.org,
> gnats-admin@netbsd.org, netbsd-bugs@netbsd.org
> Cc:
> Subject: Re: kern/55664: rump race condition
> Date: Wed, 23 Sep 2020 15:30:27 -0400
>=20
> I was wondering if there is any update on that.
>=20
--Apple-Mail=_4CED2A92-2D99-4473-BC8B-D343C781E17E
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename=signature.asc
Content-Type: application/pgp-signature;
name=signature.asc
Content-Description: Message signed with OpenPGP
-----BEGIN PGP SIGNATURE-----
Comment: GPGTools - http://gpgtools.org
iF0EARECAB0WIQS+BJlbqPkO0MDBdsRxESqxbLM7OgUCX570vgAKCRBxESqxbLM7
OnWhAJ9eTujWG26/Lwe9pTkPW3reP2esjgCeI7vbS4+HKh1GbKkkVZ1yS32MDUc=
=NeXb
-----END PGP SIGNATURE-----
--Apple-Mail=_4CED2A92-2D99-4473-BC8B-D343C781E17E--
From: Ruslan Nikolaev <nruslan_devel@yahoo.com>
To: gnats-bugs@netbsd.org, kern-bug-people@netbsd.org,
gnats-admin@netbsd.org, netbsd-bugs@netbsd.org
Cc:
Subject: Re: kern/55664: rump race condition
Date: Sun, 1 Nov 2020 14:39:06 -0500
Good point, Christos! I was also a bit concerned about this... but
eventually ended up with this implementation to avoid too many intrusive
changes in the code. The reason was that each sleepq gets the 1:1
mapping to the corresponding rump conditional variable. Therefore, it
was easier to just keep it inside sleepq. Do you have any specific
suggestions?
On 11/1/20 12:50 PM, Christos Zoulas wrote:
> The following reply was made to PR kern/55664; it has been noted by GNATS.
>
> From: Christos Zoulas <christos@zoulas.com>
> To: gnats-bugs@netbsd.org
> Cc: kern-bug-people@netbsd.org,
> gnats-admin@netbsd.org,
> netbsd-bugs@netbsd.org,
> nruslan_devel@yahoo.com
> Subject: Re: kern/55664: rump race condition
> Date: Sun, 1 Nov 2020 12:47:42 -0500
>
> --Apple-Mail=_4CED2A92-2D99-4473-BC8B-D343C781E17E
> Content-Transfer-Encoding: quoted-printable
> Content-Type: text/plain;
> charset=us-ascii
>
> I just looked at it. Part 1 is fine, but the implementation of part 2 =
> (adding an additional kcondvar_t to struct sleepq depending on #ifdef =
> _RUMPKERN) is something that we don't do in general. We try to keep the =
> code to have the least amount of #ifdef _RUMPKERN and the data =
> structures be the same between the two implementations. Perhaps there is =
> a different way to do this?
>
> christos
>
From: "Christos Zoulas" <christos@netbsd.org>
To: gnats-bugs@gnats.NetBSD.org
Cc:
Subject: PR/55664 CVS commit: src/sys
Date: Sun, 1 Nov 2020 15:55:16 -0500
Module Name: src
Committed By: christos
Date: Sun Nov 1 20:55:16 UTC 2020
Modified Files:
src/sys/kern: kern_condvar.c
src/sys/sys: sleepq.h
Added Files:
src/sys/sys: sleeptab.h
Log Message:
PR/55664: Ruslan Nikolaev: Split out sleepq guts and turnstiles not used
in rump into a separate header file. Add a sleepq_destroy() empty hook.
To generate a diff of this commit:
cvs rdiff -u -r1.52 -r1.53 src/sys/kern/kern_condvar.c
cvs rdiff -u -r1.32 -r1.33 src/sys/sys/sleepq.h
cvs rdiff -u -r0 -r1.1 src/sys/sys/sleeptab.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
From: "Christos Zoulas" <christos@netbsd.org>
To: gnats-bugs@gnats.NetBSD.org
Cc:
Subject: PR/55664 CVS commit: src/sys/rump
Date: Sun, 1 Nov 2020 15:58:39 -0500
Module Name: src
Committed By: christos
Date: Sun Nov 1 20:58:38 UTC 2020
Modified Files:
src/sys/rump/include/rump-sys: kern.h
src/sys/rump/librump/rumpkern: intr.c scheduler.c sleepq.c
Added Files:
src/sys/rump/include/sys: sleeptab.h
Log Message:
PR/55664: Ruslan Nikolaev: Fix:
1. A race condition (bug) in sys/rump/librump/rumpkern/intr.c since
rumpuser_cv_signal() is called without holding a mutex
2. sleepq is implemented using a single (global) conditional
variable; that should be done per each sleepq separately
To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/rump/include/rump-sys/kern.h
cvs rdiff -u -r0 -r1.1 src/sys/rump/include/sys/sleeptab.h
cvs rdiff -u -r1.55 -r1.56 src/sys/rump/librump/rumpkern/intr.c
cvs rdiff -u -r1.51 -r1.52 src/sys/rump/librump/rumpkern/scheduler.c
cvs rdiff -u -r1.20 -r1.21 src/sys/rump/librump/rumpkern/sleepq.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
From: Ruslan Nikolaev <nruslan_devel@yahoo.com>
To: Christos Zoulas <christos@zoulas.com>, gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org, gnats-admin@netbsd.org, netbsd-bugs@netbsd.org
Subject: Re: kern/55664: rump race condition
Date: Mon, 2 Nov 2020 10:43:11 -0500
Christos, thank you for merging this change! Just one comment. Since
sleepq_destroy() is a macro for the kernel, should it also be a macro
for rump to be consistent in its sleeptab.h? Something like this:
#define sleepq_destroy(a) (cv_destroy(&(a)->sq_cv))
Also wanted to let you know that I have just posted another problem and
patch to kern/55777. I� have one more fix for rump SMP which I will post
shortly after that.
(Contact us)
$NetBSD: query-full-pr,v 1.46 2020/01/03 16:35:01 leot Exp $
$NetBSD: gnats_config.sh,v 1.9 2014/08/02 14:16:04 spz Exp $
Copyright © 1994-2020
The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.