NetBSD Problem Report #36513

From doshita@iij.ad.jp  Wed Jun 20 02:11:38 2007
Return-Path: <doshita@iij.ad.jp>
Received: from mail.netbsd.org (mail.netbsd.org [204.152.190.11])
	by narn.NetBSD.org (Postfix) with ESMTP id 3C26663B882
	for <gnats-bugs@gnats.NetBSD.org>; Wed, 20 Jun 2007 02:11:38 +0000 (UTC)
Message-Id: <200706200211.l5K2BZ3J003782@rotr.iij.ad.jp>
Date: Wed, 20 Jun 2007 11:11:35 +0900 (JST)
From: Hiroki Doshita <doshita@iij.ad.jp>
Reply-To: doshita@iij.ad.jp
To: gnats-bugs@NetBSD.org
Subject: Pre-cache load length exceeds source memory area in memcpy
X-Send-Pr-Version: 3.95

>Number:         36513
>Category:       port-arm
>Synopsis:       Pre-cache load length exceeds source memory area in memcpy
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    scw
>State:          closed
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Jun 20 02:15:00 +0000 2007
>Closed-Date:    Sat Jun 23 09:17:27 +0000 2007
>Last-Modified:  Sun Aug 26 20:30:01 +0000 2007
>Originator:     Hiroki Doshita
>Release:        NetBSD 3.1
>Organization:
	Internet Initiative Japan, Inc.
>Environment:
Architecture: arm
Machine: armeb
>Description:
Pre-cache load length exceeds source memory area in memcpy.
Unrelated area is written back to memory, which causes
some troubles.
>How-To-Repeat:
>Fix:
Index: memcpy_xscale.S
===================================================================
RCS file: /cvs/cvsroot/src/common/lib/libc/arch/arm/string/memcpy_xscale.S,v
retrieving revision 1.1
diff -u -r1.1 memcpy_xscale.S
--- memcpy_xscale.S	20 Dec 2005 19:28:49 -0000	1.1
+++ memcpy_xscale.S	20 Jun 2007 02:08:52 -0000
@@ -247,13 +247,50 @@
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
-.Lmemcpy_bad1:
 	subs	r2, r2, #0x10
+
+.Lmemcpy_bad1:
+	cmp	r2, #0x20
 	bge	.Lmemcpy_bad1_loop16
+	cmp	r2, #0x10
+	blt	.Lmemcpy_bad1_loop16_short

-	adds	r2, r2, #0x10
+	/* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+	mov	r4, ip, lsl #8
+#else
+	mov	r4, ip, lsr #8
+#endif
+	ldr	r5, [r1], #0x04
+	ldr	r6, [r1], #0x04
+	ldr	r7, [r1], #0x04
+	ldr	ip, [r1], #0x04
+#ifdef __ARMEB__
+	orr	r4, r4, r5, lsr #24
+	mov	r5, r5, lsl #8
+	orr	r5, r5, r6, lsr #24
+	mov	r6, r6, lsl #8
+	orr	r6, r6, r7, lsr #24
+	mov	r7, r7, lsl #8
+	orr	r7, r7, ip, lsr #24
+#else
+	orr	r4, r4, r5, lsl #24
+	mov	r5, r5, lsr #8
+	orr	r5, r5, r6, lsl #24
+	mov	r6, r6, lsr #8
+	orr	r6, r6, r7, lsl #24
+	mov	r7, r7, lsr #8
+	orr	r7, r7, ip, lsl #24
+#endif
+	str	r4, [r3], #0x04
+	str	r5, [r3], #0x04
+	str	r6, [r3], #0x04
+	str	r7, [r3], #0x04
+	subs	r2, r2, #0x10
 	ldmeqfd	sp!, {r4-r7}
 	bxeq	lr			/* Return now if done */
+
+.Lmemcpy_bad1_loop16_short:
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x03
 	blt	.Lmemcpy_bad_done
@@ -308,13 +345,50 @@
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
-.Lmemcpy_bad2:
 	subs	r2, r2, #0x10
+
+.Lmemcpy_bad2:
+	cmp	r2, #0x20
 	bge	.Lmemcpy_bad2_loop16
+	cmp	r2, #0x10
+	blt	.Lmemcpy_bad2_loop16_short

-	adds	r2, r2, #0x10
+	/* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+	mov	r4, ip, lsl #16
+#else
+	mov	r4, ip, lsr #16
+#endif
+	ldr	r5, [r1], #0x04
+	ldr	r6, [r1], #0x04
+	ldr	r7, [r1], #0x04
+	ldr	ip, [r1], #0x04
+#ifdef __ARMEB__
+	orr	r4, r4, r5, lsr #16
+	mov	r5, r5, lsl #16
+	orr	r5, r5, r6, lsr #16
+	mov	r6, r6, lsl #16
+	orr	r6, r6, r7, lsr #16
+	mov	r7, r7, lsl #16
+	orr	r7, r7, ip, lsr #16
+#else
+	orr	r4, r4, r5, lsl #16
+	mov	r5, r5, lsr #16
+	orr	r5, r5, r6, lsl #16
+	mov	r6, r6, lsr #16
+	orr	r6, r6, r7, lsl #16
+	mov	r7, r7, lsr #16
+	orr	r7, r7, ip, lsl #16
+#endif
+	str	r4, [r3], #0x04
+	str	r5, [r3], #0x04
+	str	r6, [r3], #0x04
+	str	r7, [r3], #0x04
+	subs	r2, r2, #0x10
 	ldmeqfd	sp!, {r4-r7}
 	bxeq	lr			/* Return now if done */
+
+.Lmemcpy_bad2_loop16_short:
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x02
 	blt	.Lmemcpy_bad_done
@@ -369,13 +443,50 @@
 	str	r5, [r3], #0x04
 	str	r6, [r3], #0x04
 	str	r7, [r3], #0x04
-.Lmemcpy_bad3:
 	subs	r2, r2, #0x10
+
+.Lmemcpy_bad3:
+	cmp	r2, #0x20
 	bge	.Lmemcpy_bad3_loop16
+	cmp	r2, #0x10
+	blt	.Lmemcpy_bad3_loop16_short

-	adds	r2, r2, #0x10
+	/* copy last 16 bytes (without preload) */
+#ifdef __ARMEB__
+	mov	r4, ip, lsl #24
+#else
+	mov	r4, ip, lsr #24
+#endif
+	ldr	r5, [r1], #0x04
+	ldr	r6, [r1], #0x04
+	ldr	r7, [r1], #0x04
+	ldr	ip, [r1], #0x04
+#ifdef __ARMEB__
+	orr	r4, r4, r5, lsr #8
+	mov	r5, r5, lsl #24
+	orr	r5, r5, r6, lsr #8
+	mov	r6, r6, lsl #24
+	orr	r6, r6, r7, lsr #8
+	mov	r7, r7, lsl #24
+	orr	r7, r7, ip, lsr #8
+#else
+	orr	r4, r4, r5, lsl #8
+	mov	r5, r5, lsr #24
+	orr	r5, r5, r6, lsl #8
+	mov	r6, r6, lsr #24
+	orr	r6, r6, r7, lsl #8
+	mov	r7, r7, lsr #24
+	orr	r7, r7, ip, lsl #8
+#endif
+	str	r4, [r3], #0x04
+	str	r5, [r3], #0x04
+	str	r6, [r3], #0x04
+	str	r7, [r3], #0x04
+	subs	r2, r2, #0x10
 	ldmeqfd	sp!, {r4-r7}
 	bxeq	lr			/* Return now if done */
+
+.Lmemcpy_bad3_loop16_short:
 	subs	r2, r2, #0x04
 	sublt	r1, r1, #0x01
 	blt	.Lmemcpy_bad_done




>Release-Note:

>Audit-Trail:

Responsible-Changed-From-To: port-arm-maintainer->scw
Responsible-Changed-By: scw@netbsd.org
Responsible-Changed-When: Wed, 20 Jun 2007 08:08:53 +0100
Responsible-Changed-Why:
I'll take this one.


From: Steve Woodford <scw@netbsd.org>
To: gnats-bugs@netbsd.org
Cc: port-arm-maintainer@netbsd.org, gnats-admin@netbsd.org,
	netbsd-bugs@netbsd.org
Subject: Re: port-arm/36513: Pre-cache load length exceeds source memory area in memcpy
Date: Wed, 20 Jun 2007 09:03:32 +0100

 On Wednesday 20 June 2007 03:15, Hiroki Doshita wrote:

 > Pre-cache load length exceeds source memory area in memcpy.
 > Unrelated area is written back to memory, which causes
 > some troubles.

 Can you elaborate on the "some troubles" part? The source cacheline will 
 not be marked dirty by pld, assuming it was already clean or not 
 resident in the cache to begin with, so a write-back will not occur.

 The only case I can think of where this *may* be an issue would be 
 copying from a cacheable memory region which is shared with a 
 DMA-capable device...

 Cheers, Steve

From: "Masao Uebayashi" <uebayasi@gmail.com>
To: "Steve Woodford" <scw@netbsd.org>
Cc: gnats-bugs@netbsd.org, port-arm-maintainer@netbsd.org,
	gnats-admin@netbsd.org, netbsd-bugs@netbsd.org
Subject: Re: port-arm/36513: Pre-cache load length exceeds source memory area in memcpy
Date: Wed, 20 Jun 2007 17:49:06 +0900

 I'm not Doshita-san, but it was me who translated with confusion, so...

 > > Pre-cache load length exceeds source memory area in memcpy.
 > > Unrelated area is written back to memory, which causes
 > > some troubles.
 >
 > Can you elaborate on the "some troubles" part? The source cacheline will
 > not be marked dirty by pld, assuming it was already clean or not
 > resident in the cache to begin with, so a write-back will not occur.

 "Write-back" was wrong.  This is all about read (cache fill).

 > The only case I can think of where this *may* be an issue would be
 > copying from a cacheable memory region which is shared with a
 > DMA-capable device...

 The place he had problem was a memcpy() in m_dup(9).  The memcpy() does
 its job correctly, but something slow happens there and the system loses
 lots of packets.  We guess it's kind of exception, but have not figured out yet.

 Masao

From: Steve Woodford <scw@netbsd.org>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: PR/36513 CVS commit: src/common/lib/libc/arch/arm/string
Date: Thu, 21 Jun 2007 21:37:04 +0000 (UTC)

 Module Name:	src
 Committed By:	scw
 Date:		Thu Jun 21 21:37:04 UTC 2007

 Modified Files:
 	src/common/lib/libc/arch/arm/string: memcpy_xscale.S

 Log Message:
 Apply the patch, with some minor tweaks, supplied in PR/36513.
 This prevents a possible prefetch past the end of the source buffer.

 Note that the semantics of the pld instruction mean that it is unlikely
 that this would have caused any problems except in very specific
 circumstances in some types of device drivers.


 To generate a diff of this commit:
 cvs rdiff -r1.1 -r1.2 src/common/lib/libc/arch/arm/string/memcpy_xscale.S

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

From: "Liam J. Foy" <liamjfoy@netbsd.org>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: PR/36513 CVS commit: [netbsd-4] src/common/lib/libc/arch/arm/string
Date: Fri, 22 Jun 2007 13:07:43 +0000 (UTC)

 Module Name:	src
 Committed By:	liamjfoy
 Date:		Fri Jun 22 13:07:43 UTC 2007

 Modified Files:
 	src/common/lib/libc/arch/arm/string [netbsd-4]: memcpy_xscale.S

 Log Message:
 Pull up following revision(s) (requested by scw in ticket #741):
 	common/lib/libc/arch/arm/string/memcpy_xscale.S: revision 1.2
 Apply the patch, with some minor tweaks, supplied in PR/36513.
 This prevents a possible prefetch past the end of the source buffer.
 Note that the semantics of the pld instruction mean that it is unlikely
 that this would have caused any problems except in very specific
 circumstances in some types of device drivers.


 To generate a diff of this commit:
 cvs rdiff -r1.1 -r1.1.6.1 src/common/lib/libc/arch/arm/string/memcpy_xscale.S

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

State-Changed-From-To: open->closed
State-Changed-By: scw@netbsd.org
State-Changed-When: Sat, 23 Jun 2007 10:17:27 +0100
State-Changed-Why:
Fix applied, and pulled up into 3.x and 4.x release branches.
Thanks for the report!


From: Manuel Bouyer <bouyer@netbsd.org>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: PR/36513 CVS commit: [netbsd-3] src
Date: Sun, 26 Aug 2007 20:25:09 +0000 (UTC)

 Module Name:	src
 Committed By:	bouyer
 Date:		Sun Aug 26 20:25:09 UTC 2007

 Modified Files:
 	src/lib/libc/arch/arm/string [netbsd-3]: memcpy_xscale.S
 	src/sys/lib/libkern/arch/arm [netbsd-3]: memcpy_xscale.S

 Log Message:
 Apply patch, requested by scw in ticket #1806:
 	lib/libc/arch/arm/string/memcpy_xscale.S: patch
 	sys/lib/libkern/arch/arm/memcpy_xscale.S: patch
 Apply the patch, with some minor tweaks, supplied in PR/36513.
 This prevents a possible prefetch past the end of the source buffer.


 To generate a diff of this commit:
 cvs rdiff -r1.2 -r1.2.2.1 src/lib/libc/arch/arm/string/memcpy_xscale.S
 cvs rdiff -r1.2 -r1.2.2.1 src/sys/lib/libkern/arch/arm/memcpy_xscale.S

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

>Unformatted:

NetBSD Home
NetBSD PR Database Search

(Contact us) $NetBSD: query-full-pr,v 1.39 2013/11/01 18:47:49 spz Exp $
$NetBSD: gnats_config.sh,v 1.8 2006/05/07 09:23:38 tsutsui Exp $
Copyright © 1994-2007 The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.