NetBSD Problem Report #16653

Received: (qmail 26583 invoked from network); 4 May 2002 02:08:10 -0000
Message-Id: <20020504020812.2B77C1112F@www.netbsd.org>
Date: Fri,  3 May 2002 19:08:12 -0700 (PDT)
From: svs@ropnet.ru
Sender: nobody@netbsd.org
Reply-To: svs@ropnet.ru
To: gnats-bugs@gnats.netbsd.org
Subject: msdosfs mistakenly assumes CP437 as on-disk file name character set.
X-Send-Pr-Version: www-1.0

>Number:         16653
>Notify-List:    gson@gson.org
>Category:       kern
>Synopsis:       msdosfs mistakenly assumes CP437 as on-disk file name character set.
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sat May 04 02:09:00 +0000 2002
>Closed-Date:    
>Last-Modified:  Sat Jan 17 14:50:44 +0000 2015
>Originator:     Sergey Svishchev
>Release:        1.5ZC
>Organization:
>Environment:
>Description:
Built-in conversion tables, when applied to file names in code page other chan CP437 (e.g. CP866), make these files inaccessible.
>How-To-Repeat:
Mount a filesystem created under localized (Russian) version of Windows.  Try to access files that contain Cyrillic characters.   Lose.

>Fix:
Not a real fix (should probably import FreeBSD's msdosfs code), but a workaround:

Index: sys/msdosfs/msdosfs_conv.c
===================================================================
RCS file: /cvsroot/syssrc/sys/msdosfs/msdosfs_conv.c,v
retrieving revision 1.32
diff -u -r1.32 msdosfs_conv.c
--- msdosfs_conv.c	2002/01/08 20:44:13	1.32
+++ msdosfs_conv.c	2002/04/29 22:21:38
@@ -66,6 +66,16 @@
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>

+#ifndef MSDOSFS_NOCONV
+#define U2L(x) u2l[x]
+#define UNIX2DOS(x) unix2dos[x]
+#define DOS2UNIX(x) dos2unix[x]
+#else
+#define U2L(x) (x)
+#define UNIX2DOS(x) (x)
+#define DOS2UNIX(x) (x)
+#endif
+
 /*
  * Days in each month in a regular year.
  */
@@ -232,6 +242,7 @@
 	tsp->tv_nsec = (dh % 100) * 10000000;
 }

+#ifndef MSDOSFS_NOCONV
 static const u_char
 unix2dos[256] = {
 	0,    0,    0,    0,    0,    0,    0,    0,	/* 00-07 */
@@ -339,6 +350,7 @@
 	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
 	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
 };
+#endif

 /*
  * DOS filenames are made of 2 parts, the name part and the extension part.
@@ -369,10 +381,10 @@
 	 * directory slot. Another dos quirk.
 	 */
 	if (*dn == SLOT_E5)
-		c = dos2unix[0xe5];
+		c = DOS2UNIX(0xe5);
 	else
-		c = dos2unix[*dn];
-	*un++ = lower ? u2l[c] : c;
+		c = DOS2UNIX(*dn);
+	*un++ = lower ? U2L(c) : c;

 	/*
 	 * Copy the rest into the unix filename string, ignoring
@@ -383,8 +395,8 @@
 		;

 	for (i = 1; i <= j; i++) {
-		c = dos2unix[dn[i]];
-		*un++ = lower ? u2l[c] : c;
+		c = DOS2UNIX(dn[i]);
+		*un++ = lower ? U2L(c) : c;
 		thislong++;
 	}
 	dn += 8;
@@ -397,8 +409,8 @@
 		*un++ = '.';
 		thislong++;
 		for (i = 0; i < 3 && *dn != ' '; i++) {
-			c = dos2unix[*dn++];
-			*un++ = lower ? u2l[c] : c;
+			c = DOS2UNIX(*dn++);
+			*un++ = lower ? U2L(c) : c;
 			thislong++;
 		}
 	}
@@ -493,7 +505,7 @@
 		else
 			l = unlen - (dp - un);
 		for (i = 0, j = 8; i < l && j < 11; i++, j++) {
-			if (dp[i] != (dn[j] = unix2dos[dp[i]])
+			if (dp[i] != (dn[j] = UNIX2DOS(dp[i]))
 			    && conv != 3)
 				conv = 2;
 			if (!dn[j]) {
@@ -518,7 +530,7 @@
 		if ((*un == ' ') && shortlen)
 			dn[j] = ' ';
 		else
-			dn[j] = unix2dos[*un];
+			dn[j] = UNIX2DOS(*un);
 		if ((*un != dn[j])
 		    && conv != 3)
 			conv = 2;
@@ -693,7 +705,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
@@ -702,7 +714,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
@@ -711,7 +723,7 @@
 				return chksum;
 			return -1;
 		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
+		if (U2L(*cp++) != U2L(*un++) || *cp++)
 			return -1;
 	}
 	return chksum;

Use "options MSDOSFS_NOCONV" to compile new kernel.
>Release-Note:
>Audit-Trail:

From: gson@gson.org (Andreas Gustafsson)
To: gnats-bugs@gnats.netbsd.org
Cc:  
Subject: Re: kern/16653
Date: Sun, 22 Sep 2002 18:21:58 -0700 (PDT)

 I too am suffering from the bug reported in PR kern/16653.

 My Libretto L2 notebook came with a Japanese Windows ME in a DOS
 partition which I shrunk and left on the disk when I installed
 NetBSD.  I can access this DOS partition using msdosfs except for
 files whose names contain Japanese characters.  Those appear when
 reading a directory, but any attempt to stat() or otherwise access
 them fails.  For example, if I run "ls" in /msdos/WINDOWS/SYSTEM,
 these files appear, but if I run "ls -l", I get a bunch of error
 messages like

    ls: ????????.dll: No such file or directory
    ls: ????????.scr: No such file or directory
    ls: ??????~1.dll: No such file or directory

 followed by a listing containing only those files not containing
 Japanese characters (I have replaced all 8-bit characters in the error
 messages with question marks to avoid them causing problems in mail
 readers).

 Another effect of this bug is that the mail sent by /etc/daily
 contains hundreds of lines of error messages, beginning with
 the lines

    Setuid/device find errors:
    find: /msdos/WINDOWS/SYSTEM/???l????.SCR: No such file or directory
    find: /msdos/WINDOWS/SYSTEM/?W????~1.SCR: No such file or directory

 After applying the patch in kern/16653 and building a kernel with
 options MSDOSFS_NOCONV, "ls -l" shows all files as expected and the
 /etc/daily errors disappear.

 Could someone explain why msdosfs is doing character set conversions
 in the first place?
 -- 
 Andreas Gustafsson, gson@gson.org
Responsible-Changed-From-To: kern-bug-people->jdolecek 
Responsible-Changed-By: jdolecek 
Responsible-Changed-When: Fri Mar 26 08:22:09 UTC 2004 
Responsible-Changed-Why:  
I take care of msdosfs, I'd look at this one too. 
From: Hauke Fath <hf@spg.tu-darmstadt.de>
To: gnats-bugs@netbsd.org
Cc: Hauke Fath <hf@spg.tu-darmstadt.de>
Subject: Re: kern/16653 (msdosfs mistakenly assumes CP437 as on-disk file
 name character set)
Date: Mon, 2 Jan 2006 16:56:43 +0100

 Since the bug is going into its fifth year and is definitely in 
 NetBSD 3.0: Any changes to the better on the radar?

 We've got a few native Russian speakers in the group, and I could 
 well do without the "can I have Windows XP | RedHat Linux which do 
 the job better" kind of remarks...

 	hauke

 -- 
 /~\  The ASCII Ribbon Campaign                    Hauke Fath
 \ /    No HTML/RTF in email	        Institut für Nachrichtentechnik
   X     No Word docs in email	                  TU Darmstadt
 / \  Respect for open standards              Ruf +49-6151-16-3281

From: Sergey Svishchev <svs+pr@grep.ru>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/16653
Date: Sun, 8 Oct 2006 13:39:52 +0400

 Regenerated for 3.0 and -current:

 begin 644 msdosfs_noconv_3.0.diff.gz
 M'XL("````````VUS9&]S9G-?;F]C;VYV7S,N,"YD:69F`-5666_;1A!^)G_%
 M!$%C413%2]3ER'7A-$"`U"JJMB@0"`)#+JUUY*7*0Y;:^+]W9M:29<E.6O2E
 M)>"A=NYOCJ7?J52LAW!3IGF9E;,D5ZMV8H[^_6/^=#&!3"[$$-Q5OG`#-UF5
 M]=(MB\0M-Z6;E>Y]4/=Q\-;*+$152+&2Z@H*?)4R5^"W0S.5609.#4Z!I\.<
 M'<<Y8!E!%]Z*CQ!X7@1!,(SZPR@"Q\/'('>V;1]:]&&<5&30!6\P#'M#OZ\-
 MS/-S<+K=5A=LI'X7SL]->"E5LJA3`:_WT*2R$*HJ-NWYV7,:0N6I8+EIOY09
 MMB"#'R9OQI.WD]GE^&)\^2ORD2F5@%^"]XVU!76P^+">[K$OW_T6H`7+E%P'
 MZ/J1`LH"4B(%E`6DI!7$HA1'_O'O:>>/!/M.M4`H;(IIF^`V38`FO(DW)4@%
 M(D[F<).K:DZG&/MX52_B`C8B+MJDZ7)%@S"DD@:=L-7CDAI5N73.JM5,E2*!
 M$332.7P#ON=9Z-SW]'-JPMT7:@=E%5<R`>QI64$]2^9Q83Y4*8BZ4_3\)P;S
 M6H#/WZ.&VP3/<[S>-O>PXU'N8>1O<_?6&6HC]9D&3$.F':81TR[37@MK!IGG
 M9.R1K?LL&3"-F7YDFC!-F0JFF;;N.[@19'UWNFL&/#1CK%=0Q3>BA+@0<!/C
 M+.89!+",BZIL03470&(^0ZQ2YHAU)12O';';&F[/:_D>XNT']&;`&(/&/:GR
 M8@/E(J_:\)W*T4-!,P>_U[+XU&8]`B@S:#13!:,13-Z/?YY]'UFF8QC4YMV$
 M>FL131&+9N_FC=@6MMW@X3TT0J=HXQC-6N%*CV"1WV(&W_+2)%,80G+L$6TL
 MXA[:T#XDEK;!2AI42L)YD2\W7)M"X%!)5>5\HOB[&N/@%7AIM4!>J9Q^Z<+U
 M.ZT^UFW0Q1>7S="NL[R`AL38_BE(>#V":WS;MH6C>8@P51_DE#'^(Y!LQC"_
 MC-,PJKG$_JDKVZ8C;I>!C;)'T#_5&`9]PM#QO1V&K<.3]LD3'A[`>0P.0GCU
 M"JC[+]`$3O:1'C73MC74KV$]ZBBZU(*OH#U,]HXQ$]#.H(/;;$=>=[O4>N3(
 M:($.:X6M!@?OI2726ED'8%MP#50UQKP@S-?XP_<9+PH?0-,VI$OL#U6$.G4]
 M9?_WMQ2+IA:C>5IU=TNSR+(X1[JK,"A]SD@YU%R#SR,(-'KR]X+=4#($._('
 M##O<W66LU,`ZTKIBPRQR6\[SHL(":*_;1*B=NT(Y>X(=&'2CE]HX2KZYK>$N
 M'J:M<R/NTW@>X/!G6?>LM]<SP\#_(>I"03+_5-8W&O8]R_'O>^[HH#14S62)
 M0T<1^$33,X7/GX'YW`32I"G2'-+D$VE:>YI'<>ZVBYXL,>-;@5^W6_$CWJKX
 M=:")*>4?(L\:^P++#4[!<22<T?9L6]3S0D;I1_\GE.%S*,-G4/H!HPPZ_U&4
 ,![G\!?2-G]5`"P``
 `
 end

From: Sergey Svishchev <svs+pr@grep.ru>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/16653
Date: Sat, 12 May 2007 13:35:45 +0400

 This patch breaks long filename support (invalid characters may be stored in
 short file names).  Don't use it.

 -- 
 Sergey Svishchev

From: Aleksey Cheusov <cheusov@tut.by>
To: gnats-bugs@gnats.netbsd.org
Cc: 
Subject: Re: kern/16653: msdosfs mistakenly assumes CP437 as on-disk file name character set.
Date: Sun, 07 Dec 2008 13:50:15 +0200

 This bug seems identical to kern/36370.
 I still hope that somebody can review, improve and apply patch
 suggested there. It supports mount_msdos -D and -L options
 and works fine with long filenames.

 -- 
 Best regards, Aleksey Cheusov.

Responsible-Changed-From-To: jdolecek->kern-bug-people
Responsible-Changed-By: wiz@NetBSD.org
Responsible-Changed-When: Sun, 15 Apr 2012 21:40:21 +0000
Responsible-Changed-Why:
Back to role account, jdolecek left


>Unformatted:

NetBSD Home
NetBSD PR Database Search

(Contact us) $NetBSD: query-full-pr,v 1.39 2013/11/01 18:47:49 spz Exp $
$NetBSD: gnats_config.sh,v 1.8 2006/05/07 09:23:38 tsutsui Exp $
Copyright © 1994-2007 The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.