NetBSD Problem Report #54683
From woods@future.weird.com Fri Nov 8 01:15:20 2019
Return-Path: <woods@future.weird.com>
Received: from mail.netbsd.org (mail.netbsd.org [199.233.217.200])
(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
(Client CN "mail.NetBSD.org", Issuer "mail.NetBSD.org CA" (not verified))
by mollari.NetBSD.org (Postfix) with ESMTPS id A8BCF7A247
for <gnats-bugs@gnats.NetBSD.org>; Fri, 8 Nov 2019 01:15:20 +0000 (UTC)
Message-Id: <20191107233634.A09D45803E@future.weird.com>
Date: Thu, 7 Nov 2019 15:36:34 -0800 (PST)
From: "Greg A. Woods" <woods@planix.ca>
Reply-To: "Greg A. Woods" <woods@planix.ca>
To: gnats-bugs@NetBSD.org
Subject: glob(3) always ignores symlinks to directories
X-Send-Pr-Version: 3.95
>Number: 54683
>Category: lib
>Synopsis: glob(3) always ignores symlinks to directories
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: lib-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Fri Nov 08 01:20:00 +0000 2019
>Originator: Greg A. Woods
>Release: NetBSD 8.99.32
>Organization:
Planix, Inc.; Kelowna, BC; Canada
>Environment:
System: NetBSD future 8.99.32 NetBSD 8.99.32 (XEN3_DOMU) #0: Mon Feb 4 15:01:05 PST 2019 woods@future:/build/woods/future/current-amd64-amd64-obj/building/work/woods/m-NetBSD-current/sys/arch/amd64/compile/XEN3_DOMU amd64
Architecture: x86_64
Machine: amd64
>Description:
While working on converting some old CVS repositories, and also
while testing the latest cvsconvert (from cvs-fast-export), I
discovered that NetBSD's in-tree CVS does not work properly if a
module directory is a symlink to some other directory (this
method is used in cvsconvert to set up a CVS proxy repository
for a directory or tree of RCS files).
As expected testing with pkgsrc/devel/scmcvs shows it does not
exhibit this problem.
So I initially patched the in-tree (src/external/gpl2/xcvs) CVS
to use the dist/lib/glob.c and friends it comes with to be sure
the problem was not somewhere else in the in-tree CVS, and
indeed this patched version also passed all tests A-OK.
I then discovered the ultimate cause is the NetBSD libc glob(3)
is not finding files matching a pattern within a subdirectory
IFF that subdirectory is actually a symlink to a real directory.
This bug was introduced in revision 1.27 of src/lib/libc/gen/glob.c
The test program below fails on native NetBSD, but passes on
GNU/Linux, macOS, FreeBSD, and likely OpenBSD.
BTW, it saddens me greatly how much the code, e.g. glob.c, has
diverged so much between NetBSD and FreeBSD (and even OpenBSD).
>How-To-Repeat:
Run "make check" for the latest cvs-fast-export or,
mimic the cvsconvert test suite check that fails:
mkdir $HOME/tmp/somedir
touch $HOME/tmp/somedir/file1
ci $HOME/tmp/somedir/file1 </dev/null
touch $HOME/tmp/somedir/file2
ci $HOME/tmp/somedir/file2 </dev/null
cvs -d :local:$HOME/tmp/test-repo init
ln -s $HOME/tmp/somedir $HOME/tmp/test-repo
cvs -d :local:$HOME/tmp/test-repo checkout -d $HOME/tmp/test-checkout somedir
ls $HOME/tmp/test-checkout
# observe there is no "file1" and "file2"
ls $HOME/tmp/test-repo/somedir/*,v
# observe that the shell can glob this pattern
This bug also seem to affect the in-tree version of csh (as it
also seems to use libc glob(3)):
$ csh
% ls $HOME/tmp/test-repo/somedir/*,v
ls: No match.
#include <errno.h>
#include <fcntl.h>
#include <glob.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int main(void);
static char *
makefile(const char *dn,
const char *fn)
{
char *pn = NULL;
asprintf(&pn, "%s/%s", dn, fn);
close(open(pn, O_CREAT));
return pn;
}
/*
* NetBSD v.s. GlibC: glob(3) of files in a symlink to a directory
*/
static bool
test_in_symlink_to_dir(void)
{
char dfn[128] = "";
char lfn[128] = "";
char *fn1;
char *fn2;
int ret;
size_t i;
glob_t pglob;
strlcpy(dfn, "/tmp/testsymlink_dir.XXXXXX", sizeof(dfn));
if (mkdtemp(dfn) == NULL) {
fprintf(stderr, "%s: ERROR: mkdtemp(%s) failed: %s\n", __func__, dfn, strerror(errno));
return true;
}
fn1 = makefile(dfn, "foo.1");
fn2 = makefile(dfn, "foo.2");
strlcpy(lfn, "/tmp/testsymlink_lnk.XXXXXX", sizeof(lfn));
mktemp(lfn);
symlink(dfn, lfn);
#if 0
/*
* On NetBSD this does work, but CVS does not chdir() to the module
* directory:
*/
chdir(lfn);
ret = glob("*.*", 0, 0, &pglob);
#else
/*
* This is effectively what CVS does to find "*,v" files in modules, and
* if a module directory is a symlink then this should still work....
*
* XXX However on NetBSD this does not work!
*
* But it does work with:
*
* - GlibC/gnulib (i.e. on linux, and with the glob() included in CVS sources)
* - macOS (at least 10.13.x)
* - FreeBSD-12.0
* - probably OpenBSD (based on inspection of code)
*/
ret = glob("/tmp/testsymlink_lnk.*/*.*", 0, 0, &pglob);
#endif
if (ret != 0) {
fprintf(stderr, "%s: ERROR: glob() failed: %d: %s\n", __func__, ret, strerror(errno));
}
if (pglob.gl_pathc != 2) {
fprintf(stderr, "%s: ERROR: pglob.gl_pathc = %ju, but expected 2!\n", __func__, (uintmax_t) pglob.gl_pathc);
}
for (i = 0; i < pglob.gl_pathc; i++) {
printf("%s: pglob.gl_pathv[%ju] = \"%s\"\n", __func__, (uintmax_t) i, pglob.gl_pathv[i]);
}
unlink(fn1);
free(fn1);
unlink(fn2);
free(fn2);
unlink(lfn);
rmdir(dfn);
return (pglob.gl_pathc != 2);
}
/*
* NetBSD v.s. GlibC: for fun also test https://sourceware.org/bugzilla/show_bug.cgi?id=866
*/
static bool
test_dangling_symlink_matching(void)
{
char lfn1[128] = "";
char lfn2[128] = "";
int ret;
size_t i;
glob_t pglob;
strlcpy(lfn1, "/tmp/testsymlink.XXXXXX", sizeof(lfn1));
mktemp(lfn1);
symlink("/no_such_file_exists", lfn1);
strlcpy(lfn2, "/tmp/testsymlink.XXXXXX", sizeof(lfn2));
mktemp(lfn2);
symlink("/usr", lfn2); /* an existing file! */
chdir("/tmp");
ret = glob("testsymlink.*", 0, 0, &pglob);
if (ret != 0) {
fprintf(stderr, "%s: ERROR: glob() failed: %d: %s\n", __func__, ret, strerror(errno));
}
if (pglob.gl_pathc != 2) {
fprintf(stderr, "%s: ERROR: pglob.gl_pathc = %ju, but expected 2!\n", __func__, (uintmax_t) pglob.gl_pathc);
}
for (i = 0; i < pglob.gl_pathc; i++) {
printf("%s: pglob.gl_pathv[%ju] = \"%s\"\n", __func__, (uintmax_t) i, pglob.gl_pathv[i]);
}
unlink(lfn1);
unlink(lfn2);
return (pglob.gl_pathc != 2);
}
int
main()
{
bool failed = false;
failed |= test_in_symlink_to_dir();
failed |= test_dangling_symlink_matching();
exit(failed);
}
/*
* Local Variables:
* eval: (make-local-variable 'compile-command)
* compile-command: (let ((fn (file-name-sans-extension (file-name-nondirectory (buffer-file-name))))) (concat "rm " fn "; " (default-value 'compile-command) " " fn " && ./" fn))
* End:
*/
>Fix:
The following patch fixes the 1.38 version of glob.c such that the test
program above passes all tests:
- remove incorrect "#ifdef S_IFLINK" and "ifdef S_IFLNK" -- they were
probably intended to both be "#ifdef S_ISLNK", but they were
misspelled and not protecting all S_ISLNK() uses anyway
- fix resulting compiler warning about parens around boolean expression
term
- always set chase_symlinks if GLOB_STAR is not given (and change it to
a bool)
- fix indentation of some complex expressions to ease readability
- also add descriptions of all GLOB_* flags and sort them.
--- glob.c.~1.38.~ 2018-12-08 19:17:13.836217312 -0800
+++ glob.c 2019-11-07 11:14:45.489123783 -0800
@@ -46,23 +46,45 @@
*
* The [!...] convention to negate a range is supported (SysV, Posix, ksh).
*
+ * POSIX defined flags:
+ *
+ * GLOB_APPEND:
+ * Append pathnames to the ones from a previous call (or calls)
+ * GLOB_DOOFFS:
+ * prepend initial gl_offs NULL pointers to the beginning of gl_pathv
+ * GLOB_ERR:
+ * return error when an unreadable directory is encountered
+ * GLOB_MARK:
+ * append a slash to matches which are directories
+ * GLOB_NOCHECK:
+ * if no matches return pattern with gl_pathc = 1 and gl_matchc = 0
+ * GLOB_NOESCAPE:
+ * disable backslash escaping in pattern
+ * GLOB_NOSORT:
+ * do not sort gl_pathv
+ *
* Optional extra services, controlled by flags not defined by POSIX:
*
+ * GLOB_ALTDIRFUNC:
+ * Use alternately specified directory access functions.
+ * GLOB_BRACE:
+ * expand {1,2}{a,b} to 1a 1b 2a 2b
+ * GLOB_LIMIT:
+ * Limit the total number of returned pathnames to initial gl_matchc
* GLOB_MAGCHAR:
* Set in gl_flags if pattern contained a globbing character.
* GLOB_NOMAGIC:
* Same as GLOB_NOCHECK, but it will only append pattern if it did
* not contain any magic characters. [Used in csh style globbing]
- * GLOB_ALTDIRFUNC:
- * Use alternately specified directory access functions.
- * GLOB_TILDE:
- * expand ~user/foo to the /home/dir/of/user/foo
- * GLOB_BRACE:
- * expand {1,2}{a,b} to 1a 1b 2a 2b
- * GLOB_PERIOD:
- * allow metacharacters to match leading dots in filenames.
* GLOB_NO_DOTDIRS:
* . and .. are hidden from wildcards, even if GLOB_PERIOD is set.
+ * GLOB_PERIOD:
+ * allow metacharacters to match leading dots in filenames.
+ * GLOB_STAR:
+ * "**" will do a recursive match without following symbolic links.
+ * "***" will do a recursive match following symbolic links.
+ * GLOB_TILDE:
+ * expand ~user/foo to the /home/dir/of/user/foo
* gl_matchc:
* Number of matches in the current invocation of glob.
*/
@@ -77,6 +99,7 @@
#include <errno.h>
#include <glob.h>
#include <pwd.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
@@ -527,9 +550,9 @@
* to avoid exponential behavior
*/
if (bufnext == patbuf || bufnext[-1] != M_ALL ||
- ((pglob->gl_flags & GLOB_STAR) != 0 &&
- (bufnext - 1 == patbuf || bufnext[-2] != M_ALL ||
- bufnext - 2 == patbuf || bufnext[-3] != M_ALL)))
+ ((pglob->gl_flags & GLOB_STAR) != 0 &&
+ (bufnext - 1 == patbuf || bufnext[-2] != M_ALL ||
+ bufnext - 2 == patbuf || bufnext[-3] != M_ALL)))
*bufnext++ = M_ALL;
break;
default:
@@ -639,10 +662,11 @@
return GLOB_NOSPACE;
}
if (((pglob->gl_flags & GLOB_MARK) &&
- pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) ||
- (S_ISLNK(sb.st_mode) &&
- (g_stat(pathbuf, &sb, pglob) == 0) &&
- S_ISDIR(sb.st_mode)))) {
+ pathend[-1] != SEP) &&
+ (S_ISDIR(sb.st_mode) ||
+ (S_ISLNK(sb.st_mode) &&
+ (g_stat(pathbuf, &sb, pglob) == 0) &&
+ S_ISDIR(sb.st_mode)))) {
if (pathend >= pathlim)
return GLOB_ABORTED;
*pathend++ = SEP;
@@ -688,8 +712,8 @@
__gl_stat_t sbuf;
int error;
char buf[MAXPATHLEN];
- int globstar = 0;
- int chase_symlinks = 0;
+ bool globstar = false;
+ bool chase_symlinks = (pglob->gl_flags & GLOB_STAR) == 0;
const Char *termstar = NULL;
/*
@@ -708,17 +732,21 @@
*pathend = EOS;
errno = 0;
-
+
while (pglobstar < restpattern) {
if ((pglobstar[0] & M_MASK) == M_ALL &&
(pglobstar[1] & M_MASK) == M_ALL) {
globstar = 1;
+ /*
+ * XXX a single star OR three stars should probably
+ * chase symlinks even when GLOB_STAR is set....
+ */
chase_symlinks = (pglobstar[2] & M_MASK) == M_ALL;
termstar = pglobstar + (2 + chase_symlinks);
break;
}
pglobstar++;
- }
+ }
if (globstar) {
error = pglobstar == pattern && termstar == restpattern ?
@@ -734,12 +762,11 @@
*pathend = EOS;
}
- if (*pathbuf && (g_lstat(pathbuf, &sbuf, pglob) ||
- !S_ISDIR(sbuf.st_mode)
-#ifdef S_IFLINK
- && ((globstar && !chase_symlinks) || !S_ISLNK(sbuf.st_mode))
-#endif
- ))
+ if (*pathbuf &&
+ (g_lstat(pathbuf, &sbuf, pglob) ||
+ (!S_ISDIR(sbuf.st_mode) &&
+ ((globstar && !chase_symlinks) ||
+ !S_ISLNK(sbuf.st_mode)))))
return 0;
if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
@@ -825,12 +852,10 @@
}
if (globstar) {
-#ifdef S_IFLNK
if (!chase_symlinks &&
(g_lstat(pathbuf, &sbuf, pglob) ||
S_ISLNK(sbuf.st_mode)))
continue;
-#endif
if (!match(pathend, pattern, termstar))
continue;
(Contact us)
$NetBSD: query-full-pr,v 1.43 2018/01/16 07:36:43 maya Exp $
$NetBSD: gnats_config.sh,v 1.9 2014/08/02 14:16:04 spz Exp $
Copyright © 1994-2017
The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.