NetBSD Problem Report #54683

From woods@future.weird.com  Fri Nov  8 01:15:20 2019
Return-Path: <woods@future.weird.com>
Received: from mail.netbsd.org (mail.netbsd.org [199.233.217.200])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(Client CN "mail.NetBSD.org", Issuer "mail.NetBSD.org CA" (not verified))
	by mollari.NetBSD.org (Postfix) with ESMTPS id A8BCF7A247
	for <gnats-bugs@gnats.NetBSD.org>; Fri,  8 Nov 2019 01:15:20 +0000 (UTC)
Message-Id: <20191107233634.A09D45803E@future.weird.com>
Date: Thu,  7 Nov 2019 15:36:34 -0800 (PST)
From: "Greg A. Woods" <woods@planix.ca>
Reply-To: "Greg A. Woods" <woods@planix.ca>
To: gnats-bugs@NetBSD.org
Subject: glob(3) always ignores symlinks to directories
X-Send-Pr-Version: 3.95

>Number:         54683
>Category:       lib
>Synopsis:       glob(3) always ignores symlinks to directories
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    lib-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Nov 08 01:20:00 +0000 2019
>Originator:     Greg A. Woods
>Release:        NetBSD 8.99.32
>Organization:
Planix, Inc.; Kelowna, BC; Canada
>Environment:
System: NetBSD future 8.99.32 NetBSD 8.99.32 (XEN3_DOMU) #0: Mon Feb 4 15:01:05 PST 2019 woods@future:/build/woods/future/current-amd64-amd64-obj/building/work/woods/m-NetBSD-current/sys/arch/amd64/compile/XEN3_DOMU amd64
Architecture: x86_64
Machine: amd64
>Description:

	While working on converting some old CVS repositories, and also
	while testing the latest cvsconvert (from cvs-fast-export), I
	discovered that NetBSD's in-tree CVS does not work properly if a
	module directory is a symlink to some other directory (this
	method is used in cvsconvert to set up a CVS proxy repository
	for a directory or tree of RCS files).

	As expected testing with pkgsrc/devel/scmcvs shows it does not
	exhibit this problem.

	So I initially patched the in-tree (src/external/gpl2/xcvs) CVS
	to use the dist/lib/glob.c and friends it comes with to be sure
	the problem was not somewhere else in the in-tree CVS, and
	indeed this patched version also passed all tests A-OK.

	I then discovered the ultimate cause is the NetBSD libc glob(3)
	is not finding files matching a pattern within a subdirectory
	IFF that subdirectory is actually a symlink to a real directory.

	This bug was introduced in revision 1.27 of src/lib/libc/gen/glob.c

	The test program below fails on native NetBSD, but passes on
	GNU/Linux, macOS, FreeBSD, and likely OpenBSD.

	BTW, it saddens me greatly how much the code, e.g. glob.c, has
	diverged so much between NetBSD and FreeBSD (and even OpenBSD).

>How-To-Repeat:

	Run "make check" for the latest cvs-fast-export or,

	mimic the cvsconvert test suite check that fails:

		mkdir $HOME/tmp/somedir
		touch $HOME/tmp/somedir/file1
		ci $HOME/tmp/somedir/file1 </dev/null
		touch $HOME/tmp/somedir/file2
		ci $HOME/tmp/somedir/file2 </dev/null
		cvs -d :local:$HOME/tmp/test-repo init
		ln -s $HOME/tmp/somedir $HOME/tmp/test-repo
		cvs -d :local:$HOME/tmp/test-repo checkout -d $HOME/tmp/test-checkout somedir
		ls $HOME/tmp/test-checkout
		# observe there is no "file1" and "file2"
		ls $HOME/tmp/test-repo/somedir/*,v
		# observe that the shell can glob this pattern

	This bug also seem to affect the in-tree version of csh (as it
	also seems to use libc glob(3)):

		$ csh
		% ls $HOME/tmp/test-repo/somedir/*,v
		ls: No match.

#include <errno.h>
#include <fcntl.h>
#include <glob.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main(void);

static char *
makefile(const char *dn,
	 const char *fn)
{
	char *pn = NULL;

	asprintf(&pn, "%s/%s", dn, fn);
	close(open(pn, O_CREAT));

	return pn;
}

/*
 * NetBSD v.s. GlibC:  glob(3) of files in a symlink to a directory
 */
static bool
test_in_symlink_to_dir(void)
{
	char dfn[128] = "";
	char lfn[128] = "";
	char *fn1;
	char *fn2;
	int ret;
	size_t i;
	glob_t pglob;

	strlcpy(dfn, "/tmp/testsymlink_dir.XXXXXX", sizeof(dfn));
	if (mkdtemp(dfn) == NULL) {
		fprintf(stderr, "%s: ERROR: mkdtemp(%s) failed: %s\n", __func__, dfn, strerror(errno));
		return true;
	}
	fn1 = makefile(dfn, "foo.1");
	fn2 = makefile(dfn, "foo.2");

	strlcpy(lfn, "/tmp/testsymlink_lnk.XXXXXX", sizeof(lfn));
	mktemp(lfn);
	symlink(dfn, lfn);
#if 0
	/*
	 * On NetBSD this does work, but CVS does not chdir() to the module
	 * directory:
	 */
	chdir(lfn);
	ret = glob("*.*", 0, 0, &pglob);
#else
        /*
	 * This is effectively what CVS does to find "*,v" files in modules, and
	 * if a module directory is a symlink then this should still work....
	 *
	 * XXX However on NetBSD this does not work!
	 *
	 * But it does work with:
	 *
	 * - GlibC/gnulib (i.e. on linux, and with the glob() included in CVS sources)
	 * - macOS (at least 10.13.x)
	 * - FreeBSD-12.0
	 * - probably OpenBSD (based on inspection of code)
	 */
	ret = glob("/tmp/testsymlink_lnk.*/*.*", 0, 0, &pglob);
#endif
	if (ret != 0) {
		fprintf(stderr, "%s: ERROR: glob() failed: %d: %s\n", __func__, ret, strerror(errno));
	}
	if (pglob.gl_pathc != 2) {
		fprintf(stderr, "%s: ERROR: pglob.gl_pathc = %ju, but expected 2!\n", __func__, (uintmax_t) pglob.gl_pathc);
	}
	for (i = 0; i < pglob.gl_pathc; i++) {
		printf("%s: pglob.gl_pathv[%ju] = \"%s\"\n", __func__, (uintmax_t) i, pglob.gl_pathv[i]);
	}
	unlink(fn1);
	free(fn1);
	unlink(fn2);
	free(fn2);
	unlink(lfn);
	rmdir(dfn);

	return (pglob.gl_pathc != 2);
}

/*
 * NetBSD v.s. GlibC:  for fun also test https://sourceware.org/bugzilla/show_bug.cgi?id=866
 */
static bool
test_dangling_symlink_matching(void)
{
	char lfn1[128] = "";
	char lfn2[128] = "";
	int ret;
	size_t i;
	glob_t pglob;

	strlcpy(lfn1, "/tmp/testsymlink.XXXXXX", sizeof(lfn1));
	mktemp(lfn1);
	symlink("/no_such_file_exists", lfn1);

	strlcpy(lfn2, "/tmp/testsymlink.XXXXXX", sizeof(lfn2));
	mktemp(lfn2);
	symlink("/usr", lfn2);          /* an existing file! */

	chdir("/tmp");
	ret = glob("testsymlink.*", 0, 0, &pglob);
	if (ret != 0) {
		fprintf(stderr, "%s: ERROR: glob() failed: %d: %s\n", __func__, ret, strerror(errno));
	}
	if (pglob.gl_pathc != 2) {
		fprintf(stderr, "%s: ERROR: pglob.gl_pathc = %ju, but expected 2!\n", __func__, (uintmax_t) pglob.gl_pathc);
	}
	for (i = 0; i < pglob.gl_pathc; i++) {
		printf("%s: pglob.gl_pathv[%ju] = \"%s\"\n", __func__, (uintmax_t) i, pglob.gl_pathv[i]);
	}
	unlink(lfn1);
	unlink(lfn2);

	return (pglob.gl_pathc != 2);
}

int
main()
{
	bool failed = false;

	failed |= test_in_symlink_to_dir();
	failed |= test_dangling_symlink_matching();

	exit(failed);
}

/*
 * Local Variables:
 * eval: (make-local-variable 'compile-command)
 * compile-command: (let ((fn (file-name-sans-extension (file-name-nondirectory (buffer-file-name))))) (concat "rm " fn "; " (default-value 'compile-command) " " fn " && ./" fn))
 * End:
 */

>Fix:

The following patch fixes the 1.38 version of glob.c such that the test
program above passes all tests:

- remove incorrect "#ifdef S_IFLINK" and "ifdef S_IFLNK" -- they were
  probably intended to both be "#ifdef S_ISLNK", but they were
  misspelled and not protecting all S_ISLNK() uses anyway

- fix resulting compiler warning about parens around boolean expression
  term

- always set chase_symlinks if GLOB_STAR is not given (and change it to
  a bool)

- fix indentation of some complex expressions to ease readability

- also add descriptions of all GLOB_* flags and sort them.


--- glob.c.~1.38.~	2018-12-08 19:17:13.836217312 -0800
+++ glob.c	2019-11-07 11:14:45.489123783 -0800
@@ -46,23 +46,45 @@
  *
  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
  *
+ * POSIX defined flags:
+ *
+ * GLOB_APPEND:
+ *	Append pathnames to the ones from a previous call (or calls)
+ * GLOB_DOOFFS:
+ *	prepend initial gl_offs NULL pointers to the beginning of gl_pathv
+ * GLOB_ERR:
+ *	return error when an unreadable directory is encountered
+ * GLOB_MARK:
+ *	append a slash to matches which are directories
+ * GLOB_NOCHECK:
+ *	if no matches return pattern with gl_pathc = 1 and gl_matchc = 0
+ * GLOB_NOESCAPE:
+ *	disable backslash escaping in pattern
+ * GLOB_NOSORT:
+ *	do not sort gl_pathv
+ *
  * Optional extra services, controlled by flags not defined by POSIX:
  *
+ * GLOB_ALTDIRFUNC:
+ *	Use alternately specified directory access functions.
+ * GLOB_BRACE:
+ *	expand {1,2}{a,b} to 1a 1b 2a 2b 
+ * GLOB_LIMIT:
+ *	Limit the total number of returned pathnames to initial gl_matchc
  * GLOB_MAGCHAR:
  *	Set in gl_flags if pattern contained a globbing character.
  * GLOB_NOMAGIC:
  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
  *	not contain any magic characters.  [Used in csh style globbing]
- * GLOB_ALTDIRFUNC:
- *	Use alternately specified directory access functions.
- * GLOB_TILDE:
- *	expand ~user/foo to the /home/dir/of/user/foo
- * GLOB_BRACE:
- *	expand {1,2}{a,b} to 1a 1b 2a 2b 
- * GLOB_PERIOD:
- *	allow metacharacters to match leading dots in filenames.
  * GLOB_NO_DOTDIRS:
  *	. and .. are hidden from wildcards, even if GLOB_PERIOD is set.
+ * GLOB_PERIOD:
+ *	allow metacharacters to match leading dots in filenames.
+ * GLOB_STAR:
+ *	"**" will do a recursive match without following symbolic links.
+ *	"***" will do a recursive match following symbolic links.
+ * GLOB_TILDE:
+ *	expand ~user/foo to the /home/dir/of/user/foo
  * gl_matchc:
  *	Number of matches in the current invocation of glob.
  */
@@ -77,6 +99,7 @@
 #include <errno.h>
 #include <glob.h>
 #include <pwd.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stddef.h>
 #include <stdlib.h>
@@ -527,9 +550,9 @@
 			 * to avoid exponential behavior
 			 */
 			if (bufnext == patbuf || bufnext[-1] != M_ALL ||
-			    ((pglob->gl_flags & GLOB_STAR) != 0 && 
-			    (bufnext - 1 == patbuf || bufnext[-2] != M_ALL ||
-			    bufnext - 2 == patbuf || bufnext[-3] != M_ALL)))
+			    ((pglob->gl_flags & GLOB_STAR) != 0 &&
+			     (bufnext - 1 == patbuf || bufnext[-2] != M_ALL ||
+			      bufnext - 2 == patbuf || bufnext[-3] != M_ALL)))
 				*bufnext++ = M_ALL;
 			break;
 		default:
@@ -639,10 +662,11 @@
 				return GLOB_NOSPACE;
 			}
 			if (((pglob->gl_flags & GLOB_MARK) &&
-			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) ||
-			    (S_ISLNK(sb.st_mode) &&
-			    (g_stat(pathbuf, &sb, pglob) == 0) &&
-			    S_ISDIR(sb.st_mode)))) {
+			     pathend[-1] != SEP) &&
+			    (S_ISDIR(sb.st_mode) ||
+			     (S_ISLNK(sb.st_mode) &&
+			      (g_stat(pathbuf, &sb, pglob) == 0) &&
+			      S_ISDIR(sb.st_mode)))) {
 				if (pathend >= pathlim)
 					return GLOB_ABORTED;
 				*pathend++ = SEP;
@@ -688,8 +712,8 @@
 	__gl_stat_t sbuf;
 	int error;
 	char buf[MAXPATHLEN];
-	int globstar = 0;
-	int chase_symlinks = 0;
+	bool globstar = false;
+	bool chase_symlinks = (pglob->gl_flags & GLOB_STAR) == 0;
 	const Char *termstar = NULL;

 	/*
@@ -708,17 +732,21 @@

 	*pathend = EOS;
 	errno = 0;
-	    
+
 	while (pglobstar < restpattern) {
 		if ((pglobstar[0] & M_MASK) == M_ALL &&
 		    (pglobstar[1] & M_MASK) == M_ALL) {
 			globstar = 1;
+			/*
+			 * XXX a single star OR three stars should probably
+			 * chase symlinks even when GLOB_STAR is set....
+			 */
 			chase_symlinks = (pglobstar[2] & M_MASK) == M_ALL;
 			termstar = pglobstar + (2 + chase_symlinks);
 			break;
 		}
 		pglobstar++;
-	} 
+	}

 	if (globstar) {
 		error = pglobstar == pattern && termstar == restpattern ?
@@ -734,12 +762,11 @@
 		*pathend = EOS;
 	}

-	if (*pathbuf && (g_lstat(pathbuf, &sbuf, pglob) ||
-	    !S_ISDIR(sbuf.st_mode)
-#ifdef S_IFLINK
-	     && ((globstar && !chase_symlinks) || !S_ISLNK(sbuf.st_mode))
-#endif
-	    ))
+	if (*pathbuf &&
+	    (g_lstat(pathbuf, &sbuf, pglob) ||
+	     (!S_ISDIR(sbuf.st_mode) &&
+	      ((globstar && !chase_symlinks) ||
+	       !S_ISLNK(sbuf.st_mode)))))
 		return 0;

 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
@@ -825,12 +852,10 @@
 		}

 		if (globstar) {
-#ifdef S_IFLNK
 			if (!chase_symlinks &&
 			    (g_lstat(pathbuf, &sbuf, pglob) ||
 			    S_ISLNK(sbuf.st_mode)))
 				continue;
-#endif

 			if (!match(pathend, pattern, termstar))
 				continue;

NetBSD Home
NetBSD PR Database Search

(Contact us) $NetBSD: query-full-pr,v 1.43 2018/01/16 07:36:43 maya Exp $
$NetBSD: gnats_config.sh,v 1.9 2014/08/02 14:16:04 spz Exp $
Copyright © 1994-2017 The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.