NetBSD Problem Report #54435

From www@netbsd.org  Sat Aug  3 14:48:19 2019
Return-Path: <www@netbsd.org>
Received: from mail.netbsd.org (mail.netbsd.org [199.233.217.200])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(Client CN "mail.NetBSD.org", Issuer "mail.NetBSD.org CA" (not verified))
	by mollari.NetBSD.org (Postfix) with ESMTPS id 0800D7A1AB
	for <gnats-bugs@gnats.NetBSD.org>; Sat,  3 Aug 2019 14:48:19 +0000 (UTC)
Message-Id: <20190803144817.927E57A1AB@mollari.NetBSD.org>
Date: Sat,  3 Aug 2019 14:48:17 +0000 (UTC)
From: uwe@stderr.spb.ru
Reply-To: uwe@stderr.spb.ru
To: gnats-bugs@NetBSD.org
Subject: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
X-Send-Pr-Version: www-1.0

>Number:         54435
>Category:       kern
>Synopsis:       reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    kern-bug-people
>State:          feedback
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sat Aug 03 14:50:00 +0000 2019
>Closed-Date:    
>Last-Modified:  Sat May 01 21:56:58 +0000 2021
>Originator:     Valery Ushakov
>Release:        NetBSD-8
>Organization:
>Environment:
NetBSD pony 8.1_STABLE NetBSD 8.1_STABLE (GENERIC) #0: Sat Jun 15 07:30:17 MSK 2019  uwe@sampo:/home/uwe/work/netbsd/build8/obj/macppc/sys/arch/macppc/compile/GENERIC macppc

>Description:
It looks like POLLPRI | POLLRDBAND status in not cleared from the
socket when TCP urgent data is read with MSG_OOB

>How-To-Repeat:
cat >> oobserv.c <<'__EOF__'
#include <sys/types.h>
#include <sys/socket.h>

#include <netinet/in.h>
#include <arpa/inet.h>

#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>


int
main(void)
{
    int status;

    int server = socket(PF_INET, SOCK_STREAM, 0);
    if (server < 0)
	err(EXIT_FAILURE, "socket");

    struct sockaddr_in sin;
    memset(&sin, 0, sizeof(sin));
#if !defined(__linux__) && !defined(__sun__)
    sin.sin_len = sizeof(sin);
#endif
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    sin.sin_port = htons(12345);

    status = bind(server, (struct sockaddr *)&sin, sizeof(sin));
    if (status < 0)
	err(EXIT_FAILURE, "bind");

    status = listen(server, 1);
    if (status < 0)
 	err(EXIT_FAILURE, "listen");

    for (;;) {
	int client = accept(server, NULL, 0);
	if (client < 0)
	    err(EXIT_FAILURE, "accept");

	send(client, "a", 1, 0);
	send(client, "b", 1, MSG_OOB);
	send(client, "c", 1, 0);

	close(client);
    }

    return 0;
}
__EOF__

cat >> oobrecv.c <<'__EOF__'
#include <sys/types.h>
#include <sys/socket.h>

#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <netinet/in.h>
#include <arpa/inet.h>

int usage(void);
int getsocket(void);
void oobtest(int);
int pollsock(int);

bool oobinline = false;

int
usage(void)
{
    fprintf(stderr, "usage: oobrecv [oob | inline]\n");
    fprintf(stderr, "default is \"%s\"\n",
	    oobinline ? "inline" : "oob");

    return EXIT_FAILURE;
}


int
main(int argc, char **argv)
{
    if (argc > 1) {
	if (strcasecmp(argv[1], "oob") == 0)
	    oobinline = false;
	else if (strcasecmp(argv[1], "inline") == 0)
	    oobinline = true;
	else
	    return usage();
    }

    oobtest(getsocket());
    return 0;
}


int
getsocket(void)
{
    int s;
    int status;

    s = socket(PF_INET, SOCK_STREAM, 0);
    if (s < 0)
	err(EXIT_FAILURE, "socket");

    struct sockaddr_in sin;
    memset(&sin, 0, sizeof(sin));
#if !defined(__linux__) && !defined(__sun__)
    sin.sin_len = sizeof(sin);
#endif
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    sin.sin_port = htons(12345);

    if (oobinline) {
	int one = 1;
	status = setsockopt(s, SOL_SOCKET, SO_OOBINLINE,
			    (char *)&one, sizeof(one));
	if (status < 0)
	    err(EXIT_FAILURE, "SO_OOBINLINE");
    }

    status = connect(s, (struct sockaddr *)&sin, sizeof(sin));
    if (status < 0)
	err(EXIT_FAILURE, "connect");

    return s;
}


void
oobtest(int s)
{
    char buf[128];
    int status;

    printf("reading urgent data with %s\n",
	   oobinline ? "SO_OOBINLINE" : "MSG_OOB");

    for (;;) {
	int revents = pollsock(s);
	if (revents == 0)
	    continue;

	if (revents & POLLNVAL) {
	    errno = EBADF;
	    err(EXIT_FAILURE, "poll");
	}

	if (revents & POLLERR) {
	    int sockerr;
	    socklen_t optlen = (socklen_t)sizeof(sockerr);

            status = getsockopt(s, SOL_SOCKET, SO_ERROR,
				(char *)&sockerr, &optlen);
	    if (status < 0)
		err(EXIT_FAILURE, "SO_ERROR");

	    errno = sockerr;
	    err(EXIT_FAILURE, NULL);
	}

	int flags = 0;
	if (!oobinline && (revents & (POLLPRI | POLLRDBAND)))
	    flags = MSG_OOB;

	ssize_t nread = recv(s, buf, sizeof(buf), flags);
	if (nread < 0)
	    err(EXIT_FAILURE, "recv%s",
		flags & MSG_OOB ? "(MSG_OOB)" : "");

	printf("recv(%s) = %zd\n",
	       flags & MSG_OOB ? "MSG_OOB" : "",
	       nread);

	if (nread == 0)
	    return;

	fwrite(buf, 1, nread, stdout);
	printf("\n");
    }
}


int
pollsock(int s)
{
    struct pollfd fds[1];

    fds[0].fd = s;
    fds[0].events = POLLIN | POLLPRI | POLLRDBAND;
    fds[0].revents = 0;

    int nready = poll(fds, 1, -1);
    if (nready < 0)
	err(EXIT_FAILURE, "poll");

    if (nready == 0)
	return 0;

    int revents = fds[0].revents;

    printf("poll: revents = 0x%x", revents);
    if (fds[0].revents != 0) {
	printf(":");
	if (revents & POLLNVAL)   printf(" NVAL");
	if (revents & POLLERR) 	  printf(" ERR");
	if (revents & POLLHUP) 	  printf(" HUP");
	if (revents & POLLIN)  	  printf(" IN");
	if (revents & POLLPRI) 	  printf(" PRI");
	if (revents & POLLRDNORM) printf(" RDNORM");
	if (revents & POLLRDBAND) printf(" RDBAND");
    }
    printf("\n");

    return revents;
}
__EOF__

cc -o oobserv oobserv.c
cc -o oobrecv oobrecv.c

./oobserv &
./oobrecv oob


The server sends "a", then send "b" as urgent/MSG_OOB, then "c".

The test invocation fails with:

reading urgent data with MSG_OOB
poll: revents = 0x1: IN
recv() = 1
a
poll: revents = 0x82: PRI RDBAND
recv(MSG_OOB) = 1
b
poll: revents = 0x82: PRI RDBAND
oobrecv: recv(MSG_OOB): Invalid argument

So after "b" was read (with MSG_OOB), the poll still reports POLLPRI.

This doesn't happen with SO_OOBINLINE:

$ ./oobrecv inline
reading urgent data with SO_OOBINLINE
poll: revents = 0x1: IN
recv() = 1
a
poll: revents = 0x83: IN PRI RDBAND
recv() = 1
b
poll: revents = 0x1: IN
recv() = 1
c
poll: revents = 0x1: IN
recv() = 0

here POLLPRI | POLLRDBAND are cleared after "b" was read.

>Fix:

>Release-Note:

>Audit-Trail:
From: Taylor R Campbell <riastradh@NetBSD.org>
To: gnats-bugs@NetBSD.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Sat, 3 Aug 2019 19:37:34 +0000

 This is a multi-part message in MIME format.
 --=_zuStfGoWPMcVlXnvkpRI2k8NISfYH+LY

 poll on a socket returns POLLPRI|POLLRDBAND as long as so->so_oobmark
 is nonzero or so->so_state has SS_RCVATMARK is set.

 - so_oobmark is the pointer to how many bytes away the urgent data are.
 - SS_RCVATMARK is set when the urgent data are clamouring at the gate.

 These are updated by normal reads, but not by the recv(MSG_OOB) call
 that actually returns the urgent data.

 Candidate fix: set so_oobmark = 0, clear SS_RCVATMARK when we do
 recv(MSG_OOB).

 --=_zuStfGoWPMcVlXnvkpRI2k8NISfYH+LY
 Content-Type: text/plain; charset="ISO-8859-1"; name="oobfix"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="oobfix.patch"

 From 8f07765d4d7f455359d1a4d131b4b9ad3973b224 Mon Sep 17 00:00:00 2001
 From: Taylor R Campbell <riastradh@NetBSD.org>
 Date: Sat, 3 Aug 2019 19:31:42 +0000
 Subject: [PATCH] Clear the oobmark when we read oob data.

 Candidate fix for spurious POLLPRI/POLLRDBAND availability after
 recv(MSG_OOB) just consumed it.
 ---
  sys/kern/uipc_socket.c | 3 +++
  1 file changed, 3 insertions(+)

 diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
 index c89dc0f4d576..f19d86321a21 100644
 --- a/sys/kern/uipc_socket.c
 +++ b/sys/kern/uipc_socket.c
 @@ -1179,6 +1179,9 @@ soreceive(struct socket *so, struct mbuf **paddr, str=
 uct uio *uio,
  			    MIN(uio->uio_resid, m->m_len), uio);
  			m =3D m_free(m);
  		} while (uio->uio_resid > 0 && error =3D=3D 0 && m);
 +		/* We consumed the oob data, no more oobmark.  */
 +		so->so_oobmark =3D 0;
 +		so->so_state &=3D ~SS_RCVATMARK;
  bad:
  		if (m !=3D NULL)
  			m_freem(m);

 --=_zuStfGoWPMcVlXnvkpRI2k8NISfYH+LY--

From: mlelstv@serpens.de (Michael van Elst)
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Sat, 3 Aug 2019 20:20:34 -0000 (UTC)

 riastradh@NetBSD.org (Taylor R Campbell) writes:

 > poll on a socket returns POLLPRI|POLLRDBAND as long as so->so_oobmark
 > is nonzero or so->so_state has SS_RCVATMARK is set.
 > 
 > - so_oobmark is the pointer to how many bytes away the urgent data are.
 > - SS_RCVATMARK is set when the urgent data are clamouring at the gate.
 > 
 > These are updated by normal reads, but not by the recv(MSG_OOB) call
 > that actually returns the urgent data.


 That's how it is supposed to be.

 OOB data is not delivered in the stream. Instead you get information
 that 'urgent data' lies ahead. recv(MSG_OOB) can be used to peek at
 that data, but it doesn't consume the stream or that data. That's
 why the poll condition persists. It's even possible that you cannot
 peek at the data, because it hasn't been received, then recv(MSG_OOB)
 is supposed to return EWOULDBLOCK.

 The correct answer to 'urgent data' is to consume the stream to
 the point where the OOB data lies ahead. Then, since a normal
 recv() won't deliver the urgent data to the application, use
 recv(MSG_OOB) to read it. At that point, the poll condition
 already has been cleared by reading the stream.

 To read ahead you can use the SIOCATMARK ioctl that returns the
 state of the RCVATMARK flag. The recv() is automatically truncated
 so that you don't read past the urgent data.

 An alternative to recv(MSG_OOB) is the SO_OOBINLINE option. Then you
 don't need recv(MSG_OOB) to get at the urgent data but read it
 directly. There is even an RFC telling that this behaviour was
 originally intended, but everyone copied the MSG_OOB interface.

 -- 
 -- 
                                 Michael van Elst
 Internet: mlelstv@serpens.de
                                 "A potential Snark may lurk in every tree."

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Michael van Elst <mlelstv@serpens.de>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Sat, 3 Aug 2019 23:51:59 +0300

 On Sat, Aug 03, 2019 at 20:25:01 +0000, Michael van Elst wrote:

 >  OOB data is not delivered in the stream. Instead you get information
 >  that 'urgent data' lies ahead. recv(MSG_OOB) can be used to peek at
 >  that data, but it doesn't consume the stream or that data.

 Can you cite a chapter and verse for that?


 >  The correct answer to 'urgent data' is to consume the stream to
 >  the point where the OOB data lies ahead. Then, since a normal
 >  recv() won't deliver the urgent data to the application, use
 >  recv(MSG_OOB) to read it. At that point, the poll condition
 >  already has been cleared by reading the stream.

 Except that this is not what happens, as far as I can tell.  Consider
 the output from the test quoted in the PR:

 1) we read non-urgent "a"

   poll: revents = 0x1: IN
   recv() = 1
   a


 2) At this point we have urgent "b" and there's no normal data
 preceding it.

   poll: revents = 0x82: PRI RDBAND
   recv(MSG_OOB) = 1
   b

 the test "used recv(MSG_OOB) to read it".


 3) "At that point, the poll condition already has been cleared by
    reading the stream" - unfortunately, that's not what happens:

   poll: revents = 0x82: PRI RDBAND
   oobrecv: recv(MSG_OOB): Invalid argument

 The test had consumed urgent "b" and tries to read further - using
 MSG_OOB since that's how the test interprets POLLPRI.  It fails b/c
 there's no urgent data.  A modified test that uses plain recv() at
 this point would read (non-urgent) "c".

 E.g. on linux the same test would work with

   reading urgent data with MSG_OOB
   poll: revents = 0x1: IN
   recv() = 1
   a
   poll: revents = 0x2: PRI
   recv(MSG_OOB) = 1
   b
   poll: revents = 0x1: IN
   recv() = 1
   c
   poll: revents = 0x1: IN
   recv() = 0

 or, dependent on timing:

   reading urgent data with MSG_OOB
   poll: revents = 0x3: IN PRI
   recv(MSG_OOB) = 1
   b
   poll: revents = 0x1: IN
   recv() = 1
   a
   poll: revents = 0x1: IN
   recv() = 1
   c
   poll: revents = 0x1: IN
   recv() = 0

 -uwe

From: mlelstv@serpens.de (Michael van Elst)
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Sat, 3 Aug 2019 22:11:05 -0000 (UTC)

 uwe@stderr.spb.ru (Valery Ushakov) writes:

 > Except that this is not what happens, as far as I can tell.  Consider
 > the output from the test quoted in the PR:

 Without error handling:

 int mark;
 char oob;
 char buffer[BUFLEN];

 while (1) {
 	/* check if we reached urgent data */
 	ioctl(sock, SIOCATMARK, &mark);
 	if (mark)
 		break;
 	/* consume stream */
 	(void) read(sock, buffer, sizeof(buffer));
 }
 recv(sock, &oob, 1, MSG_OOB);

 -- 
 -- 
                                 Michael van Elst
 Internet: mlelstv@serpens.de
                                 "A potential Snark may lurk in every tree."

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Michael van Elst <mlelstv@serpens.de>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Sun, 4 Aug 2019 02:20:40 +0300

 On Sat, Aug 03, 2019 at 22:15:01 +0000, Michael van Elst wrote:

 >  uwe@stderr.spb.ru (Valery Ushakov) writes:
 >  
 >  > Except that this is not what happens, as far as I can tell.  Consider
 >  > the output from the test quoted in the PR:
 >  
 >  Without error handling:
 [...]

 Right, right.  That is exactly what fails.  SIOCATMARK in that code
 just makes sure we read oob "in-band", but the failed output already
 was doing it if by chance.  If I modify the test to actually sync the
 oob byte position with SIOCATMARK it fails just the same.  The
 modified test does a 1s delay (to make sure the oob data is
 available), adds SIOCATMARK loop if POLLPRI is set and falls back to
 reading w/out MSG_OOB if MSG_OOB fails:

   $ ./oobrecv oob delay
   reading urgent data with MSG_OOB after delay
   poll: revents = 0x83: IN PRI RDBAND
   recv() = 1 (reading to mark)
   a
   at mark
   recv(MSG_OOB) = 1
   b
   poll: revents = 0x83: IN PRI RDBAND
   at mark
   recv(MSG_OOB) failed, retrying
   recv() = 1
   c
   poll: revents = 0x1: IN
   recv() = 0

 -uwe

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Sun, 4 Aug 2019 02:36:37 +0300

 Updated test, so that we are on the same page.

 /* oobrecv.c */
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>

 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>

 #include <netinet/in.h>
 #include <arpa/inet.h>

 int usage(void);
 int getsocket(void);
 void oobtest(int);
 int pollsock(int);

 bool oobinline = false;
 bool oobsync = false;
 bool delay = false;

 int
 usage(void)
 {
     fprintf(stderr, "usage: oobrecv [oob | inline | sync | delay | sleep] ...\n");
     fprintf(stderr, "default is: %s%s%s\n",
 	    oobinline ? "inline" : "oob",
 	    oobsync ? " sync" : "",
 	    delay ? " delay" : "");

     return EXIT_FAILURE;
 }


 int
 main(int argc, char **argv)
 {
     int i;

     for (i = 1; i < argc; ++i) {
 	if (strcasecmp(argv[i], "oob") == 0)
 	    oobinline = false;
 	else if (strcasecmp(argv[i], "inline") == 0)
 	    oobinline = true;
 	else if (strcasecmp(argv[i], "delay") == 0
 		 || strcasecmp(argv[i], "sleep") == 0)
 	    delay = true;
 	else if (strcasecmp(argv[i], "sync") == 0)
 	    oobsync = true;
 	else
 	    return usage();
     }

     oobtest(getsocket());
     return 0;
 }


 int
 getsocket(void)
 {
     int s;
     int status;

     s = socket(PF_INET, SOCK_STREAM, 0);
     if (s < 0)
 	err(EXIT_FAILURE, "socket");

     struct sockaddr_in sin;
     memset(&sin, 0, sizeof(sin));
 #if !defined(__linux__) && !defined(__sun__)
     sin.sin_len = sizeof(sin);
 #endif
     sin.sin_family = AF_INET;
     sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
     sin.sin_port = htons(12345);

     if (oobinline) {
 	int one = 1;
 	status = setsockopt(s, SOL_SOCKET, SO_OOBINLINE,
 			    (char *)&one, sizeof(one));
 	if (status < 0)
 	    err(EXIT_FAILURE, "SO_OOBINLINE");
     }

     status = connect(s, (struct sockaddr *)&sin, sizeof(sin));
     if (status < 0)
 	err(EXIT_FAILURE, "connect");

     return s;
 }


 void
 oobtest(int s)
 {
     char buf[128];
     int status;

     printf("reading urgent data with %s",
 	   oobinline ? "SO_OOBINLINE" : "MSG_OOB");
     if (oobsync)
 	printf(" with SIOCATMARK");
     if (delay)
 	printf(" after delay");
     printf("\n");

     if (delay)
 	sleep(1);

     for (;;) {
 	int revents = pollsock(s);
 	if (revents == 0)
 	    continue;

 	if (revents & POLLNVAL) {
 	    errno = EBADF;
 	    err(EXIT_FAILURE, "poll");
 	}

 	if (revents & POLLERR) {
 	    int sockerr;
 	    socklen_t optlen = (socklen_t)sizeof(sockerr);

             status = getsockopt(s, SOL_SOCKET, SO_ERROR,
 				(char *)&sockerr, &optlen);
 	    if (status < 0)
 		err(EXIT_FAILURE, "SO_ERROR");

 	    errno = sockerr;
 	    err(EXIT_FAILURE, NULL);
 	}

 	ssize_t nread;

 	if (oobsync && (revents & (POLLPRI | POLLRDBAND))) {
 	    for (;;) {
 		int atmark;
 		status = ioctl(s, SIOCATMARK, &atmark);
 		if (status < 0)
 		    err(EXIT_FAILURE, "SIOCATMARK");

 		if (atmark) {
 		    printf("<at mark>\n");
 		    break;
 		}

 		nread = recv(s, buf, sizeof(buf), 0);
 		if (nread < 0)
 		    err(EXIT_FAILURE, "recv");

 		printf("recv() = %zd (reading to mark)\n", nread);
 		fwrite(buf, 1, nread, stdout);
 		printf("\n");
 	    }
 	}

 	int flags = 0;
 	if (!oobinline && (revents & (POLLPRI | POLLRDBAND)))
 	    flags |= MSG_OOB;

       retry:
 	nread = recv(s, buf, sizeof(buf), flags);

 	if (nread < 0) {
 	    if (flags & MSG_OOB) {
 		flags &= ~MSG_OOB;
 		printf("recv(MSG_OOB) failed, retrying\n");
 		goto retry;
 	    }
 	    else {
 		err(EXIT_FAILURE, "recv%s",
 		    flags & MSG_OOB ? "(MSG_OOB)" : "");
 	    }
 	}

 	printf("recv(%s) = %zd\n",
 	       flags & MSG_OOB ? "MSG_OOB" : "",
 	       nread);

 	if (nread == 0)
 	    return;

 	fwrite(buf, 1, nread, stdout);
 	printf("\n");
     }
 }


 int
 pollsock(int s)
 {
     struct pollfd fds[1];

     fds[0].fd = s;
     fds[0].events = POLLIN | POLLPRI | POLLRDBAND;
     fds[0].revents = 0;

     int nready = poll(fds, 1, -1);
     if (nready < 0)
 	err(EXIT_FAILURE, "poll");

     if (nready == 0)
 	return 0;

     int revents = fds[0].revents;

     printf("poll: revents = 0x%x", revents);
     if (fds[0].revents != 0) {
 	printf(":");
 	if (revents & POLLNVAL)   printf(" NVAL");
 	if (revents & POLLERR) 	  printf(" ERR");
 	if (revents & POLLHUP) 	  printf(" HUP");
 	if (revents & POLLIN)  	  printf(" IN");
 	if (revents & POLLPRI) 	  printf(" PRI");
 	if (revents & POLLRDNORM) printf(" RDNORM");
 	if (revents & POLLRDBAND) printf(" RDBAND");
     }
     printf("\n");

     return revents;
 }

From: mlelstv@serpens.de (Michael van Elst)
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Sun, 4 Aug 2019 04:36:55 -0000 (UTC)

 uwe@stderr.spb.ru (Valery Ushakov) writes:

 > Right, right.  That is exactly what fails.  SIOCATMARK in that code
 > just makes sure we read oob "in-band", but the failed output already
 > was doing it if by chance.  If I modify the test to actually sync the
 > oob byte position with SIOCATMARK it fails just the same.

 If you change the test to
 - read OOB data when you get (POLLPRI | POLLRDBAND)
 - read regular data when you get POLLIN
 - do both when you get both conditions

 then

 % ./oobrecv oob delay sync
 reading urgent data with MSG_OOB with SIOCATMARK after delay
 poll: revents = 0x83: IN PRI RDBAND
 recv() = 1 (reading to mark)
 a
 <at mark>
 recv(MSG_OOB) = 1
 b
 recv() = 1
 c
 poll: revents = 0x1: IN
 recv() = 0


 If anyone would actually use urgent data, we would probably
 have a different interface. Then we wouldn't be limited to
 a single byte either. TCP after all doesn't impose such
 a limit.


 -- 
 -- 
                                 Michael van Elst
 Internet: mlelstv@serpens.de
                                 "A potential Snark may lurk in every tree."

From: Robert Elz <kre@munnari.OZ.AU>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Sun, 04 Aug 2019 13:08:17 +0700

     Date:        Sun,  4 Aug 2019 04:40:01 +0000 (UTC)
     From:        mlelstv@serpens.de (Michael van Elst)
     Message-ID:  <20190804044001.73A2A7A1AD@mollari.NetBSD.org>

   |  If anyone would actually use urgent data, we would probably
   |  have a different interface. Then we wouldn't be limited to
   |  a single byte either. TCP after all doesn't impose such
   |  a limit.

 I am sure that many of you already know this, perhaps all of you,
 but TCP has no concepy of OOB data at all, in the (sending side of)
 the test program:

 	send(client, "b", 1, MSG_OOB);

 does not send "b" as out of band data, it cannot, as TCP has
 no method to do that.   What it does is send "b" and also set
 the "urgent" pointer to the position in the stream immediately
 after the 'b'.    If anything can be said to be sent OOB it is
 that urgent pointer position - that arrives at the receiver with
 the very next packet sent (or even resent), regardless of how
 far back in the transmit queue "b" happens to be.

 The model is that the receiving application is being informed
 that there is something important, something that it will want
 to notice soon, coming in the data stream, and so it should
 read the data as fast as possible until it has processed that
 urgent data, and then, if appropriate, go back to really process
 all of the data that was skipped while looking for the urgent
 part,   The only assistance TCP provides for finding that urgent
 data is that it stops telling the receiving data that there is
 some coming after the receiver has read past the location of the
 urgent pointer - beyond that it is all up to the applcation protocol
 to define a method to determine just what the data that is supposed
 to be urgent was.

 The sockets API attempted to provide a mechanism to make this
 possible, and relatively easy to handle in code, while also allowing
 the same interface to handle the "out of band data" mechanism in
 transport protocols that actually have a concept of that (like
 everything ultimately derived from HDLC: X.25, OSI TP, ...)

 For TCP, the OOBINLINE method is clearly the right one, all TCP
 data is simply sequential TCP data, all that matters is that the
 receiver is told when some urgent date is coming in the future
 and when the read pointer has read past that urgent pointer, so
 whatever data was urgent has now been received.

 So, there is really no concept of "only 1 byte of urgent data" vs
 more than one, there cannot be, in TCP it is (depending upon how
 you view it) either "all the data from when the urgent pointer was
 first set, until that the receiver has read to that place in the
 stream" or "no data at all, just a marker between bytes".

 The !OOBINLINE model is really only appropriate for transport protocols
 which have a true OOB mechanism, where out of band data is delivered
 early, out of the normal flow control and seqencing rules ... in such
 a model, with the test programs, particularly with that delay added in
 the receiver, "b" would be read before "a".

 None of this has any particular bearing on what the !OOBINLINE
 interface should look like for TCP applications  -  or whether NetBSD
 provides the same one as other systems (but I would test FreeBSD,
 MacOS. and/or Solaris, rather than linux to determine that) but it
 really should not matter too much, since no application that is
 using TCP should really be using that interface anyway.

 kre

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Michael van Elst <mlelstv@serpens.de>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Sun, 4 Aug 2019 22:07:39 +0300

 On Sun, Aug 04, 2019 at 04:40:01 +0000, Michael van Elst wrote:

 >  If you change the test to
 >  - read OOB data when you get (POLLPRI | POLLRDBAND)
 >  - read regular data when you get POLLIN
 >  - do both when you get both conditions
 >  
 >  then
 >  
 >  % ./oobrecv oob delay sync
 >  reading urgent data with MSG_OOB with SIOCATMARK after delay
 >  poll: revents = 0x83: IN PRI RDBAND
 >  recv() = 1 (reading to mark)
 >  a
 >  <at mark>
 >  recv(MSG_OOB) = 1
 >  b

 So far so good.


 >  recv() = 1
 >  c
 >  poll: revents = 0x1: IN
 >  recv() = 0

 But this is cheating :) You continue reading from the socket which
 does succeed and which clears POLLPRI.  The bug I'm reporting is
 exactly that if you poll the socket in this state you will get POLLPRI
 even though there is no urgent/oob data.


 -uwe

From: mlelstv@serpens.de (Michael van Elst)
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Mon, 5 Aug 2019 04:09:15 -0000 (UTC)

 uwe@stderr.spb.ru (Valery Ushakov) writes:

 > But this is cheating :) You continue reading from the socket which
 > does succeed and which clears POLLPRI.  The bug I'm reporting is
 > exactly that if you poll the socket in this state you will get POLLPRI
 > even though there is no urgent/oob data.

 And I tell you that this is expected behaviour. It's not a second stream
 that you can just read independently (poll/select, then read) and a
 program that acts normally on POLLIN wouldn't even see that.

 -- 
 -- 
                                 Michael van Elst
 Internet: mlelstv@serpens.de
                                 "A potential Snark may lurk in every tree."

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Michael van Elst <mlelstv@serpens.de>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Mon, 5 Aug 2019 10:21:10 +0300

 On Mon, Aug 05, 2019 at 04:15:01 +0000, Michael van Elst wrote:

 >  And I tell you that this is expected behaviour. It's not a second
 >  stream that you can just read independently (poll/select, then
 >  read) and a program that acts normally on POLLIN wouldn't even see
 >  that.

 I asked for a reference (a book, an old mail, personal communication
 from someone who implemented it, something) and you just keep
 restating the same thing without any further argument than "b/c I say
 so".  You say it's expected, I say it doesn't make sense to me.

   read           - returns "a"
   read(MSG_OOB)  - returns "b"
   poll           - ?

 Are you saying it's illegal to call poll there?

 Are you saying it's correct for poll to return POLLPRI there when
 there's no MSG_OOB data to read?

 -uwe

From: Michael van Elst <mlelstv@serpens.de>
To: Valery Ushakov <uwe@stderr.spb.ru>
Cc: gnats-bugs@netbsd.org
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Mon, 5 Aug 2019 11:22:50 +0200

 On Mon, Aug 05, 2019 at 10:21:10AM +0300, Valery Ushakov wrote:

 > I asked for a reference (a book, an old mail, personal communication
 > from someone who implemented it, something) and you just keep
 > restating the same thing without any further argument than "b/c I say
 > so".  You say it's expected, I say it doesn't make sense to me.

 The closest thing to documentation is probably "An Advanced 4.4BSD
 Interprocess Communication Tutorial" [Leffler,1986], but it doesn't
 tell you when the "exceptional condition" for select (equivalent to
 POLLPRI/POLLRDBAND) goes away. It however is pretty clear on the fact
 that you cannot use it to determine when recv(MSG_OOB) will succeed.

 It makes sense to me to clear the OOB condition when you continue
 reading the stream beyond the mark. That's a clear indication that
 processing of OOB data has finished.

 It also make some sense to clear the OOB condition with some action on
 the stream instead of some action on the protocol (e.g. reading OOB
 data) because that avoids further knowledge of the protocol.

 So I can understand why someone made the decision for this behaviour
 a long time ago. It's not the only decision about OOB processing
 that was made and that makes it different from reading regular data.


 >   read           - returns "a"
 >   read(MSG_OOB)  - returns "b"
 >   poll           - ?
 > 
 > Are you saying it's illegal to call poll there?

 You can call poll here and it works as specified.


 > Are you saying it's correct for poll to return POLLPRI there when
 > there's no MSG_OOB data to read?

 It would be ok (but not that useful) for poll to return POLLPRI at
 almost any time. It doesn't tell you that something can be read,
 but just that an attempt to read won't block (and recv(MSG_OOB) is
 guaranteed not to block). The case where you get POLLPRI _before_
 you can read the OOB data is well documented, getting POLLPRI
 _after_ you can read OOB data isn't that different. It's even
 possible that OOB data gets lost if multiple chunks exist.

 This is not a good interface, just an attempt to get OOB and urgent
 data accessible to userland. It doesn't have to "make sense" and
 changing just a small aspect of it probably does more harm than
 keeping the established behaviour, in particular when there is
 almost no user of that interface.


 Greetings,
 -- 
                                 Michael van Elst
 Internet: mlelstv@serpens.de
                                 "A potential Snark may lurk in every tree."

From: Robert Elz <kre@munnari.OZ.AU>
To: gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Mon, 05 Aug 2019 21:06:08 +0700

     Date:        Mon,  5 Aug 2019 09:25:02 +0000 (UTC)
     From:        Michael van Elst <mlelstv@serpens.de>
     Message-ID:  <20190805092502.0D7C67A1AD@mollari.NetBSD.org>

   |  The closest thing to documentation is probably "An Advanced 4.4BSD
   |  Interprocess Communication Tutorial" [Leffler,1986],

 Available in: src/share/doc/psd/21.ipc   Section 5.1 is the relevant part.

 It is clear that the intent is to support protocols that actually
 have OOB data, unlike TCP which doesn't:

 	This is more difficult when the underlying protocol delivers the
 	urgent data in-band with the normal data, and only sends notification
 	of its presence ahead of time (e.g., the TCP protocol used to
 	implement streams in the Internet domain).

   |  This is not a good interface,

 Certainly not for TCP, where SO_OOBINLINE should always be used really.

   |  It doesn't have to "make sense" and
   |  changing just a small aspect of it probably does more harm than
   |  keeping the established behaviour, in particular when there is
   |  almost no user of that interface.

 The only user of recv*(MSG_OOB) I can find in the NetBSD src tree is
 rlogin - which uses SIGURG and in its handler, simply reads from the
 stream (discarding it) until SIOCATMARK says the urgent pointer has
 been reached, after which it does one recv(MSG_OOB).   That example is
 also used in the Tutorial.   It is the sole example of how MSG_OOB
 should be used for recv()...

 There are several users of send(MSG_OOB) but the receivers for them don't
 use recv(MSG_OOB).   (telnet is one example).

 If we still had some reason to support protocols which actually have OOB
 data as part of the protocol, and we had applications that used those
 protocols, there might be more use of recv(MSG_OOB) as for such protocols,
 where OOB data is delivered independantly of the normal data and the
 application needs to say which it wants to read at any point.

 kre

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Michael van Elst <mlelstv@serpens.de>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Thu, 8 Aug 2019 01:29:14 +0300

 On Mon, Aug 05, 2019 at 09:25:02 +0000, Michael van Elst wrote:

 >  ... getting POLLPRI _after_ you can read OOB data isn't that
 >  different.  It's even possible that OOB data gets lost if multiple
 >  chunks exist.

 That still makes absolutely no sense to me.  The kernel knows whether
 there is or there is no more OOB data, doesn't it?  Yes, it's
 technically correct ("the best kind of correct") to return POLLPRI b/c
 MSG_OOB never blocks, but it's useless and harmful.

 Yes, reading past the mark would have cleared it (at least for TCP,
 where urgent data are not really out of band), but what if there's no
 normal data to read?

 Change the test to

     send("a")
     send("B", MSG_OOB)
     /* ride into the sunset */

 our current behaviour just spams the reader with POLLPRI causing it to
 get stuck in busy-wait:

   reading urgent data with MSG_OOB with SIOCATMARK

   poll: revents = 0x1: IN
   recv() = 1
   a

   poll: revents = 0x82: PRI RDBAND
   <at mark>
   recv(MSG_OOB) = 1
   B

   poll: revents = 0x82: PRI RDBAND
   <at mark>
   oobrecv: recv(MSG_OOB): Invalid argument

   poll: revents = 0x82: PRI RDBAND
   <at mark>
   oobrecv: recv(MSG_OOB): Invalid argument

   poll: revents = 0x82: PRI RDBAND
   <at mark>
   oobrecv: recv(MSG_OOB): Invalid argument
   ...

 which is obviously bad.  Solaris that probably copied that code from
 BSD has the same problem.


 >  This is not a good interface, just an attempt to get OOB and urgent
 >  data accessible to userland. It doesn't have to "make sense" and
 >  changing just a small aspect of it probably does more harm than
 >  keeping the established behaviour, in particular when there is
 >  almost no user of that interface.

 We may call it an "established behaviour" or we may call it a buggy
 corner case that was never properly tested, in particular *because*
 "there is almost no users of that interface".  rlogin that kre@
 mentioned seems to be the only user of recv(MSG_OOB) in our tree and
 it doesn't even use poll/select, it uses SIGURG.

 Yes, poll behaviour (as in which events are reported when) varies
 (sometimes *wildly*) between different systems (cf. things like
 POLLHUP for half-closed TCP connections) and changing it can break
 code that was written to assume that specific behaviour, but in this
 case the stars are algined.  There's probably no existing callers at
 all and our existing behaviour easily causes a busy loop with POLLPRI
 spam.  Linux behaviour in this area makese sense and is useful, so I
 don't see why we don't adopt it.

 -uwe

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Robert Elz <kre@munnari.OZ.AU>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Thu, 8 Aug 2019 01:51:52 +0300

 On Mon, Aug 05, 2019 at 14:10:01 +0000, Robert Elz wrote:

 >  Certainly not for TCP, where SO_OOBINLINE should always be used really.

 There's even an RFC for that - https://tools.ietf.org/html/rfc6093

 But I think TCP urgent data is the only "out-of-band" thing we have,
 so it has to be the guinea pig for this out of necessity.

 And since we do provide recv(MSG_OOB) we might as well provide
 non-buggy poll to go with it.

 -uwe

From: Robert Elz <kre@munnari.OZ.AU>
To: Valery Ushakov <uwe@stderr.spb.ru>
Cc: gnats-bugs@netbsd.org
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Thu, 08 Aug 2019 07:32:10 +0700

     Date:        Thu, 8 Aug 2019 01:51:52 +0300
     From:        Valery Ushakov <uwe@stderr.spb.ru>
     Message-ID:  <20190807225152.GB1102@pony.stderr.spb.ru>

   | But I think TCP urgent data is the only "out-of-band" thing we have,
   | so it has to be the guinea pig for this out of necessity.

 It is now (unless sctp provides something different than TCP, I don't
 know sctp at all) - there used to be OSI and X.25 implementations for
 BSD though, and those really provide OOB data, for which recv(MSG_OOB)
 is the right interface.

   | And since we do provide recv(MSG_OOB) we might as well provide
   | non-buggy poll to go with it.

 Again, Michael says that what we have isn't buggy.   From the example in
 the tutorial (which remains in rlogin to this day) the method used for
 urgent data using recv(MSG_OOB) is to read (using recv() or read()) until
 SOICATMARK returns true, and then do the recv(MSG_OOB).   Have you tried
 that to see if that works?   It is not what your test program did.

 I also asked (earlier) what FreeBSD/MacOS/Solaris do in this area - that
 linux is different would not be all that strange.  It often is.

 kre

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Robert Elz <kre@munnari.OZ.AU>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Thu, 8 Aug 2019 04:21:46 +0300

 On Thu, Aug 08, 2019 at 00:35:01 +0000, Robert Elz wrote:

 >    | And since we do provide recv(MSG_OOB) we might as well provide
 >    | non-buggy poll to go with it.
 >  
 >  Again, Michael says that what we have isn't buggy.   From the example in
 >  the tutorial (which remains in rlogin to this day) the method used for
 >  urgent data using recv(MSG_OOB) is to read (using recv() or read()) until
 >  SOICATMARK returns true, and then do the recv(MSG_OOB).   Have you tried
 >  that to see if that works?   It is not what your test program did.

 See my previous reply to the PR.  If the sender sends

     send("a")
     send("B", MSG_OOB)
     /* stops sending, keeps connection open */

 and the receiver

     recv until SIOCATMARK -> "a"
     recv(MSG_OOB) -> "B"

 and then calls poll with POLLIN|POLLPRI to decide what to do next, it
 will end up in an busy-wait loop as poll will immediately return
 POLLPRI (only), but recv(MSG_OOB) will fail, then the receiver calls
 poll and the loop repeats.


 Aside - I wonder if the notion of "at mark" made sense for those other
 protocols that had real out of band data (of which I know nothing).

 It's also not entirely obvious to me what given POLLIN | POLLRDBAND
 the right action is to read/drain the normal (in band) stream first.
 This is likely highly application and protocol dependent, but it would
 seem natural to want to read real out of band data out of band,
 i.e. calling recv(MSG_OOB) as soon as POLLRDBAND is reported.  So I'm
 somewhat worried that we are imposing the semantic of TCP not-really
 OOB urgent data - which happens to be the only user of this interface
 - on the way that interface behaves.


 >  I also asked (earlier) what FreeBSD/MacOS/Solaris do in this area - that
 >  linux is different would not be all that strange.  It often is.

 As I noted in the previous reply Solaris seems to behave the same,
 with POLLPRI spammed after the urgent data has been read.  I'm
 travelling, so I don't have a macos system handy.  I'll test when I
 get back home.


 -uwe

From: Robert Elz <kre@munnari.OZ.AU>
To: Valery Ushakov <uwe@stderr.spb.ru>
Cc: gnats-bugs@netbsd.org
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear poll(2) status
Date: Thu, 08 Aug 2019 10:19:08 +0700

     Date:        Thu, 8 Aug 2019 04:21:46 +0300
     From:        Valery Ushakov <uwe@stderr.spb.ru>
     Message-ID:  <20190808012146.GC1102@pony.stderr.spb.ru>

   | See my previous reply to the PR.

 Yes, I saw that after I sent my reply (... not that it really
 matters, but just as an explanation - I was reading backwards
 through the messages ... that way I see what has been answered
 before I see the questions which means I don't need to think
 about whether I should attempt an answer in many cases) - your
 two messages were close together, with the same Subject (just
 like happened this time) - and I just assumed that (like happened
 this time) there was one copy sent via gnats, and another to either
 one of the lists, or direct to me, so I just skipped the earlier one 
 initially, but then noticed it later).

 Your point in it that it is entirely possible that no-one has ever
 written a poll/select application that deals with MSG_OOB with TCP
 without using SO_OOBINLINE is probably correct though.  Not that it
 should really matter, as no-one should be writing TCP applications
 that use OOB without setting SO_OOBINLINE in the receiver.  It simply
 is the wrong way.

   | and then calls poll with POLLIN|POLLPRI to decide what to do next, it
   | will end up in an busy-wait loop as poll will immediately return
   | POLLPRI (only), but recv(MSG_OOB) will fail, then the receiver calls
   | poll and the loop repeats.

 That looks like a bug somewhere - certainly in code that would do
 a recv(), get an EINVAL, and then repeat the same recv() again (regardless
 of an intervening poll()) - but it is possible the kernel also has a
 bug, that one I would need to look into more closely to see why that
 "no more data" is an issue.

   | Aside - I wonder if the notion of "at mark" made sense for those other
   | protocols that had real out of band data (of which I know nothing).

 Not really, as best I recall them.   I am fairly certain it would be
 meaningless for X.25, which had OOB data, but there was no way the
 receiver could know (from its transport protocol - or network layer
 protcol it would usually be called, there was just one - that is
 not considering anything that application protcol may have layered
 on top) how much regular data the sender had queued before it sent
 the OOB data.   OOB (just one packet permitted until that was ack'd)
 was sent outside all of the normal sequencing and flow control rules
 (which is why it is called "out of band").

 Whether OSI TP had any mechanism to mark where in the regular packet
 stream an OOB packet was sent I have no idea, I never looked at it in
 enough detail to pick up on that kind of nuance.

 This also indicates something of an oddity in the development of all
 of this in BSD ... the API was designed to cope with OSI (which at the
 time was a plausible mass use protocol - or its advocates thought so
 anyway) but the only procotol implemented (at the time) which could be
 used to test any of this was TCP, so while the API looks like it was
 designed for OOB data, it was really only ever (initially) used with
 urgent data.

   | It's also not entirely obvious to me what given POLLIN | POLLRDBAND
   | the right action is to read/drain the normal (in band) stream first.

 It is the nature of TCP.   Despite what the BSD API seems to promise,
 the urgent data cannot be guaranteed to be delivered until all previous
 data has been read.   What's more, and also despite the apparent API,
 an application could do

 	send("a"); send("b"); send("c", MSG_OOB);

 where the whole combination "abc" is intended to be urgent - it doesn't
 send the OOB flag until the end, as until then all the urgent data isn't
 available, it doesn't want the received to read to "b" and assume it
 has all of it, so the urgent pointer should never be there.

 Remember that all there is is data, and a "mark" that says, "somewhere
 before here there is data that the application protocol needs to see
 and process before it processes other data that is arriving earlier".
 There is also just one mark - if the application sends more urgent data
 the mark just moves further down the stream - the earlier urgent data
 is still there in the queue, but is no longer marked any way (the sender
 might know where it is, but has no (TCP) way to convey that to the
 recipient, the receiver cannot keep track of all the various urgent
 data pieces, as there's no guarantee anything will be sent to it at all
 in between 2 OOB writes from the sender.  That is, if my example had
 been

 	send("a", MSG_OOB); send("b"); send("c", MSG_OOB);

 (and no more MSG_OOB) we know the receiver will get the urgent
 pointer after "c", but the one after "a" might never have been
 sent to it - the sending TCP sees both, but after the "c" OOB
 has been queued, has no way to convey the earlier "a" mark to the
 receiver - there is just one urgent pointer and it is now after "c".

   | This is likely highly application and protocol dependent,

 Absolutely.   The application has to provide some design for
 what data should be sent as urgent, and how the receiver is to
 recognise that.   Further to do that, the application has to know
 exactly what its transport prococol provices.   For TCP transport
 telnet has all this done properly.  rlogin is simply a mess.

 There is simply no way to write a transport protocol agnostic
 application which uses any kind of out of band or urgent data.

   | but it would
   | seem natural to want to read real out of band data out of band,

 Yes, it would - but TCP has no such concept.  There is no out of band
 data.  If you don't understand that, you cannot sanely attempt to use
 MSG_OOB with a TCP transport.

   | i.e. calling recv(MSG_OOB) as soon as POLLRDBAND is reported.

 You can do that - and various hacks that are in the sockets TCP
 stack sometimes pretend to allow it to work.   But it is all nonsense,
 nothing in TCP makes any of the attempts to do that meaningful in any
 way at all, which is why not using SO_OOBINLINE with a TCP transport
 is simply crazy.

   | So I'm
   | somewhat worried that we are imposing the semantic of TCP not-really
   | OOB urgent data - which happens to be the only user of this interface
   | - on the way that interface behaves.

 It all depends upon what the transport procotol implements.   Since we
 currently have nothing but TCP (and SCTP, just in case that is different,
 though I doubt it - but as I recall it does keep record markers rather than
 just providing a byte stream, so it might be) it is all very hard to tell
 what might happen with a transport protocol that actually has OOB data
 (I never once used either X.25 or OSI procotocls on a BSD system - I had
 my own X.25 once, but it was all imolemented in a plug in board, the system
 just saw something not all that much different to a modem - can make calls,
 transport bytes, and hang up... which is just what applications like uucp
 wanted to be able to do.)

 But I would not be surprised if you are somewhat correct.   TCP is all that
 existed to be used for testing when all this was developed (the others came
 later) and is all that exists again now (the others have vanished again).
 Until we get another implemented transport protocol which supports genuine
 OOB data, we will probably never really know.   When do you expect one of
 those?

 kre


From: Christos Zoulas <christos@zoulas.com>
To: gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org,
 gnats-admin@netbsd.org,
 netbsd-bugs@netbsd.org
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Thu, 8 Aug 2019 19:07:54 +0300

 Darwin vpn1-1.astron.com 18.7.0 Darwin Kernel Version 18.7.0: Thu Jun 20 =
 18:42:21 PDT 2019; root:xnu-4903.270.47~4/RELEASE_X86_64 x86_64
 reading urgent data with MSG_OOB
 poll: revents =3D 0x93: HUP IN PRI RDBAND
 recv(MSG_OOB) =3D 1
 b
 poll: revents =3D 0x93: HUP IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 1
 a
 poll: revents =3D 0x93: HUP IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 1
 c
 poll: revents =3D 0x93: HUP IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 0

 FreeBSD mb1.astron.com 12.0-CURRENT FreeBSD 12.0-CURRENT #0 r318137: Wed =
 May 10 15:09:31 UTC 2017     =
 root@releng3.nyi.freebsd.org:/usr/obj/usr/src/sys/GENERIC  amd64
 reading urgent data with MSG_OOB
 poll: revents =3D 0x1: IN
 recv() =3D 1
 a
 poll: revents =3D 0x83: IN PRI RDBAND
 recv(MSG_OOB) =3D 1
 b
 poll: revents =3D 0x83: IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 1
 c
 poll: revents =3D 0x1: IN
 recv() =3D 0

 Linux mb1 4.9.0-8-amd64 #1 SMP Debian 4.9.144-3.1 (2019-02-19) x86_64 =
 GNU/Linux
 reading urgent data with MSG_OOB
 poll: revents =3D 0x3: IN PRI
 recv(MSG_OOB) =3D 1
 b
 poll: revents =3D 0x1: IN
 recv() =3D 1
 a
 poll: revents =3D 0x1: IN
 recv() =3D 1
 c
 poll: revents =3D 0x1: IN
 recv() =3D 0

 SunOS openindiana 5.11 illumos-6ccda740e0 i86pc i386 i86pc
 christos@openindiana:~/oob$ ./oobrecv=20
 reading urgent data with MSG_OOB
 poll: revents =3D 0x83: IN PRI RDBAND
 recv(MSG_OOB) =3D 1
 b
 poll: revents =3D 0x83: IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 1
 a
 poll: revents =3D 0x83: IN PRI RDBAND
 recv(MSG_OOB) failed, retrying
 recv() =3D 1
 c
 poll: revents =3D 0x1: IN
 recv() =3D 0

 ------------------------------------------
 for portability on Solaris:
 #include <sys/param.h>
 #ifdef BSD4_4
 for socklen
 #endif

 #include <sys/sockio.h> for SIOCATMARK.

 I support the change to make us behave like Linux.

 christos

 > On Aug 3, 2019, at 5:50 PM, uwe@stderr.spb.ru wrote:
 >=20
 >> Number:         54435
 >> Category:       kern
 >> Synopsis:       reading TCP urgent data with MSG_OOB doesn't clear =
 poll(2) status
 >> Confidential:   no
 >> Severity:       non-critical
 >> Priority:       low
 >> Responsible:    kern-bug-people
 >> State:          open
 >> Class:          sw-bug
 >> Submitter-Id:   net
 >> Arrival-Date:   Sat Aug 03 14:50:00 +0000 2019
 >> Originator:     Valery Ushakov
 >> Release:        NetBSD-8
 >> Organization:
 >> Environment:
 > NetBSD pony 8.1_STABLE NetBSD 8.1_STABLE (GENERIC) #0: Sat Jun 15 =
 07:30:17 MSK 2019  =
 uwe@sampo:/home/uwe/work/netbsd/build8/obj/macppc/sys/arch/macppc/compile/=
 GENERIC macppc
 >=20
 >> Description:
 > It looks like POLLPRI | POLLRDBAND status in not cleared from the
 > socket when TCP urgent data is read with MSG_OOB
 >=20
 >> How-To-Repeat:
 > cat >> oobserv.c <<'__EOF__'
 > #include <sys/types.h>
 > #include <sys/socket.h>
 >=20
 > #include <netinet/in.h>
 > #include <arpa/inet.h>
 >=20
 > #include <err.h>
 > #include <errno.h>
 > #include <stdio.h>
 > #include <stdlib.h>
 > #include <string.h>
 > #include <unistd.h>
 >=20
 >=20
 > int
 > main(void)
 > {
 >    int status;
 >=20
 >    int server =3D socket(PF_INET, SOCK_STREAM, 0);
 >    if (server < 0)
 > 	err(EXIT_FAILURE, "socket");
 >=20
 >    struct sockaddr_in sin;
 >    memset(&sin, 0, sizeof(sin));
 > #if !defined(__linux__) && !defined(__sun__)
 >    sin.sin_len =3D sizeof(sin);
 > #endif
 >    sin.sin_family =3D AF_INET;
 >    sin.sin_addr.s_addr =3D htonl(INADDR_LOOPBACK);
 >    sin.sin_port =3D htons(12345);
 >=20
 >    status =3D bind(server, (struct sockaddr *)&sin, sizeof(sin));
 >    if (status < 0)
 > 	err(EXIT_FAILURE, "bind");
 >=20
 >    status =3D listen(server, 1);
 >    if (status < 0)
 > 	err(EXIT_FAILURE, "listen");
 >=20
 >    for (;;) {
 > 	int client =3D accept(server, NULL, 0);
 > 	if (client < 0)
 > 	    err(EXIT_FAILURE, "accept");
 >=20
 > 	send(client, "a", 1, 0);
 > 	send(client, "b", 1, MSG_OOB);
 > 	send(client, "c", 1, 0);
 >=20
 > 	close(client);
 >    }
 >=20
 >    return 0;
 > }
 > __EOF__
 >=20
 > cat >> oobrecv.c <<'__EOF__'
 > #include <sys/types.h>
 > #include <sys/socket.h>
 >=20
 > #include <err.h>
 > #include <errno.h>
 > #include <fcntl.h>
 > #include <poll.h>
 > #include <stdbool.h>
 > #include <stdio.h>
 > #include <stdlib.h>
 > #include <string.h>
 > #include <unistd.h>
 >=20
 > #include <netinet/in.h>
 > #include <arpa/inet.h>
 >=20
 > int usage(void);
 > int getsocket(void);
 > void oobtest(int);
 > int pollsock(int);
 >=20
 > bool oobinline =3D false;
 >=20
 > int
 > usage(void)
 > {
 >    fprintf(stderr, "usage: oobrecv [oob | inline]\n");
 >    fprintf(stderr, "default is \"%s\"\n",
 > 	    oobinline ? "inline" : "oob");
 >=20
 >    return EXIT_FAILURE;
 > }
 >=20
 >=20
 > int
 > main(int argc, char **argv)
 > {
 >    if (argc > 1) {
 > 	if (strcasecmp(argv[1], "oob") =3D=3D 0)
 > 	    oobinline =3D false;
 > 	else if (strcasecmp(argv[1], "inline") =3D=3D 0)
 > 	    oobinline =3D true;
 > 	else
 > 	    return usage();
 >    }
 >=20
 >    oobtest(getsocket());
 >    return 0;
 > }
 >=20
 >=20
 > int
 > getsocket(void)
 > {
 >    int s;
 >    int status;
 >=20
 >    s =3D socket(PF_INET, SOCK_STREAM, 0);
 >    if (s < 0)
 > 	err(EXIT_FAILURE, "socket");
 >=20
 >    struct sockaddr_in sin;
 >    memset(&sin, 0, sizeof(sin));
 > #if !defined(__linux__) && !defined(__sun__)
 >    sin.sin_len =3D sizeof(sin);
 > #endif
 >    sin.sin_family =3D AF_INET;
 >    sin.sin_addr.s_addr =3D htonl(INADDR_LOOPBACK);
 >    sin.sin_port =3D htons(12345);
 >=20
 >    if (oobinline) {
 > 	int one =3D 1;
 > 	status =3D setsockopt(s, SOL_SOCKET, SO_OOBINLINE,
 > 			    (char *)&one, sizeof(one));
 > 	if (status < 0)
 > 	    err(EXIT_FAILURE, "SO_OOBINLINE");
 >    }
 >=20
 >    status =3D connect(s, (struct sockaddr *)&sin, sizeof(sin));
 >    if (status < 0)
 > 	err(EXIT_FAILURE, "connect");
 >=20
 >    return s;
 > }
 >=20
 >=20
 > void
 > oobtest(int s)
 > {
 >    char buf[128];
 >    int status;
 >=20
 >    printf("reading urgent data with %s\n",
 > 	   oobinline ? "SO_OOBINLINE" : "MSG_OOB");
 >=20
 >    for (;;) {
 > 	int revents =3D pollsock(s);
 > 	if (revents =3D=3D 0)
 > 	    continue;
 >=20
 > 	if (revents & POLLNVAL) {
 > 	    errno =3D EBADF;
 > 	    err(EXIT_FAILURE, "poll");
 > 	}
 >=20
 > 	if (revents & POLLERR) {
 > 	    int sockerr;
 > 	    socklen_t optlen =3D (socklen_t)sizeof(sockerr);
 >=20
 >            status =3D getsockopt(s, SOL_SOCKET, SO_ERROR,
 > 				(char *)&sockerr, &optlen);
 > 	    if (status < 0)
 > 		err(EXIT_FAILURE, "SO_ERROR");
 >=20
 > 	    errno =3D sockerr;
 > 	    err(EXIT_FAILURE, NULL);
 > 	}
 >=20
 > 	int flags =3D 0;
 > 	if (!oobinline && (revents & (POLLPRI | POLLRDBAND)))
 > 	    flags =3D MSG_OOB;
 >=20
 > 	ssize_t nread =3D recv(s, buf, sizeof(buf), flags);
 > 	if (nread < 0)
 > 	    err(EXIT_FAILURE, "recv%s",
 > 		flags & MSG_OOB ? "(MSG_OOB)" : "");
 >=20
 > 	printf("recv(%s) =3D %zd\n",
 > 	       flags & MSG_OOB ? "MSG_OOB" : "",
 > 	       nread);
 >=20
 > 	if (nread =3D=3D 0)
 > 	    return;
 >=20
 > 	fwrite(buf, 1, nread, stdout);
 > 	printf("\n");
 >    }
 > }
 >=20
 >=20
 > int
 > pollsock(int s)
 > {
 >    struct pollfd fds[1];
 >=20
 >    fds[0].fd =3D s;
 >    fds[0].events =3D POLLIN | POLLPRI | POLLRDBAND;
 >    fds[0].revents =3D 0;
 >=20
 >    int nready =3D poll(fds, 1, -1);
 >    if (nready < 0)
 > 	err(EXIT_FAILURE, "poll");
 >=20
 >    if (nready =3D=3D 0)
 > 	return 0;
 >=20
 >    int revents =3D fds[0].revents;
 >=20
 >    printf("poll: revents =3D 0x%x", revents);
 >    if (fds[0].revents !=3D 0) {
 > 	printf(":");
 > 	if (revents & POLLNVAL)   printf(" NVAL");
 > 	if (revents & POLLERR) 	  printf(" ERR");
 > 	if (revents & POLLHUP) 	  printf(" HUP");
 > 	if (revents & POLLIN)  	  printf(" IN");
 > 	if (revents & POLLPRI) 	  printf(" PRI");
 > 	if (revents & POLLRDNORM) printf(" RDNORM");
 > 	if (revents & POLLRDBAND) printf(" RDBAND");
 >    }
 >    printf("\n");
 >=20
 >    return revents;
 > }
 > __EOF__
 >=20
 > cc -o oobserv oobserv.c
 > cc -o oobrecv oobrecv.c
 >=20
 > ./oobserv &
 > ./oobrecv oob
 >=20
 >=20
 > The server sends "a", then send "b" as urgent/MSG_OOB, then "c".
 >=20
 > The test invocation fails with:
 >=20
 > reading urgent data with MSG_OOB
 > poll: revents =3D 0x1: IN
 > recv() =3D 1
 > a
 > poll: revents =3D 0x82: PRI RDBAND
 > recv(MSG_OOB) =3D 1
 > b
 > poll: revents =3D 0x82: PRI RDBAND
 > oobrecv: recv(MSG_OOB): Invalid argument
 >=20
 > So after "b" was read (with MSG_OOB), the poll still reports POLLPRI.
 >=20
 > This doesn't happen with SO_OOBINLINE:
 >=20
 > $ ./oobrecv inline
 > reading urgent data with SO_OOBINLINE
 > poll: revents =3D 0x1: IN
 > recv() =3D 1
 > a
 > poll: revents =3D 0x83: IN PRI RDBAND
 > recv() =3D 1
 > b
 > poll: revents =3D 0x1: IN
 > recv() =3D 1
 > c
 > poll: revents =3D 0x1: IN
 > recv() =3D 0
 >=20
 > here POLLPRI | POLLRDBAND are cleared after "b" was read.
 >=20
 >> Fix:

From: Valery Ushakov <uwe@stderr.spb.ru>
To: gnats-bugs@netbsd.org
Cc: Christos Zoulas <christos@zoulas.com>
Subject: Re: kern/54435: reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status
Date: Thu, 8 Aug 2019 19:58:36 +0300

 On Thu, Aug 08, 2019 at 16:10:02 +0000, Christos Zoulas wrote:

 >  Darwin vpn1-1.astron.com 18.7.0 Darwin Kernel Version 18.7.0: Thu Jun 20 =
 >  18:42:21 PDT 2019; root:xnu-4903.270.47~4/RELEASE_X86_64 x86_64
 >  reading urgent data with MSG_OOB
 >  poll: revents =3D 0x93: HUP IN PRI RDBAND

 Ah, macos spamming POLLHUP on peer's half-close...

 I've put my current test code at http://www.stderr.spb.ru/~uwe/netbsd/oob/
 Not pasting it here as I'm still playing with it.

 oobserv can now take the send pattern on the command line

 n - send a byte
 u - send(MSG_OOB) a byte
 + - sleep for 1 second
 0 - sleep forever

 So you can do ./oobserv nu0 to serve the busy-wait example I mentioned
 earlier.

 oobrecv has dropped the stupid "retry" experiment.

 -uwe

From: "Christos Zoulas" <christos@netbsd.org>
To: gnats-bugs@gnats.NetBSD.org
Cc: 
Subject: PR/54435 CVS commit: src/sys/kern
Date: Mon, 17 Feb 2020 19:40:50 -0500

 Module Name:	src
 Committed By:	christos
 Date:		Tue Feb 18 00:40:50 UTC 2020

 Modified Files:
 	src/sys/kern: uipc_socket.c

 Log Message:
 PR/54435: Valery Ushakov: Clear urgent status after reading urgent data, so
 that poll(2) works.


 To generate a diff of this commit:
 cvs rdiff -u -r1.285 -r1.286 src/sys/kern/uipc_socket.c

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

From: Rin Okuyama <rokuyama.rk@gmail.com>
To: "gnats-bugs@NetBSD.org" <gnats-bugs@NetBSD.org>
Cc: Christos Zoulas <christos@astron.com>
Subject: Re: kern/54435 (reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status)
Date: Sun, 3 May 2020 23:34:49 +0900

 On 2020/02/18 9:40, Christos Zoulas wrote:
 > Module Name:	src
 > Committed By:	christos
 > Date:		Tue Feb 18 00:40:50 UTC 2020
 > 
 > Modified Files:
 > 	src/sys/kern: uipc_socket.c
 > 
 > Log Message:
 > PR/54435: Valery Ushakov: Clear urgent status after reading urgent data, so
 > that poll(2) works.
 > 
 > 
 > To generate a diff of this commit:
 > cvs rdiff -u -r1.285 -r1.286 src/sys/kern/uipc_socket.c

 ...

 > Index: src/sys/kern/uipc_socket.c
 > diff -u src/sys/kern/uipc_socket.c:1.285 src/sys/kern/uipc_socket.c:1.286
 > --- src/sys/kern/uipc_socket.c:1.285	Mon Oct 14 12:27:03 2019
 > +++ src/sys/kern/uipc_socket.c	Mon Feb 17 19:40:50 2020
 ...
 > @@ -1180,6 +1180,9 @@ soreceive(struct socket *so, struct mbuf
 >   			    MIN(uio->uio_resid, m->m_len), uio);
 >   			m = m_free(m);
 >   		} while (uio->uio_resid > 0 && error == 0 && m);
 > +		/* We consumed the oob data, no more oobmark.  */
 > +		so->so_oobmark = 0;
 > +		so->so_state &= ~SS_RCVATMARK;
 >   bad:
 >   		if (m != NULL)
 >   			m_freem(m);
 > 

 This breaks rlogin. When both server and client run on recent NetBSD,
 session freezes by hitting ^C:

 ---
 % rlogin localhost
 rlogin: rcmd: connect to address ::1: Connection refused
 Trying 127.0.0.1...
 Last login: Sun May  3 21:39:37 2020 from localhost on pts/4
 ...
 % cat
 (^C then session freezes and does not respond any input)
 ---

 Reverting this commit makes ^C working again as expected.

 Thanks,
 rin

From: "Christos Zoulas" <christos@netbsd.org>
To: gnats-bugs@gnats.NetBSD.org
Cc: 
Subject: PR/54435 CVS commit: src/usr.bin/rlogin
Date: Sun, 3 May 2020 12:32:16 -0400

 Module Name:	src
 Committed By:	christos
 Date:		Sun May  3 16:32:16 UTC 2020

 Modified Files:
 	src/usr.bin/rlogin: rlogin.c

 Log Message:
 PR/54435: Adjust for new kernel behavior of soreceive(9) clearing MSG_OOB
 when receiving the oob message. This made SIOCATMARK return always 0 since
 the oob message was cleared. Instead, use recvmsg(2) to determine if
 the message was oob or not. This works with both the old and new kernel
 and it is not racy.


 To generate a diff of this commit:
 cvs rdiff -u -r1.46 -r1.47 src/usr.bin/rlogin/rlogin.c

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

From: Rin Okuyama <rokuyama.rk@gmail.com>
To: gnats-bugs@netbsd.org
Cc: Christos Zoulas <christos@astron.com>
Subject: Re: PR/54435 CVS commit: src/usr.bin/rlogin
Date: Tue, 5 May 2020 00:42:03 +0900

 Thank you for fixing it! Now, it works fine for me.

 Just FYI:

 Previously, our rlogin(1) on OS X also freezes in the same way as on
 NetBSD. (Since they removed rlogin from their distribution, I ported
 it to OS X.) Now, it works fine also on OS X.

From: Christos Zoulas <christos@zoulas.com>
To: gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org,
 gnats-admin@netbsd.org,
 netbsd-bugs@netbsd.org,
 Valery Ushakov <uwe@stderr.spb.ru>,
 matthew green <mrg@NetBSD.org>
Subject: Re: PR/54435 CVS commit: src/usr.bin/rlogin
Date: Mon, 4 May 2020 12:54:28 -0400

 --Apple-Mail=_16CF0346-9C4A-471E-A6C8-95922D8E510D
 Content-Transfer-Encoding: quoted-printable
 Content-Type: text/plain;
 	charset=us-ascii

 Yes, and it should work on Linux too... :-) This is why I prefer the =
 change to reset after read,
 and consider the original behavior an implementation accident.

 christos

 > On May 4, 2020, at 11:45 AM, Rin Okuyama <rokuyama.rk@gmail.com> =
 wrote:
 >=20
 > The following reply was made to PR kern/54435; it has been noted by =
 GNATS.
 >=20
 > From: Rin Okuyama <rokuyama.rk@gmail.com>
 > To: gnats-bugs@netbsd.org
 > Cc: Christos Zoulas <christos@astron.com>
 > Subject: Re: PR/54435 CVS commit: src/usr.bin/rlogin
 > Date: Tue, 5 May 2020 00:42:03 +0900
 >=20
 > Thank you for fixing it! Now, it works fine for me.
 >=20
 > Just FYI:
 >=20
 > Previously, our rlogin(1) on OS X also freezes in the same way as on
 > NetBSD. (Since they removed rlogin from their distribution, I ported
 > it to OS X.) Now, it works fine also on OS X.
 >=20


 --Apple-Mail=_16CF0346-9C4A-471E-A6C8-95922D8E510D
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
 	filename=signature.asc
 Content-Type: application/pgp-signature;
 	name=signature.asc
 Content-Description: Message signed with OpenPGP

 -----BEGIN PGP SIGNATURE-----
 Comment: GPGTools - http://gpgtools.org

 iF0EARECAB0WIQS+BJlbqPkO0MDBdsRxESqxbLM7OgUCXrBIxAAKCRBxESqxbLM7
 OtmcAKCcZyGm136HNC9no8CrIrUyu8so1wCgx8ulAw0MS4RjR1zXPzc6t+3shF4=
 =OQJw
 -----END PGP SIGNATURE-----

 --Apple-Mail=_16CF0346-9C4A-471E-A6C8-95922D8E510D--

From: matthew green <mrg@eterna.com.au>
To: Christos Zoulas <christos@zoulas.com>
Cc: kern-bug-people@netbsd.org, gnats-admin@netbsd.org,
    netbsd-bugs@netbsd.org, Valery Ushakov <uwe@stderr.spb.ru>,
    gnats-bugs@netbsd.org
Subject: re: PR/54435 CVS commit: src/usr.bin/rlogin
Date: Tue, 05 May 2020 03:36:12 +1000

 > Yes, and it should work on Linux too... :-) This is why I prefer the 
 > change to reset after read,
 > and consider the original behavior an implementation accident.

 i still consider this a binary compat issue and IMO, it
 needs to be fixed -- *particularly* since rtools are one
 of the few actual consumers of OOB using these apis.


 .mrg.

From: Chuck Silvers <chuq@chuq.com>
To: gnats-bugs@netbsd.org
Cc: kern-bug-people@netbsd.org, netbsd-bugs@netbsd.org,
	Valery Ushakov <uwe@stderr.spb.ru>,
	Taylor R Campbell <riastradh@NetBSD.org>,
	Michael van Elst <mlelstv@serpens.de>,
	Robert Elz <kre@munnari.OZ.AU>,
	Christos Zoulas <christos@zoulas.com>,
	Rin Okuyama <rokuyama.rk@gmail.com>,
	matthew green <mrg@eterna.com.au>
Subject: Re: kern/54435 (reading TCP urgent data with MSG_OOB doesn't clear
 poll(2) status)
Date: Thu, 19 Nov 2020 10:40:44 -0800

 There's still a problem with the change that was committed for this PR.
 With the current code, when recv(MSG_OOB) is used to retrieve the
 OOB data, we clear SS_RCVATMARK immediately, which is fine for poll()
 deciding whether to report POLLRDBAND, but that also means that
 the application can no longer use SIOCATMARK tell where in the
 stream the OOB data was, which is a critical part of using this API.
 If SIOCATMARK always reports "not at the mark", the application will
 think that entire rest of the stream came before the OOB byte that
 was read, which breaks any use of this interface.  This is exactly
 the problem that this change causes for rlogin.

 The real underlying problem is that the current code assumes that the
 conditions under which SIOCATMARK should report "at the mark" and the
 conditions under which poll() should report POLLRDBAND are the same,
 but they are actually not the same, since SIOCATMARK must report "at
 the mark" at some point after poll() stops reporting POLLRDBAND for
 this interface to work at all.  We need to add some extra state so
 that these two conditions can each be reported at the correct time.
 I have a patch below that does that.

 To demonstrate this, I adjusted uwe's oobrecv program to report the
 results of SIOCATMARK before every recv() call, not just the calls
 that are done when poll() returns POLLRDBAND, and I also have it
 print "not at mark" explicitly just to make the status super-clear.
 Here are the results of "./oobserv nun" and "./oobrecv oob":



 netbsd HEAD

 # ./oobrecv oob
 reading urgent data with MSG_OOB
 poll: revents = 0x83: IN PRI RDBAND
 <not at mark>
 recv(MSG_OOB) = 1
 B
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 2
 ac
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 0



 netbsd HEAD + my patch

 # ./oobrecv 
 reading urgent data with MSG_OOB
 poll: revents = 0x83: IN PRI RDBAND
 <not at mark>
 recv(MSG_OOB) = 1
 B
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 1
 a
 poll: revents = 0x1: IN
 <at mark>
 recv() = 1
 c
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 0



 linux (5.7.17-200.fc32.x86_64)

 # ./oobrecv 
 reading urgent data with MSG_OOB
 poll: revents = 0x3: IN PRI
 <not at mark>
 recv(MSG_OOB) = 1
 B
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 1
 a
 poll: revents = 0x1: IN
 <at mark>
 recv() = 1
 c
 poll: revents = 0x1: IN
 <not at mark>
 recv() = 0



 The netbsd HEAD result is clearly wrong, since we never see "at mark",
 and the mark should actually occur in between the two bytes returned by
 the second recv(), so returning those two bytes together is also wrong.
 The result with my patch is correct, and matches linux.

 With the patch, an rlogin binary from netbsd 9.0 also works again.

 The patch and modified test program are at:
 https://ftp.netbsd.org/pub/NetBSD/misc/chs/diff.oob.1
 https://ftp.netbsd.org/pub/NetBSD/misc/chs/oobrecv.c

 I'll commit the patch this weekend.

 -Chuck

From: Robert Elz <kre@munnari.OZ.AU>
To: Chuck Silvers <chuq@chuq.com>
Cc: gnats-bugs@netbsd.org, kern-bug-people@netbsd.org, netbsd-bugs@netbsd.org,
        Valery Ushakov <uwe@stderr.spb.ru>,
        Taylor R Campbell <riastradh@NetBSD.org>,
        Michael van Elst <mlelstv@serpens.de>,
        Christos Zoulas <christos@zoulas.com>,
        Rin Okuyama <rokuyama.rk@gmail.com>, matthew green <mrg@eterna.com.au>
Subject: Re: kern/54435 (reading TCP urgent data with MSG_OOB doesn't clear poll(2) status)
Date: Fri, 20 Nov 2020 03:31:11 +0700

     Date:        Thu, 19 Nov 2020 10:40:44 -0800
     From:        Chuck Silvers <chuq@chuq.com>
     Message-ID:  <20201119184044.GA24468@spathi.chuq.com>


   | The netbsd HEAD result is clearly wrong, since we never see "at mark",
   | and the mark should actually occur in between the two bytes returned by
   | the second recv(), so returning those two bytes together is also wrong.

 That latter is not correct.   If both bytes are there, they can both be
 returned.  The urgent pointer is not some kind of record marker.

 Once again, there is *no* out of band data in TCP, all data is (are)
 just data.

 All we have is an out of band "something urgent follows" single bit
 (actually a pointer that indicates somewhere in the window which is
 beyond where the "urgent" data was written, but the API offers just
 an "urgent data coming" single bit.)

 To use this, the application protocol needs to mark the data somehow
 (in band) to let the application determine what is what, the urgent
 pointer simply cannot achieve that.  Attempts to make it do so will
 simply lead to frustration.

 The rlogin "protocol" is a mess.   It isn't an example of anything
 useful, for anyone who wants to see uses of urgent data in TCP, look
 at telnet - there it is all done properly, and simply works.

 This is not a comment about your patch, and certainly restoring binary
 compat so old binaries work on new kernels is a very good thing (even
 when the old binary is rlogin).

 kre

From: "Chuck Silvers" <chs@netbsd.org>
To: gnats-bugs@gnats.NetBSD.org
Cc: 
Subject: PR/54435 CVS commit: src/sys
Date: Mon, 23 Nov 2020 00:52:53 +0000

 Module Name:	src
 Committed By:	chs
 Date:		Mon Nov 23 00:52:53 UTC 2020

 Modified Files:
 	src/sys/kern: uipc_socket.c
 	src/sys/netinet: tcp_usrreq.c
 	src/sys/sys: socketvar.h

 Log Message:
 Restore correct functioning of SIOCATMARK by removing the previous
 change that was done to fix poll(POLLPRI | POLLRDBAND) and instead
 add a separate flag to track when poll() should indicate that a
 MSG_OOB byte is available.  Re-fixes PR 54435 properly.


 To generate a diff of this commit:
 cvs rdiff -u -r1.292 -r1.293 src/sys/kern/uipc_socket.c
 cvs rdiff -u -r1.227 -r1.228 src/sys/netinet/tcp_usrreq.c
 cvs rdiff -u -r1.162 -r1.163 src/sys/sys/socketvar.h

 Please note that diffs are not public domain; they are subject to the
 copyright notices on the relevant files.

State-Changed-From-To: open->feedback
State-Changed-By: dholland@NetBSD.org
State-Changed-When: Sat, 01 May 2021 21:56:58 +0000
State-Changed-Why:
is this fixed?


>Unformatted:

NetBSD Home
NetBSD PR Database Search

(Contact us) $NetBSD: query-full-pr,v 1.46 2020/01/03 16:35:01 leot Exp $
$NetBSD: gnats_config.sh,v 1.9 2014/08/02 14:16:04 spz Exp $
Copyright © 1994-2020 The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.