NetBSD Problem Report #47455

From dholland@macaran.localdomain  Wed Jan 16 00:40:28 2013
Return-Path: <dholland@macaran.localdomain>
Received: from mail.netbsd.org (mail.netbsd.org [149.20.53.66])
	by www.NetBSD.org (Postfix) with ESMTP id 9BBCB63EDB2
	for <gnats-bugs@gnats.NetBSD.org>; Wed, 16 Jan 2013 00:40:27 +0000 (UTC)
Message-Id: <20130116004123.DF3926E239@macaran.localdomain>
Date: Tue, 15 Jan 2013 19:41:23 -0500 (EST)
From: dholland@eecs.harvard.edu
Reply-To: dholland@eecs.harvard.edu
To: gnats-bugs@gnats.NetBSD.org
Subject: ATA TRIM doesn't work on piixide(4)
X-Send-Pr-Version: 3.95

>Number:         47455
>Category:       kern
>Synopsis:       ATA TRIM doesn't work on piixide(4)
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Wed Jan 16 00:45:00 +0000 2013
>Originator:     David A. Holland
>Release:        6.99.16 (20130108)
>Organization:
>Environment:
System: NetBSD amberdon 6.99.16 NetBSD 6.99.16 (AMBERDON) #10: Mon Jan 14 22:04:43 EST 2013  root@amberdon:/usr/src/sys/arch/amd64/compile/AMBERDON amd64
Architecture: x86_64
Machine: amd64
>Description:

Trying to do TRIM on a SATA-attached SSD connected through piixide(4)
results in a timeout, and the device wedges such that further access
hangs and it won't probe after a reboot. (It needs to be powercycled.)

This happened using both 65535 blocks at a time (as reported by
DIOCGDISCARDPARAMS) and one at a time. Ordinary accesses to the device
were fine.

I was able to get around this by changing the hardware from piixide to
ahcisata in the BIOS; that makes it work reliably. But this may not be
an option for everyone and it wasn't the default setting.

The kernel messages:

Jan 11 20:13:18 amberdon /netbsd: wd: maxtrimsize 65535
Jan 11 20:13:48 amberdon /netbsd: piixide1:1:0: lost interrupt
Jan 11 20:13:48 amberdon /netbsd:       type: ata tc_bcount: 512 tc_skip: 0
Jan 11 20:13:48 amberdon /netbsd: pii: wd_trim: status=0x12a<TIMEOU>

I don't know where that "pii" comes from; this is what came out on
both the console and in the system log. In other cases it was "pi" or
"piix".

Also sometimes this appeared as well:

Jan 14 21:03:24 amberdon /netbsd: wd1: wd_flushcache: status=0x5128<TIMEOU>

>How-To-Repeat:

Here's the source I was using to call DIOCDISCARD:

   ------
/*
 * wdtrim.c - do TRIM on an (ATA) disk device (the entire device)
 * usage: wdtrim /dev/rwd1d
 */

#include <sys/types.h>
#include <sys/param.h> /* for DEV_BSIZE */
#include <sys/dkio.h>
#include <sys/ioctl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <err.h>
#include <util.h>

static unsigned long atoul(const char *s) {
   char *t;
   unsigned long ret;

   errno = 0;
   ret = strtoul(s, &t, 0);
   if (errno) {
      err(1, "Invalid number %s", s);
   }
   while (*t == ' ') {
      t++;
   }
   if (*t == '\r') {
      t++;
   }
   if (*t == '\n') {
      t++;
   }
   if (*t != 0) {
      errx(1, "Invalid number %s: Trailing garbage", s);
   }
   return ret;
}

static void trim(int fd, unsigned long startpos, unsigned long endpos) {
   struct disk_discard_params ddp;
   struct disk_discard_range ddr;

   /* these are in DEV_BSIZE units; so is DEVSIZE */
   unsigned long pos, amt, maxamt;

   if (ioctl(fd, DIOCGDISCARDPARAMS, &ddp) < 0) {
      err(1, "DIOCGDISCARDPARAMS");
   }
   maxamt = ddp.maxsize;

   printf("Discarding from block %lu to block %lu in groups of %lu\n",
	  startpos, endpos, maxamt);

   for (pos = startpos; pos < endpos; pos += amt) {
      amt = maxamt;
      if (pos + amt > endpos) {
	 amt = endpos - pos;
      }
      printf("%lu of %lu (%lu)  \r", pos, endpos, amt);
      fflush(stdout);
      ddr.bno = pos;
      ddr.size = amt;
      if (ioctl(fd, DIOCDISCARD, &ddr) < 0) {
	err(1, "DIOCDISCARD (%lu at %lu)", amt, pos);
      }
   }
   printf("%lu of %lu             \n", pos, endpos);

   printf("Done.\n");
}

static void go(const char *dev) {
   int fd;
   FILE *f;
   char name[PATH_MAX];
   char cmd[PATH_MAX*2];
   char buf[256];
   unsigned long blocks, blocksize;

   fd = opendisk(dev, O_RDWR, name, sizeof(name), 0);
   if (fd < 0) {
      err(1, "%s", dev);
   }
   snprintf(cmd, sizeof(cmd),
	    "/sbin/atactl %s identify"
	    " | grep '^Capacity '"
	    " | awk '{ print $4; print $6 }'",
	    name);
   f = popen(cmd, "r");
   if (f == NULL) {
      err(1, "popen");
   }
   if (fgets(buf, sizeof(buf), f) == NULL) {
      errx(1, "Error reading device size");
   }
   blocks = atoul(buf);
   if (fgets(buf, sizeof(buf), f) == NULL) {
      errx(1, "Error reading device size");
   }
   blocksize = atoul(buf);
   pclose(f);

   if (blocksize != DEV_BSIZE) {
      errx(1, "Reported block size is %lu, not DEV_BSIZE", blocksize);
   }

   printf("Using %s (%lu KB)\n", name, blocks / (1024 / DEV_BSIZE));
   snprintf(cmd, sizeof(cmd), "/sbin/atactl %s identify | head -4", name);
   system(cmd);
   printf("\n *** Will erase this disk in ten seconds ***\n\n"); 
   sleep(10);

   trim(fd, 0, blocks);
   close(fd);
}

int main(int argc, char *argv[]) {
   if (argc != 2) {
      fprintf(stderr, "wdtrim: usage: wdtrim device\n");
      exit(1);
   }
   go(argv[1]);
   return 0;
}


>Fix:

not a clue.

NetBSD Home
NetBSD PR Database Search

(Contact us) $NetBSD: query-full-pr,v 1.39 2013/11/01 18:47:49 spz Exp $
$NetBSD: gnats_config.sh,v 1.8 2006/05/07 09:23:38 tsutsui Exp $
Copyright © 1994-2007 The NetBSD Foundation, Inc. ALL RIGHTS RESERVED.