Merge ../linux-2.6
[safe/jmp/linux-2.6] / drivers / scsi / sd.c
index bb5b242..3b01e5d 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/blkpg.h>
 #include <linux/kref.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
  */
 #define SD_MAJORS      16
 
+MODULE_AUTHOR("Eric Youngdale");
+MODULE_DESCRIPTION("SCSI disk (sd) driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
+
 /*
  * This is limited by the naming scheme enforced in sd_probe,
  * add another character to it if you really need more disks.
 #define SD_MAX_RETRIES         5
 #define SD_PASSTHROUGH_RETRIES 1
 
+/*
+ * Size of the initial data buffer for mode and read capacity data
+ */
+#define SD_BUF_SIZE            512
+
 static void scsi_disk_release(struct kref *kref);
 
 struct scsi_disk {
@@ -102,6 +129,7 @@ struct scsi_disk {
        u8              write_prot;
        unsigned        WCE : 1;        /* state of disk WCE bit */
        unsigned        RCD : 1;        /* state of disk RCD bit, unused */
+       unsigned        DPOFUA : 1;     /* state of disk DPOFUA bit */
 };
 
 static DEFINE_IDR(sd_index_idr);
@@ -110,7 +138,7 @@ static DEFINE_SPINLOCK(sd_index_lock);
 /* This semaphore is used to mediate the 0->1 reference get in the
  * face of object destruction (i.e. we can't allow a get on an
  * object after last put) */
-static DECLARE_MUTEX(sd_ref_sem);
+static DEFINE_MUTEX(sd_ref_mutex);
 
 static int sd_revalidate_disk(struct gendisk *disk);
 static void sd_rw_intr(struct scsi_cmnd * SCpnt);
@@ -121,8 +149,7 @@ static void sd_shutdown(struct device *dev);
 static void sd_rescan(struct device *);
 static int sd_init_command(struct scsi_cmnd *);
 static int sd_issue_flush(struct device *, sector_t *);
-static void sd_end_flush(request_queue_t *, struct request *);
-static int sd_prepare_flush(request_queue_t *, struct request *);
+static void sd_prepare_flush(request_queue_t *, struct request *);
 static void sd_read_capacity(struct scsi_disk *sdkp, char *diskname,
                             unsigned char *buffer);
 
@@ -137,8 +164,6 @@ static struct scsi_driver sd_template = {
        .rescan                 = sd_rescan,
        .init_command           = sd_init_command,
        .issue_flush            = sd_issue_flush,
-       .prepare_flush          = sd_prepare_flush,
-       .end_flush              = sd_end_flush,
 };
 
 /*
@@ -195,9 +220,9 @@ static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
 {
        struct scsi_disk *sdkp;
 
-       down(&sd_ref_sem);
+       mutex_lock(&sd_ref_mutex);
        sdkp = __scsi_disk_get(disk);
-       up(&sd_ref_sem);
+       mutex_unlock(&sd_ref_mutex);
        return sdkp;
 }
 
@@ -205,11 +230,11 @@ static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev)
 {
        struct scsi_disk *sdkp;
 
-       down(&sd_ref_sem);
+       mutex_lock(&sd_ref_mutex);
        sdkp = dev_get_drvdata(dev);
        if (sdkp)
                sdkp = __scsi_disk_get(sdkp->disk);
-       up(&sd_ref_sem);
+       mutex_unlock(&sd_ref_mutex);
        return sdkp;
 }
 
@@ -217,10 +242,10 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 {
        struct scsi_device *sdev = sdkp->device;
 
-       down(&sd_ref_sem);
+       mutex_lock(&sd_ref_mutex);
        kref_put(&sdkp->kref, scsi_disk_release);
        scsi_device_put(sdev);
-       up(&sd_ref_sem);
+       mutex_unlock(&sd_ref_mutex);
 }
 
 /**
@@ -233,48 +258,12 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
  **/
 static int sd_init_command(struct scsi_cmnd * SCpnt)
 {
-       unsigned int this_count, timeout;
-       struct gendisk *disk;
-       sector_t block;
        struct scsi_device *sdp = SCpnt->device;
        struct request *rq = SCpnt->request;
-
-       timeout = sdp->timeout;
-
-       /*
-        * SG_IO from block layer already setup, just copy cdb basically
-        */
-       if (blk_pc_request(rq)) {
-               if (sizeof(rq->cmd) > sizeof(SCpnt->cmnd))
-                       return 0;
-
-               memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd));
-               SCpnt->cmd_len = rq->cmd_len;
-               if (rq_data_dir(rq) == WRITE)
-                       SCpnt->sc_data_direction = DMA_TO_DEVICE;
-               else if (rq->data_len)
-                       SCpnt->sc_data_direction = DMA_FROM_DEVICE;
-               else
-                       SCpnt->sc_data_direction = DMA_NONE;
-
-               this_count = rq->data_len;
-               if (rq->timeout)
-                       timeout = rq->timeout;
-
-               SCpnt->transfersize = rq->data_len;
-               SCpnt->allowed = SD_PASSTHROUGH_RETRIES;
-               goto queue;
-       }
-
-       /*
-        * we only do REQ_CMD and REQ_BLOCK_PC
-        */
-       if (!blk_fs_request(rq))
-               return 0;
-
-       disk = rq->rq_disk;
-       block = rq->sector;
-       this_count = SCpnt->request_bufflen >> 9;
+       struct gendisk *disk = rq->rq_disk;
+       sector_t block = rq->sector;
+       unsigned int this_count = SCpnt->request_bufflen >> 9;
+       unsigned int timeout = sdp->timeout;
 
        SCSI_LOG_HLQUEUE(1, printk("sd_init_command: disk=%s, block=%llu, "
                            "count=%d\n", disk->disk_name,
@@ -360,6 +349,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
        
        if (block > 0xffffffff) {
                SCpnt->cmnd[0] += READ_16 - READ_6;
+               SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
                SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
                SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
                SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
@@ -379,6 +369,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                        this_count = 0xffff;
 
                SCpnt->cmnd[0] += READ_10 - READ_6;
+               SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
                SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
                SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
                SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
@@ -387,6 +378,17 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
                SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
        } else {
+               if (unlikely(blk_fua_rq(rq))) {
+                       /*
+                        * This happens only if this drive failed
+                        * 10byte rw command with ILLEGAL_REQUEST
+                        * during operation and thus turned off
+                        * use_10_for_rw.
+                        */
+                       printk(KERN_ERR "sd: FUA write on READ/WRITE(6) drive\n");
+                       return 0;
+               }
+
                SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
                SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
                SCpnt->cmnd[3] = (unsigned char) block & 0xff;
@@ -404,8 +406,6 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
        SCpnt->transfersize = sdp->sector_size;
        SCpnt->underflow = this_count << 9;
        SCpnt->allowed = SD_MAX_RETRIES;
-
-queue:
        SCpnt->timeout_per_command = timeout;
 
        /*
@@ -530,7 +530,7 @@ static int sd_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc)
+static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
        struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
        struct scsi_device *sdp = sdkp->device;
@@ -548,15 +548,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *
        else
                scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
 
-       if (put_user(diskinfo[0], &loc->heads))
-               return -EFAULT;
-       if (put_user(diskinfo[1], &loc->sectors))
-               return -EFAULT;
-       if (put_user(diskinfo[2], &loc->cylinders))
-               return -EFAULT;
-       if (put_user((unsigned)get_start_sect(bdev),
-                    (unsigned long __user *)&loc->start))
-               return -EFAULT;
+       geo->heads = diskinfo[0];
+       geo->sectors = diskinfo[1];
+       geo->cylinders = diskinfo[2];
        return 0;
 }
 
@@ -596,12 +590,6 @@ static int sd_ioctl(struct inode * inode, struct file * filp,
        if (!scsi_block_when_processing_errors(sdp) || !error)
                return error;
 
-       if (cmd == HDIO_GETGEO) {
-               if (!arg)
-                       return -EINVAL;
-               return sd_hdio_getgeo(bdev, p);
-       }
-
        /*
         * Send SCSI addressing ioctls directly to mid level, send other
         * ioctls to block level and then onto mid level if they can't be
@@ -743,46 +731,13 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
        return ret;
 }
 
-static void sd_end_flush(request_queue_t *q, struct request *flush_rq)
+static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 {
-       struct request *rq = flush_rq->end_io_data;
-       struct scsi_cmnd *cmd = rq->special;
-       unsigned int bytes = rq->hard_nr_sectors << 9;
-
-       if (!flush_rq->errors) {
-               spin_unlock(q->queue_lock);
-               scsi_io_completion(cmd, bytes, 0);
-               spin_lock(q->queue_lock);
-       } else if (blk_barrier_postflush(rq)) {
-               spin_unlock(q->queue_lock);
-               scsi_io_completion(cmd, 0, bytes);
-               spin_lock(q->queue_lock);
-       } else {
-               /*
-                * force journal abort of barriers
-                */
-               end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors);
-               end_that_request_last(rq);
-       }
-}
-
-static int sd_prepare_flush(request_queue_t *q, struct request *rq)
-{
-       struct scsi_device *sdev = q->queuedata;
-       struct scsi_disk *sdkp = scsi_disk_get_from_dev(&sdev->sdev_gendev);
-       int ret = 0;
-
-       if (sdkp) {
-               if (sdkp->WCE) {
-                       memset(rq->cmd, 0, sizeof(rq->cmd));
-                       rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
-                       rq->timeout = SD_TIMEOUT;
-                       rq->cmd[0] = SYNCHRONIZE_CACHE;
-                       ret = 1;
-               }
-               scsi_disk_put(sdkp);
-       }
-       return ret;
+       memset(rq->cmd, 0, sizeof(rq->cmd));
+       rq->flags |= REQ_BLOCK_PC;
+       rq->timeout = SD_TIMEOUT;
+       rq->cmd[0] = SYNCHRONIZE_CACHE;
+       rq->cmd_len = 10;
 }
 
 static void sd_rescan(struct device *dev)
@@ -836,6 +791,7 @@ static struct block_device_operations sd_fops = {
        .open                   = sd_open,
        .release                = sd_release,
        .ioctl                  = sd_ioctl,
+       .getgeo                 = sd_getgeo,
 #ifdef CONFIG_COMPAT
        .compat_ioctl           = sd_compat_ioctl,
 #endif
@@ -883,15 +839,7 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
           relatively rare error condition, no care is taken to avoid
           unnecessary additional work such as memcpy's that could be avoided.
         */
-
-       /* 
-        * If SG_IO from block layer then set good_bytes to stop retries;
-        * else if errors, check them, and if necessary prepare for
-        * (partial) retries.
-        */
-       if (blk_pc_request(SCpnt->request))
-               good_bytes = this_count;
-       else if (driver_byte(result) != 0 &&
+       if (driver_byte(result) != 0 &&
                 sense_valid && !sense_deferred) {
                switch (sshdr.sense_key) {
                case MEDIUM_ERROR:
@@ -1317,7 +1265,7 @@ sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage,
 
 /*
  * read write protect setting, if possible - called only in sd_revalidate_disk()
- * called with buffer of length 512
+ * called with buffer of length SD_BUF_SIZE
  */
 static void
 sd_read_write_protect_flag(struct scsi_disk *sdkp, char *diskname,
@@ -1375,7 +1323,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, char *diskname,
 
 /*
  * sd_read_cache_type - called only from sd_revalidate_disk()
- * called with buffer of length 512
+ * called with buffer of length SD_BUF_SIZE
  */
 static void
 sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
@@ -1406,6 +1354,12 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
        if (!scsi_status_is_good(res))
                goto bad_sense;
 
+       if (!data.header_length) {
+               modepage = 6;
+               printk(KERN_ERR "%s: missing header in MODE_SENSE response\n",
+                      diskname);
+       }
+
        /* that went OK, now ask for the proper length */
        len = data.length;
 
@@ -1420,6 +1374,8 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
 
        /* Take headers and block descriptors into account */
        len += data.header_length + data.block_descriptor_length;
+       if (len > SD_BUF_SIZE)
+               goto bad_sense;
 
        /* Get the data */
        res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
@@ -1432,6 +1388,12 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
                int ct = 0;
                int offset = data.header_length + data.block_descriptor_length;
 
+               if (offset >= SD_BUF_SIZE - 2) {
+                       printk(KERN_ERR "%s: malformed MODE SENSE response",
+                               diskname);
+                       goto defaults;
+               }
+
                if ((buffer[offset] & 0x3f) != modepage) {
                        printk(KERN_ERR "%s: got wrong page\n", diskname);
                        goto defaults;
@@ -1445,10 +1407,18 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
                        sdkp->RCD = 0;
                }
 
+               sdkp->DPOFUA = (data.device_specific & 0x10) != 0;
+               if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
+                       printk(KERN_NOTICE "SCSI device %s: uses "
+                              "READ/WRITE(6), disabling FUA\n", diskname);
+                       sdkp->DPOFUA = 0;
+               }
+
                ct =  sdkp->RCD + 2*sdkp->WCE;
 
-               printk(KERN_NOTICE "SCSI device %s: drive cache: %s\n",
-                      diskname, types[ct]);
+               printk(KERN_NOTICE "SCSI device %s: drive cache: %s%s\n",
+                      diskname, types[ct],
+                      sdkp->DPOFUA ? " w/ FUA" : "");
 
                return;
        }
@@ -1468,6 +1438,7 @@ defaults:
               diskname);
        sdkp->WCE = 0;
        sdkp->RCD = 0;
+       sdkp->DPOFUA = 0;
 }
 
 /**
@@ -1480,6 +1451,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
        struct scsi_disk *sdkp = scsi_disk(disk);
        struct scsi_device *sdp = sdkp->device;
        unsigned char *buffer;
+       unsigned ordered;
 
        SCSI_LOG_HLQUEUE(3, printk("sd_revalidate_disk: disk=%s\n", disk->disk_name));
 
@@ -1490,7 +1462,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
        if (!scsi_device_online(sdp))
                goto out;
 
-       buffer = kmalloc(512, GFP_KERNEL | __GFP_DMA);
+       buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA);
        if (!buffer) {
                printk(KERN_WARNING "(sd_revalidate_disk:) Memory allocation "
                       "failure.\n");
@@ -1513,12 +1485,24 @@ static int sd_revalidate_disk(struct gendisk *disk)
         */
        if (sdkp->media_present) {
                sd_read_capacity(sdkp, disk->disk_name, buffer);
-               if (sdp->removable)
-                       sd_read_write_protect_flag(sdkp, disk->disk_name,
-                                                  buffer);
+               sd_read_write_protect_flag(sdkp, disk->disk_name, buffer);
                sd_read_cache_type(sdkp, disk->disk_name, buffer);
        }
-               
+
+       /*
+        * We now have all cache related info, determine how we deal
+        * with ordered requests.  Note that as the current SCSI
+        * dispatch function can alter request order, we cannot use
+        * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+        */
+       if (sdkp->WCE)
+               ordered = sdkp->DPOFUA
+                       ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
+       else
+               ordered = QUEUE_ORDERED_DRAIN;
+
+       blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush);
+
        set_capacity(disk, sdkp->capacity);
        kfree(buffer);
 
@@ -1560,11 +1544,10 @@ static int sd_probe(struct device *dev)
                                        "sd_attach\n"));
 
        error = -ENOMEM;
-       sdkp = kmalloc(sizeof(*sdkp), GFP_KERNEL);
+       sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
        if (!sdkp)
                goto out;
 
-       memset (sdkp, 0, sizeof(*sdkp));
        kref_init(&sdkp->kref);
 
        gd = alloc_disk(16);
@@ -1615,9 +1598,8 @@ static int sd_probe(struct device *dev)
                        'a' + m1, 'a' + m2, 'a' + m3);
        }
 
-       strcpy(gd->devfs_name, sdp->devfs_name);
-
        gd->private_data = &sdkp->driver;
+       gd->queue = sdkp->device->request_queue;
 
        sd_revalidate_disk(gd);
 
@@ -1625,7 +1607,6 @@ static int sd_probe(struct device *dev)
        gd->flags = GENHD_FL_DRIVERFS;
        if (sdp->removable)
                gd->flags |= GENHD_FL_REMOVABLE;
-       gd->queue = sdkp->device->request_queue;
 
        dev_set_drvdata(dev, sdkp);
        add_disk(gd);
@@ -1661,10 +1642,10 @@ static int sd_remove(struct device *dev)
        del_gendisk(sdkp->disk);
        sd_shutdown(dev);
 
-       down(&sd_ref_sem);
+       mutex_lock(&sd_ref_mutex);
        dev_set_drvdata(dev, NULL);
        kref_put(&sdkp->kref, scsi_disk_release);
-       up(&sd_ref_sem);
+       mutex_unlock(&sd_ref_mutex);
 
        return 0;
 }
@@ -1673,7 +1654,7 @@ static int sd_remove(struct device *dev)
  *     scsi_disk_release - Called to free the scsi_disk structure
  *     @kref: pointer to embedded kref
  *
- *     sd_ref_sem must be held entering this routine.  Because it is
+ *     sd_ref_mutex must be held entering this routine.  Because it is
  *     called on last put, you should always use the scsi_disk_get()
  *     scsi_disk_put() helpers which manipulate the semaphore directly
  *     and never do a direct kref_put().
@@ -1753,9 +1734,5 @@ static void __exit exit_sd(void)
                unregister_blkdev(sd_major(i), "sd");
 }
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Youngdale");
-MODULE_DESCRIPTION("SCSI disk (sd) driver");
-
 module_init(init_sd);
 module_exit(exit_sd);