allow userspace to modify scsi command filter on per device basis
[safe/jmp/linux-2.6] / drivers / scsi / sg.c
index bfa86b3..f7abcca 100644 (file)
@@ -18,8 +18,8 @@
  *
  */
 
-static int sg_version_num = 30533;     /* 2 digits for each component */
-#define SG_VERSION_STR "3.5.33"
+static int sg_version_num = 30534;     /* 2 digits for each component */
+#define SG_VERSION_STR "3.5.34"
 
 /*
  *  D. P. Gilbert (dgilbert@interlog.com, dougg@triode.net.au), notes:
@@ -28,7 +28,6 @@ static int sg_version_num = 30533;    /* 2 digits for each component */
  *        (otherwise the macros compile to empty statements).
  *
  */
-#include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/fs.h>
@@ -42,13 +41,14 @@ static int sg_version_num = 30533;  /* 2 digits for each component */
 #include <linux/fcntl.h>
 #include <linux/init.h>
 #include <linux/poll.h>
-#include <linux/smp_lock.h>
 #include <linux/moduleparam.h>
 #include <linux/cdev.h>
+#include <linux/idr.h>
 #include <linux/seq_file.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
+#include <linux/blktrace_api.h>
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@@ -61,7 +61,7 @@ static int sg_version_num = 30533;    /* 2 digits for each component */
 
 #ifdef CONFIG_SCSI_PROC_FS
 #include <linux/proc_fs.h>
-static char *sg_version_date = "20050908";
+static char *sg_version_date = "20061027";
 
 static int sg_proc_init(void);
 static void sg_proc_cleanup(void);
@@ -95,25 +95,27 @@ int sg_big_buff = SG_DEF_RESERVED_SIZE;
 static int def_reserved_size = -1;     /* picks up init parameter */
 static int sg_allow_dio = SG_ALLOW_DIO_DEF;
 
+static int scatter_elem_sz = SG_SCATTER_SZ;
+static int scatter_elem_sz_prev = SG_SCATTER_SZ;
+
 #define SG_SECTOR_SZ 512
 #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1)
 
-#define SG_DEV_ARR_LUMP 32     /* amount to over allocate sg_dev_arr by */
-
-static int sg_add(struct class_device *, struct class_interface *);
-static void sg_remove(struct class_device *, struct class_interface *);
+static int sg_add(struct device *, struct class_interface *);
+static void sg_remove(struct device *, struct class_interface *);
 
-static DEFINE_RWLOCK(sg_dev_arr_lock); /* Also used to lock
+static DEFINE_IDR(sg_index_idr);
+static DEFINE_RWLOCK(sg_index_lock);   /* Also used to lock
                                                           file descriptor list for device */
 
 static struct class_interface sg_interface = {
-       .add            = sg_add,
-       .remove         = sg_remove,
+       .add_dev        = sg_add,
+       .remove_dev     = sg_remove,
 };
 
 typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */
        unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */
-       unsigned short sglist_len; /* size of malloc'd scatter-gather list ++ */
+       unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */
        unsigned bufflen;       /* Size of (aggregate) data buffer */
        unsigned b_malloc_len;  /* actual len malloc'ed in buffer */
        struct scatterlist *buffer;/* scatter list */
@@ -161,6 +163,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
        struct scsi_device *device;
        wait_queue_head_t o_excl_wait;  /* queue open() when O_EXCL in use */
        int sg_tablesize;       /* adapter's max scatter-gather table size */
+       u32 index;              /* device index number */
        Sg_fd *headfp;          /* first open fd belonging to this device */
        volatile char detached; /* 0->attached, 1->detached pending removal */
        volatile char exclude;  /* opened for exclusive access */
@@ -179,8 +182,9 @@ static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
                         int tablesize);
 static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count,
                           Sg_request * srp);
-static ssize_t sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
-                           int blocking, int read_only, Sg_request ** o_srp);
+static ssize_t sg_new_write(Sg_fd *sfp, struct file *file,
+                       const char __user *buf, size_t count, int blocking,
+                       int read_only, Sg_request **o_srp);
 static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
                           unsigned char *cmnd, int timeout, int blocking);
 static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
@@ -201,17 +205,12 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
 static Sg_request *sg_add_request(Sg_fd * sfp);
 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
 static int sg_res_in_use(Sg_fd * sfp);
-static int sg_allow_access(unsigned char opcode, char dev_type);
 static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
 static Sg_device *sg_get_dev(int dev);
 #ifdef CONFIG_SCSI_PROC_FS
 static int sg_last_dev(void);
 #endif
 
-static Sg_device **sg_dev_arr = NULL;
-static int sg_dev_max;
-static int sg_nr_dev;
-
 #define SZ_SG_HEADER sizeof(struct sg_header)
 #define SZ_SG_IO_HDR sizeof(sg_io_hdr_t)
 #define SZ_SG_IOVEC sizeof(sg_iovec_t)
@@ -545,7 +544,7 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
                return -EFAULT;
        blocking = !(filp->f_flags & O_NONBLOCK);
        if (old_hdr.reply_len < 0)
-               return sg_new_write(sfp, buf, count, blocking, 0, NULL);
+               return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL);
        if (count < (SZ_SG_HEADER + 6))
                return -EIO;    /* The minimum scsi command length is 6 bytes. */
 
@@ -604,8 +603,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
         * but is is possible that the app intended SG_DXFER_TO_DEV, because there
         * is a non-zero input_size, so emit a warning.
         */
-       if (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV)
-               if (printk_ratelimit())
+       if (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV) {
+               static char cmd[TASK_COMM_LEN];
+               if (strcmp(current->comm, cmd) && printk_ratelimit()) {
                        printk(KERN_WARNING
                               "sg_write: data in/out %d/%d bytes for SCSI command 0x%x--"
                               "guessing data in;\n" KERN_WARNING "   "
@@ -613,13 +613,17 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
                               old_hdr.reply_len - (int)SZ_SG_HEADER,
                               input_size, (unsigned int) cmnd[0],
                               current->comm);
+                       strcpy(cmd, current->comm);
+               }
+       }
        k = sg_common_write(sfp, srp, cmnd, sfp->timeout, blocking);
        return (k < 0) ? k : count;
 }
 
 static ssize_t
-sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
-            int blocking, int read_only, Sg_request ** o_srp)
+sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf,
+                size_t count, int blocking, int read_only,
+                Sg_request **o_srp)
 {
        int k;
        Sg_request *srp;
@@ -675,8 +679,7 @@ sg_new_write(Sg_fd * sfp, const char __user *buf, size_t count,
                sg_remove_request(sfp, srp);
                return -EFAULT;
        }
-       if (read_only &&
-           (!sg_allow_access(cmnd[0], sfp->parentdp->device->type))) {
+       if (read_only && (!blk_verify_command(file, cmnd))) {
                sg_remove_request(sfp, srp);
                return -EPERM;
        }
@@ -708,12 +711,12 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
                          (int) cmnd[0], (int) hp->cmd_len));
 
        if ((k = sg_start_req(srp))) {
-               SCSI_LOG_TIMEOUT(1, printk("sg_write: start_req err=%d\n", k));
+               SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k));
                sg_finish_rem_req(srp);
                return k;       /* probably out of space --> ENOMEM */
        }
        if ((k = sg_write_xfer(srp))) {
-               SCSI_LOG_TIMEOUT(1, printk("sg_write: write_xfer, bad address\n"));
+               SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n"));
                sg_finish_rem_req(srp);
                return k;
        }
@@ -744,7 +747,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
                                hp->dxfer_len, srp->data.k_use_sg, timeout,
                                SG_DEFAULT_RETRIES, srp, sg_cmd_done,
                                GFP_ATOMIC)) {
-               SCSI_LOG_TIMEOUT(1, printk("sg_write: scsi_execute_async failed\n"));
+               SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n"));
                /*
                 * most likely out of mem, but could also be a bad map
                 */
@@ -796,7 +799,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
                        if (!access_ok(VERIFY_WRITE, p, SZ_SG_IO_HDR))
                                return -EFAULT;
                        result =
-                           sg_new_write(sfp, p, SZ_SG_IO_HDR,
+                           sg_new_write(sfp, filp, p, SZ_SG_IO_HDR,
                                         blocking, read_only, &srp);
                        if (result < 0)
                                return result;
@@ -915,6 +918,8 @@ sg_ioctl(struct inode *inode, struct file *filp,
                        return result;
                 if (val < 0)
                         return -EINVAL;
+               val = min_t(int, val,
+                               sdp->device->request_queue->max_sectors * 512);
                if (val != sfp->reserve.bufflen) {
                        if (sg_res_in_use(sfp) || sfp->mmap_called)
                                return -EBUSY;
@@ -923,7 +928,8 @@ sg_ioctl(struct inode *inode, struct file *filp,
                }
                return 0;
        case SG_GET_RESERVED_SIZE:
-               val = (int) sfp->reserve.bufflen;
+               val = min_t(int, sfp->reserve.bufflen,
+                               sdp->device->request_queue->max_sectors * 512);
                return put_user(val, ip);
        case SG_SET_COMMAND_Q:
                result = get_user(val, ip);
@@ -1042,7 +1048,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 
                        if (copy_from_user(&opcode, siocp->data, 1))
                                return -EFAULT;
-                       if (!sg_allow_access(opcode, sdp->device->type))
+                       if (!blk_verify_command(filp, &opcode))
                                return -EPERM;
                }
                return sg_scsi_ioctl(filp, sdp->device->request_queue, NULL, p);
@@ -1059,6 +1065,20 @@ sg_ioctl(struct inode *inode, struct file *filp,
                if (sdp->detached)
                        return -ENODEV;
                return scsi_ioctl(sdp->device, cmd_in, p);
+       case BLKSECTGET:
+               return put_user(sdp->device->request_queue->max_sectors * 512,
+                               ip);
+       case BLKTRACESETUP:
+               return blk_trace_setup(sdp->device->request_queue,
+                                      sdp->disk->disk_name,
+                                      sdp->device->sdev_gendev.devt,
+                                      (char *)arg);
+       case BLKTRACESTART:
+               return blk_trace_startstop(sdp->device->request_queue, 1);
+       case BLKTRACESTOP:
+               return blk_trace_startstop(sdp->device->request_queue, 0);
+       case BLKTRACETEARDOWN:
+               return blk_trace_remove(sdp->device->request_queue);
        default:
                if (read_only)
                        return -EPERM;  /* don't know so take safe approach */
@@ -1140,46 +1160,45 @@ sg_fasync(int fd, struct file *filp, int mode)
        return (retval < 0) ? retval : 0;
 }
 
-static struct page *
-sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
+static int
+sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        Sg_fd *sfp;
-       struct page *page = NOPAGE_SIGBUS;
        unsigned long offset, len, sa;
        Sg_scatter_hold *rsv_schp;
        struct scatterlist *sg;
        int k;
 
        if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data)))
-               return page;
+               return VM_FAULT_SIGBUS;
        rsv_schp = &sfp->reserve;
-       offset = addr - vma->vm_start;
+       offset = vmf->pgoff << PAGE_SHIFT;
        if (offset >= rsv_schp->bufflen)
-               return page;
-       SCSI_LOG_TIMEOUT(3, printk("sg_vma_nopage: offset=%lu, scatg=%d\n",
+               return VM_FAULT_SIGBUS;
+       SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n",
                                   offset, rsv_schp->k_use_sg));
        sg = rsv_schp->buffer;
        sa = vma->vm_start;
        for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-            ++k, ++sg) {
+            ++k, sg = sg_next(sg)) {
                len = vma->vm_end - sa;
                len = (len < sg->length) ? len : sg->length;
                if (offset < len) {
-                       page = sg->page;
+                       struct page *page;
+                       page = virt_to_page(page_address(sg_page(sg)) + offset);
                        get_page(page); /* increment page count */
-                       break;
+                       vmf->page = page;
+                       return 0; /* success */
                }
                sa += len;
                offset -= len;
        }
 
-       if (type)
-               *type = VM_FAULT_MINOR;
-       return page;
+       return VM_FAULT_SIGBUS;
 }
 
 static struct vm_operations_struct sg_mmap_vm_ops = {
-       .nopage = sg_vma_nopage,
+       .fault = sg_vma_fault,
 };
 
 static int
@@ -1205,7 +1224,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
        sa = vma->vm_start;
        sg = rsv_schp->buffer;
        for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-            ++k, ++sg) {
+            ++k, sg = sg_next(sg)) {
                len = vma->vm_end - sa;
                len = (len < sg->length) ? len : sg->length;
                sa += len;
@@ -1281,7 +1300,7 @@ sg_cmd_done(void *data, char *sense, int result, int resid)
                sg_finish_rem_req(srp);
                srp = NULL;
                if (NULL == sfp->headrp) {
-                       SCSI_LOG_TIMEOUT(1, printk("sg...bh: already closed, final cleanup\n"));
+                       SCSI_LOG_TIMEOUT(1, printk("sg_cmd_done: already closed, final cleanup\n"));
                        if (0 == sg_remove_sfp(sdp, sfp)) {     /* device still present */
                                scsi_device_put(sdp->device);
                        }
@@ -1324,40 +1343,35 @@ static struct class *sg_sysfs_class;
 
 static int sg_sysfs_valid = 0;
 
-static int sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
+static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 {
        struct request_queue *q = scsidp->request_queue;
        Sg_device *sdp;
        unsigned long iflags;
-       void *old_sg_dev_arr = NULL;
-       int k, error;
+       int error;
+       u32 k;
 
        sdp = kzalloc(sizeof(Sg_device), GFP_KERNEL);
        if (!sdp) {
                printk(KERN_WARNING "kmalloc Sg_device failure\n");
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
+       }
+       error = -ENOMEM;
+       if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) {
+               printk(KERN_WARNING "idr expansion Sg_device failure\n");
+               goto out;
        }
 
-       write_lock_irqsave(&sg_dev_arr_lock, iflags);
-       if (unlikely(sg_nr_dev >= sg_dev_max)) {        /* try to resize */
-               Sg_device **tmp_da;
-               int tmp_dev_max = sg_nr_dev + SG_DEV_ARR_LUMP;
-               write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
-
-               tmp_da = kzalloc(tmp_dev_max * sizeof(Sg_device *), GFP_KERNEL);
-               if (unlikely(!tmp_da))
-                       goto expand_failed;
+       write_lock_irqsave(&sg_index_lock, iflags);
+       error = idr_get_new(&sg_index_idr, sdp, &k);
+       write_unlock_irqrestore(&sg_index_lock, iflags);
 
-               write_lock_irqsave(&sg_dev_arr_lock, iflags);
-               memcpy(tmp_da, sg_dev_arr, sg_dev_max * sizeof(Sg_device *));
-               old_sg_dev_arr = sg_dev_arr;
-               sg_dev_arr = tmp_da;
-               sg_dev_max = tmp_dev_max;
+       if (error) {
+               printk(KERN_WARNING "idr allocation Sg_device failure: %d\n",
+                      error);
+               goto out;
        }
 
-       for (k = 0; k < sg_dev_max; k++)
-               if (!sg_dev_arr[k])
-                       break;
        if (unlikely(k >= SG_MAX_DEVS))
                goto overflow;
 
@@ -1368,25 +1382,17 @@ static int sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
        sdp->device = scsidp;
        init_waitqueue_head(&sdp->o_excl_wait);
        sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments);
+       sdp->index = k;
 
-       sg_nr_dev++;
-       sg_dev_arr[k] = sdp;
-       write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
-       error = k;
-
+       error = 0;
  out:
-       if (error < 0)
+       if (error) {
                kfree(sdp);
-       kfree(old_sg_dev_arr);
-       return error;
-
- expand_failed:
-       printk(KERN_WARNING "sg_alloc: device array cannot be resized\n");
-       error = -ENOMEM;
-       goto out;
+               return ERR_PTR(error);
+       }
+       return sdp;
 
  overflow:
-       write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
        sdev_printk(KERN_WARNING, scsidp,
                    "Unable to attach sg device type=%d, minor "
                    "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1);
@@ -1395,13 +1401,14 @@ static int sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 }
 
 static int
-sg_add(struct class_device *cl_dev, struct class_interface *cl_intf)
+sg_add(struct device *cl_dev, struct class_interface *cl_intf)
 {
-       struct scsi_device *scsidp = to_scsi_device(cl_dev->dev);
+       struct scsi_device *scsidp = to_scsi_device(cl_dev->parent);
        struct gendisk *disk;
        Sg_device *sdp = NULL;
        struct cdev * cdev = NULL;
-       int error, k;
+       int error;
+       unsigned long iflags;
 
        disk = alloc_disk(1);
        if (!disk) {
@@ -1419,43 +1426,55 @@ sg_add(struct class_device *cl_dev, struct class_interface *cl_intf)
        cdev->owner = THIS_MODULE;
        cdev->ops = &sg_fops;
 
-       error = sg_alloc(disk, scsidp);
-       if (error < 0) {
+       sdp = sg_alloc(disk, scsidp);
+       if (IS_ERR(sdp)) {
                printk(KERN_WARNING "sg_alloc failed\n");
+               error = PTR_ERR(sdp);
                goto out;
        }
-       k = error;
-       sdp = sg_dev_arr[k];
 
-       error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, k), 1);
+       error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1);
        if (error)
-               goto out;
+               goto cdev_add_err;
 
        sdp->cdev = cdev;
        if (sg_sysfs_valid) {
-               struct class_device * sg_class_member;
-
-               sg_class_member = class_device_create(sg_sysfs_class, NULL,
-                               MKDEV(SCSI_GENERIC_MAJOR, k), 
-                               cl_dev->dev, "%s", 
-                               disk->disk_name);
-               if (IS_ERR(sg_class_member))
-                       printk(KERN_WARNING "sg_add: "
-                               "class_device_create failed\n");
-               class_set_devdata(sg_class_member, sdp);
-               error = sysfs_create_link(&scsidp->sdev_gendev.kobj, 
+               struct device *sg_class_member;
+
+               sg_class_member = device_create_drvdata(sg_sysfs_class,
+                                                       cl_dev->parent,
+                                                       MKDEV(SCSI_GENERIC_MAJOR,
+                                                             sdp->index),
+                                                       sdp,
+                                                       "%s", disk->disk_name);
+               if (IS_ERR(sg_class_member)) {
+                       printk(KERN_ERR "sg_add: "
+                              "device_create failed\n");
+                       error = PTR_ERR(sg_class_member);
+                       goto cdev_add_err;
+               }
+               error = sysfs_create_link(&scsidp->sdev_gendev.kobj,
                                          &sg_class_member->kobj, "generic");
                if (error)
                        printk(KERN_ERR "sg_add: unable to make symlink "
-                                       "'generic' back to sg%d\n", k);
+                                       "'generic' back to sg%d\n", sdp->index);
        } else
-               printk(KERN_WARNING "sg_add: sg_sys INvalid\n");
+               printk(KERN_WARNING "sg_add: sg_sys Invalid\n");
 
        sdev_printk(KERN_NOTICE, scsidp,
-                   "Attached scsi generic sg%d type %d\n", k,scsidp->type);
+                   "Attached scsi generic sg%d type %d\n", sdp->index,
+                   scsidp->type);
+
+       dev_set_drvdata(cl_dev, sdp);
 
        return 0;
 
+cdev_add_err:
+       write_lock_irqsave(&sg_index_lock, iflags);
+       idr_remove(&sg_index_idr, sdp->index);
+       write_unlock_irqrestore(&sg_index_lock, iflags);
+       kfree(sdp);
+
 out:
        put_disk(disk);
        if (cdev)
@@ -1464,77 +1483,67 @@ out:
 }
 
 static void
-sg_remove(struct class_device *cl_dev, struct class_interface *cl_intf)
+sg_remove(struct device *cl_dev, struct class_interface *cl_intf)
 {
-       struct scsi_device *scsidp = to_scsi_device(cl_dev->dev);
-       Sg_device *sdp = NULL;
+       struct scsi_device *scsidp = to_scsi_device(cl_dev->parent);
+       Sg_device *sdp = dev_get_drvdata(cl_dev);
        unsigned long iflags;
        Sg_fd *sfp;
        Sg_fd *tsfp;
        Sg_request *srp;
        Sg_request *tsrp;
-       int k, delay;
+       int delay;
 
-       if (NULL == sg_dev_arr)
+       if (!sdp)
                return;
+
        delay = 0;
-       write_lock_irqsave(&sg_dev_arr_lock, iflags);
-       for (k = 0; k < sg_dev_max; k++) {
-               sdp = sg_dev_arr[k];
-               if ((NULL == sdp) || (sdp->device != scsidp))
-                       continue;       /* dirty but lowers nesting */
-               if (sdp->headfp) {
-                       sdp->detached = 1;
-                       for (sfp = sdp->headfp; sfp; sfp = tsfp) {
-                               tsfp = sfp->nextfp;
-                               for (srp = sfp->headrp; srp; srp = tsrp) {
-                                       tsrp = srp->nextrp;
-                                       if (sfp->closed || (0 == sg_srp_done(srp, sfp)))
-                                               sg_finish_rem_req(srp);
-                               }
-                               if (sfp->closed) {
-                                       scsi_device_put(sdp->device);
-                                       __sg_remove_sfp(sdp, sfp);
-                               } else {
-                                       delay = 1;
-                                       wake_up_interruptible(&sfp->read_wait);
-                                       kill_fasync(&sfp->async_qp, SIGPOLL,
-                                                   POLL_HUP);
-                               }
+       write_lock_irqsave(&sg_index_lock, iflags);
+       if (sdp->headfp) {
+               sdp->detached = 1;
+               for (sfp = sdp->headfp; sfp; sfp = tsfp) {
+                       tsfp = sfp->nextfp;
+                       for (srp = sfp->headrp; srp; srp = tsrp) {
+                               tsrp = srp->nextrp;
+                               if (sfp->closed || (0 == sg_srp_done(srp, sfp)))
+                                       sg_finish_rem_req(srp);
                        }
-                       SCSI_LOG_TIMEOUT(3, printk("sg_detach: dev=%d, dirty\n", k));
-                       if (NULL == sdp->headfp) {
-                               sg_dev_arr[k] = NULL;
+                       if (sfp->closed) {
+                               scsi_device_put(sdp->device);
+                               __sg_remove_sfp(sdp, sfp);
+                       } else {
+                               delay = 1;
+                               wake_up_interruptible(&sfp->read_wait);
+                               kill_fasync(&sfp->async_qp, SIGPOLL,
+                                           POLL_HUP);
                        }
-               } else {        /* nothing active, simple case */
-                       SCSI_LOG_TIMEOUT(3, printk("sg_detach: dev=%d\n", k));
-                       sg_dev_arr[k] = NULL;
                }
-               sg_nr_dev--;
-               break;
-       }
-       write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
-
-       if (sdp) {
-               sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic");
-               class_device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, k));
-               cdev_del(sdp->cdev);
-               sdp->cdev = NULL;
-               put_disk(sdp->disk);
-               sdp->disk = NULL;
-               if (NULL == sdp->headfp)
-                       kfree((char *) sdp);
-       }
+               SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d, dirty\n", sdp->index));
+               if (NULL == sdp->headfp) {
+                       idr_remove(&sg_index_idr, sdp->index);
+               }
+       } else {        /* nothing active, simple case */
+               SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d\n", sdp->index));
+               idr_remove(&sg_index_idr, sdp->index);
+       }
+       write_unlock_irqrestore(&sg_index_lock, iflags);
+
+       sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic");
+       device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
+       cdev_del(sdp->cdev);
+       sdp->cdev = NULL;
+       put_disk(sdp->disk);
+       sdp->disk = NULL;
+       if (NULL == sdp->headfp)
+               kfree(sdp);
 
        if (delay)
                msleep(10);     /* dirty detach so delay device destruction */
 }
 
-/* Set 'perm' (4th argument) to 0 to disable module_param's definition
- * of sysfs parameters (which module_param doesn't yet support).
- * Sysfs parameters defined explicitly below.
- */
-module_param_named(def_reserved_size, def_reserved_size, int, S_IRUGO);
+module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR);
+module_param_named(def_reserved_size, def_reserved_size, int,
+                  S_IRUGO | S_IWUSR);
 module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR);
 
 MODULE_AUTHOR("Douglas Gilbert");
@@ -1543,6 +1552,8 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(SG_VERSION_STR);
 MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR);
 
+MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element "
+                "size (default: max(SG_SCATTER_SZ, PAGE_SIZE))");
 MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd");
 MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))");
 
@@ -1551,8 +1562,14 @@ init_sg(void)
 {
        int rc;
 
+       if (scatter_elem_sz < PAGE_SIZE) {
+               scatter_elem_sz = PAGE_SIZE;
+               scatter_elem_sz_prev = scatter_elem_sz;
+       }
        if (def_reserved_size >= 0)
                sg_big_buff = def_reserved_size;
+       else
+               def_reserved_size = sg_big_buff;
 
        rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), 
                                    SG_MAX_DEVS, "sg");
@@ -1588,9 +1605,7 @@ exit_sg(void)
        sg_sysfs_valid = 0;
        unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
                                 SG_MAX_DEVS);
-       kfree((char *)sg_dev_arr);
-       sg_dev_arr = NULL;
-       sg_dev_max = 0;
+       idr_destroy(&sg_index_idr);
 }
 
 static int
@@ -1657,6 +1672,7 @@ sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize)
        schp->buffer = kzalloc(sg_bufflen, gfp_flags);
        if (!schp->buffer)
                return -ENOMEM;
+       sg_init_table(schp->buffer, tablesize);
        schp->sglist_len = sg_bufflen;
        return tablesize;       /* number of scat_gath elements allocated */
 }
@@ -1722,16 +1738,12 @@ st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
                   goto out_unlock; */
         }
 
-       sgl[0].page = pages[0];
-       sgl[0].offset = uaddr & ~PAGE_MASK;
+       sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK);
        if (nr_pages > 1) {
                sgl[0].length = PAGE_SIZE - sgl[0].offset;
                count -= sgl[0].length;
-               for (i=1; i < nr_pages ; i++) {
-                       sgl[i].page = pages[i]; 
-                       sgl[i].length = count < PAGE_SIZE ? count : PAGE_SIZE;
-                       count -= PAGE_SIZE;
-               }
+               for (i=1; i < nr_pages ; i++)
+                       sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0);
        }
        else {
                sgl[0].length = count;
@@ -1759,7 +1771,7 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
        int i;
 
        for (i=0; i < nr_pages; i++) {
-               struct page *page = sgl[i].page;
+               struct page *page = sg_page(&sgl[i]);
 
                if (dirtied)
                        SetPageDirty(page);
@@ -1799,8 +1811,10 @@ sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len)
        res = st_map_user_pages(schp->buffer, mx_sc_elems,
                                (unsigned long)hp->dxferp, dxfer_len, 
                                (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0);
-       if (res <= 0)
+       if (res <= 0) {
+               sg_remove_scat(schp);
                return 1;
+       }
        schp->k_use_sg = res;
        schp->dio_in_use = 1;
        hp->info |= SG_INFO_DIRECT_IO;
@@ -1819,7 +1833,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
        int blk_size = buff_size;
        struct page *p = NULL;
 
-       if ((blk_size < 0) || (!sfp))
+       if (blk_size < 0)
                return -EFAULT;
        if (0 == blk_size)
                ++blk_size;     /* don't know why */
@@ -1833,24 +1847,39 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
        if (mx_sc_elems < 0)
                return mx_sc_elems;     /* most likely -ENOMEM */
 
+       num = scatter_elem_sz;
+       if (unlikely(num != scatter_elem_sz_prev)) {
+               if (num < PAGE_SIZE) {
+                       scatter_elem_sz = PAGE_SIZE;
+                       scatter_elem_sz_prev = PAGE_SIZE;
+               } else
+                       scatter_elem_sz_prev = num;
+       }
        for (k = 0, sg = schp->buffer, rem_sz = blk_size;
             (rem_sz > 0) && (k < mx_sc_elems);
-            ++k, rem_sz -= ret_sz, ++sg) {
+            ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
                
-               num = (rem_sz > SG_SCATTER_SZ) ? SG_SCATTER_SZ : rem_sz;
+               num = (rem_sz > scatter_elem_sz_prev) ?
+                     scatter_elem_sz_prev : rem_sz;
                p = sg_page_malloc(num, sfp->low_dma, &ret_sz);
                if (!p)
                        return -ENOMEM;
 
-               sg->page = p;
-               sg->length = ret_sz;
+               if (num == scatter_elem_sz_prev) {
+                       if (unlikely(ret_sz > scatter_elem_sz_prev)) {
+                               scatter_elem_sz = ret_sz;
+                               scatter_elem_sz_prev = ret_sz;
+                       }
+               }
+               sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0);
 
-               SCSI_LOG_TIMEOUT(5, printk("sg_build_build: k=%d, a=0x%p, len=%d\n",
-                                 k, p, ret_sz));
+               SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
+                                "ret_sz=%d\n", k, num, ret_sz));
        }               /* end of for loop */
 
        schp->k_use_sg = k;
-       SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, rem_sz=%d\n", k, rem_sz));
+       SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, "
+                        "rem_sz=%d\n", k, rem_sz));
 
        schp->bufflen = blk_size;
        if (rem_sz > 0) /* must have failed */
@@ -1894,14 +1923,14 @@ sg_write_xfer(Sg_request * srp)
                onum = 1;
 
        ksglen = sg->length;
-       p = page_address(sg->page);
+       p = page_address(sg_page(sg));
        for (j = 0, k = 0; j < onum; ++j) {
                res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
                if (res)
                        return res;
 
-               for (; p; ++sg, ksglen = sg->length,
-                    p = page_address(sg->page)) {
+               for (; p; sg = sg_next(sg), ksglen = sg->length,
+                    p = page_address(sg_page(sg))) {
                        if (usglen <= 0)
                                break;
                        if (ksglen > usglen) {
@@ -1978,12 +2007,12 @@ sg_remove_scat(Sg_scatter_hold * schp)
                } else {
                        int k;
 
-                       for (k = 0; (k < schp->k_use_sg) && sg->page;
-                            ++k, ++sg) {
+                       for (k = 0; (k < schp->k_use_sg) && sg_page(sg);
+                            ++k, sg = sg_next(sg)) {
                                SCSI_LOG_TIMEOUT(5, printk(
-                                   "sg_remove_scat: k=%d, a=0x%p, len=%d\n",
-                                   k, sg->page, sg->length));
-                               sg_page_free(sg->page, sg->length);
+                                   "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
+                                   k, sg_page(sg), sg->length));
+                               sg_page_free(sg_page(sg), sg->length);
                        }
                }
                kfree(schp->buffer);
@@ -2025,15 +2054,15 @@ sg_read_xfer(Sg_request * srp)
        } else
                onum = 1;
 
-       p = page_address(sg->page);
+       p = page_address(sg_page(sg));
        ksglen = sg->length;
        for (j = 0, k = 0; j < onum; ++j) {
                res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
                if (res)
                        return res;
 
-               for (; p; ++sg, ksglen = sg->length,
-                    p = page_address(sg->page)) {
+               for (; p; sg = sg_next(sg), ksglen = sg->length,
+                    p = page_address(sg_page(sg))) {
                        if (usglen <= 0)
                                break;
                        if (ksglen > usglen) {
@@ -2079,15 +2108,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
        if ((!outp) || (num_read_xfer <= 0))
                return 0;
 
-       for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, ++sg) {
+       for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) {
                num = sg->length;
                if (num > num_read_xfer) {
-                       if (__copy_to_user(outp, page_address(sg->page),
+                       if (__copy_to_user(outp, page_address(sg_page(sg)),
                                           num_read_xfer))
                                return -EFAULT;
                        break;
                } else {
-                       if (__copy_to_user(outp, page_address(sg->page),
+                       if (__copy_to_user(outp, page_address(sg_page(sg)),
                                           num))
                                return -EFAULT;
                        num_read_xfer -= num;
@@ -2129,7 +2158,7 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size)
        SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
        rem = size;
 
-       for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) {
+       for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
                num = sg->length;
                if (rem <= num) {
                        sfp->save_scat_len = num;
@@ -2292,10 +2321,10 @@ sg_get_nth_sfp(Sg_device * sdp, int nth)
        unsigned long iflags;
        int k;
 
-       read_lock_irqsave(&sg_dev_arr_lock, iflags);
+       read_lock_irqsave(&sg_index_lock, iflags);
        for (k = 0, resp = sdp->headfp; resp && (k < nth);
             ++k, resp = resp->nextfp) ;
-       read_unlock_irqrestore(&sg_dev_arr_lock, iflags);
+       read_unlock_irqrestore(&sg_index_lock, iflags);
        return resp;
 }
 #endif
@@ -2305,6 +2334,7 @@ sg_add_sfp(Sg_device * sdp, int dev)
 {
        Sg_fd *sfp;
        unsigned long iflags;
+       int bufflen;
 
        sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN);
        if (!sfp)
@@ -2321,7 +2351,7 @@ sg_add_sfp(Sg_device * sdp, int dev)
        sfp->cmd_q = SG_DEF_COMMAND_Q;
        sfp->keep_orphan = SG_DEF_KEEP_ORPHAN;
        sfp->parentdp = sdp;
-       write_lock_irqsave(&sg_dev_arr_lock, iflags);
+       write_lock_irqsave(&sg_index_lock, iflags);
        if (!sdp->headfp)
                sdp->headfp = sfp;
        else {                  /* add to tail of existing list */
@@ -2330,9 +2360,14 @@ sg_add_sfp(Sg_device * sdp, int dev)
                        pfp = pfp->nextfp;
                pfp->nextfp = sfp;
        }
-       write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
+       write_unlock_irqrestore(&sg_index_lock, iflags);
        SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp));
-       sg_build_reserve(sfp, sg_big_buff);
+       if (unlikely(sg_big_buff != def_reserved_size))
+               sg_big_buff = def_reserved_size;
+
+       bufflen = min_t(int, sg_big_buff,
+                       sdp->device->request_queue->max_sectors * 512);
+       sg_build_reserve(sfp, bufflen);
        SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp:   bufflen=%d, k_use_sg=%d\n",
                           sfp->reserve.bufflen, sfp->reserve.k_use_sg));
        return sfp;
@@ -2386,22 +2421,14 @@ sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp)
        if (0 == dirty) {
                unsigned long iflags;
 
-               write_lock_irqsave(&sg_dev_arr_lock, iflags);
+               write_lock_irqsave(&sg_index_lock, iflags);
                __sg_remove_sfp(sdp, sfp);
                if (sdp->detached && (NULL == sdp->headfp)) {
-                       int k, maxd;
-
-                       maxd = sg_dev_max;
-                       for (k = 0; k < maxd; ++k) {
-                               if (sdp == sg_dev_arr[k])
-                                       break;
-                       }
-                       if (k < maxd)
-                               sg_dev_arr[k] = NULL;
-                       kfree((char *) sdp);
+                       idr_remove(&sg_index_idr, sdp->index);
+                       kfree(sdp);
                        res = 1;
                }
-               write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
+               write_unlock_irqrestore(&sg_index_lock, iflags);
        } else {
                /* MOD_INC's to inhibit unloading sg and associated adapter driver */
                /* only bump the access_count if we actually succeeded in
@@ -2428,16 +2455,16 @@ sg_res_in_use(Sg_fd * sfp)
        return srp ? 1 : 0;
 }
 
-/* If retSzp==NULL want exact size or fail */
+/* The size fetched (value output via retSzp) set when non-NULL return */
 static struct page *
 sg_page_malloc(int rqSz, int lowDma, int *retSzp)
 {
        struct page *resp = NULL;
        gfp_t page_mask;
        int order, a_size;
-       int resSz = rqSz;
+       int resSz;
 
-       if (rqSz <= 0)
+       if ((rqSz <= 0) || (NULL == retSzp))
                return resp;
 
        if (lowDma)
@@ -2447,8 +2474,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp)
 
        for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
             order++, a_size <<= 1) ;
+       resSz = a_size;         /* rounded up if necessary */
        resp = alloc_pages(page_mask, order);
-       while ((!resp) && order && retSzp) {
+       while ((!resp) && order) {
                --order;
                a_size >>= 1;   /* divide by 2, until PAGE_SIZE */
                resp =  alloc_pages(page_mask, order);  /* try half */
@@ -2457,8 +2485,7 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp)
        if (resp) {
                if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
                        memset(page_address(resp), 0, resSz);
-               if (retSzp)
-                       *retSzp = resSz;
+               *retSzp = resSz;
        }
        return resp;
 }
@@ -2479,38 +2506,27 @@ sg_page_free(struct page *page, int size)
 #define MAINTENANCE_IN_CMD 0xa3
 #endif
 
-static unsigned char allow_ops[] = { TEST_UNIT_READY, REQUEST_SENSE,
-       INQUIRY, READ_CAPACITY, READ_BUFFER, READ_6, READ_10, READ_12,
-       READ_16, MODE_SENSE, MODE_SENSE_10, LOG_SENSE, REPORT_LUNS,
-       SERVICE_ACTION_IN, RECEIVE_DIAGNOSTIC, READ_LONG, MAINTENANCE_IN_CMD
-};
-
+#ifdef CONFIG_SCSI_PROC_FS
 static int
-sg_allow_access(unsigned char opcode, char dev_type)
+sg_idr_max_id(int id, void *p, void *data)
 {
-       int k;
+       int *k = data;
+
+       if (*k < id)
+               *k = id;
 
-       if (TYPE_SCANNER == dev_type)   /* TYPE_ROM maybe burner */
-               return 1;
-       for (k = 0; k < sizeof (allow_ops); ++k) {
-               if (opcode == allow_ops[k])
-                       return 1;
-       }
        return 0;
 }
 
-#ifdef CONFIG_SCSI_PROC_FS
 static int
 sg_last_dev(void)
 {
-       int k;
+       int k = -1;
        unsigned long iflags;
 
-       read_lock_irqsave(&sg_dev_arr_lock, iflags);
-       for (k = sg_dev_max - 1; k >= 0; --k)
-               if (sg_dev_arr[k] && sg_dev_arr[k]->device)
-                       break;
-       read_unlock_irqrestore(&sg_dev_arr_lock, iflags);
+       read_lock_irqsave(&sg_index_lock, iflags);
+       idr_for_each(&sg_index_idr, sg_idr_max_id, &k);
+       read_unlock_irqrestore(&sg_index_lock, iflags);
        return k + 1;           /* origin 1 */
 }
 #endif
@@ -2518,15 +2534,13 @@ sg_last_dev(void)
 static Sg_device *
 sg_get_dev(int dev)
 {
-       Sg_device *sdp = NULL;
+       Sg_device *sdp;
        unsigned long iflags;
 
-       if (sg_dev_arr && (dev >= 0)) {
-               read_lock_irqsave(&sg_dev_arr_lock, iflags);
-               if (dev < sg_dev_max)
-                       sdp = sg_dev_arr[dev];
-               read_unlock_irqrestore(&sg_dev_arr_lock, iflags);
-       }
+       read_lock_irqsave(&sg_index_lock, iflags);
+       sdp = idr_find(&sg_index_idr, dev);
+       read_unlock_irqrestore(&sg_index_lock, iflags);
+
        return sdp;
 }
 
@@ -2633,9 +2647,7 @@ static int
 sg_proc_init(void)
 {
        int k, mask;
-       int num_leaves =
-           sizeof (sg_proc_leaf_arr) / sizeof (sg_proc_leaf_arr[0]);
-       struct proc_dir_entry *pdep;
+       int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
        struct sg_proc_leaf * leaf;
 
        sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL);
@@ -2644,13 +2656,10 @@ sg_proc_init(void)
        for (k = 0; k < num_leaves; ++k) {
                leaf = &sg_proc_leaf_arr[k];
                mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
-               pdep = create_proc_entry(leaf->name, mask, sg_proc_sgp);
-               if (pdep) {
-                       leaf->fops->owner = THIS_MODULE,
-                       leaf->fops->read = seq_read,
-                       leaf->fops->llseek = seq_lseek,
-                       pdep->proc_fops = leaf->fops;
-               }
+               leaf->fops->owner = THIS_MODULE;
+               leaf->fops->read = seq_read;
+               leaf->fops->llseek = seq_lseek;
+               proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
        }
        return 0;
 }
@@ -2659,8 +2668,7 @@ static void
 sg_proc_cleanup(void)
 {
        int k;
-       int num_leaves =
-           sizeof (sg_proc_leaf_arr) / sizeof (sg_proc_leaf_arr[0]);
+       int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
 
        if (!sg_proc_sgp)
                return;
@@ -2762,8 +2770,6 @@ static void * dev_seq_start(struct seq_file *s, loff_t *pos)
        if (! it)
                return NULL;
 
-       if (NULL == sg_dev_arr)
-               return NULL;
        it->index = *pos;
        it->max = sg_last_dev();
        if (it->index >= it->max)
@@ -2899,8 +2905,8 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v)
        Sg_device *sdp;
 
        if (it && (0 == it->index)) {
-               seq_printf(s, "dev_max(currently)=%d max_active_device=%d "
-                          "(origin 1)\n", sg_dev_max, (int)it->max);
+               seq_printf(s, "max_active_device=%d(origin 1)\n",
+                          (int)it->max);
                seq_printf(s, " def_reserved_size=%d\n", sg_big_buff);
        }
        sdp = it ? sg_get_dev(it->index) : NULL;