PCI: hotplug: pciehp: Removed check for hotplug of display devices
[safe/jmp/linux-2.6] / drivers / block / nbd.c
index b4c0888..218d091 100644 (file)
@@ -4,7 +4,7 @@
  * Note that you can not swap over this thing, yet. Seems to work but
  * deadlocks sometimes - you can not swap over TCP in general.
  * 
- * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
+ * Copyright 1997-2000, 2008 Pavel Machek <pavel@suse.cz>
  * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
  *
  * This file is released under GPLv2 or later.
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/net.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -54,7 +56,8 @@ static unsigned int debugflags;
 #endif /* NDEBUG */
 
 static unsigned int nbds_max = 16;
-static struct nbd_device nbd_dev[MAX_NBD];
+static struct nbd_device *nbd_dev;
+static int max_part;
 
 /*
  * Use just one lock (or at most 1 per NIC). Two arguments for this:
@@ -100,17 +103,15 @@ static const char *nbdcmd_to_ascii(int cmd)
 
 static void nbd_end_request(struct request *req)
 {
-       int uptodate = (req->errors == 0) ? 1 : 0;
+       int error = req->errors ? -EIO : 0;
        struct request_queue *q = req->q;
        unsigned long flags;
 
        dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
-                       req, uptodate? "done": "failed");
+                       req, error ? "failed" : "done");
 
        spin_lock_irqsave(q->queue_lock, flags);
-       if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
-               end_that_request_last(req, uptodate);
-       }
+       __blk_end_request_all(req, error);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -155,6 +156,12 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
        struct kvec iov;
        sigset_t blocked, oldset;
 
+       if (unlikely(!sock)) {
+               printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
+                      lo->disk->disk_name, (send ? "send" : "recv"));
+               return -EINVAL;
+       }
+
        /* Allow interception of SIGKILL only
         * Don't allow other signals to interrupt the transmission */
        siginitsetinv(&blocked, sigmask(SIGKILL));
@@ -225,19 +232,19 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 {
        int result, flags;
        struct nbd_request request;
-       unsigned long size = req->nr_sectors << 9;
+       unsigned long size = blk_rq_bytes(req);
 
        request.magic = htonl(NBD_REQUEST_MAGIC);
        request.type = htonl(nbd_cmd(req));
-       request.from = cpu_to_be64((u64) req->sector << 9);
+       request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
        request.len = htonl(size);
        memcpy(request.handle, &req, sizeof(req));
 
-       dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
+       dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
                        lo->disk->disk_name, req,
                        nbdcmd_to_ascii(nbd_cmd(req)),
-                       (unsigned long long)req->sector << 9,
-                       req->nr_sectors << 9);
+                       (unsigned long long)blk_rq_pos(req) << 9,
+                       blk_rq_bytes(req));
        result = sock_xmit(lo, 1, &request, sizeof(request),
                        (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
        if (result <= 0) {
@@ -270,7 +277,7 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
        return 0;
 
 error_out:
-       return 1;
+       return -EIO;
 }
 
 static struct request *nbd_find_request(struct nbd_device *lo,
@@ -333,7 +340,7 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
        }
 
        req = nbd_find_request(lo, *(struct request **)reply.handle);
-       if (unlikely(IS_ERR(req))) {
+       if (IS_ERR(req)) {
                result = PTR_ERR(req);
                if (result != -ENOENT)
                        goto harderror;
@@ -375,14 +382,17 @@ harderror:
        return NULL;
 }
 
-static ssize_t pid_show(struct gendisk *disk, char *page)
+static ssize_t pid_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
 {
-       return sprintf(page, "%ld\n",
+       struct gendisk *disk = dev_to_disk(dev);
+
+       return sprintf(buf, "%ld\n",
                (long) ((struct nbd_device *)disk->private_data)->pid);
 }
 
-static struct disk_attribute pid_attr = {
-       .attr = { .name = "pid", .mode = S_IRUGO },
+static struct device_attribute pid_attr = {
+       .attr = { .name = "pid", .mode = S_IRUGO},
        .show = pid_show,
 };
 
@@ -394,16 +404,18 @@ static int nbd_do_it(struct nbd_device *lo)
        BUG_ON(lo->magic != LO_MAGIC);
 
        lo->pid = current->pid;
-       ret = sysfs_create_file(&lo->disk->kobj, &pid_attr.attr);
+       ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
        if (ret) {
                printk(KERN_ERR "nbd: sysfs_create_file failed!");
+               lo->pid = 0;
                return ret;
        }
 
        while ((req = nbd_read_stat(lo)) != NULL)
                nbd_end_request(req);
 
-       sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr);
+       sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+       lo->pid = 0;
        return 0;
 }
 
@@ -434,6 +446,83 @@ static void nbd_clear_que(struct nbd_device *lo)
 }
 
 
+static void nbd_handle_req(struct nbd_device *lo, struct request *req)
+{
+       if (!blk_fs_request(req))
+               goto error_out;
+
+       nbd_cmd(req) = NBD_CMD_READ;
+       if (rq_data_dir(req) == WRITE) {
+               nbd_cmd(req) = NBD_CMD_WRITE;
+               if (lo->flags & NBD_READ_ONLY) {
+                       printk(KERN_ERR "%s: Write on read-only\n",
+                                       lo->disk->disk_name);
+                       goto error_out;
+               }
+       }
+
+       req->errors = 0;
+
+       mutex_lock(&lo->tx_lock);
+       if (unlikely(!lo->sock)) {
+               mutex_unlock(&lo->tx_lock);
+               printk(KERN_ERR "%s: Attempted send on closed socket\n",
+                      lo->disk->disk_name);
+               goto error_out;
+       }
+
+       lo->active_req = req;
+
+       if (nbd_send_req(lo, req) != 0) {
+               printk(KERN_ERR "%s: Request send failed\n",
+                               lo->disk->disk_name);
+               req->errors++;
+               nbd_end_request(req);
+       } else {
+               spin_lock(&lo->queue_lock);
+               list_add(&req->queuelist, &lo->queue_head);
+               spin_unlock(&lo->queue_lock);
+       }
+
+       lo->active_req = NULL;
+       mutex_unlock(&lo->tx_lock);
+       wake_up_all(&lo->active_wq);
+
+       return;
+
+error_out:
+       req->errors++;
+       nbd_end_request(req);
+}
+
+static int nbd_thread(void *data)
+{
+       struct nbd_device *lo = data;
+       struct request *req;
+
+       set_user_nice(current, -20);
+       while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) {
+               /* wait for something to do */
+               wait_event_interruptible(lo->waiting_wq,
+                                        kthread_should_stop() ||
+                                        !list_empty(&lo->waiting_queue));
+
+               /* extract request */
+               if (list_empty(&lo->waiting_queue))
+                       continue;
+
+               spin_lock_irq(&lo->queue_lock);
+               req = list_entry(lo->waiting_queue.next, struct request,
+                                queuelist);
+               list_del_init(&req->queuelist);
+               spin_unlock_irq(&lo->queue_lock);
+
+               /* handle request */
+               nbd_handle_req(lo, req);
+       }
+       return 0;
+}
+
 /*
  * We always wait for result of write, for now. It would be nice to make it optional
  * in future
@@ -441,168 +530,145 @@ static void nbd_clear_que(struct nbd_device *lo)
  *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
  */
 
-static void do_nbd_request(struct request_queue * q)
+static void do_nbd_request(struct request_queue *q)
 {
        struct request *req;
        
-       while ((req = elv_next_request(q)) != NULL) {
+       while ((req = blk_fetch_request(q)) != NULL) {
                struct nbd_device *lo;
 
-               blkdev_dequeue_request(req);
+               spin_unlock_irq(q->queue_lock);
+
                dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
                                req->rq_disk->disk_name, req, req->cmd_type);
 
-               if (!blk_fs_request(req))
-                       goto error_out;
-
                lo = req->rq_disk->private_data;
 
                BUG_ON(lo->magic != LO_MAGIC);
 
-               nbd_cmd(req) = NBD_CMD_READ;
-               if (rq_data_dir(req) == WRITE) {
-                       nbd_cmd(req) = NBD_CMD_WRITE;
-                       if (lo->flags & NBD_READ_ONLY) {
-                               printk(KERN_ERR "%s: Write on read-only\n",
-                                               lo->disk->disk_name);
-                               goto error_out;
-                       }
-               }
-
-               req->errors = 0;
-               spin_unlock_irq(q->queue_lock);
-
-               mutex_lock(&lo->tx_lock);
                if (unlikely(!lo->sock)) {
-                       mutex_unlock(&lo->tx_lock);
                        printk(KERN_ERR "%s: Attempted send on closed socket\n",
-                              lo->disk->disk_name);
+                               lo->disk->disk_name);
                        req->errors++;
                        nbd_end_request(req);
                        spin_lock_irq(q->queue_lock);
                        continue;
                }
 
-               lo->active_req = req;
-
-               if (nbd_send_req(lo, req) != 0) {
-                       printk(KERN_ERR "%s: Request send failed\n",
-                                       lo->disk->disk_name);
-                       req->errors++;
-                       nbd_end_request(req);
-               } else {
-                       spin_lock(&lo->queue_lock);
-                       list_add(&req->queuelist, &lo->queue_head);
-                       spin_unlock(&lo->queue_lock);
-               }
+               spin_lock_irq(&lo->queue_lock);
+               list_add_tail(&req->queuelist, &lo->waiting_queue);
+               spin_unlock_irq(&lo->queue_lock);
 
-               lo->active_req = NULL;
-               mutex_unlock(&lo->tx_lock);
-               wake_up_all(&lo->active_wq);
+               wake_up(&lo->waiting_wq);
 
                spin_lock_irq(q->queue_lock);
-               continue;
-
-error_out:
-               req->errors++;
-               spin_unlock(q->queue_lock);
-               nbd_end_request(req);
-               spin_lock(q->queue_lock);
        }
 }
 
-static int nbd_ioctl(struct inode *inode, struct file *file,
-                    unsigned int cmd, unsigned long arg)
-{
-       struct nbd_device *lo = inode->i_bdev->bd_disk->private_data;
-       int error;
-       struct request sreq ;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       BUG_ON(lo->magic != LO_MAGIC);
-
-       /* Anyone capable of this syscall can do *real bad* things */
-       dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
-                       lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+/* Must be called with tx_lock held */
 
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+                      unsigned int cmd, unsigned long arg)
+{
        switch (cmd) {
-       case NBD_DISCONNECT:
+       case NBD_DISCONNECT: {
+               struct request sreq;
+
                printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+
+               blk_rq_init(NULL, &sreq);
                sreq.cmd_type = REQ_TYPE_SPECIAL;
                nbd_cmd(&sreq) = NBD_CMD_DISC;
-               /*
-                * Set these to sane values in case server implementation
-                * fails to check the request type first and also to keep
-                * debugging output cleaner.
-                */
-               sreq.sector = 0;
-               sreq.nr_sectors = 0;
-                if (!lo->sock)
+               if (!lo->sock)
                        return -EINVAL;
-               mutex_lock(&lo->tx_lock);
-                nbd_send_req(lo, &sreq);
-               mutex_unlock(&lo->tx_lock);
+               nbd_send_req(lo, &sreq);
                 return 0;
+       }
  
-       case NBD_CLEAR_SOCK:
-               error = 0;
-               mutex_lock(&lo->tx_lock);
+       case NBD_CLEAR_SOCK: {
+               struct file *file;
+
                lo->sock = NULL;
-               mutex_unlock(&lo->tx_lock);
                file = lo->file;
                lo->file = NULL;
                nbd_clear_que(lo);
                BUG_ON(!list_empty(&lo->queue_head));
                if (file)
                        fput(file);
-               return error;
-       case NBD_SET_SOCK:
+               return 0;
+       }
+
+       case NBD_SET_SOCK: {
+               struct file *file;
                if (lo->file)
                        return -EBUSY;
-               error = -EINVAL;
                file = fget(arg);
                if (file) {
-                       inode = file->f_path.dentry->d_inode;
+                       struct inode *inode = file->f_path.dentry->d_inode;
                        if (S_ISSOCK(inode->i_mode)) {
                                lo->file = file;
                                lo->sock = SOCKET_I(inode);
-                               error = 0;
+                               if (max_part > 0)
+                                       bdev->bd_invalidated = 1;
+                               return 0;
                        } else {
                                fput(file);
                        }
                }
-               return error;
+               return -EINVAL;
+       }
+
        case NBD_SET_BLKSIZE:
                lo->blksize = arg;
                lo->bytesize &= ~(lo->blksize-1);
-               inode->i_bdev->bd_inode->i_size = lo->bytesize;
-               set_blocksize(inode->i_bdev, lo->blksize);
+               bdev->bd_inode->i_size = lo->bytesize;
+               set_blocksize(bdev, lo->blksize);
                set_capacity(lo->disk, lo->bytesize >> 9);
                return 0;
+
        case NBD_SET_SIZE:
                lo->bytesize = arg & ~(lo->blksize-1);
-               inode->i_bdev->bd_inode->i_size = lo->bytesize;
-               set_blocksize(inode->i_bdev, lo->blksize);
+               bdev->bd_inode->i_size = lo->bytesize;
+               set_blocksize(bdev, lo->blksize);
                set_capacity(lo->disk, lo->bytesize >> 9);
                return 0;
+
        case NBD_SET_TIMEOUT:
                lo->xmit_timeout = arg * HZ;
                return 0;
+
        case NBD_SET_SIZE_BLOCKS:
                lo->bytesize = ((u64) arg) * lo->blksize;
-               inode->i_bdev->bd_inode->i_size = lo->bytesize;
-               set_blocksize(inode->i_bdev, lo->blksize);
+               bdev->bd_inode->i_size = lo->bytesize;
+               set_blocksize(bdev, lo->blksize);
                set_capacity(lo->disk, lo->bytesize >> 9);
                return 0;
-       case NBD_DO_IT:
+
+       case NBD_DO_IT: {
+               struct task_struct *thread;
+               struct file *file;
+               int error;
+
+               if (lo->pid)
+                       return -EBUSY;
                if (!lo->file)
                        return -EINVAL;
+
+               mutex_unlock(&lo->tx_lock);
+
+               thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+               if (IS_ERR(thread)) {
+                       mutex_lock(&lo->tx_lock);
+                       return PTR_ERR(thread);
+               }
+               wake_up_process(thread);
                error = nbd_do_it(lo);
+               kthread_stop(thread);
+
+               mutex_lock(&lo->tx_lock);
                if (error)
                        return error;
-               sock_shutdown(lo, 1);
+               sock_shutdown(lo, 0);
                file = lo->file;
                lo->file = NULL;
                nbd_clear_que(lo);
@@ -610,9 +676,13 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
                if (file)
                        fput(file);
                lo->bytesize = 0;
-               inode->i_bdev->bd_inode->i_size = 0;
+               bdev->bd_inode->i_size = 0;
                set_capacity(lo->disk, 0);
+               if (max_part > 0)
+                       ioctl_by_bdev(bdev, BLKRRPART, 0);
                return lo->harderror;
+       }
+
        case NBD_CLEAR_QUE:
                /*
                 * This is for compatibility only.  The queue is always cleared
@@ -620,20 +690,43 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
                 */
                BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
                return 0;
+
        case NBD_PRINT_DEBUG:
                printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
-                       inode->i_bdev->bd_disk->disk_name,
+                       bdev->bd_disk->disk_name,
                        lo->queue_head.next, lo->queue_head.prev,
                        &lo->queue_head);
                return 0;
        }
-       return -EINVAL;
+       return -ENOTTY;
 }
 
-static struct block_device_operations nbd_fops =
+static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+                    unsigned int cmd, unsigned long arg)
+{
+       struct nbd_device *lo = bdev->bd_disk->private_data;
+       int error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       BUG_ON(lo->magic != LO_MAGIC);
+
+       /* Anyone capable of this syscall can do *real bad* things */
+       dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
+                       lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+
+       mutex_lock(&lo->tx_lock);
+       error = __nbd_ioctl(bdev, lo, cmd, arg);
+       mutex_unlock(&lo->tx_lock);
+
+       return error;
+}
+
+static const struct block_device_operations nbd_fops =
 {
        .owner =        THIS_MODULE,
-       .ioctl =        nbd_ioctl,
+       .locked_ioctl = nbd_ioctl,
 };
 
 /*
@@ -645,17 +738,25 @@ static int __init nbd_init(void)
 {
        int err = -ENOMEM;
        int i;
+       int part_shift;
 
        BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
 
-       if (nbds_max > MAX_NBD) {
-               printk(KERN_CRIT "nbd: cannot allocate more than %u nbds; %u requested.\n", MAX_NBD,
-                               nbds_max);
+       if (max_part < 0) {
+               printk(KERN_CRIT "nbd: max_part must be >= 0\n");
                return -EINVAL;
        }
 
+       nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+       if (!nbd_dev)
+               return -ENOMEM;
+
+       part_shift = 0;
+       if (max_part > 0)
+               part_shift = fls(max_part);
+
        for (i = 0; i < nbds_max; i++) {
-               struct gendisk *disk = alloc_disk(1);
+               struct gendisk *disk = alloc_disk(1 << part_shift);
                if (!disk)
                        goto out;
                nbd_dev[i].disk = disk;
@@ -669,6 +770,10 @@ static int __init nbd_init(void)
                        put_disk(disk);
                        goto out;
                }
+               /*
+                * Tell the block layer that we are not a rotational device
+                */
+               queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
        }
 
        if (register_blkdev(NBD_MAJOR, "nbd")) {
@@ -684,17 +789,18 @@ static int __init nbd_init(void)
                nbd_dev[i].file = NULL;
                nbd_dev[i].magic = LO_MAGIC;
                nbd_dev[i].flags = 0;
+               INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
                spin_lock_init(&nbd_dev[i].queue_lock);
                INIT_LIST_HEAD(&nbd_dev[i].queue_head);
                mutex_init(&nbd_dev[i].tx_lock);
                init_waitqueue_head(&nbd_dev[i].active_wq);
+               init_waitqueue_head(&nbd_dev[i].waiting_wq);
                nbd_dev[i].blksize = 1024;
                nbd_dev[i].bytesize = 0;
                disk->major = NBD_MAJOR;
-               disk->first_minor = i;
+               disk->first_minor = i << part_shift;
                disk->fops = &nbd_fops;
                disk->private_data = &nbd_dev[i];
-               disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
                sprintf(disk->disk_name, "nbd%d", i);
                set_capacity(disk, 0);
                add_disk(disk);
@@ -706,6 +812,7 @@ out:
                blk_cleanup_queue(nbd_dev[i].disk->queue);
                put_disk(nbd_dev[i].disk);
        }
+       kfree(nbd_dev);
        return err;
 }
 
@@ -722,6 +829,7 @@ static void __exit nbd_cleanup(void)
                }
        }
        unregister_blkdev(NBD_MAJOR, "nbd");
+       kfree(nbd_dev);
        printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
 }
 
@@ -732,7 +840,9 @@ MODULE_DESCRIPTION("Network Block Device");
 MODULE_LICENSE("GPL");
 
 module_param(nbds_max, int, 0444);
-MODULE_PARM_DESC(nbds_max, "How many network block devices to initialize.");
+MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
+module_param(max_part, int, 0444);
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
 #ifndef NDEBUG
 module_param(debugflags, int, 0644);
 MODULE_PARM_DESC(debugflags, "flags for controlling debug output");