* Note that you can not swap over this thing, yet. Seems to work but
* deadlocks sometimes - you can not swap over TCP in general.
*
- * Copyright 1997-2000 Pavel Machek <pavel@ucw.cz>
+ * Copyright 1997-2000, 2008 Pavel Machek <pavel@suse.cz>
* Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
*
* This file is released under GPLv2 or later.
#include <linux/kernel.h>
#include <net/sock.h>
#include <linux/net.h>
+#include <linux/kthread.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#endif /* NDEBUG */
static unsigned int nbds_max = 16;
-static struct nbd_device nbd_dev[MAX_NBD];
+static struct nbd_device *nbd_dev;
+static int max_part;
/*
* Use just one lock (or at most 1 per NIC). Two arguments for this:
struct kvec iov;
sigset_t blocked, oldset;
+ if (unlikely(!sock)) {
+ printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
+ lo->disk->disk_name, (send ? "send" : "recv"));
+ return -EINVAL;
+ }
+
/* Allow interception of SIGKILL only
* Don't allow other signals to interrupt the transmission */
siginitsetinv(&blocked, sigmask(SIGKILL));
return 0;
error_out:
- return 1;
+ return -EIO;
}
static struct request *nbd_find_request(struct nbd_device *lo,
}
req = nbd_find_request(lo, *(struct request **)reply.handle);
- if (unlikely(IS_ERR(req))) {
+ if (IS_ERR(req)) {
result = PTR_ERR(req);
if (result != -ENOENT)
goto harderror;
}
static struct device_attribute pid_attr = {
- .attr = { .name = "pid", .mode = S_IRUGO, .owner = THIS_MODULE },
+ .attr = { .name = "pid", .mode = S_IRUGO},
.show = pid_show,
};
BUG_ON(lo->magic != LO_MAGIC);
lo->pid = current->pid;
- ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr);
+ ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
if (ret) {
printk(KERN_ERR "nbd: sysfs_create_file failed!");
+ lo->pid = 0;
return ret;
}
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
- sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr);
+ sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+ lo->pid = 0;
return 0;
}
}
+static void nbd_handle_req(struct nbd_device *lo, struct request *req)
+{
+ if (!blk_fs_request(req))
+ goto error_out;
+
+ nbd_cmd(req) = NBD_CMD_READ;
+ if (rq_data_dir(req) == WRITE) {
+ nbd_cmd(req) = NBD_CMD_WRITE;
+ if (lo->flags & NBD_READ_ONLY) {
+ printk(KERN_ERR "%s: Write on read-only\n",
+ lo->disk->disk_name);
+ goto error_out;
+ }
+ }
+
+ req->errors = 0;
+
+ mutex_lock(&lo->tx_lock);
+ if (unlikely(!lo->sock)) {
+ mutex_unlock(&lo->tx_lock);
+ printk(KERN_ERR "%s: Attempted send on closed socket\n",
+ lo->disk->disk_name);
+ goto error_out;
+ }
+
+ lo->active_req = req;
+
+ if (nbd_send_req(lo, req) != 0) {
+ printk(KERN_ERR "%s: Request send failed\n",
+ lo->disk->disk_name);
+ req->errors++;
+ nbd_end_request(req);
+ } else {
+ spin_lock(&lo->queue_lock);
+ list_add(&req->queuelist, &lo->queue_head);
+ spin_unlock(&lo->queue_lock);
+ }
+
+ lo->active_req = NULL;
+ mutex_unlock(&lo->tx_lock);
+ wake_up_all(&lo->active_wq);
+
+ return;
+
+error_out:
+ req->errors++;
+ nbd_end_request(req);
+}
+
+static int nbd_thread(void *data)
+{
+ struct nbd_device *lo = data;
+ struct request *req;
+
+ set_user_nice(current, -20);
+ while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) {
+ /* wait for something to do */
+ wait_event_interruptible(lo->waiting_wq,
+ kthread_should_stop() ||
+ !list_empty(&lo->waiting_queue));
+
+ /* extract request */
+ if (list_empty(&lo->waiting_queue))
+ continue;
+
+ spin_lock_irq(&lo->queue_lock);
+ req = list_entry(lo->waiting_queue.next, struct request,
+ queuelist);
+ list_del_init(&req->queuelist);
+ spin_unlock_irq(&lo->queue_lock);
+
+ /* handle request */
+ nbd_handle_req(lo, req);
+ }
+ return 0;
+}
+
/*
* We always wait for result of write, for now. It would be nice to make it optional
* in future
* { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
*/
-static void do_nbd_request(struct request_queue * q)
+static void do_nbd_request(struct request_queue *q)
{
struct request *req;
struct nbd_device *lo;
blkdev_dequeue_request(req);
+
+ spin_unlock_irq(q->queue_lock);
+
dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
req->rq_disk->disk_name, req, req->cmd_type);
- if (!blk_fs_request(req))
- goto error_out;
-
lo = req->rq_disk->private_data;
BUG_ON(lo->magic != LO_MAGIC);
- nbd_cmd(req) = NBD_CMD_READ;
- if (rq_data_dir(req) == WRITE) {
- nbd_cmd(req) = NBD_CMD_WRITE;
- if (lo->flags & NBD_READ_ONLY) {
- printk(KERN_ERR "%s: Write on read-only\n",
- lo->disk->disk_name);
- goto error_out;
- }
- }
-
- req->errors = 0;
- spin_unlock_irq(q->queue_lock);
-
- mutex_lock(&lo->tx_lock);
if (unlikely(!lo->sock)) {
- mutex_unlock(&lo->tx_lock);
printk(KERN_ERR "%s: Attempted send on closed socket\n",
- lo->disk->disk_name);
+ lo->disk->disk_name);
req->errors++;
nbd_end_request(req);
spin_lock_irq(q->queue_lock);
continue;
}
- lo->active_req = req;
+ spin_lock_irq(&lo->queue_lock);
+ list_add_tail(&req->queuelist, &lo->waiting_queue);
+ spin_unlock_irq(&lo->queue_lock);
- if (nbd_send_req(lo, req) != 0) {
- printk(KERN_ERR "%s: Request send failed\n",
- lo->disk->disk_name);
- req->errors++;
- nbd_end_request(req);
- } else {
- spin_lock(&lo->queue_lock);
- list_add(&req->queuelist, &lo->queue_head);
- spin_unlock(&lo->queue_lock);
- }
-
- lo->active_req = NULL;
- mutex_unlock(&lo->tx_lock);
- wake_up_all(&lo->active_wq);
+ wake_up(&lo->waiting_wq);
spin_lock_irq(q->queue_lock);
- continue;
-
-error_out:
- req->errors++;
- spin_unlock(q->queue_lock);
- nbd_end_request(req);
- spin_lock(q->queue_lock);
}
}
-static int nbd_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-{
- struct nbd_device *lo = inode->i_bdev->bd_disk->private_data;
- int error;
- struct request sreq ;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- BUG_ON(lo->magic != LO_MAGIC);
-
- /* Anyone capable of this syscall can do *real bad* things */
- dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
- lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+/* Must be called with tx_lock held */
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+ unsigned int cmd, unsigned long arg)
+{
switch (cmd) {
- case NBD_DISCONNECT:
+ case NBD_DISCONNECT: {
+ struct request sreq;
+
printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+
+ blk_rq_init(NULL, &sreq);
sreq.cmd_type = REQ_TYPE_SPECIAL;
nbd_cmd(&sreq) = NBD_CMD_DISC;
/*
*/
sreq.sector = 0;
sreq.nr_sectors = 0;
- if (!lo->sock)
+ if (!lo->sock)
return -EINVAL;
- mutex_lock(&lo->tx_lock);
- nbd_send_req(lo, &sreq);
- mutex_unlock(&lo->tx_lock);
+ nbd_send_req(lo, &sreq);
return 0;
+ }
- case NBD_CLEAR_SOCK:
- error = 0;
- mutex_lock(&lo->tx_lock);
+ case NBD_CLEAR_SOCK: {
+ struct file *file;
+
lo->sock = NULL;
- mutex_unlock(&lo->tx_lock);
file = lo->file;
lo->file = NULL;
nbd_clear_que(lo);
BUG_ON(!list_empty(&lo->queue_head));
if (file)
fput(file);
- return error;
- case NBD_SET_SOCK:
+ return 0;
+ }
+
+ case NBD_SET_SOCK: {
+ struct file *file;
if (lo->file)
return -EBUSY;
- error = -EINVAL;
file = fget(arg);
if (file) {
- inode = file->f_path.dentry->d_inode;
+ struct inode *inode = file->f_path.dentry->d_inode;
if (S_ISSOCK(inode->i_mode)) {
lo->file = file;
lo->sock = SOCKET_I(inode);
- error = 0;
+ if (max_part > 0)
+ bdev->bd_invalidated = 1;
+ return 0;
} else {
fput(file);
}
}
- return error;
+ return -EINVAL;
+ }
+
case NBD_SET_BLKSIZE:
lo->blksize = arg;
lo->bytesize &= ~(lo->blksize-1);
- inode->i_bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(inode->i_bdev, lo->blksize);
+ bdev->bd_inode->i_size = lo->bytesize;
+ set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
+
case NBD_SET_SIZE:
lo->bytesize = arg & ~(lo->blksize-1);
- inode->i_bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(inode->i_bdev, lo->blksize);
+ bdev->bd_inode->i_size = lo->bytesize;
+ set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
+
case NBD_SET_TIMEOUT:
lo->xmit_timeout = arg * HZ;
return 0;
+
case NBD_SET_SIZE_BLOCKS:
lo->bytesize = ((u64) arg) * lo->blksize;
- inode->i_bdev->bd_inode->i_size = lo->bytesize;
- set_blocksize(inode->i_bdev, lo->blksize);
+ bdev->bd_inode->i_size = lo->bytesize;
+ set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
- case NBD_DO_IT:
+
+ case NBD_DO_IT: {
+ struct task_struct *thread;
+ struct file *file;
+ int error;
+
+ if (lo->pid)
+ return -EBUSY;
if (!lo->file)
return -EINVAL;
+
+ mutex_unlock(&lo->tx_lock);
+
+ thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+ if (IS_ERR(thread)) {
+ mutex_lock(&lo->tx_lock);
+ return PTR_ERR(thread);
+ }
+ wake_up_process(thread);
error = nbd_do_it(lo);
+ kthread_stop(thread);
+
+ mutex_lock(&lo->tx_lock);
if (error)
return error;
- sock_shutdown(lo, 1);
+ sock_shutdown(lo, 0);
file = lo->file;
lo->file = NULL;
nbd_clear_que(lo);
if (file)
fput(file);
lo->bytesize = 0;
- inode->i_bdev->bd_inode->i_size = 0;
+ bdev->bd_inode->i_size = 0;
set_capacity(lo->disk, 0);
+ if (max_part > 0)
+ ioctl_by_bdev(bdev, BLKRRPART, 0);
return lo->harderror;
+ }
+
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
*/
BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
return 0;
+
case NBD_PRINT_DEBUG:
printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
- inode->i_bdev->bd_disk->disk_name,
+ bdev->bd_disk->disk_name,
lo->queue_head.next, lo->queue_head.prev,
&lo->queue_head);
return 0;
}
- return -EINVAL;
+ return -ENOTTY;
+}
+
+static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nbd_device *lo = bdev->bd_disk->private_data;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ BUG_ON(lo->magic != LO_MAGIC);
+
+ /* Anyone capable of this syscall can do *real bad* things */
+ dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
+ lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+
+ mutex_lock(&lo->tx_lock);
+ error = __nbd_ioctl(bdev, lo, cmd, arg);
+ mutex_unlock(&lo->tx_lock);
+
+ return error;
}
static struct block_device_operations nbd_fops =
{
.owner = THIS_MODULE,
- .ioctl = nbd_ioctl,
+ .locked_ioctl = nbd_ioctl,
};
/*
{
int err = -ENOMEM;
int i;
+ int part_shift;
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
- if (nbds_max > MAX_NBD) {
- printk(KERN_CRIT "nbd: cannot allocate more than %u nbds; %u requested.\n", MAX_NBD,
- nbds_max);
+ if (max_part < 0) {
+ printk(KERN_CRIT "nbd: max_part must be >= 0\n");
return -EINVAL;
}
+ nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+ if (!nbd_dev)
+ return -ENOMEM;
+
+ part_shift = 0;
+ if (max_part > 0)
+ part_shift = fls(max_part);
+
for (i = 0; i < nbds_max; i++) {
- struct gendisk *disk = alloc_disk(1);
+ struct gendisk *disk = alloc_disk(1 << part_shift);
if (!disk)
goto out;
nbd_dev[i].disk = disk;
put_disk(disk);
goto out;
}
+ /*
+ * Tell the block layer that we are not a rotational device
+ */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
}
if (register_blkdev(NBD_MAJOR, "nbd")) {
nbd_dev[i].file = NULL;
nbd_dev[i].magic = LO_MAGIC;
nbd_dev[i].flags = 0;
+ INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
init_waitqueue_head(&nbd_dev[i].active_wq);
+ init_waitqueue_head(&nbd_dev[i].waiting_wq);
nbd_dev[i].blksize = 1024;
nbd_dev[i].bytesize = 0;
disk->major = NBD_MAJOR;
- disk->first_minor = i;
+ disk->first_minor = i << part_shift;
disk->fops = &nbd_fops;
disk->private_data = &nbd_dev[i];
- disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
sprintf(disk->disk_name, "nbd%d", i);
set_capacity(disk, 0);
add_disk(disk);
blk_cleanup_queue(nbd_dev[i].disk->queue);
put_disk(nbd_dev[i].disk);
}
+ kfree(nbd_dev);
return err;
}
}
}
unregister_blkdev(NBD_MAJOR, "nbd");
+ kfree(nbd_dev);
printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
}
MODULE_LICENSE("GPL");
module_param(nbds_max, int, 0444);
-MODULE_PARM_DESC(nbds_max, "How many network block devices to initialize.");
+MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
+module_param(max_part, int, 0444);
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
#ifndef NDEBUG
module_param(debugflags, int, 0644);
MODULE_PARM_DESC(debugflags, "flags for controlling debug output");