Btrfs: always pin metadata in discard mode
[safe/jmp/linux-2.6] / fs / sysfs / bin.c
index 66f6e58..2524714 100644 (file)
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
 struct bin_buffer {
-       struct mutex    mutex;
-       void            *buffer;
-       int             mmapped;
+       struct mutex                    mutex;
+       void                            *buffer;
+       int                             mmapped;
+       struct vm_operations_struct     *vm_ops;
+       struct file                     *file;
+       struct hlist_node               list;
 };
 
 static int
@@ -63,6 +76,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
        int count = min_t(size_t, bytes, PAGE_SIZE);
        char *temp;
 
+       if (!bytes)
+               return 0;
+
        if (size) {
                if (offs > size)
                        return 0;
@@ -131,6 +147,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
        int count = min_t(size_t, bytes, PAGE_SIZE);
        char *temp;
 
+       if (!bytes)
+               return 0;
+
        if (size) {
                if (offs > size)
                        return 0;
@@ -138,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
                        count = size - offs;
        }
 
-       temp = kmalloc(count, GFP_KERNEL);
-       if (!temp)
-               return -ENOMEM;
-
-       if (copy_from_user(temp, userbuf, count)) {
-               count = -EFAULT;
-               goto out_free;
-       }
+       temp = memdup_user(userbuf, count);
+       if (IS_ERR(temp))
+               return PTR_ERR(temp);
 
        mutex_lock(&bb->mutex);
 
@@ -157,11 +171,179 @@ static ssize_t write(struct file *file, const char __user *userbuf,
        if (count > 0)
                *off = offs + count;
 
-out_free:
        kfree(temp);
        return count;
 }
 
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+       if (!bb->vm_ops || !bb->vm_ops->open)
+               return;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return;
+
+       bb->vm_ops->open(vma);
+
+       sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+       if (!bb->vm_ops || !bb->vm_ops->close)
+               return;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return;
+
+       bb->vm_ops->close(vma);
+
+       sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return VM_FAULT_SIGBUS;
+
+       ret = bb->vm_ops->fault(vma, vmf);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops)
+               return VM_FAULT_SIGBUS;
+
+       if (!bb->vm_ops->page_mkwrite)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return VM_FAULT_SIGBUS;
+
+       ret = bb->vm_ops->page_mkwrite(vma, vmf);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+                 void *buf, int len, int write)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->access)
+               return -EINVAL;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return -EINVAL;
+
+       ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->set_policy)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return -EINVAL;
+
+       ret = bb->vm_ops->set_policy(vma, new);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+                                       unsigned long addr)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       struct mempolicy *pol;
+
+       if (!bb->vm_ops || !bb->vm_ops->get_policy)
+               return vma->vm_policy;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return vma->vm_policy;
+
+       pol = bb->vm_ops->get_policy(vma, addr);
+
+       sysfs_put_active_two(attr_sd);
+       return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+                       const nodemask_t *to, unsigned long flags)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->migrate)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return 0;
+
+       ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+#endif
+
+static struct vm_operations_struct bin_vm_ops = {
+       .open           = bin_vma_open,
+       .close          = bin_vma_close,
+       .fault          = bin_fault,
+       .page_mkwrite   = bin_page_mkwrite,
+       .access         = bin_access,
+#ifdef CONFIG_NUMA
+       .set_policy     = bin_set_policy,
+       .get_policy     = bin_get_policy,
+       .migrate        = bin_migrate,
+#endif
+};
+
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct bin_buffer *bb = file->private_data;
@@ -173,18 +355,37 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
        mutex_lock(&bb->mutex);
 
        /* need attr_sd for attr, its parent for kobj */
+       rc = -ENODEV;
        if (!sysfs_get_active_two(attr_sd))
-               return -ENODEV;
+               goto out_unlock;
 
        rc = -EINVAL;
-       if (attr->mmap)
-               rc = attr->mmap(kobj, attr, vma);
+       if (!attr->mmap)
+               goto out_put;
+
+       rc = attr->mmap(kobj, attr, vma);
+       if (rc)
+               goto out_put;
 
-       if (rc == 0 && !bb->mmapped)
-               bb->mmapped = 1;
-       else
-               sysfs_put_active_two(attr_sd);
+       /*
+        * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+        * to satisfy versions of X which crash if the mmap fails: that
+        * substitutes a new vm_file, and we don't then want bin_vm_ops.
+        */
+       if (vma->vm_file != file)
+               goto out_put;
 
+       rc = -EINVAL;
+       if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+               goto out_put;
+
+       rc = 0;
+       bb->mmapped = 1;
+       bb->vm_ops = vma->vm_ops;
+       vma->vm_ops = &bin_vm_ops;
+out_put:
+       sysfs_put_active_two(attr_sd);
+out_unlock:
        mutex_unlock(&bb->mutex);
 
        return rc;
@@ -217,8 +418,13 @@ static int open(struct inode * inode, struct file * file)
                goto err_out;
 
        mutex_init(&bb->mutex);
+       bb->file = file;
        file->private_data = bb;
 
+       mutex_lock(&sysfs_bin_lock);
+       hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+       mutex_unlock(&sysfs_bin_lock);
+
        /* open succeeded, put active references */
        sysfs_put_active_two(attr_sd);
        return 0;
@@ -231,11 +437,12 @@ static int open(struct inode * inode, struct file * file)
 
 static int release(struct inode * inode, struct file * file)
 {
-       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
        struct bin_buffer *bb = file->private_data;
 
-       if (bb->mmapped)
-               sysfs_put_active_two(attr_sd);
+       mutex_lock(&sysfs_bin_lock);
+       hlist_del(&bb->list);
+       mutex_unlock(&sysfs_bin_lock);
+
        kfree(bb->buffer);
        kfree(bb);
        return 0;
@@ -250,6 +457,26 @@ const struct file_operations bin_fops = {
        .release        = release,
 };
 
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+       struct bin_buffer *bb;
+       struct hlist_node *tmp;
+
+       if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+               return;
+
+       mutex_lock(&sysfs_bin_lock);
+
+       hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+               struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+               unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+       }
+
+       mutex_unlock(&sysfs_bin_lock);
+}
+
 /**
  *     sysfs_create_bin_file - create binary file for object.
  *     @kobj:  object.