Btrfs: always pin metadata in discard mode
[safe/jmp/linux-2.6] / fs / sysfs / bin.c
index d4aaa88..2524714 100644 (file)
@@ -1,46 +1,83 @@
 /*
- * bin.c - binary file operations for sysfs.
+ * fs/sysfs/bin.c - sysfs binary file implementation
  *
  * Copyright (c) 2003 Patrick Mochel
  * Copyright (c) 2003 Matthew Wilcox
  * Copyright (c) 2004 Silicon Graphics, Inc.
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Please see Documentation/filesystems/sysfs.txt for more information.
  */
 
 #undef DEBUG
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
+struct bin_buffer {
+       struct mutex                    mutex;
+       void                            *buffer;
+       int                             mmapped;
+       struct vm_operations_struct     *vm_ops;
+       struct file                     *file;
+       struct hlist_node               list;
+};
+
 static int
 fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 {
-       struct bin_attribute * attr = to_bin_attr(dentry);
-       struct kobject * kobj = to_kobj(dentry->d_parent);
+       struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+       struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+       struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+       int rc;
 
-       if (!attr->read)
-               return -EINVAL;
+       /* need attr_sd for attr, its parent for kobj */
+       if (!sysfs_get_active_two(attr_sd))
+               return -ENODEV;
 
-       return attr->read(kobj, buffer, off, count);
+       rc = -EIO;
+       if (attr->read)
+               rc = attr->read(kobj, attr, buffer, off, count);
+
+       sysfs_put_active_two(attr_sd);
+
+       return rc;
 }
 
 static ssize_t
-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
+read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
-       char *buffer = file->private_data;
-       struct dentry *dentry = file->f_dentry;
+       struct bin_buffer *bb = file->private_data;
+       struct dentry *dentry = file->f_path.dentry;
        int size = dentry->d_inode->i_size;
        loff_t offs = *off;
-       int ret;
+       int count = min_t(size_t, bytes, PAGE_SIZE);
+       char *temp;
 
-       if (count > PAGE_SIZE)
-               count = PAGE_SIZE;
+       if (!bytes)
+               return 0;
 
        if (size) {
                if (offs > size)
@@ -49,43 +86,70 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
                        count = size - offs;
        }
 
-       ret = fill_read(dentry, buffer, offs, count);
-       if (ret < 0) 
-               return ret;
-       count = ret;
+       temp = kmalloc(count, GFP_KERNEL);
+       if (!temp)
+               return -ENOMEM;
 
-       if (copy_to_user(userbuf, buffer, count))
-               return -EFAULT;
+       mutex_lock(&bb->mutex);
 
-       pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
+       count = fill_read(dentry, bb->buffer, offs, count);
+       if (count < 0) {
+               mutex_unlock(&bb->mutex);
+               goto out_free;
+       }
+
+       memcpy(temp, bb->buffer, count);
+
+       mutex_unlock(&bb->mutex);
+
+       if (copy_to_user(userbuf, temp, count)) {
+               count = -EFAULT;
+               goto out_free;
+       }
+
+       pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
 
        *off = offs + count;
 
+ out_free:
+       kfree(temp);
        return count;
 }
 
 static int
 flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 {
-       struct bin_attribute *attr = to_bin_attr(dentry);
-       struct kobject *kobj = to_kobj(dentry->d_parent);
+       struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+       struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+       struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+       int rc;
 
-       if (!attr->write)
-               return -EINVAL;
+       /* need attr_sd for attr, its parent for kobj */
+       if (!sysfs_get_active_two(attr_sd))
+               return -ENODEV;
+
+       rc = -EIO;
+       if (attr->write)
+               rc = attr->write(kobj, attr, buffer, offset, count);
 
-       return attr->write(kobj, buffer, offset, count);
+       sysfs_put_active_two(attr_sd);
+
+       return rc;
 }
 
-static ssize_t write(struct file * file, const char __user * userbuf,
-                    size_t count, loff_t * off)
+static ssize_t write(struct file *file, const char __user *userbuf,
+                    size_t bytes, loff_t *off)
 {
-       char *buffer = file->private_data;
-       struct dentry *dentry = file->f_dentry;
+       struct bin_buffer *bb = file->private_data;
+       struct dentry *dentry = file->f_path.dentry;
        int size = dentry->d_inode->i_size;
        loff_t offs = *off;
+       int count = min_t(size_t, bytes, PAGE_SIZE);
+       char *temp;
+
+       if (!bytes)
+               return 0;
 
-       if (count > PAGE_SIZE)
-               count = PAGE_SIZE;
        if (size) {
                if (offs > size)
                        return 0;
@@ -93,77 +157,298 @@ static ssize_t write(struct file * file, const char __user * userbuf,
                        count = size - offs;
        }
 
-       if (copy_from_user(buffer, userbuf, count))
-               return -EFAULT;
+       temp = memdup_user(userbuf, count);
+       if (IS_ERR(temp))
+               return PTR_ERR(temp);
+
+       mutex_lock(&bb->mutex);
+
+       memcpy(bb->buffer, temp, count);
+
+       count = flush_write(dentry, bb->buffer, offs, count);
+       mutex_unlock(&bb->mutex);
 
-       count = flush_write(dentry, buffer, offs, count);
        if (count > 0)
                *off = offs + count;
+
+       kfree(temp);
        return count;
 }
 
-static int mmap(struct file *file, struct vm_area_struct *vma)
+static void bin_vma_open(struct vm_area_struct *vma)
 {
-       struct dentry *dentry = file->f_dentry;
-       struct bin_attribute *attr = to_bin_attr(dentry);
-       struct kobject *kobj = to_kobj(dentry->d_parent);
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 
-       if (!attr->mmap)
+       if (!bb->vm_ops || !bb->vm_ops->open)
+               return;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return;
+
+       bb->vm_ops->open(vma);
+
+       sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+       if (!bb->vm_ops || !bb->vm_ops->close)
+               return;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return;
+
+       bb->vm_ops->close(vma);
+
+       sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return VM_FAULT_SIGBUS;
+
+       ret = bb->vm_ops->fault(vma, vmf);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops)
+               return VM_FAULT_SIGBUS;
+
+       if (!bb->vm_ops->page_mkwrite)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return VM_FAULT_SIGBUS;
+
+       ret = bb->vm_ops->page_mkwrite(vma, vmf);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+                 void *buf, int len, int write)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->access)
+               return -EINVAL;
+
+       if (!sysfs_get_active_two(attr_sd))
                return -EINVAL;
 
-       return attr->mmap(kobj, attr, vma);
+       ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
 }
 
-static int open(struct inode * inode, struct file * file)
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->set_policy)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return -EINVAL;
+
+       ret = bb->vm_ops->set_policy(vma, new);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+                                       unsigned long addr)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       struct mempolicy *pol;
+
+       if (!bb->vm_ops || !bb->vm_ops->get_policy)
+               return vma->vm_policy;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return vma->vm_policy;
+
+       pol = bb->vm_ops->get_policy(vma, addr);
+
+       sysfs_put_active_two(attr_sd);
+       return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+                       const nodemask_t *to, unsigned long flags)
+{
+       struct file *file = vma->vm_file;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       int ret;
+
+       if (!bb->vm_ops || !bb->vm_ops->migrate)
+               return 0;
+
+       if (!sysfs_get_active_two(attr_sd))
+               return 0;
+
+       ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+       sysfs_put_active_two(attr_sd);
+       return ret;
+}
+#endif
+
+static struct vm_operations_struct bin_vm_ops = {
+       .open           = bin_vma_open,
+       .close          = bin_vma_close,
+       .fault          = bin_fault,
+       .page_mkwrite   = bin_page_mkwrite,
+       .access         = bin_access,
+#ifdef CONFIG_NUMA
+       .set_policy     = bin_set_policy,
+       .get_policy     = bin_get_policy,
+       .migrate        = bin_migrate,
+#endif
+};
+
+static int mmap(struct file *file, struct vm_area_struct *vma)
 {
-       struct kobject *kobj = sysfs_get_kobject(file->f_dentry->d_parent);
-       struct bin_attribute * attr = to_bin_attr(file->f_dentry);
-       int error = -EINVAL;
+       struct bin_buffer *bb = file->private_data;
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+       struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+       int rc;
+
+       mutex_lock(&bb->mutex);
+
+       /* need attr_sd for attr, its parent for kobj */
+       rc = -ENODEV;
+       if (!sysfs_get_active_two(attr_sd))
+               goto out_unlock;
 
-       if (!kobj || !attr)
-               goto Done;
+       rc = -EINVAL;
+       if (!attr->mmap)
+               goto out_put;
+
+       rc = attr->mmap(kobj, attr, vma);
+       if (rc)
+               goto out_put;
+
+       /*
+        * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+        * to satisfy versions of X which crash if the mmap fails: that
+        * substitutes a new vm_file, and we don't then want bin_vm_ops.
+        */
+       if (vma->vm_file != file)
+               goto out_put;
+
+       rc = -EINVAL;
+       if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+               goto out_put;
+
+       rc = 0;
+       bb->mmapped = 1;
+       bb->vm_ops = vma->vm_ops;
+       vma->vm_ops = &bin_vm_ops;
+out_put:
+       sysfs_put_active_two(attr_sd);
+out_unlock:
+       mutex_unlock(&bb->mutex);
+
+       return rc;
+}
 
-       /* Grab the module reference for this attribute if we have one */
-       error = -ENODEV;
-       if (!try_module_get(attr->attr.owner)) 
-               goto Done;
+static int open(struct inode * inode, struct file * file)
+{
+       struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+       struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
+       struct bin_buffer *bb = NULL;
+       int error;
+
+       /* binary file operations requires both @sd and its parent */
+       if (!sysfs_get_active_two(attr_sd))
+               return -ENODEV;
 
        error = -EACCES;
        if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
-               goto Error;
+               goto err_out;
        if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
-               goto Error;
+               goto err_out;
 
        error = -ENOMEM;
-       file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!file->private_data)
-               goto Error;
-
-       error = 0;
-    goto Done;
-
- Error:
-       module_put(attr->attr.owner);
- Done:
-       if (error && kobj)
-               kobject_put(kobj);
+       bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+       if (!bb)
+               goto err_out;
+
+       bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!bb->buffer)
+               goto err_out;
+
+       mutex_init(&bb->mutex);
+       bb->file = file;
+       file->private_data = bb;
+
+       mutex_lock(&sysfs_bin_lock);
+       hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+       mutex_unlock(&sysfs_bin_lock);
+
+       /* open succeeded, put active references */
+       sysfs_put_active_two(attr_sd);
+       return 0;
+
+ err_out:
+       sysfs_put_active_two(attr_sd);
+       kfree(bb);
        return error;
 }
 
 static int release(struct inode * inode, struct file * file)
 {
-       struct kobject * kobj = to_kobj(file->f_dentry->d_parent);
-       struct bin_attribute * attr = to_bin_attr(file->f_dentry);
-       u8 * buffer = file->private_data;
+       struct bin_buffer *bb = file->private_data;
 
-       if (kobj) 
-               kobject_put(kobj);
-       module_put(attr->attr.owner);
-       kfree(buffer);
+       mutex_lock(&sysfs_bin_lock);
+       hlist_del(&bb->list);
+       mutex_unlock(&sysfs_bin_lock);
+
+       kfree(bb->buffer);
+       kfree(bb);
        return 0;
 }
 
-struct file_operations bin_fops = {
+const struct file_operations bin_fops = {
        .read           = read,
        .write          = write,
        .mmap           = mmap,
@@ -172,18 +457,37 @@ struct file_operations bin_fops = {
        .release        = release,
 };
 
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+       struct bin_buffer *bb;
+       struct hlist_node *tmp;
+
+       if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+               return;
+
+       mutex_lock(&sysfs_bin_lock);
+
+       hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+               struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+               unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+       }
+
+       mutex_unlock(&sysfs_bin_lock);
+}
+
 /**
  *     sysfs_create_bin_file - create binary file for object.
  *     @kobj:  object.
  *     @attr:  attribute descriptor.
- *
  */
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-       BUG_ON(!kobj || !kobj->dentry || !attr);
+       BUG_ON(!kobj || !kobj->sd || !attr);
 
-       return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+       return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
 }
 
 
@@ -191,13 +495,11 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
  *     sysfs_remove_bin_file - remove binary file for object.
  *     @kobj:  object.
  *     @attr:  attribute descriptor.
- *
  */
 
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-       sysfs_hash_and_remove(kobj->dentry,attr->attr.name);
-       return 0;
+       sysfs_hash_and_remove(kobj->sd, attr->attr.name);
 }
 
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);